aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/cache.c9
-rw-r--r--drivers/infiniband/core/cm.c13
-rw-r--r--drivers/infiniband/core/cma.c81
-rw-r--r--drivers/infiniband/core/cma_configfs.c12
-rw-r--r--drivers/infiniband/core/counters.c78
-rw-r--r--drivers/infiniband/core/device.c23
-rw-r--r--drivers/infiniband/core/iwpm_msg.c16
-rw-r--r--drivers/infiniband/core/iwpm_util.c6
-rw-r--r--drivers/infiniband/core/multicast.c1
-rw-r--r--drivers/infiniband/core/nldev.c4
-rw-r--r--drivers/infiniband/core/restrack.c4
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c2
-rw-r--r--drivers/infiniband/core/rw.c2
-rw-r--r--drivers/infiniband/core/sa_query.c26
-rw-r--r--drivers/infiniband/core/umem.c3
-rw-r--r--drivers/infiniband/core/umem_dmabuf.c174
-rw-r--r--drivers/infiniband/core/user_mad.c17
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c2
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c117
-rw-r--r--drivers/infiniband/core/verbs.c4
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c49
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c29
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h2
-rw-r--r--drivers/infiniband/hw/cxgb4/restrack.c2
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h25
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_defs.h4
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c33
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c46
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.c8
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c2
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c16
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.c4
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c4
-rw-r--r--drivers/infiniband/hw/hfi1/msix.c2
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c4
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c1
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c14
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c4
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c7
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c5
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c12
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c47
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c8
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c8
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c10
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h26
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c116
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h82
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c9
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c33
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h43
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c791
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h141
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c30
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c458
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c38
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c331
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c21
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c18
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hmc.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c13
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c13
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c22
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c19
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.c19
-rw-r--r--drivers/infiniband/hw/mlx4/main.c2
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c4
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c231
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c14
-rw-r--r--drivers/infiniband/hw/mlx5/main.c147
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h60
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c137
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c327
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c160
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c2
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h8
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c18
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c14
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_twsi.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_tx.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c6
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c5
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.c7
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c21
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c34
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c7
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c2
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c86
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h178
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c64
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c34
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c300
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h103
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c11
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c79
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c68
-rw-r--r--drivers/infiniband/sw/siw/siw.h2
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c4
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c271
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c26
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c4
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c20
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c15
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c53
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c3
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c2
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c10
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c2
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c11
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c127
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.h4
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h9
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c9
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c123
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c32
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c110
143 files changed, 3562 insertions, 2690 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 9325e189a215..04a78d9f8fe3 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -41,6 +41,7 @@ config INFINIBAND_USER_MEM
bool
depends on INFINIBAND_USER_ACCESS != n
depends on MMU
+ select DMA_SHARED_BUFFER
default y
config INFINIBAND_ON_DEMAND_PAGING
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index ccf2670ef45e..8ab4eea5a0a5 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -40,5 +40,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_srq.o \
uverbs_std_types_wq.o \
uverbs_std_types_qp.o
-ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 7989b7e1d1c0..5c9fac7cf420 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -669,11 +669,10 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
* rdma_find_gid_by_port - Returns the GID entry attributes when it finds
* a valid GID entry for given search parameters. It searches for the specified
* GID value in the local software cache.
- * @device: The device to query.
+ * @ib_dev: The device to query.
* @gid: The GID value to search for.
* @gid_type: The GID type to search for.
- * @port_num: The port number of the device where the GID value should be
- * searched.
+ * @port: The port number of the device where the GID value should be searched.
* @ndev: In RoCE, the net device of the device. NULL means ignore.
*
* Returns sgid attributes if the GID is found with valid reference or
@@ -719,7 +718,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port);
/**
* rdma_find_gid_by_filter - Returns the GID table attribute where a
* specified GID value occurs
- * @device: The device to query.
+ * @ib_dev: The device to query.
* @gid: The GID value to search for.
* @port: The port number of the device where the GID value could be
* searched.
@@ -728,6 +727,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port);
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
+ * @context: Private data to pass into the call-back.
*
* rdma_find_gid_by_filter() searches for the specified GID value
* of which the filter function returns true in the port's GID table.
@@ -1253,7 +1253,6 @@ EXPORT_SYMBOL(rdma_get_gid_attr);
* @entries: Entries where GID entries are returned.
* @max_entries: Maximum number of entries that can be returned.
* Entries array must be allocated to hold max_entries number of entries.
- * @num_entries: Updated to the number of entries that were successfully read.
*
* Returns number of entries on success or appropriate error code.
*/
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 98165589c8ab..3d194bb60840 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3651,6 +3651,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param)
{
struct ib_mad_send_buf *msg;
+ unsigned long flags;
int ret;
lockdep_assert_held(&cm_id_priv->lock);
@@ -3676,12 +3677,12 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
return ret;
}
cm_id_priv->id.state = IB_CM_IDLE;
- spin_lock_irq(&cm.lock);
+ spin_lock_irqsave(&cm.lock, flags);
if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
}
- spin_unlock_irq(&cm.lock);
+ spin_unlock_irqrestore(&cm.lock, flags);
return 0;
}
@@ -4333,7 +4334,7 @@ static int cm_add_one(struct ib_device *ib_device)
unsigned long flags;
int ret;
int count = 0;
- u8 i;
+ unsigned int i;
cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
GFP_KERNEL);
@@ -4345,7 +4346,7 @@ static int cm_add_one(struct ib_device *ib_device)
cm_dev->going_down = 0;
set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
- for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ rdma_for_each_port (ib_device, i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
@@ -4431,7 +4432,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
.clr_port_cap_mask = IB_PORT_CM_SUP
};
unsigned long flags;
- int i;
+ unsigned int i;
write_lock_irqsave(&cm.device_lock, flags);
list_del(&cm_dev->list);
@@ -4441,7 +4442,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
cm_dev->going_down = 1;
spin_unlock_irq(&cm.lock);
- for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ rdma_for_each_port (ib_device, i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index c51b84b2d2f3..94096511599f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -352,7 +352,13 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
struct cma_multicast {
struct rdma_id_private *id_priv;
- struct ib_sa_multicast *sa_mc;
+ union {
+ struct ib_sa_multicast *sa_mc;
+ struct {
+ struct work_struct work;
+ struct rdma_cm_event event;
+ } iboe_join;
+ };
struct list_head list;
void *context;
struct sockaddr_storage addr;
@@ -1823,6 +1829,8 @@ static void destroy_mc(struct rdma_id_private *id_priv,
cma_igmp_send(ndev, &mgid, false);
dev_put(ndev);
}
+
+ cancel_work_sync(&mc->iboe_join.work);
}
kfree(mc);
}
@@ -2683,6 +2691,28 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv,
return (id_priv->query_id < 0) ? id_priv->query_id : 0;
}
+static void cma_iboe_join_work_handler(struct work_struct *work)
+{
+ struct cma_multicast *mc =
+ container_of(work, struct cma_multicast, iboe_join.work);
+ struct rdma_cm_event *event = &mc->iboe_join.event;
+ struct rdma_id_private *id_priv = mc->id_priv;
+ int ret;
+
+ mutex_lock(&id_priv->handler_mutex);
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+
+ ret = cma_cm_event_handler(id_priv, event);
+ WARN_ON(ret);
+
+out_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
+ if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&event->param.ud.ah_attr);
+}
+
static void cma_work_handler(struct work_struct *_work)
{
struct cma_work *work = container_of(_work, struct cma_work, work);
@@ -4478,10 +4508,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
cma_make_mc_event(status, id_priv, multicast, &event, mc);
ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
- if (ret) {
- destroy_id_handler_unlock(id_priv);
- return 0;
- }
+ WARN_ON(ret);
out:
mutex_unlock(&id_priv->handler_mutex);
@@ -4542,17 +4569,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
- if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
- (!ib_sa_sendonly_fullmem_support(&sa_client,
- id_priv->id.device,
- id_priv->id.port_num))) {
- dev_warn(
- &id_priv->id.device->dev,
- "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
- id_priv->id.port_num);
- return -EOPNOTSUPP;
- }
-
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
@@ -4604,7 +4620,6 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
- struct cma_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
@@ -4618,10 +4633,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (cma_zero_addr(addr))
return -EINVAL;
- work = kzalloc(sizeof *work, GFP_KERNEL);
- if (!work)
- return -ENOMEM;
-
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
@@ -4632,10 +4643,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (!ndev) {
- err = -ENODEV;
- goto err_free;
- }
+ if (!ndev)
+ return -ENODEV;
+
ib.rec.rate = iboe_get_rate(ndev);
ib.rec.hop_limit = 1;
ib.rec.mtu = iboe_get_mtu(ndev->mtu);
@@ -4653,24 +4663,15 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -ENOTSUPP;
}
dev_put(ndev);
- if (err || !ib.rec.mtu) {
- if (!err)
- err = -EINVAL;
- goto err_free;
- }
+ if (err || !ib.rec.mtu)
+ return err ?: -EINVAL;
+
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&ib.rec.port_gid);
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- cma_make_mc_event(0, id_priv, &ib, &work->event, mc);
- /* Balances with cma_id_put() in cma_work_handler */
- cma_id_get(id_priv);
- queue_work(cma_wq, &work->work);
+ INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
+ cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc);
+ queue_work(cma_wq, &mc->iboe_join.work);
return 0;
-
-err_free:
- kfree(work);
- return err;
}
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 97a77ea8d3c9..e0d5e3bae458 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -204,7 +204,6 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
unsigned int i;
unsigned int ports_num;
struct cma_dev_port_group *ports;
- int err;
ibdev = cma_get_ib_dev(cma_dev);
@@ -215,10 +214,8 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
GFP_KERNEL);
- if (!ports) {
- err = -ENOMEM;
- goto free;
- }
+ if (!ports)
+ return -ENOMEM;
for (i = 0; i < ports_num; i++) {
char port_str[10];
@@ -234,12 +231,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
}
cma_dev_group->ports = ports;
-
return 0;
-free:
- kfree(ports);
- cma_dev_group->ports = NULL;
- return err;
}
static void release_cma_dev(struct config_item *item)
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 92745522250e..f3a7c1f404af 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -10,30 +10,35 @@
#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
-static int __counter_set_mode(struct rdma_counter_mode *curr,
+static int __counter_set_mode(struct rdma_port_counter *port_counter,
enum rdma_nl_counter_mode new_mode,
enum rdma_nl_counter_mask new_mask)
{
- if ((new_mode == RDMA_COUNTER_MODE_AUTO) &&
- ((new_mask & (~ALL_AUTO_MODE_MASKS)) ||
- (curr->mode != RDMA_COUNTER_MODE_NONE)))
- return -EINVAL;
+ if (new_mode == RDMA_COUNTER_MODE_AUTO && port_counter->num_counters)
+ if (new_mask & ~ALL_AUTO_MODE_MASKS ||
+ port_counter->mode.mode != RDMA_COUNTER_MODE_NONE)
+ return -EINVAL;
- curr->mode = new_mode;
- curr->mask = new_mask;
+ port_counter->mode.mode = new_mode;
+ port_counter->mode.mask = new_mask;
return 0;
}
-/**
+/*
* rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
*
- * When @on is true, the @mask must be set; When @on is false, it goes
- * into manual mode if there's any counter, so that the user is able to
- * manually access them.
+ * @dev: Device to operate
+ * @port: Port to use
+ * @mask: Mask to configure
+ * @extack: Message to the user
+ *
+ * Return 0 on success.
*/
int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
- bool on, enum rdma_nl_counter_mask mask)
+ enum rdma_nl_counter_mask mask,
+ struct netlink_ext_ack *extack)
{
+ enum rdma_nl_counter_mode mode = RDMA_COUNTER_MODE_AUTO;
struct rdma_port_counter *port_counter;
int ret;
@@ -42,23 +47,23 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
return -EOPNOTSUPP;
mutex_lock(&port_counter->lock);
- if (on) {
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_AUTO, mask);
- } else {
- if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) {
- ret = -EINVAL;
- goto out;
- }
+ if (mask) {
+ ret = __counter_set_mode(port_counter, mode, mask);
+ if (ret)
+ NL_SET_ERR_MSG(
+ extack,
+ "Turning on auto mode is not allowed when there is bound QP");
+ goto out;
+ }
- if (port_counter->num_counters)
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_MANUAL, 0);
- else
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_NONE, 0);
+ if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) {
+ ret = -EINVAL;
+ goto out;
}
+ mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
+ RDMA_COUNTER_MODE_NONE;
+ ret = __counter_set_mode(port_counter, mode, 0);
out:
mutex_unlock(&port_counter->lock);
return ret;
@@ -122,8 +127,8 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port,
mutex_lock(&port_counter->lock);
switch (mode) {
case RDMA_COUNTER_MODE_MANUAL:
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_MANUAL, 0);
+ ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
+ 0);
if (ret) {
mutex_unlock(&port_counter->lock);
goto err_mode;
@@ -170,8 +175,7 @@ static void rdma_counter_free(struct rdma_counter *counter)
port_counter->num_counters--;
if (!port_counter->num_counters &&
(port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
- __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE,
- 0);
+ __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0);
mutex_unlock(&port_counter->lock);
@@ -227,7 +231,7 @@ static void counter_history_stat_update(struct rdma_counter *counter)
port_counter->hstats->value[i] += counter->stats->value[i];
}
-/**
+/*
* rdma_get_counter_auto_mode - Find the counter that @qp should be bound
* with in auto mode
*
@@ -274,7 +278,7 @@ static void counter_release(struct kref *kref)
rdma_counter_free(counter);
}
-/**
+/*
* rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
* the auto-mode rule
*/
@@ -311,7 +315,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
return 0;
}
-/**
+/*
* rdma_counter_unbind_qp - Unbind a qp from a counter
* @force:
* true - Decrease the counter ref-count anyway (e.g., qp destroy)
@@ -380,7 +384,7 @@ next:
return sum;
}
-/**
+/*
* rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
* specific port, including the running ones and history data
*/
@@ -436,7 +440,7 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
return counter;
}
-/**
+/*
* rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
*/
int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
@@ -485,7 +489,7 @@ err:
return ret;
}
-/**
+/*
* rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
* The id of new counter is returned in @counter_id
*/
@@ -533,7 +537,7 @@ err:
return ret;
}
-/**
+/*
* rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
*/
int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index e96f979e6d52..aac0fe14e1d9 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -848,6 +848,20 @@ static int setup_port_data(struct ib_device *device)
return 0;
}
+/**
+ * ib_port_immutable_read() - Read rdma port's immutable data
+ * @dev: IB device
+ * @port: port number whose immutable data to read. It starts with index 1 and
+ * valid upto including rdma_end_port().
+ */
+const struct ib_port_immutable*
+ib_port_immutable_read(struct ib_device *dev, unsigned int port)
+{
+ WARN_ON(!rdma_is_port_valid(dev, port));
+ return &dev->port_data[port].immutable;
+}
+EXPORT_SYMBOL(ib_port_immutable_read);
+
void ib_get_device_fw_str(struct ib_device *dev, char *str)
{
if (dev->ops.get_dev_fw_str)
@@ -1887,9 +1901,9 @@ static int __ib_get_client_nl_info(struct ib_device *ibdev,
/**
* ib_get_client_nl_info - Fetch the nl_info from a client
- * @device - IB device
- * @client_name - Name of the client
- * @res - Result of the query
+ * @ibdev: IB device
+ * @client_name: Name of the client
+ * @res: Result of the query
*/
int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
struct ib_client_nl_info *res)
@@ -2317,7 +2331,7 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
up_read(&devices_rwsem);
}
-/**
+/*
* ib_enum_all_devs - enumerate all ib_devices
* @cb: Callback to call for each found ib_device
*
@@ -2681,6 +2695,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, read_counters);
SET_DEVICE_OP(dev_ops, reg_dm_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr);
+ SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
SET_DEVICE_OP(dev_ops, req_ncomp_notif);
SET_DEVICE_OP(dev_ops, req_notify_cq);
SET_DEVICE_OP(dev_ops, rereg_user_mr);
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 46686990a827..30a0ff76b332 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -392,7 +392,7 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
/**
* iwpm_register_pid_cb - Process the port mapper response to
* iwpm_register_pid query
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* If successful, the function receives the userspace port mapper pid
@@ -468,7 +468,7 @@ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
/**
* iwpm_add_mapping_cb - Process the port mapper response to
* iwpm_add_mapping request
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -545,7 +545,7 @@ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] =
/**
* iwpm_add_and_query_mapping_cb - Process the port mapper response to
* iwpm_add_and_query_mapping request
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
@@ -627,7 +627,7 @@ query_mapping_response_exit:
/**
* iwpm_remote_info_cb - Process remote connecting peer address info, which
* the port mapper has received from the connecting peer
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Stores the IPv4/IPv6 address info in a hash table
@@ -706,7 +706,7 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
/**
* iwpm_mapping_info_cb - Process a notification that the userspace
* port mapper daemon is started
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send all the local mapping
@@ -766,7 +766,7 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
/**
* iwpm_ack_mapping_info_cb - Process the port mapper ack for
* the provided local mapping info records
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -796,7 +796,7 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
/**
* iwpm_mapping_error_cb - Process port mapper notification for error
*
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -841,7 +841,7 @@ static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = {
/**
* iwpm_hello_cb - Process a hello message from iwpmd
*
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send the kernel's abi_version
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 13495b43dbc1..f80e5550b51f 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -127,8 +127,8 @@ static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
/**
* iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
* info in a hash table
- * @local_addr: Local ip/tcp address
- * @mapped_addr: Mapped local ip/tcp address
+ * @local_sockaddr: Local ip/tcp address
+ * @mapped_sockaddr: Mapped local ip/tcp address
* @nl_client: The index of the netlink client
* @map_flags: IWPM mapping flags
*/
@@ -174,7 +174,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
/**
* iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
* info from the hash table
- * @local_addr: Local ip/tcp address
+ * @local_sockaddr: Local ip/tcp address
* @mapped_local_addr: Mapped local ip/tcp address
*
* Returns err code if mapping info is not found in the hash table,
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 740f03ecc05d..57519ca6cd2c 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -721,6 +721,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
* member record and gid of the device.
* @device: RDMA device
* @port_num: Port of the rdma device to consider
+ * @rec: Multicast member record to use
* @ndev: Optional netdevice, applicable only for RoCE
* @gid_type: GID type to consider
* @ah_attr: AH attribute to fillup on successful completion
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 08366e254b1d..d306049c22a2 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1768,9 +1768,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
mask = nla_get_u32(
tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
-
- ret = rdma_counter_set_auto_mode(device, port,
- mask ? true : false, mask);
+ ret = rdma_counter_set_auto_mode(device, port, mask, extack);
if (ret)
goto err_msg;
} else {
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index ff1551b3cf61..ffabaf327242 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -201,8 +201,8 @@ EXPORT_SYMBOL(rdma_restrack_parent_name);
/**
* rdma_restrack_new() - Initializes new restrack entry to allow _put() interface
* to release memory in fully automatic way.
- * @res - Entry to initialize
- * @type - REstrack type
+ * @res: Entry to initialize
+ * @type: REstrack type
*/
void rdma_restrack_new(struct rdma_restrack_entry *res,
enum rdma_restrack_type type)
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 6b8364bb032d..34fff94eaa38 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -505,7 +505,7 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
*
- * @device: the rdma device
+ * @ib_dev: the rdma device
*/
void rdma_roce_rescan_device(struct ib_device *ib_dev)
{
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index a96030b784eb..31156e22d3e7 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -410,7 +410,7 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
ctx->type = RDMA_RW_SIG_MR;
ctx->nr_ops = 1;
- ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL);
+ ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL);
if (!ctx->reg) {
ret = -ENOMEM;
goto out_unmap_prot_sg;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 89a831fa1885..9ef1a355131b 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1434,7 +1434,7 @@ enum opa_pr_supported {
PR_IB_SUPPORTED
};
-/**
+/*
* opa_pr_query_possible - Check if current PR query can be an OPA query.
*
* Retuns PR_NOT_SUPPORTED if a path record query is not
@@ -1951,30 +1951,6 @@ err1:
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
-bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
- struct ib_device *device,
- u8 port_num)
-{
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
- bool ret = false;
- unsigned long flags;
-
- if (!sa_dev)
- return ret;
-
- port = &sa_dev->port[port_num - sa_dev->start_port];
-
- spin_lock_irqsave(&port->classport_lock, flags);
- if ((port->classport_info.valid) &&
- (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB))
- ret = ib_get_cpi_capmask2(&port->classport_info.data.ib)
- & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT;
- spin_unlock_irqrestore(&port->classport_lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support);
-
struct ib_classport_info_context {
struct completion done;
struct ib_sa_query *sa_query;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 917338db7ac1..2dde99a9ba07 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -2,6 +2,7 @@
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -278,6 +279,8 @@ void ib_umem_release(struct ib_umem *umem)
{
if (!umem)
return;
+ if (umem->is_dmabuf)
+ return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem));
if (umem->is_odp)
return ib_umem_odp_release(to_ib_umem_odp(umem));
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
new file mode 100644
index 000000000000..f9b5162d9260
--- /dev/null
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
+#include <linux/dma-mapping.h>
+
+#include "uverbs.h"
+
+int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct sg_table *sgt;
+ struct scatterlist *sg;
+ struct dma_fence *fence;
+ unsigned long start, end, cur = 0;
+ unsigned int nmap = 0;
+ int i;
+
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (umem_dmabuf->sgt)
+ goto wait_fence;
+
+ sgt = dma_buf_map_attachment(umem_dmabuf->attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ /* modify the sg list in-place to match umem address and length */
+
+ start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE);
+ end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length,
+ PAGE_SIZE);
+ for_each_sgtable_dma_sg(sgt, sg, i) {
+ if (start < cur + sg_dma_len(sg) && cur < end)
+ nmap++;
+ if (cur <= start && start < cur + sg_dma_len(sg)) {
+ unsigned long offset = start - cur;
+
+ umem_dmabuf->first_sg = sg;
+ umem_dmabuf->first_sg_offset = offset;
+ sg_dma_address(sg) += offset;
+ sg_dma_len(sg) -= offset;
+ cur += offset;
+ }
+ if (cur < end && end <= cur + sg_dma_len(sg)) {
+ unsigned long trim = cur + sg_dma_len(sg) - end;
+
+ umem_dmabuf->last_sg = sg;
+ umem_dmabuf->last_sg_trim = trim;
+ sg_dma_len(sg) -= trim;
+ break;
+ }
+ cur += sg_dma_len(sg);
+ }
+
+ umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg;
+ umem_dmabuf->umem.sg_head.nents = nmap;
+ umem_dmabuf->umem.nmap = nmap;
+ umem_dmabuf->sgt = sgt;
+
+wait_fence:
+ /*
+ * Although the sg list is valid now, the content of the pages
+ * may be not up-to-date. Wait for the exporter to finish
+ * the migration.
+ */
+ fence = dma_resv_get_excl(umem_dmabuf->attach->dmabuf->resv);
+ if (fence)
+ return dma_fence_wait(fence, false);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
+
+void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (!umem_dmabuf->sgt)
+ return;
+
+ /* retore the original sg list */
+ if (umem_dmabuf->first_sg) {
+ sg_dma_address(umem_dmabuf->first_sg) -=
+ umem_dmabuf->first_sg_offset;
+ sg_dma_len(umem_dmabuf->first_sg) +=
+ umem_dmabuf->first_sg_offset;
+ umem_dmabuf->first_sg = NULL;
+ umem_dmabuf->first_sg_offset = 0;
+ }
+ if (umem_dmabuf->last_sg) {
+ sg_dma_len(umem_dmabuf->last_sg) +=
+ umem_dmabuf->last_sg_trim;
+ umem_dmabuf->last_sg = NULL;
+ umem_dmabuf->last_sg_trim = 0;
+ }
+
+ dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt,
+ DMA_BIDIRECTIONAL);
+
+ umem_dmabuf->sgt = NULL;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages);
+
+struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
+ unsigned long offset, size_t size,
+ int fd, int access,
+ const struct dma_buf_attach_ops *ops)
+{
+ struct dma_buf *dmabuf;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ib_umem *umem;
+ unsigned long end;
+ struct ib_umem_dmabuf *ret = ERR_PTR(-EINVAL);
+
+ if (check_add_overflow(offset, (unsigned long)size, &end))
+ return ret;
+
+ if (unlikely(!ops || !ops->move_notify))
+ return ret;
+
+ dmabuf = dma_buf_get(fd);
+ if (IS_ERR(dmabuf))
+ return ERR_CAST(dmabuf);
+
+ if (dmabuf->size < end)
+ goto out_release_dmabuf;
+
+ umem_dmabuf = kzalloc(sizeof(*umem_dmabuf), GFP_KERNEL);
+ if (!umem_dmabuf) {
+ ret = ERR_PTR(-ENOMEM);
+ goto out_release_dmabuf;
+ }
+
+ umem = &umem_dmabuf->umem;
+ umem->ibdev = device;
+ umem->length = size;
+ umem->address = offset;
+ umem->writable = ib_access_writable(access);
+ umem->is_dmabuf = 1;
+
+ if (!ib_umem_num_pages(umem))
+ goto out_free_umem;
+
+ umem_dmabuf->attach = dma_buf_dynamic_attach(
+ dmabuf,
+ device->dma_device,
+ ops,
+ umem_dmabuf);
+ if (IS_ERR(umem_dmabuf->attach)) {
+ ret = ERR_CAST(umem_dmabuf->attach);
+ goto out_free_umem;
+ }
+ return umem_dmabuf;
+
+out_free_umem:
+ kfree(umem_dmabuf);
+
+out_release_dmabuf:
+ dma_buf_put(dmabuf);
+ return ret;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get);
+
+void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf;
+
+ dma_buf_detach(dmabuf, umem_dmabuf->attach);
+ dma_buf_put(dmabuf);
+ kfree(umem_dmabuf);
+}
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 19104a675691..dd7f3b437c6b 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -379,6 +379,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
mutex_lock(&file->mutex);
+ if (file->agents_dead) {
+ mutex_unlock(&file->mutex);
+ return -EIO;
+ }
+
while (list_empty(&file->recv_list)) {
mutex_unlock(&file->mutex);
@@ -392,6 +397,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
mutex_lock(&file->mutex);
}
+ if (file->agents_dead) {
+ mutex_unlock(&file->mutex);
+ return -EIO;
+ }
+
packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
list_del(&packet->list);
@@ -524,7 +534,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
agent = __get_agent(file, packet->mad.hdr.id);
if (!agent) {
- ret = -EINVAL;
+ ret = -EIO;
goto err_up;
}
@@ -653,10 +663,14 @@ static __poll_t ib_umad_poll(struct file *filp, struct poll_table_struct *wait)
/* we will always be able to post a MAD send */
__poll_t mask = EPOLLOUT | EPOLLWRNORM;
+ mutex_lock(&file->mutex);
poll_wait(filp, &file->recv_wait, wait);
if (!list_empty(&file->recv_list))
mask |= EPOLLIN | EPOLLRDNORM;
+ if (file->agents_dead)
+ mask = EPOLLERR;
+ mutex_unlock(&file->mutex);
return mask;
}
@@ -1336,6 +1350,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
list_for_each_entry(file, &port->file_list, port_list) {
mutex_lock(&file->mutex);
file->agents_dead = 1;
+ wake_up_interruptible(&file->recv_wait);
mutex_unlock(&file->mutex);
for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 98a5d36813ff..f5b8be3bedde 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1382,7 +1382,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
if (has_sq)
scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
cmd->send_cq_handle, attrs);
- if (!ind_tbl)
+ if (!ind_tbl && cmd->qp_type != IB_QPT_XRC_INI)
rcq = rcq ?: scq;
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
attrs);
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index e47c5949013f..ff047eb024ab 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -91,7 +91,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
}
/**
- * uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * _uverbs_alloc() - Quickly allocate memory for use with a bundle
* @bundle: The bundle
* @size: Number of bytes to allocate
* @flags: Allocator flags
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index dd4e76b26c74..f782d5e1aa25 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -182,6 +183,86 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)(
return IS_UVERBS_COPY_ERR(ret) ? ret : 0;
}
+static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE);
+ struct ib_device *ib_dev = pd->device;
+
+ u64 offset, length, iova;
+ u32 fd, access_flags;
+ struct ib_mr *mr;
+ int ret;
+
+ if (!ib_dev->ops.reg_user_mr_dmabuf)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&offset, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_OFFSET);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&length, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&iova, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_IOVA);
+ if (ret)
+ return ret;
+
+ if ((offset & ~PAGE_MASK) != (iova & ~PAGE_MASK))
+ return -EINVAL;
+
+ ret = uverbs_copy_from(&fd, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_FD);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&access_flags, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_ATOMIC |
+ IB_ACCESS_RELAXED_ORDERING);
+ if (ret)
+ return ret;
+
+ ret = ib_check_mr_access(ib_dev, access_flags);
+ if (ret)
+ return ret;
+
+ mr = pd->device->ops.reg_user_mr_dmabuf(pd, offset, length, iova, fd,
+ access_flags,
+ &attrs->driver_udata);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->type = IB_MR_TYPE_USER;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+
+ uobj->object = mr;
+
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY,
+ &mr->lkey, sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+ return ret;
+}
+
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_ADVISE_MR,
UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
@@ -247,6 +328,37 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_REG_DMABUF_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_IOVA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_FD,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ enum ib_access_flags),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_MR_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE,
@@ -257,10 +369,11 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_MR,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
+ &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR),
&UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
&UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
- &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR),
- &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR));
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR),
+ &UVERBS_METHOD(UVERBS_METHOD_REG_DMABUF_MR));
const struct uapi_definition uverbs_def_obj_mr[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 9137a25bb521..28464c58738c 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -2248,7 +2248,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
struct ib_qp_init_attr init_attr = {};
struct ib_qp_attr attr = {};
int num_eth_ports = 0;
- int port;
+ unsigned int port;
/* If QP state >= init, it is assigned to a port and we can check this
* port only.
@@ -2263,7 +2263,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
}
/* Can't get a quick answer, iterate over all ports */
- for (port = 0; port < qp->device->phys_port_cnt; port++)
+ rdma_for_each_port(qp->device, port)
if (rdma_port_get_link_layer(qp->device, port) !=
IB_LINK_LAYER_INFINIBAND)
num_eth_ports++;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 401bdc9e931e..ba515efd4fdc 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -469,7 +469,6 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
struct bnxt_re_mr *mr = NULL;
dma_addr_t dma_addr = 0;
struct ib_mw *mw;
- u64 pbl_tbl;
int rc;
dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
@@ -504,9 +503,8 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
mr->ib_mr.lkey = mr->qplib_mr.lkey;
mr->qplib_mr.va = (u64)(unsigned long)fence->va;
mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
- pbl_tbl = dma_addr;
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
- BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE);
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL,
+ BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n");
goto fail;
@@ -3589,7 +3587,6 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mr *mr;
- u64 pbl = 0;
int rc;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
@@ -3608,7 +3605,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
mr->qplib_mr.hwq.level = PBL_LVL_MAX;
mr->qplib_mr.total_size = -1; /* Infinte length */
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false,
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 0,
PAGE_SIZE);
if (rc)
goto fail_mr;
@@ -3779,19 +3776,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
return rc;
}
-static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
- int page_shift)
-{
- u64 *pbl_tbl = pbl_tbl_orig;
- u64 page_size = BIT_ULL(page_shift);
- struct ib_block_iter biter;
-
- rdma_umem_for_each_dma_block(umem, &biter, page_size)
- *pbl_tbl++ = rdma_block_iter_dma_address(&biter);
-
- return pbl_tbl - pbl_tbl_orig;
-}
-
/* uverbs */
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
@@ -3801,7 +3785,6 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mr *mr;
struct ib_umem *umem;
- u64 *pbl_tbl = NULL;
unsigned long page_size;
int umem_pgs, rc;
@@ -3846,39 +3829,19 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
}
mr->qplib_mr.total_size = length;
- if (page_size == BNXT_RE_PAGE_SIZE_4K &&
- length > BNXT_RE_MAX_MR_SIZE_LOW) {
- ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu",
- length, (u64)BNXT_RE_MAX_MR_SIZE_LOW);
- rc = -EINVAL;
- goto free_umem;
- }
-
umem_pgs = ib_umem_num_dma_blocks(umem, page_size);
- pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL);
- if (!pbl_tbl) {
- rc = -ENOMEM;
- goto free_umem;
- }
-
- /* Map umem buf ptrs to the PBL */
- umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size));
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl,
- umem_pgs, false, page_size);
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem,
+ umem_pgs, page_size);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to register user MR");
- goto fail;
+ goto free_umem;
}
- kfree(pbl_tbl);
-
mr->ib_mr.lkey = mr->qplib_mr.lkey;
mr->ib_mr.rkey = mr->qplib_mr.lkey;
atomic_inc(&rdev->mr_count);
return &mr->ib_mr;
-fail:
- kfree(pbl_tbl);
free_umem:
ib_umem_release(umem);
free_mrw:
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 6316179583a6..049b3576302b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -650,42 +650,32 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
}
int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
- u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size)
+ struct ib_umem *umem, int num_pbls, u32 buf_pg_size)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct bnxt_qplib_hwq_attr hwq_attr = {};
struct bnxt_qplib_sg_info sginfo = {};
struct creq_register_mr_resp resp;
struct cmdq_register_mr req;
- int pg_ptrs, pages, i, rc;
u16 cmd_flags = 0, level;
- dma_addr_t **pbl_ptr;
+ int pages, rc;
u32 pg_size;
if (num_pbls) {
+ pages = roundup_pow_of_two(num_pbls);
/* Allocate memory for the non-leaf pages to store buf ptrs.
* Non-leaf pages always uses system PAGE_SIZE
*/
- pg_ptrs = roundup_pow_of_two(num_pbls);
- pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
- if (!pages)
- pages++;
-
- if (pages > MAX_PBL_LVL_1_PGS) {
- dev_err(&res->pdev->dev,
- "SP: Reg MR: pages requested (0x%x) exceeded max (0x%x)\n",
- pages, MAX_PBL_LVL_1_PGS);
- return -ENOMEM;
- }
/* Free the hwq if it already exist, must be a rereg */
if (mr->hwq.max_elements)
bnxt_qplib_free_hwq(res, &mr->hwq);
/* Use system PAGE_SIZE */
hwq_attr.res = res;
hwq_attr.depth = pages;
- hwq_attr.stride = PAGE_SIZE;
+ hwq_attr.stride = buf_pg_size;
hwq_attr.type = HWQ_TYPE_MR;
hwq_attr.sginfo = &sginfo;
+ hwq_attr.sginfo->umem = umem;
hwq_attr.sginfo->npages = pages;
hwq_attr.sginfo->pgsize = PAGE_SIZE;
hwq_attr.sginfo->pgshft = PAGE_SHIFT;
@@ -695,11 +685,6 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
"SP: Reg MR memory allocation failed\n");
return -ENOMEM;
}
- /* Write to the hwq */
- pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr;
- for (i = 0; i < num_pbls; i++)
- pbl_ptr[PTR_PG(i)][PTR_IDX(i)] =
- (pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID;
}
RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags);
@@ -711,7 +696,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
req.pbl = 0;
pg_size = PAGE_SIZE;
} else {
- level = mr->hwq.level + 1;
+ level = mr->hwq.level;
req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
}
pg_size = buf_pg_size ? buf_pg_size : PAGE_SIZE;
@@ -728,7 +713,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
req.mr_size = cpu_to_le64(mr->total_size);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, block);
+ (void *)&resp, NULL, false);
if (rc)
goto fail;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 967890cd81f2..bc228340684f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -254,7 +254,7 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
bool block);
int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
- u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size);
+ struct ib_umem *umem, int num_pbls, u32 buf_pg_size);
int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
struct bnxt_qplib_mrw *mr, int max);
diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c
index b32e6516d65f..ff645b955a08 100644
--- a/drivers/infiniband/hw/cxgb4/restrack.c
+++ b/drivers/infiniband/hw/cxgb4/restrack.c
@@ -209,7 +209,7 @@ int c4iw_fill_res_cm_id_entry(struct sk_buff *msg,
epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data;
if (!epcp)
return 0;
- uep = kcalloc(1, sizeof(*uep), GFP_KERNEL);
+ uep = kzalloc(sizeof(*uep), GFP_KERNEL);
if (!uep)
return 0;
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index b199e4ac6cf9..fa38b34eddb8 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_ADMIN_CMDS_H_
@@ -161,8 +161,8 @@ struct efa_admin_create_qp_resp {
u32 qp_handle;
/*
- * QP number in the given EFA virtual device. Least-significant bits
- * (as needed according to max_qp) carry unique QP ID
+ * QP number in the given EFA virtual device. Least-significant bits (as
+ * needed according to max_qp) carry unique QP ID
*/
u16 qp_num;
@@ -465,7 +465,7 @@ struct efa_admin_create_cq_cmd {
/*
* number of sub cqs - must be equal to sub_cqs_per_cq of queue
- * attributes.
+ * attributes.
*/
u16 num_sub_cqs;
@@ -563,12 +563,8 @@ struct efa_admin_acq_get_stats_resp {
};
struct efa_admin_get_set_feature_common_desc {
- /*
- * 1:0 : select - 0x1 - current value; 0x3 - default
- * value
- * 7:3 : reserved3 - MBZ
- */
- u8 flags;
+ /* MBZ */
+ u8 reserved0;
/* as appears in efa_admin_aq_feature_id */
u8 feature_id;
@@ -823,12 +819,6 @@ enum efa_admin_aenq_group {
EFA_ADMIN_AENQ_GROUPS_NUM = 5,
};
-enum efa_admin_aenq_notification_syndrom {
- EFA_ADMIN_SUSPEND = 0,
- EFA_ADMIN_RESUME = 1,
- EFA_ADMIN_UPDATE_HINTS = 2,
-};
-
struct efa_admin_mmio_req_read_less_resp {
u16 req_id;
@@ -909,9 +899,6 @@ struct efa_admin_host_info {
#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6)
#define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
-/* get_set_feature_common_desc */
-#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
-
/* feature_device_attr_desc */
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0)
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1)
diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h
index 29d53ed63b3e..78ff9389ae25 100644
--- a/drivers/infiniband/hw/efa/efa_admin_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_ADMIN_H_
@@ -82,7 +82,7 @@ struct efa_admin_acq_common_desc {
/*
* indicates to the driver which AQ entry has been consumed by the
- * device and could be reused
+ * device and could be reused
*/
u16 sq_head_indx;
};
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index 336bc2c57bb1..0d523ad736c7 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include "efa_com.h"
@@ -20,9 +20,6 @@
#define EFA_CTRL_MINOR 0
#define EFA_CTRL_SUB_MINOR 1
-#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x)))
-#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32))
-
enum efa_cmd_status {
EFA_CMD_SUBMITTED,
EFA_CMD_COMPLETED,
@@ -33,8 +30,6 @@ struct efa_comp_ctx {
struct efa_admin_acq_entry *user_cqe;
u32 comp_size;
enum efa_cmd_status status;
- /* status from the device */
- u8 comp_status;
u8 cmd_opcode;
u8 occupied;
};
@@ -140,8 +135,8 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev)
sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
- addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr);
- addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr);
+ addr_high = upper_32_bits(sq->dma_addr);
+ addr_low = lower_32_bits(sq->dma_addr);
writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
@@ -174,8 +169,8 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev)
cq->cc = 0;
cq->phase = 1;
- addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr);
- addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr);
+ addr_high = upper_32_bits(cq->dma_addr);
+ addr_low = lower_32_bits(cq->dma_addr);
writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
@@ -215,8 +210,8 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
aenq->cc = 0;
aenq->phase = 1;
- addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
- addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
+ addr_low = lower_32_bits(aenq->dma_addr);
+ addr_high = upper_32_bits(aenq->dma_addr);
writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
@@ -421,9 +416,7 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a
}
comp_ctx->status = EFA_CMD_COMPLETED;
- comp_ctx->comp_status = cqe->acq_common_descriptor.status;
- if (comp_ctx->user_cqe)
- memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
+ memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
complete(&comp_ctx->wait_event);
@@ -521,7 +514,7 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c
msleep(aq->poll_interval);
}
- err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
+ err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
out:
efa_com_put_comp_ctx(aq, comp_ctx);
return err;
@@ -569,7 +562,7 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com
goto out;
}
- err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
+ err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
out:
efa_com_put_comp_ctx(aq, comp_ctx);
return err;
@@ -641,8 +634,8 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
aq->efa_dev,
"Failed to process command %s (opcode %u) comp_status %d err %d\n",
efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
- cmd->aq_common_descriptor.opcode, comp_ctx->comp_status,
- err);
+ cmd->aq_common_descriptor.opcode,
+ comp_ctx->user_cqe->acq_common_descriptor.status, err);
atomic64_inc(&aq->stats.cmd_err);
}
@@ -795,7 +788,7 @@ err_destroy_comp_ctxt:
* This method goes over the admin completion queue and wakes up
* all the pending threads that wait on the commands wait event.
*
- * @note: Should be called after MSI-X interrupt.
+ * Note: Should be called after MSI-X interrupt.
*/
void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
{
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index c87b94ea2939..993cbf37e0b9 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1323,8 +1323,8 @@ CNTR_ELEM(#name, \
/**
* hfi_addr_from_offset - return addr for readq/writeq
- * @dd - the dd device
- * @offset - the offset of the CSR within bar0
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
*
* This routine selects the appropriate base address
* based on the indicated offset.
@@ -1340,8 +1340,8 @@ static inline void __iomem *hfi1_addr_from_offset(
/**
* read_csr - read CSR at the indicated offset
- * @dd - the dd device
- * @offset - the offset of the CSR within bar0
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
*
* Return: the value read or all FF's if there
* is no mapping
@@ -1355,9 +1355,9 @@ u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
/**
* write_csr - write CSR at the indicated offset
- * @dd - the dd device
- * @offset - the offset of the CSR within bar0
- * @value - value to write
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
+ * @value: value to write
*/
void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
{
@@ -1373,8 +1373,8 @@ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
/**
* get_csr_addr - return te iomem address for offset
- * @dd - the dd device
- * @offset - the offset of the CSR within bar0
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
*
* Return: The iomem address to use in subsequent
* writeq/readq operations.
@@ -8433,7 +8433,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
return hfi1_rcd_head(rcd) != tail;
}
-/**
+/*
* Common code for receive contexts interrupt handlers.
* Update traces, increment kernel IRQ counter and
* setup ASPM when needed.
@@ -8447,7 +8447,7 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
aspm_ctx_disable(rcd);
}
-/**
+/*
* __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt
* when there are packets present in the queue. When calling
* with interrupts enabled please use hfi1_rcd_eoi_intr.
@@ -8484,8 +8484,8 @@ static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
/**
* hfi1_netdev_rx_napi - napi poll function to move eoi inline
- * @napi - pointer to napi object
- * @budget - netdev budget
+ * @napi: pointer to napi object
+ * @budget: netdev budget
*/
int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget)
{
@@ -10142,7 +10142,7 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
/*
* Set Send Length
- * @ppd - per port data
+ * @ppd: per port data
*
* Set the MTU by limiting how many DWs may be sent. The SendLenCheck*
* registers compare against LRH.PktLen, so use the max bytes included
@@ -14200,9 +14200,9 @@ u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx)
/**
* init_qpmap_table
- * @dd - device data
- * @first_ctxt - first context
- * @last_ctxt - first context
+ * @dd: device data
+ * @first_ctxt: first context
+ * @last_ctxt: first context
*
* This return sets the qpn mapping table that
* is indexed by qpn[8:1].
@@ -14383,8 +14383,8 @@ no_qos:
/**
* init_qos - init RX qos
- * @dd - device data
- * @rmt - RSM map table
+ * @dd: device data
+ * @rmt: RSM map table
*
* This routine initializes Rule 0 and the RSM map table to implement
* quality of service (qos).
@@ -15022,8 +15022,7 @@ err_exit:
/**
* hfi1_init_dd() - Initialize most of the dd structure.
- * @dev: the pci_dev for hfi1_ib device
- * @ent: pci_device_id struct for this dev
+ * @dd: the dd device
*
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
@@ -15378,10 +15377,11 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
/**
* create_pbc - build a pbc for transmission
+ * @ppd: info of physical Hfi port
* @flags: special case flags or-ed in built pbc
- * @srate: static rate
+ * @srate_mbs: static rate
* @vl: vl
- * @dwlen: dword length (header words + data words + pbc words)
+ * @dw_len: dword length (header words + data words + pbc words)
*
* Create a PBC with the given flags, rate, VL, and length.
*
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c
index e9d5cc8b771a..91f13140ddf2 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.c
@@ -50,7 +50,7 @@
/**
* exp_tid_group_init - initialize exp_tid_set
- * @set - the set
+ * @set: the set
*/
static void hfi1_exp_tid_set_init(struct exp_tid_set *set)
{
@@ -60,7 +60,7 @@ static void hfi1_exp_tid_set_init(struct exp_tid_set *set)
/**
* hfi1_exp_tid_group_init - initialize rcd expected receive
- * @rcd - the rcd
+ * @rcd: the rcd
*/
void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd)
{
@@ -71,7 +71,7 @@ void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd)
/**
* alloc_ctxt_rcv_groups - initialize expected receive groups
- * @rcd - the context to add the groupings to
+ * @rcd: the context to add the groupings to
*/
int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
{
@@ -101,7 +101,7 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
/**
* free_ctxt_rcv_groups - free expected receive groups
- * @rcd - the context to free
+ * @rcd: the context to free
*
* The routine dismantles the expect receive linked
* list and clears any tids associated with the receive
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 329ee4f48d95..3b7bbc7b9d10 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1522,7 +1522,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
* manage_rcvq - manage a context's receive queue
* @uctxt: the context
* @subctxt: the sub-context
- * @start_stop: action to carry out
+ * @arg: start/stop action to carry out
*
* start_stop == 0 disables receive on the context, for use in queue
* overflow conditions. start_stop==1 re-enables, to be used to
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 387305b768e9..5ba5c11459e7 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -91,9 +91,9 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
/**
* format_hwmsg - format a single hwerror message
- * @msg message buffer
- * @msgl length of message buffer
- * @hwmsg message to add to message buffer
+ * @msg: message buffer
+ * @msgl: length of message buffer
+ * @hwmsg: message to add to message buffer
*/
static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
{
@@ -104,11 +104,11 @@ static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
/**
* hfi1_format_hwerrors - format hardware error messages for display
- * @hwerrs hardware errors bit vector
- * @hwerrmsgs hardware error descriptions
- * @nhwerrmsgs number of hwerrmsgs
- * @msg message buffer
- * @msgl message buffer length
+ * @hwerrs: hardware errors bit vector
+ * @hwerrmsgs: hardware error descriptions
+ * @nhwerrmsgs: number of hwerrmsgs
+ * @msg: message buffer
+ * @msgl: message buffer length
*/
void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs,
size_t nhwerrmsgs, char *msg, size_t msgl)
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
index 5836fe7b2817..111489802614 100644
--- a/drivers/infiniband/hw/hfi1/iowait.c
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -26,7 +26,7 @@ inline void iowait_clear_flag(struct iowait *wait, u32 flag)
clear_bit(flag, &wait->flags);
}
-/**
+/*
* iowait_init() - initialize wait structure
* @wait: wait struct to initialize
* @tx_limit: limit for overflow queuing
@@ -88,7 +88,7 @@ void iowait_cancel_work(struct iowait *w)
/**
* iowait_set_work_flag - set work flag based on leg
- * @w - the iowait work struct
+ * @w: the iowait work struct
*/
int iowait_set_work_flag(struct iowait_work *w)
{
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 3222e3acb79c..e2f2f7847aed 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1341,7 +1341,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
return 0;
}
-/**
+/*
* subn_set_opa_portinfo - set port information
* @smp: the incoming SM packet
* @ibdev: the infiniband device
@@ -4902,6 +4902,8 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
* @in_grh: the global route header for this packet
* @in_mad: the incoming MAD
* @out_mad: any outgoing MAD reply
+ * @out_mad_size: size of the outgoing MAD reply
+ * @out_mad_pkey_index: used to apss back the packet key index
*
* Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
* interested in processing.
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
index d61ee853d215..cf3040bb177f 100644
--- a/drivers/infiniband/hw/hfi1/msix.c
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -103,8 +103,8 @@ int msix_initialize(struct hfi1_devdata *dd)
* @arg: context information for the IRQ
* @handler: IRQ handler
* @thread: IRQ thread handler (could be NULL)
- * @idx: zero base idx if multiple devices are needed
* @type: affinty IRQ type
+ * @name: IRQ name
*
* Allocated an MSIx vector if available, and then create the appropriate
* meta data needed to keep track of the pci IRQ request.
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
index 6d263c9749b3..1fb6e1a0e4e1 100644
--- a/drivers/infiniband/hw/hfi1/netdev_rx.c
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -467,7 +467,7 @@ void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id)
* hfi1_netdev_get_first_dat - Gets first entry with greater or equal id.
*
* @dd: hfi1 dev data
- * @id: requested integer id up to INT_MAX
+ * @start_id: requested integer id up to INT_MAX
*/
void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id)
{
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 18d32f053d26..6f06e9920503 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -334,7 +334,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
return 0;
}
-/**
+/*
* Restore command and BARs after a reset has wiped them out
*
* Returns 0 on success, otherwise a negative error value
@@ -393,7 +393,7 @@ error:
return pcibios_err_to_errno(ret);
}
-/**
+/*
* Save BARs and command to rewrite after device reset
*
* Returns 0 on success, otherwise a negative error value
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 4a4ec2397857..14bfd8287f4a 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -55,6 +55,7 @@
/**
* pio_copy - copy data block to MMIO space
+ * @dd: hfi1 dev data
* @pbuf: a number of blocks allocated within a PIO send context
* @pbc: PBC to send
* @from: source, must be 8 byte aligned
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 681bb4e918c9..e037df911512 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -186,7 +186,7 @@ static void flush_iowait(struct rvt_qp *qp)
write_sequnlock_irqrestore(lock, flags);
}
-/**
+/*
* This function is what we would push to the core layer if we wanted to be a
* "first class citizen". Instead we hide this here and rely on Verbs ULPs
* to blindly pass the MTU enum value from the PathRecord to us.
@@ -289,9 +289,9 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
/**
* hfi1_setup_wqe - set up the wqe
- * @qp - The qp
- * @wqe - The built wqe
- * @call_send - Determine if the send should be posted or scheduled.
+ * @qp: The qp
+ * @wqe: The built wqe
+ * @call_send: Determine if the send should be posted or scheduled.
*
* Perform setup of the wqe. This is called
* prior to inserting the wqe into the ring but after
@@ -595,7 +595,7 @@ struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
return sde;
}
-/*
+/**
* qp_to_send_context - map a qp to a send context
* @qp: the QP
* @sc5: the 5 bit sc
@@ -912,8 +912,8 @@ void notify_error_qp(struct rvt_qp *qp)
/**
* hfi1_qp_iter_cb - callback for iterator
- * @qp - the qp
- * @v - the sl in low bits of v
+ * @qp: the qp
+ * @v: the sl in low bits of v
*
* This is called from the iterator callback to work
* on an individual qp.
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 8386c84c2d92..38f311f855b5 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -242,7 +242,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c,
msgs[0].buf = offset_bytes;
msgs[1].addr = slave_addr;
- msgs[1].flags = I2C_M_NOSTART,
+ msgs[1].flags = I2C_M_NOSTART;
msgs[1].len = len;
msgs[1].buf = data;
break;
@@ -290,7 +290,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus,
msgs[0].buf = offset_bytes;
msgs[1].addr = slave_addr;
- msgs[1].flags = I2C_M_RD,
+ msgs[1].flags = I2C_M_RD;
msgs[1].len = len;
msgs[1].buf = data;
break;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 1bb5f57152d3..0174b8ee9f00 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -421,6 +421,7 @@ bail:
/**
* hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP
+ * @ps: the current packet state
*
* Assumes s_lock is held.
*
@@ -1375,9 +1376,8 @@ static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = {
[HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B
};
-/**
+/*
* hfi1_send_rc_ack - Construct an ACK packet and send it
- * @qp: a pointer to the QP
*
* This is called from hfi1_rc_rcv() and handle_receive_interrupt().
* Note that RDMA reads and atomics are handled in the
@@ -1992,7 +1992,7 @@ static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn,
}
}
-/**
+/*
* do_rc_ack - process an incoming RC ACK
* @qp: the QP the ACK came in on
* @psn: the packet sequence number of the ACK
@@ -2541,6 +2541,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
* @opcode: the opcode for this packet
* @psn: the packet sequence number for this packet
* @diff: the difference between the PSN and the expected PSN
+ * @rcd: the receive context
*
* This is called from hfi1_rc_rcv() to process an unexpected
* incoming RC packet for the given QP.
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 23ac6057b211..c3fa1814c6a8 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -260,6 +260,7 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
* @qp: the queue pair
* @ohdr: a pointer to the destination header memory
* @bth0: bth0 passed in from the RC/UC builder
+ * @bth1: bth1 passed in from the RC/UC builder
* @bth2: bth2 passed in from the RC/UC builder
* @middle: non zero implies indicates ahg "could" be used
* @ps: the current packet state
@@ -348,6 +349,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
* @qp: the queue pair
* @ohdr: a pointer to the destination header memory
* @bth0: bth0 passed in from the RC/UC builder
+ * @bth1: bth1 passed in from the RC/UC builder
* @bth2: bth2 passed in from the RC/UC builder
* @middle: non zero implies indicates ahg "could" be used
* @ps: the current packet state
@@ -455,11 +457,10 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
/**
* hfi1_schedule_send_yield - test for a yield required for QP
* send engine
- * @timeout: Final time for timeout slice for jiffies
* @qp: a pointer to QP
* @ps: a pointer to a structure with commonly lookup values for
* the the send engine progress
- * @tid - true if it is the tid leg
+ * @tid: true if it is the tid leg
*
* This routine checks if the time slice for the QP has expired
* for RC QPs, if so an additional work entry is queued. At this
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index a307d4c8b15a..46b5290b2839 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1740,7 +1740,7 @@ retry:
sane = (hwhead == swhead);
if (unlikely(!sane)) {
- dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
+ dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n",
sde->this_idx,
use_dmahead ? "dma" : "kreg",
hwhead, swhead, swtail, cnt);
@@ -2448,11 +2448,11 @@ nodesc:
* @sde: sdma engine to use
* @wait: SE wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
- * @count: pointer to a u16 which, after return will contain the total number of
- * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
- * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
- * which are added to SDMA engine flush list if the SDMA engine state is
- * not running.
+ * @count_out: pointer to a u16 which, after return will contain the total number of
+ * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
+ * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
+ * which are added to SDMA engine flush list if the SDMA engine state is
+ * not running.
*
* The call submits the list into the ring.
*
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 92aa2a9b3b5a..0b1f9e4d038b 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -309,7 +309,8 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
/**
* qp_to_rcd - determine the receive context used by a qp
- * @qp - the qp
+ * @rdi: rvt dev struct
+ * @qp: the qp
*
* This routine returns the receive context associated
* with a a qp's qpn.
@@ -484,6 +485,7 @@ static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
/**
* kernel_tid_waiters - determine rcd wait
* @rcd: the receive context
+ * @queue: the queue to operate on
* @qp: the head of the qp being processed
*
* This routine will return false IFF
@@ -517,7 +519,9 @@ static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
/**
* dequeue_tid_waiter - dequeue the qp from the list
- * @qp - the qp to remove the wait list
+ * @rcd: the receive context
+ * @queue: the queue to operate on
+ * @qp: the qp to remove the wait list
*
* This routine removes the indicated qp from the
* wait list if it is there.
@@ -549,6 +553,7 @@ static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
/**
* queue_qp_for_tid_wait - suspend QP on tid space
* @rcd: the receive context
+ * @queue: the queue to operate on
* @qp: the qp
*
* The qp is inserted at the tail of the rcd
@@ -593,7 +598,7 @@ static void __trigger_tid_waiter(struct rvt_qp *qp)
/**
* tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp
- * @qp - the qp
+ * @qp: the qp
*
* trigger a schedule or a waiting qp in a deadlock
* safe manner. The qp reference is held prior
@@ -630,7 +635,7 @@ static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
/**
* tid_rdma_trigger_resume - field a trigger work request
- * @work - the work item
+ * @work: the work item
*
* Complete the off qp trigger processing by directly
* calling the progress routine.
@@ -654,7 +659,7 @@ static void tid_rdma_trigger_resume(struct work_struct *work)
rvt_put_qp(qp);
}
-/**
+/*
* tid_rdma_flush_wait - unwind any tid space wait
*
* This is called when resetting a qp to
@@ -693,8 +698,8 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
/* Flow functions */
/**
* kern_reserve_flow - allocate a hardware flow
- * @rcd - the context to use for allocation
- * @last - the index of the preferred flow. Use RXE_NUM_TID_FLOWS to
+ * @rcd: the context to use for allocation
+ * @last: the index of the preferred flow. Use RXE_NUM_TID_FLOWS to
* signify "don't care".
*
* Use a bit mask based allocation to reserve a hardware
@@ -860,9 +865,10 @@ static u8 trdma_pset_order(struct tid_rdma_pageset *s)
/**
* tid_rdma_find_phys_blocks_4k - get groups base on mr info
- * @npages - number of pages
- * @pages - pointer to an array of page structs
- * @list - page set array to return
+ * @flow: overall info for a TID RDMA segment
+ * @pages: pointer to an array of page structs
+ * @npages: number of pages
+ * @list: page set array to return
*
* This routine returns the number of groups associated with
* the current sge information. This implementation is based
@@ -949,10 +955,10 @@ static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
/**
* tid_flush_pages - dump out pages into pagesets
- * @list - list of pagesets
- * @idx - pointer to current page index
- * @pages - number of pages to dump
- * @sets - current number of pagesset
+ * @list: list of pagesets
+ * @idx: pointer to current page index
+ * @pages: number of pages to dump
+ * @sets: current number of pagesset
*
* This routine flushes out accumuated pages.
*
@@ -990,9 +996,10 @@ static u32 tid_flush_pages(struct tid_rdma_pageset *list,
/**
* tid_rdma_find_phys_blocks_8k - get groups base on mr info
- * @pages - pointer to an array of page structs
- * @npages - number of pages
- * @list - page set array to return
+ * @flow: overall info for a TID RDMA segment
+ * @pages: pointer to an array of page structs
+ * @npages: number of pages
+ * @list: page set array to return
*
* This routine parses an array of pages to compute pagesets
* in an 8k compatible way.
@@ -1064,7 +1071,7 @@ static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
return sets;
}
-/**
+/*
* Find pages for one segment of a sge array represented by @ss. The function
* does not check the sge, the sge must have been checked for alignment with a
* prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of
@@ -1598,7 +1605,7 @@ void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
/**
* hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information
- * @req - the tid rdma request to be cleaned
+ * @req: the tid rdma request to be cleaned
*/
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
{
@@ -3435,7 +3442,7 @@ static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg)
return 0;
}
-/**
+/*
* Central place for resource allocation at TID write responder,
* is called from write_req and write_data interrupt handlers as
* well as the send thread when a queued QP is scheduled for
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 1fb918399da0..5b0f536b34e0 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -55,6 +55,7 @@
/**
* hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
+ * @ps: the current packet state
*
* Assume s_lock is held.
*
@@ -291,12 +292,7 @@ bail_no_tx:
/**
* hfi1_uc_rcv - handle an incoming UC packet
- * @ibp: the port the packet came in on
- * @hdr: the header of the packet
- * @rcv_flags: flags relevant to rcv processing
- * @data: the packet data
- * @tlen: the length of the packet
- * @qp: the QP for this packet.
+ * @packet: the packet structure
*
* This is called from qp_rcv() to process an incoming UC packet
* for the given QP.
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index e804af71b629..6ecb984c85fa 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -468,6 +468,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
/**
* hfi1_make_ud_req - construct a UD request packet
* @qp: the QP
+ * @ps: the current packet state
*
* Assume s_lock is held.
*
@@ -840,12 +841,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
/**
* hfi1_ud_rcv - receive an incoming UD packet
- * @ibp: the port the packet came in on
- * @hdr: the packet header
- * @rcv_flags: flags relevant to rcv processing
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
+ * @packet: the packet structure
*
* This is called from qp_rcv() to process an incoming UD packet
* for the given QP.
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index b94fc7fd75a9..58dcab2679d9 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -154,12 +154,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
fd->entry_to_rb = NULL;
}
-/**
+/*
* Release pinned receive buffer pages.
*
- * @mapped - true if the pages have been DMA mapped. false otherwise.
- * @idx - Index of the first page to unpin.
- * @npages - No of pages to unpin.
+ * @mapped: true if the pages have been DMA mapped. false otherwise.
+ * @idx: Index of the first page to unpin.
+ * @npages: No of pages to unpin.
*
* If the pages have been DMA mapped (indicated by mapped parameter), their
* info will be passed via a struct tid_rb_node. If they haven't been mapped,
@@ -189,7 +189,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd,
fd->tid_n_pinned -= npages;
}
-/**
+/*
* Pin receive buffer pages.
*/
static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 3591923abebb..0dd4bb0a5a7e 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -729,7 +729,7 @@ bail_txadd:
/**
* update_tx_opstats - record stats by opcode
- * @qp; the qp
+ * @qp: the qp
* @ps: transmit packet state
* @plen: the plen in dwords
*
@@ -1145,7 +1145,7 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
* egress_pkey_check - check P_KEY of a packet
* @ppd: Physical IB port data
* @slid: SLID for packet
- * @bkey: PKEY for header
+ * @pkey: PKEY for header
* @sc5: SC for packet
* @s_pkey_index: It will be used for look up optimization for kernel contexts
* only. If it is negative value, then it means user contexts is calling this
@@ -1206,7 +1206,7 @@ bad:
return 1;
}
-/**
+/*
* get_send_routine - choose an egress routine
*
* Choose an egress routine based on QP type
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 5afee04fb02c..23c438cef40d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -32,6 +32,7 @@
#ifndef _HNS_ROCE_COMMON_H
#define _HNS_ROCE_COMMON_H
+#include <linux/bitfield.h>
#define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg))
#define roce_read(dev, reg) readl((dev)->reg_base + (reg))
@@ -65,6 +66,27 @@
#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
+#define _hr_reg_clear(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ *((__le32 *)_ptr + (field_h) / 32) &= \
+ cpu_to_le32( \
+ ~GENMASK((field_h) % 32, (field_l) % 32)) + \
+ BUILD_BUG_ON_ZERO(((field_h) / 32) != \
+ ((field_l) / 32)); \
+ })
+
+#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field)
+
+#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \
+ ({ \
+ _hr_reg_clear(ptr, field_type, field_h, field_l); \
+ *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \
+ GENMASK((field_h) % 32, (field_l) % 32), val)); \
+ })
+
+#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val)
+
#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
@@ -342,8 +364,8 @@
#define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000
#define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004
#define ROCEE_TX_CMQ_DEPTH_REG 0x07008
-#define ROCEE_TX_CMQ_TAIL_REG 0x07010
-#define ROCEE_TX_CMQ_HEAD_REG 0x07014
+#define ROCEE_TX_CMQ_HEAD_REG 0x07010
+#define ROCEE_TX_CMQ_TAIL_REG 0x07014
#define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018
#define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 8533fc2d8df2..74fc4940b03a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -38,11 +38,74 @@
#include "hns_roce_hem.h"
#include "hns_roce_common.h"
+static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank)
+{
+ u32 least_load = bank[0].inuse;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+ for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+ bankid = i;
+ }
+ }
+
+ return bankid;
+}
+
+static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+ u8 bankid;
+ int id;
+
+ mutex_lock(&cq_table->bank_mutex);
+ bankid = get_least_load_bankid_for_cq(cq_table->bank);
+ bank = &cq_table->bank[bankid];
+
+ id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL);
+ if (id < 0) {
+ mutex_unlock(&cq_table->bank_mutex);
+ return id;
+ }
+
+ /* the lower 2 bits is bankid */
+ hr_cq->cqn = (id << CQ_BANKID_SHIFT) | bankid;
+ bank->inuse++;
+ mutex_unlock(&cq_table->bank_mutex);
+
+ return 0;
+}
+
+static inline u8 get_cq_bankid(unsigned long cqn)
+{
+ /* The lower 2 bits of CQN are used to hash to different banks */
+ return (u8)(cqn & GENMASK(1, 0));
+}
+
+static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+
+ bank = &cq_table->bank[get_cq_bankid(cqn)];
+
+ ida_free(&bank->ida, cqn >> CQ_BANKID_SHIFT);
+
+ mutex_lock(&cq_table->bank_mutex);
+ bank->inuse--;
+ mutex_unlock(&cq_table->bank_mutex);
+}
+
static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_cmd_mailbox *mailbox;
- struct hns_roce_cq_table *cq_table;
u64 mtts[MTT_MIN_COUNT] = { 0 };
dma_addr_t dma_handle;
int ret;
@@ -54,13 +117,6 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
return -EINVAL;
}
- cq_table = &hr_dev->cq_table;
- ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn);
- if (ret) {
- ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret);
- return ret;
- }
-
/* Get CQC memory HEM(Hardware Entry Memory) table */
ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
if (ret) {
@@ -110,7 +166,6 @@ err_put:
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
err_out:
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
return ret;
}
@@ -138,7 +193,6 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
wait_for_completion(&hr_cq->free);
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
}
static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
@@ -152,7 +206,6 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
buf_attr.region_count = 1;
- buf_attr.fixed_page = true;
ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT,
@@ -298,11 +351,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
goto err_cq_buf;
}
+ ret = alloc_cqn(hr_dev, hr_cq);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
+ goto err_cq_db;
+ }
+
ret = alloc_cqc(hr_dev, hr_cq);
if (ret) {
ibdev_err(ibdev,
"failed to alloc CQ context, ret = %d.\n", ret);
- goto err_cq_db;
+ goto err_cqn;
}
/*
@@ -326,6 +385,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
err_cqc:
free_cqc(hr_dev, hr_cq);
+err_cqn:
+ free_cqn(hr_dev, hr_cq->cqn);
err_cq_db:
free_cq_db(hr_dev, hr_cq, udata);
err_cq_buf:
@@ -341,9 +402,11 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
if (hr_dev->hw->destroy_cq)
hr_dev->hw->destroy_cq(ib_cq, udata);
- free_cq_buf(hr_dev, hr_cq);
- free_cq_db(hr_dev, hr_cq, udata);
free_cqc(hr_dev, hr_cq);
+ free_cqn(hr_dev, hr_cq->cqn);
+ free_cq_db(hr_dev, hr_cq, udata);
+ free_cq_buf(hr_dev, hr_cq);
+
return 0;
}
@@ -402,18 +465,33 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
complete(&hr_cq->free);
}
-int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ unsigned int reserved_from_bot;
+ unsigned int i;
+ mutex_init(&cq_table->bank_mutex);
xa_init(&cq_table->array);
- return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs,
- hr_dev->caps.num_cqs - 1,
- hr_dev->caps.reserved_cqs, 0);
+ reserved_from_bot = hr_dev->caps.reserved_cqs;
+
+ for (i = 0; i < reserved_from_bot; i++) {
+ cq_table->bank[get_cq_bankid(i)].inuse++;
+ cq_table->bank[get_cq_bankid(i)].min++;
+ }
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ ida_init(&cq_table->bank[i].ida);
+ cq_table->bank[i].max = hr_dev->caps.num_cqs /
+ HNS_ROCE_CQ_BANK_NUM - 1;
+ }
}
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
{
- hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap);
+ int i;
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
+ ida_destroy(&hr_dev->cq_table.bank[i].ida);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index ad8253245a85..3d6b7a2db496 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -54,6 +54,7 @@
/* Hardware specification only for v1 engine */
#define HNS_ROCE_MIN_CQE_NUM 0x40
#define HNS_ROCE_MIN_WQE_NUM 0x20
+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
/* Hardware specification only for v1 engine */
#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
@@ -65,6 +66,8 @@
#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
#define HNS_ROCE_MIN_CQE_CNT 16
+#define HNS_ROCE_RESERVED_SGE 1
+
#define HNS_ROCE_MAX_IRQ_NUM 128
#define HNS_ROCE_SGE_IN_WQE 2
@@ -90,6 +93,7 @@
#define HNS_ROCE_MAX_PORTS 6
#define HNS_ROCE_GID_SIZE 16
#define HNS_ROCE_SGE_SIZE 16
+#define HNS_ROCE_DWQE_SIZE 65536
#define HNS_ROCE_HOP_NUM_0 0xff
@@ -119,6 +123,9 @@
#define SRQ_DB_REG 0x230
#define HNS_ROCE_QP_BANK_NUM 8
+#define HNS_ROCE_CQ_BANK_NUM 4
+
+#define CQ_BANKID_SHIFT 2
/* The chip implementation of the consumer index is calculated
* according to twice the actual EQ depth
@@ -163,44 +170,6 @@ enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_FLR = 0x15,
};
-/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */
-enum {
- HNS_ROCE_LWQCE_QPC_ERROR = 1,
- HNS_ROCE_LWQCE_MTU_ERROR = 2,
- HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR = 3,
- HNS_ROCE_LWQCE_WQE_ADDR_ERROR = 4,
- HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR = 5,
- HNS_ROCE_LWQCE_SL_ERROR = 6,
- HNS_ROCE_LWQCE_PORT_ERROR = 7,
-};
-
-/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */
-enum {
- HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1,
- HNS_ROCE_LAVWQE_LENGTH_ERROR = 2,
- HNS_ROCE_LAVWQE_VA_ERROR = 3,
- HNS_ROCE_LAVWQE_PD_ERROR = 4,
- HNS_ROCE_LAVWQE_RW_ACC_ERROR = 5,
- HNS_ROCE_LAVWQE_KEY_STATE_ERROR = 6,
- HNS_ROCE_LAVWQE_MR_OPERATION_ERROR = 7,
-};
-
-/* DOORBELL overflow subtype */
-enum {
- HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1,
- HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF = 2,
- HNS_ROCE_DB_SUBTYPE_ODB_OVF = 3,
- HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF = 4,
- HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP = 5,
- HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP = 6,
-};
-
-enum {
- /* RQ&SRQ related operations */
- HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06,
- HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07,
-};
-
#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
enum {
@@ -253,9 +222,6 @@ enum {
#define HNS_ROCE_CMD_SUCCESS 1
-#define HNS_ROCE_PORT_DOWN 0
-#define HNS_ROCE_PORT_UP 1
-
/* The minimum page size is 4K for hardware */
#define HNS_HW_PAGE_SHIFT 12
#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
@@ -332,7 +298,6 @@ struct hns_roce_buf_attr {
} region[HNS_ROCE_MAX_BT_REGION];
unsigned int region_count; /* valid region count */
unsigned int page_shift; /* buffer page shift */
- bool fixed_page; /* decide page shift is fixed-size or maximum size */
unsigned int user_access; /* umem access flag */
bool mtt_only; /* only alloc buffer-required MTT memory */
};
@@ -393,6 +358,7 @@ struct hns_roce_wq {
spinlock_t lock;
u32 wqe_cnt; /* WQE num */
u32 max_gs;
+ u32 rsv_sge;
int offset;
int wqe_shift; /* WQE size */
u32 head;
@@ -489,6 +455,8 @@ struct hns_roce_idx_que {
struct hns_roce_mtr mtr;
int entry_shift;
unsigned long *bitmap;
+ u32 head;
+ u32 tail;
};
struct hns_roce_srq {
@@ -496,7 +464,9 @@ struct hns_roce_srq {
unsigned long srqn;
u32 wqe_cnt;
int max_gs;
+ u32 rsv_sge;
int wqe_shift;
+ u32 cqn;
void __iomem *db_reg_l;
atomic_t refcount;
@@ -507,8 +477,6 @@ struct hns_roce_srq {
u64 *wrid;
struct hns_roce_idx_que idx_que;
spinlock_t lock;
- u16 head;
- u16 tail;
struct mutex mutex;
void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
};
@@ -536,9 +504,10 @@ struct hns_roce_qp_table {
};
struct hns_roce_cq_table {
- struct hns_roce_bitmap bitmap;
struct xarray array;
struct hns_roce_hem_table table;
+ struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM];
+ struct mutex bank_mutex;
};
struct hns_roce_srq_table {
@@ -640,6 +609,10 @@ struct hns_roce_work {
u32 queue_num;
};
+enum {
+ HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5),
+};
+
struct hns_roce_qp {
struct ib_qp ibqp;
struct hns_roce_wq rq;
@@ -647,7 +620,7 @@ struct hns_roce_qp {
struct hns_roce_db sdb;
unsigned long en_flags;
u32 doorbell_qpn;
- u32 sq_signal_bits;
+ enum ib_sig_type sq_signal_bits;
struct hns_roce_wq sq;
struct hns_roce_mtr mtr;
@@ -779,7 +752,7 @@ struct hns_roce_caps {
u32 max_cqes;
u32 min_cqes;
u32 min_wqes;
- int reserved_cqs;
+ u32 reserved_cqs;
int reserved_srqs;
int num_aeq_vectors;
int num_comp_vectors;
@@ -911,8 +884,7 @@ struct hns_roce_hw {
int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
struct hns_roce_mr *mr, unsigned long mtpt_idx);
int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr, int flags, u32 pdn,
- int mr_access_flags, u64 iova, u64 size,
+ struct hns_roce_mr *mr, int flags,
void *mb_buf);
int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
struct hns_roce_mr *mr);
@@ -945,11 +917,7 @@ struct hns_roce_hw {
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int (*init_eq)(struct hns_roce_dev *hr_dev);
void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
- void (*write_srqc)(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn,
- void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx,
- dma_addr_t dma_handle_wqe,
- dma_addr_t dma_handle_idx);
+ int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
@@ -982,6 +950,7 @@ struct hns_roce_dev {
struct mutex pgdir_mutex;
int irq[HNS_ROCE_MAX_IRQ_NUM];
u8 __iomem *reg_base;
+ void __iomem *mem_base;
struct hns_roce_caps caps;
struct xarray qp_table_xa;
@@ -1067,7 +1036,7 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq)
static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
{
- __raw_writeq(*(u64 *) val, dest);
+ writeq(*(u64 *)val, dest);
}
static inline struct hns_roce_qp
@@ -1164,7 +1133,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
@@ -1281,7 +1250,6 @@ u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
-
int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
struct ib_cq *ib_cq);
#endif /* _HNS_ROCE_DEVICE_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index edc9d6b98d95..cfd2e1b60c7f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -1075,9 +1075,8 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev,
return NULL;
if (exist_bt) {
- hem->addr = dma_alloc_coherent(hr_dev->dev,
- count * BA_BYTE_LEN,
- &hem->dma_addr, GFP_KERNEL);
+ hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN,
+ &hem->dma_addr, GFP_KERNEL);
if (!hem->addr) {
kfree(hem);
return NULL;
@@ -1336,6 +1335,10 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
if (ba_num < 1)
return -ENOMEM;
+ if (ba_num > unit)
+ return -ENOBUFS;
+
+ ba_num = min_t(int, ba_num, unit);
INIT_LIST_HEAD(&temp_root);
offset = r->offset;
/* indicate to last region */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index f68585ff8e8a..5346fdca9473 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -43,6 +43,22 @@
#include "hns_roce_hem.h"
#include "hns_roce_hw_v1.h"
+/**
+ * hns_get_gid_index - Get gid index.
+ * @hr_dev: pointer to structure hns_roce_dev.
+ * @port: port, value range: 0 ~ MAX
+ * @gid_index: gid_index, value range: 0 ~ MAX
+ * Description:
+ * N ports shared gids, allocation method as follow:
+ * GID[0][0], GID[1][0],.....GID[N - 1][0],
+ * GID[0][0], GID[1][0],.....GID[N - 1][0],
+ * And so on
+ */
+u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
+{
+ return gid_index * hr_dev->caps.num_ports + port;
+}
+
static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg)
{
dseg->lkey = cpu_to_le32(sg->lkey);
@@ -314,8 +330,6 @@ out:
/* Set DB return */
if (likely(nreq)) {
qp->sq.head += nreq;
- /* Memory barrier */
- wmb();
roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
SQ_DOORBELL_U32_4_SQ_HEAD_S,
@@ -395,8 +409,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
out:
if (likely(nreq)) {
hr_qp->rq.head += nreq;
- /* Memory barrier */
- wmb();
if (ibqp->qp_type == IB_QPT_GSI) {
__le32 tmp;
@@ -1391,7 +1403,7 @@ static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev)
/**
* hns_roce_v1_reset - reset RoCE
* @hr_dev: RoCE device struct pointer
- * @enable: true -- drop reset, false -- reset
+ * @dereset: true -- drop reset, false -- reset
* return 0 - success , negative --fail
*/
static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
@@ -1968,12 +1980,6 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
if (nfreed) {
hr_cq->cons_index += nfreed;
- /*
- * Make sure update of buffer contents is done before
- * updating consumer index.
- */
- wmb();
-
hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
}
}
@@ -2314,8 +2320,6 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
*hr_cq->tptr_addr = hr_cq->cons_index &
((hr_cq->cq_depth << 1) - 1);
- /* Memroy barrier */
- wmb();
hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
}
@@ -3204,9 +3208,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
* need to hw to flash RQ HEAD by DB again
*/
if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- /* Memory barrier */
- wmb();
-
roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M,
RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head);
roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 46ab0a321d21..84383236e47d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -193,6 +193,49 @@
#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0
#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0)
+/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */
+enum {
+ HNS_ROCE_LWQCE_QPC_ERROR = 1,
+ HNS_ROCE_LWQCE_MTU_ERROR,
+ HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR,
+ HNS_ROCE_LWQCE_WQE_ADDR_ERROR,
+ HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR,
+ HNS_ROCE_LWQCE_SL_ERROR,
+ HNS_ROCE_LWQCE_PORT_ERROR,
+};
+
+/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */
+enum {
+ HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1,
+ HNS_ROCE_LAVWQE_LENGTH_ERROR,
+ HNS_ROCE_LAVWQE_VA_ERROR,
+ HNS_ROCE_LAVWQE_PD_ERROR,
+ HNS_ROCE_LAVWQE_RW_ACC_ERROR,
+ HNS_ROCE_LAVWQE_KEY_STATE_ERROR,
+ HNS_ROCE_LAVWQE_MR_OPERATION_ERROR,
+};
+
+/* DOORBELL overflow subtype */
+enum {
+ HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1,
+ HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF,
+ HNS_ROCE_DB_SUBTYPE_ODB_OVF,
+ HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF,
+ HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP,
+ HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP,
+};
+
+enum {
+ /* RQ&SRQ related operations */
+ HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06,
+ HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE,
+};
+
+enum {
+ HNS_ROCE_PORT_DOWN = 0,
+ HNS_ROCE_PORT_UP,
+};
+
struct hns_roce_cq_context {
__le32 cqc_byte_4;
__le32 cq_bt_l;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 833e1f259936..c3934abeb260 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -48,8 +48,8 @@
#include "hns_roce_hem.h"
#include "hns_roce_hw_v2.h"
-static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
- struct ib_sge *sg)
+static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct ib_sge *sg)
{
dseg->lkey = cpu_to_le32(sg->lkey);
dseg->addr = cpu_to_le64(sg->addr);
@@ -99,16 +99,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
u64 pbl_ba;
/* use ib_access_flags */
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
- wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
- wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RR_S,
- wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RW_S,
- wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_LW_S,
- wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S,
+ !!(wr->access & IB_ACCESS_MW_BIND));
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S,
+ !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RR_S,
+ !!(wr->access & IB_ACCESS_REMOTE_READ));
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RW_S,
+ !!(wr->access & IB_ACCESS_REMOTE_WRITE));
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_LW_S,
+ !!(wr->access & IB_ACCESS_LOCAL_WRITE));
/* Data structure reuse may lead to confusion */
pbl_ba = mr->pbl_mtr.hem_cfg.root_ba;
@@ -121,12 +121,10 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
fseg->pbl_size = cpu_to_le32(mr->npages);
- roce_set_field(fseg->mode_buf_pg_sz,
- V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
+ roce_set_field(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
- roce_set_bit(fseg->mode_buf_pg_sz,
- V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
}
static void set_atomic_seg(const struct ib_send_wr *wr,
@@ -361,7 +359,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev,
} else if (unlikely(hr_qp->state == IB_QPS_RESET ||
hr_qp->state == IB_QPS_INIT ||
hr_qp->state == IB_QPS_RTR)) {
- ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n",
+ ibdev_err(ibdev, "failed to post WQE, QP state %u!\n",
hr_qp->state);
return -EINVAL;
} else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
@@ -469,7 +467,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
int ret;
valid_num_sge = calc_wr_sge_num(wr, &msg_len);
- memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
ret = set_ud_opcode(ud_sq_wqe, wr);
if (WARN_ON(ret))
@@ -503,6 +500,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
if (ret)
return ret;
+ qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl;
+
set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge);
/*
@@ -521,10 +520,12 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
return 0;
}
-static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+static int set_rc_opcode(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
const struct ib_send_wr *wr)
{
u32 ib_op = wr->opcode;
+ int ret = 0;
rc_sq_wqe->immtdata = get_immtdata(wr);
@@ -544,7 +545,10 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
break;
case IB_WR_REG_MR:
- set_frmr_seg(rc_sq_wqe, reg_wr(wr));
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ set_frmr_seg(rc_sq_wqe, reg_wr(wr));
+ else
+ ret = -EOPNOTSUPP;
break;
case IB_WR_LOCAL_INV:
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
@@ -553,19 +557,23 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
break;
default:
- return -EINVAL;
+ ret = -EINVAL;
}
+ if (unlikely(ret))
+ return ret;
+
roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
- return 0;
+ return ret;
}
static inline int set_rc_wqe(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
void *wqe, unsigned int *sge_idx,
unsigned int owner_bit)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
unsigned int curr_idx = *sge_idx;
unsigned int valid_num_sge;
@@ -573,11 +581,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
int ret;
valid_num_sge = calc_wr_sge_num(wr, &msg_len);
- memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
- ret = set_rc_opcode(rc_sq_wqe, wr);
+ ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr);
if (WARN_ON(ret))
return ret;
@@ -635,6 +642,8 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev,
V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn);
roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M,
V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB);
+ /* indicates data on new BAR, 0 : SQ doorbell, 1 : DWQE */
+ roce_set_bit(sq_db.byte_4, V2_DB_FLAG_S, 0);
roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M,
V2_DB_PARAMETER_IDX_S, qp->sq.head);
roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M,
@@ -644,6 +653,38 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev,
}
}
+static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val,
+ u64 __iomem *dest)
+{
+#define HNS_ROCE_WRITE_TIMES 8
+ struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ int i;
+
+ if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle))
+ for (i = 0; i < HNS_ROCE_WRITE_TIMES; i++)
+ writeq_relaxed(*(val + i), dest + i);
+}
+
+static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ void *wqe)
+{
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+
+ /* All kinds of DirectWQE have the same header field layout */
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1);
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M,
+ V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl);
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M,
+ V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2);
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M,
+ V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
+
+ hns_roce_write512(hr_dev, wqe, hr_dev->mem_base +
+ HNS_ROCE_DWQE_SIZE * qp->ibqp.qp_num);
+}
+
static int hns_roce_v2_post_send(struct ib_qp *ibqp,
const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
@@ -708,9 +749,12 @@ out:
if (likely(nreq)) {
qp->sq.head += nreq;
qp->next_sge = sge_idx;
- /* Memory barrier */
- wmb();
- update_sq_db(hr_dev, qp);
+
+ if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 &&
+ (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ write_dwqe(hr_dev, qp, wqe);
+ else
+ update_sq_db(hr_dev, qp);
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -721,14 +765,74 @@ out:
static int check_recv_valid(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp)
{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+
+ if (unlikely(ibqp->qp_type != IB_QPT_RC &&
+ ibqp->qp_type != IB_QPT_GSI &&
+ ibqp->qp_type != IB_QPT_UD)) {
+ ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n",
+ ibqp->qp_type);
+ return -EOPNOTSUPP;
+ }
+
if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
return -EIO;
- else if (hr_qp->state == IB_QPS_RESET)
+
+ if (hr_qp->state == IB_QPS_RESET)
return -EINVAL;
return 0;
}
+static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe,
+ u32 max_sge, bool rsv)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ u32 i, cnt;
+
+ for (i = 0, cnt = 0; i < wr->num_sge; i++) {
+ /* Skip zero-length sge */
+ if (!wr->sg_list[i].length)
+ continue;
+ set_data_seg_v2(dseg + cnt, wr->sg_list + i);
+ cnt++;
+ }
+
+ /* Fill a reserved sge to make hw stop reading remaining segments */
+ if (rsv) {
+ dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
+ dseg[cnt].addr = 0;
+ dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
+ } else {
+ /* Clear remaining segments to make ROCEE ignore sges */
+ if (cnt < max_sge)
+ memset(dseg + cnt, 0,
+ (max_sge - cnt) * HNS_ROCE_SGE_SIZE);
+ }
+}
+
+static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
+ u32 wqe_idx, u32 max_sge)
+{
+ struct hns_roce_rinl_sge *sge_list;
+ void *wqe = NULL;
+ u32 i;
+
+ wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
+
+ /* rq support inline data */
+ if (hr_qp->rq_inl_buf.wqe_cnt) {
+ sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
+ hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++) {
+ sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
+ sge_list[i].len = wr->sg_list[i].length;
+ }
+ }
+}
+
static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
@@ -736,14 +840,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_v2_wqe_data_seg *dseg;
- struct hns_roce_rinl_sge *sge_list;
+ u32 wqe_idx, nreq, max_sge;
unsigned long flags;
- void *wqe = NULL;
- u32 wqe_idx;
- int nreq;
int ret;
- int i;
spin_lock_irqsave(&hr_qp->rq.lock, flags);
@@ -754,6 +853,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
goto out;
}
+ max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq,
hr_qp->ibqp.recv_cq))) {
@@ -762,50 +862,22 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
goto out;
}
- wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
-
- if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
+ if (unlikely(wr->num_sge > max_sge)) {
ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n",
- wr->num_sge, hr_qp->rq.max_gs);
+ wr->num_sge, max_sge);
ret = -EINVAL;
*bad_wr = wr;
goto out;
}
- wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
- for (i = 0; i < wr->num_sge; i++) {
- if (!wr->sg_list[i].length)
- continue;
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- }
-
- if (wr->num_sge < hr_qp->rq.max_gs) {
- dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
- dseg->addr = 0;
- }
-
- /* rq support inline data */
- if (hr_qp->rq_inl_buf.wqe_cnt) {
- sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
- hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt =
- (u32)wr->num_sge;
- for (i = 0; i < wr->num_sge; i++) {
- sge_list[i].addr =
- (void *)(u64)wr->sg_list[i].addr;
- sge_list[i].len = wr->sg_list[i].length;
- }
- }
-
+ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+ fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge);
hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
}
out:
if (likely(nreq)) {
hr_qp->rq.head += nreq;
- /* Memory barrier */
- wmb();
/*
* Hip08 hardware cannot flush the WQEs in RQ if the QP state
@@ -829,41 +901,82 @@ out:
return ret;
}
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n)
{
return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
}
-static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n)
+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n)
{
return hns_roce_buf_offset(idx_que->mtr.kmem,
n << idx_que->entry_shift);
}
-static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index)
{
/* always called with interrupts disabled. */
spin_lock(&srq->lock);
bitmap_clear(srq->idx_que.bitmap, wqe_index, 1);
- srq->tail++;
+ srq->idx_que.tail++;
spin_unlock(&srq->lock);
}
-static int find_empty_entry(struct hns_roce_idx_que *idx_que,
- unsigned long size)
+static int hns_roce_srqwq_overflow(struct hns_roce_srq *srq)
{
- int wqe_idx;
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
- if (unlikely(bitmap_full(idx_que->bitmap, size)))
+ return idx_que->head - idx_que->tail >= srq->wqe_cnt;
+}
+
+static int check_post_srq_valid(struct hns_roce_srq *srq, u32 max_sge,
+ const struct ib_recv_wr *wr)
+{
+ struct ib_device *ib_dev = srq->ibsrq.device;
+
+ if (unlikely(wr->num_sge > max_sge)) {
+ ibdev_err(ib_dev,
+ "failed to check sge, wr->num_sge = %d, max_sge = %u.\n",
+ wr->num_sge, max_sge);
+ return -EINVAL;
+ }
+
+ if (unlikely(hns_roce_srqwq_overflow(srq))) {
+ ibdev_err(ib_dev,
+ "failed to check srqwq status, srqwq is full.\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ u32 pos;
+
+ pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt);
+ if (unlikely(pos == srq->wqe_cnt))
return -ENOSPC;
- wqe_idx = find_first_zero_bit(idx_que->bitmap, size);
+ bitmap_set(idx_que->bitmap, pos, 1);
+ *wqe_idx = pos;
+ return 0;
+}
- bitmap_set(idx_que->bitmap, wqe_idx, 1);
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ unsigned int head;
+ __le32 *buf;
- return wqe_idx;
+ head = idx_que->head & (srq->wqe_cnt - 1);
+
+ buf = get_idx_buf(idx_que, head);
+ *buf = cpu_to_le32(wqe_idx);
+
+ idx_que->head++;
}
static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
@@ -872,77 +985,42 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- struct hns_roce_v2_wqe_data_seg *dseg;
struct hns_roce_v2_db srq_db;
unsigned long flags;
- unsigned int ind;
- __le32 *srq_idx;
int ret = 0;
- int wqe_idx;
+ u32 max_sge;
+ u32 wqe_idx;
void *wqe;
- int nreq;
- int i;
+ u32 nreq;
spin_lock_irqsave(&srq->lock, flags);
- ind = srq->head & (srq->wqe_cnt - 1);
-
+ max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (unlikely(wr->num_sge >= srq->max_gs)) {
- ret = -EINVAL;
- *bad_wr = wr;
- break;
- }
-
- if (unlikely(srq->head == srq->tail)) {
- ret = -ENOMEM;
+ ret = check_post_srq_valid(srq, max_sge, wr);
+ if (ret) {
*bad_wr = wr;
break;
}
- wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt);
- if (unlikely(wqe_idx < 0)) {
- ret = -ENOMEM;
+ ret = get_srq_wqe_idx(srq, &wqe_idx);
+ if (unlikely(ret)) {
*bad_wr = wr;
break;
}
- wqe = get_srq_wqe(srq, wqe_idx);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
-
- for (i = 0; i < wr->num_sge; ++i) {
- dseg[i].len = cpu_to_le32(wr->sg_list[i].length);
- dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey);
- dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
- }
-
- if (wr->num_sge < srq->max_gs) {
- dseg[i].len = 0;
- dseg[i].lkey = cpu_to_le32(0x100);
- dseg[i].addr = 0;
- }
-
- srq_idx = get_idx_buf(&srq->idx_que, ind);
- *srq_idx = cpu_to_le32(wqe_idx);
-
+ wqe = get_srq_wqe_buf(srq, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
+ fill_wqe_idx(srq, wqe_idx);
srq->wrid[wqe_idx] = wr->wr_id;
- ind = (ind + 1) & (srq->wqe_cnt - 1);
}
if (likely(nreq)) {
- srq->head += nreq;
-
- /*
- * Make sure that descriptors are written before
- * doorbell record.
- */
- wmb();
-
srq_db.byte_4 =
cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S |
(srq->srqn & V2_DB_BYTE_4_TAG_M));
srq_db.parameter =
- cpu_to_le32(srq->head & V2_DB_PARAMETER_IDX_M);
+ cpu_to_le32(srq->idx_que.head & V2_DB_PARAMETER_IDX_M);
hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l);
}
@@ -1059,15 +1137,6 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev)
return 0;
}
-static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring)
-{
- int ntu = ring->next_to_use;
- int ntc = ring->next_to_clean;
- int used = (ntu - ntc + ring->desc_num) % ring->desc_num;
-
- return ring->desc_num - used - 1;
-}
-
static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev,
struct hns_roce_v2_cmq_ring *ring)
{
@@ -1107,8 +1176,7 @@ static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type)
&priv->cmq.csq : &priv->cmq.crq;
ring->flag = ring_type;
- ring->next_to_clean = 0;
- ring->next_to_use = 0;
+ ring->head = 0;
return hns_roce_alloc_cmq_desc(hr_dev, ring);
}
@@ -1207,34 +1275,10 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc,
static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev)
{
- u32 head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG);
+ u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG);
struct hns_roce_v2_priv *priv = hr_dev->priv;
- return head == priv->cmq.csq.next_to_use;
-}
-
-static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v2_priv *priv = hr_dev->priv;
- struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
- struct hns_roce_cmq_desc *desc;
- u16 ntc = csq->next_to_clean;
- u32 head;
- int clean = 0;
-
- desc = &csq->desc[ntc];
- head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG);
- while (head != ntc) {
- memset(desc, 0, sizeof(*desc));
- ntc++;
- if (ntc == csq->desc_num)
- ntc = 0;
- desc = &csq->desc[ntc];
- clean++;
- }
- csq->next_to_clean = ntc;
-
- return clean;
+ return tail == priv->cmq.csq.head;
}
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
@@ -1242,42 +1286,26 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
- struct hns_roce_cmq_desc *desc_to_use;
- bool complete = false;
u32 timeout = 0;
- int handle = 0;
u16 desc_ret;
- int ret = 0;
- int ntc;
+ u32 tail;
+ int ret;
+ int i;
spin_lock_bh(&csq->lock);
- if (num > hns_roce_cmq_space(csq)) {
- spin_unlock_bh(&csq->lock);
- return -EBUSY;
- }
-
- /*
- * Record the location of desc in the cmq for this time
- * which will be use for hardware to write back
- */
- ntc = csq->next_to_use;
+ tail = csq->head;
- while (handle < num) {
- desc_to_use = &csq->desc[csq->next_to_use];
- *desc_to_use = desc[handle];
- dev_dbg(hr_dev->dev, "set cmq desc:\n");
- csq->next_to_use++;
- if (csq->next_to_use == csq->desc_num)
- csq->next_to_use = 0;
- handle++;
+ for (i = 0; i < num; i++) {
+ csq->desc[csq->head++] = desc[i];
+ if (csq->head == csq->desc_num)
+ csq->head = 0;
}
/* Write to hardware */
- roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, csq->next_to_use);
+ roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, csq->head);
- /*
- * If the command is sync, wait for the firmware to write back,
+ /* If the command is sync, wait for the firmware to write back,
* if multi descriptors to be sent, use the first one to check
*/
if (le16_to_cpu(desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) {
@@ -1285,39 +1313,34 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
if (hns_roce_cmq_csq_done(hr_dev))
break;
udelay(1);
- timeout++;
- } while (timeout < priv->cmq.tx_timeout);
+ } while (++timeout < priv->cmq.tx_timeout);
}
if (hns_roce_cmq_csq_done(hr_dev)) {
- complete = true;
- handle = 0;
- while (handle < num) {
- /* get the result of hardware write back */
- desc_to_use = &csq->desc[ntc];
- desc[handle] = *desc_to_use;
- dev_dbg(hr_dev->dev, "Get cmq desc:\n");
- desc_ret = le16_to_cpu(desc[handle].retval);
- if (desc_ret == CMD_EXEC_SUCCESS)
- ret = 0;
- else
- ret = -EIO;
- priv->cmq.last_status = desc_ret;
- ntc++;
- handle++;
- if (ntc == csq->desc_num)
- ntc = 0;
+ for (ret = 0, i = 0; i < num; i++) {
+ /* check the result of hardware write back */
+ desc[i] = csq->desc[tail++];
+ if (tail == csq->desc_num)
+ tail = 0;
+
+ desc_ret = le16_to_cpu(desc[i].retval);
+ if (likely(desc_ret == CMD_EXEC_SUCCESS))
+ continue;
+
+ dev_err_ratelimited(hr_dev->dev,
+ "Cmdq IO error, opcode = %x, return = %x\n",
+ desc->opcode, desc_ret);
+ ret = -EIO;
}
- }
+ } else {
+ /* FW/HW reset or incorrect number of desc */
+ tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG);
+ dev_warn(hr_dev->dev, "CMDQ move tail from %d to %d\n",
+ csq->head, tail);
+ csq->head = tail;
- if (!complete)
ret = -EAGAIN;
-
- /* clean the command send queue */
- handle = hns_roce_cmq_csq_clean(hr_dev);
- if (handle != num)
- dev_warn(hr_dev->dev, "Cleaned %d, need to clean %d\n",
- handle, num);
+ }
spin_unlock_bh(&csq->lock);
@@ -1530,7 +1553,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_S, 0x3e8);
roce_set_field(req->time_cfg_udp_port,
CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M,
- CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, 0x12b7);
+ CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S,
+ ROCE_V2_UDP_DPORT);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
@@ -1541,17 +1565,13 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
struct hns_roce_pf_res_a *req_a;
struct hns_roce_pf_res_b *req_b;
int ret;
- int i;
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i],
- HNS_ROCE_OPC_QUERY_PF_RES, true);
+ hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_QUERY_PF_RES,
+ true);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- }
+ hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_QUERY_PF_RES,
+ true);
ret = hns_roce_cmq_send(hr_dev, desc, 2);
if (ret)
@@ -1644,19 +1664,16 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
struct hns_roce_cmq_desc desc[2];
struct hns_roce_vf_res_a *req_a;
struct hns_roce_vf_res_b *req_b;
- int i;
req_a = (struct hns_roce_vf_res_a *)desc[0].data;
req_b = (struct hns_roce_vf_res_b *)desc[1].data;
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i],
- HNS_ROCE_OPC_ALLOC_VF_RES, false);
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- }
+ hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_ALLOC_VF_RES,
+ false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_ALLOC_VF_RES,
+ false);
roce_set_field(req_a->vf_qpc_bt_idx_num,
VF_RES_A_DATA_1_VF_QPC_BT_IDX_M,
@@ -1866,7 +1883,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR |
HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
- HNS_ROCE_CAP_FLAG_RQ_INLINE |
HNS_ROCE_CAP_FLAG_RECORD_DB |
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
@@ -1999,10 +2015,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg);
caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline);
caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg);
+ caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg);
caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer);
caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer);
caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
+ caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
caps->num_aeq_vectors = resp_a->num_aeq_vectors;
caps->num_other_vectors = resp_a->num_other_vectors;
caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
@@ -2336,7 +2354,6 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
struct hns_roce_link_table_entry *entry;
enum hns_roce_opcode_type opcode;
u32 page_num;
- int i;
switch (type) {
case TSQ_LINK_TABLE:
@@ -2354,14 +2371,10 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
page_num = link_tbl->npages;
entry = link_tbl->table.buf;
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false);
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- }
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
req_a->base_addr_l = cpu_to_le32(link_tbl->table.map & 0xffffffff);
req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32);
@@ -2880,36 +2893,20 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
- V2_MPT_BYTE_4_PBL_HOP_NUM_S, mr->pbl_hop_num ==
- HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num);
- roce_set_field(mpt_entry->byte_4_pd_hop_st,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, mr->pd);
-
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
- (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
- mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
- (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
- (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S,
- (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0));
-
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S,
- mr->type == MR_TYPE_MR ? 0 : 1);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S,
- 1);
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+ hr_reg_enable(mpt_entry, MPT_L_INV_EN);
+
+ hr_reg_write(mpt_entry, MPT_BIND_EN,
+ !!(mr->access & IB_ACCESS_MW_BIND));
+ hr_reg_write(mpt_entry, MPT_ATOMIC_EN,
+ !!(mr->access & IB_ACCESS_REMOTE_ATOMIC));
+ hr_reg_write(mpt_entry, MPT_RR_EN,
+ !!(mr->access & IB_ACCESS_REMOTE_READ));
+ hr_reg_write(mpt_entry, MPT_RW_EN,
+ !!(mr->access & IB_ACCESS_REMOTE_WRITE));
+ hr_reg_write(mpt_entry, MPT_LW_EN,
+ !!((mr->access & IB_ACCESS_LOCAL_WRITE)));
mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
@@ -2917,9 +2914,19 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova));
mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+ if (mr->type != MR_TYPE_MR)
+ hr_reg_enable(mpt_entry, MPT_PA);
+
if (mr->type == MR_TYPE_DMA)
return 0;
+ if (mr->pbl_hop_num != HNS_ROCE_HOP_NUM_0)
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num);
+
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD);
+
ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
return ret;
@@ -2927,20 +2934,17 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr, int flags,
- u32 pdn, int mr_access_flags, u64 iova,
- u64 size, void *mb_buf)
+ void *mb_buf)
{
struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf;
+ u32 mr_access_flags = mr->access;
int ret = 0;
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
- if (flags & IB_MR_REREG_PD) {
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, pdn);
- mr->pd = pdn;
- }
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+ V2_MPT_BYTE_4_PD_S, mr->pd);
if (flags & IB_MR_REREG_ACCESS) {
roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
@@ -2958,13 +2962,10 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
}
if (flags & IB_MR_REREG_TRANS) {
- mpt_entry->va_l = cpu_to_le32(lower_32_bits(iova));
- mpt_entry->va_h = cpu_to_le32(upper_32_bits(iova));
- mpt_entry->len_l = cpu_to_le32(lower_32_bits(size));
- mpt_entry->len_h = cpu_to_le32(upper_32_bits(size));
-
- mr->iova = iova;
- mr->size = size;
+ mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova));
+ mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+ mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
+ mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
}
@@ -3126,11 +3127,6 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
if (nfreed) {
hr_cq->cons_index += nfreed;
- /*
- * Make sure update of buffer contents is done before
- * updating consumer index.
- */
- wmb();
hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
}
}
@@ -3639,11 +3635,8 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
break;
}
- if (npolled) {
- /* Memory barrier */
- wmb();
+ if (npolled)
hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
- }
out:
spin_unlock_irqrestore(&hr_cq->lock, flags);
@@ -4235,7 +4228,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
- const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct ib_device *ibdev = &hr_dev->ib_dev;
@@ -4243,7 +4235,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
dma_addr_t irrl_ba;
enum ib_mtu mtu;
u8 lp_pktn_ini;
- u8 port_num;
u64 *mtts;
u8 *dmac;
u8 *smac;
@@ -4324,15 +4315,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
}
- /* Configure GID index */
- port_num = rdma_ah_get_port_num(&attr->ah_attr);
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S,
- hns_get_gid_index(hr_dev, port_num - 1,
- grh->sgid_index));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0);
-
memcpy(&(context->dmac), dmac, sizeof(u32));
roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M,
V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4])));
@@ -5083,7 +5065,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
- qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+ qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
if (!ibqp->uobject) {
qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
@@ -5174,6 +5156,9 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc desc;
int ret, i;
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
mutex_lock(&hr_dev->qp_table.scc_mutex);
/* set scc ctx clear done flag */
@@ -5220,98 +5205,96 @@ out:
return ret;
}
-static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq, u32 pdn, u16 xrcd,
- u32 cqn, void *mb_buf, u64 *mtts_wqe,
- u64 *mtts_idx, dma_addr_t dma_handle_wqe,
- dma_addr_t dma_handle_idx)
+#define DMA_IDX_SHIFT 3
+#define DMA_WQE_SHIFT 3
+
+static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq,
+ struct hns_roce_srq_context *ctx)
{
- struct hns_roce_srq_context *srq_context;
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ struct ib_device *ibdev = srq->ibsrq.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ u64 mtts_idx[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle_idx = 0;
+ int ret;
+
+ /* Get physical address of idx que buf */
+ ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx,
+ ARRAY_SIZE(mtts_idx), &dma_handle_idx);
+ if (ret < 1) {
+ ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
+ ret);
+ return -ENOBUFS;
+ }
+
+ hr_reg_write(ctx, SRQC_IDX_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt));
+
+ hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> DMA_IDX_SHIFT);
+ hr_reg_write(ctx, SRQC_IDX_BT_BA_H,
+ upper_32_bits(dma_handle_idx >> DMA_IDX_SHIFT));
+
+ hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ,
+ to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ,
+ to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift));
+
+ hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts_idx[0]));
+ hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[0])));
+
+ hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts_idx[1]));
+ hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[1])));
+
+ return 0;
+}
- srq_context = mb_buf;
- memset(srq_context, 0, sizeof(*srq_context));
-
- roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M,
- SRQC_BYTE_4_SRQ_ST_S, 1);
-
- roce_set_field(srq_context->byte_4_srqn_srqst,
- SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M,
- SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S,
- to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num,
- srq->wqe_cnt));
- roce_set_field(srq_context->byte_4_srqn_srqst,
- SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
- ilog2(srq->wqe_cnt));
-
- roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
- SRQC_BYTE_4_SRQN_S, srq->srqn);
-
- roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M,
- SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
-
- roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M,
- SRQC_BYTE_12_SRQ_XRCD_S, xrcd);
-
- srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));
-
- roce_set_field(srq_context->byte_24_wqe_bt_ba,
- SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
- SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
- dma_handle_wqe >> 35);
-
- roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
- SRQC_BYTE_28_PD_S, pdn);
- roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M,
- SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
- fls(srq->max_gs - 1));
-
- srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3);
- roce_set_field(srq_context->rsv_idx_bt_ba,
- SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
- SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
- dma_handle_idx >> 35);
-
- srq_context->idx_cur_blk_addr =
- cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0]));
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
- SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
- upper_32_bits(to_hr_hw_page_addr(mtts_idx[0])));
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
- SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
- to_hr_hem_hopnum(hr_dev->caps.idx_hop_num,
- srq->wqe_cnt));
-
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
- SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
- to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift));
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
- SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
- to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift));
-
- srq_context->idx_nxt_blk_addr =
- cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1]));
- roce_set_field(srq_context->rsv_idxnxtblkaddr,
- SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
- SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
- upper_32_bits(to_hr_hw_page_addr(mtts_idx[1])));
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
- cqn);
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M,
- SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S,
- to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift));
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M,
- SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S,
- to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift));
-
- roce_set_bit(srq_context->db_record_addr_record_en,
- SRQC_BYTE_60_SRQ_RECORD_EN_S, 0);
+static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
+{
+ struct ib_device *ibdev = srq->ibsrq.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_srq_context *ctx = mb_buf;
+ u64 mtts_wqe[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle_wqe = 0;
+ int ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ /* Get the physical address of srq buf */
+ ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
+ ARRAY_SIZE(mtts_wqe), &dma_handle_wqe);
+ if (ret < 1) {
+ ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
+ ret);
+ return -ENOBUFS;
+ }
+
+ hr_reg_write(ctx, SRQC_SRQ_ST, 1);
+ hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn);
+ hr_reg_write(ctx, SRQC_SRQN, srq->srqn);
+ hr_reg_write(ctx, SRQC_XRCD, 0);
+ hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn);
+ hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt));
+ hr_reg_write(ctx, SRQC_RQWS,
+ srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1));
+
+ hr_reg_write(ctx, SRQC_WQE_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num,
+ srq->wqe_cnt));
+
+ hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> DMA_WQE_SHIFT);
+ hr_reg_write(ctx, SRQC_WQE_BT_BA_H,
+ upper_32_bits(dma_handle_wqe >> DMA_WQE_SHIFT));
+
+ hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift));
+
+ return hns_roce_v2_write_srqc_index_queue(srq, ctx);
}
static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
@@ -5331,7 +5314,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
return -EINVAL;
if (srq_attr_mask & IB_SRQ_LIMIT) {
- if (srq_attr->srq_limit >= srq->wqe_cnt)
+ if (srq_attr->srq_limit > srq->wqe_cnt)
return -EINVAL;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
@@ -5394,8 +5377,8 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
SRQC_BYTE_8_SRQ_LIMIT_WL_S);
attr->srq_limit = limit_wl;
- attr->max_wr = srq->wqe_cnt - 1;
- attr->max_sge = srq->max_gs;
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
out:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -5626,9 +5609,6 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
++eq->cons_index;
aeqe_found = 1;
- if (eq->cons_index > (2 * eq->entries - 1))
- eq->cons_index = 0;
-
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
aeqe = next_aeqe_sw_v2(eq);
@@ -5671,9 +5651,6 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
++eq->cons_index;
ceqe_found = 1;
- if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1))
- eq->cons_index = 0;
-
ceqe = next_ceqe_sw_v2(eq);
}
@@ -5948,7 +5925,6 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
buf_attr.region[0].size = eq->entries * eq->eqe_size;
buf_attr.region[0].hopnum = eq->hop_num;
buf_attr.region_count = 1;
- buf_attr.fixed_page = true;
err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr,
hr_dev->caps.eqe_ba_pg_sz +
@@ -6286,6 +6262,7 @@ static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
/* Get info from NIC driver. */
hr_dev->reg_base = handle->rinfo.roce_io_base;
+ hr_dev->mem_base = handle->rinfo.roce_mem_base;
hr_dev->caps.num_ports = 1;
hr_dev->iboe.netdevs[0] = handle->rinfo.netdev;
hr_dev->iboe.phy_port[0] = 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index bdaccf86460d..39621fb6ec16 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -96,7 +96,8 @@
#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
-#define HNS_ROCE_INVALID_LKEY 0x100
+#define HNS_ROCE_INVALID_LKEY 0x0
+#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_V2_RSV_QPS 8
@@ -366,24 +367,61 @@ struct hns_roce_v2_cq_context {
#define CQC_STASH CQC_FIELD_LOC(63, 63)
struct hns_roce_srq_context {
- __le32 byte_4_srqn_srqst;
- __le32 byte_8_limit_wl;
- __le32 byte_12_xrcd;
- __le32 byte_16_pi_ci;
- __le32 wqe_bt_ba;
- __le32 byte_24_wqe_bt_ba;
- __le32 byte_28_rqws_pd;
- __le32 idx_bt_ba;
- __le32 rsv_idx_bt_ba;
- __le32 idx_cur_blk_addr;
- __le32 byte_44_idxbufpgsz_addr;
- __le32 idx_nxt_blk_addr;
- __le32 rsv_idxnxtblkaddr;
- __le32 byte_56_xrc_cqn;
- __le32 db_record_addr_record_en;
- __le32 db_record_addr;
+ __le32 byte_4_srqn_srqst;
+ __le32 byte_8_limit_wl;
+ __le32 byte_12_xrcd;
+ __le32 byte_16_pi_ci;
+ __le32 wqe_bt_ba;
+ __le32 byte_24_wqe_bt_ba;
+ __le32 byte_28_rqws_pd;
+ __le32 idx_bt_ba;
+ __le32 rsv_idx_bt_ba;
+ __le32 idx_cur_blk_addr;
+ __le32 byte_44_idxbufpgsz_addr;
+ __le32 idx_nxt_blk_addr;
+ __le32 rsv_idxnxtblkaddr;
+ __le32 byte_56_xrc_cqn;
+ __le32 db_record_addr_record_en;
+ __le32 db_record_addr;
};
+#define SRQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_srq_context, h, l)
+
+#define SRQC_SRQ_ST SRQC_FIELD_LOC(1, 0)
+#define SRQC_WQE_HOP_NUM SRQC_FIELD_LOC(3, 2)
+#define SRQC_SHIFT SRQC_FIELD_LOC(7, 4)
+#define SRQC_SRQN SRQC_FIELD_LOC(31, 8)
+#define SRQC_LIMIT_WL SRQC_FIELD_LOC(47, 32)
+#define SRQC_RSV0 SRQC_FIELD_LOC(63, 48)
+#define SRQC_XRCD SRQC_FIELD_LOC(87, 64)
+#define SRQC_RSV1 SRQC_FIELD_LOC(95, 88)
+#define SRQC_PRODUCER_IDX SRQC_FIELD_LOC(111, 96)
+#define SRQC_CONSUMER_IDX SRQC_FIELD_LOC(127, 112)
+#define SRQC_WQE_BT_BA_L SRQC_FIELD_LOC(159, 128)
+#define SRQC_WQE_BT_BA_H SRQC_FIELD_LOC(188, 160)
+#define SRQC_RSV2 SRQC_FIELD_LOC(191, 189)
+#define SRQC_PD SRQC_FIELD_LOC(215, 192)
+#define SRQC_RQWS SRQC_FIELD_LOC(219, 216)
+#define SRQC_RSV3 SRQC_FIELD_LOC(223, 220)
+#define SRQC_IDX_BT_BA_L SRQC_FIELD_LOC(255, 224)
+#define SRQC_IDX_BT_BA_H SRQC_FIELD_LOC(284, 256)
+#define SRQC_RSV4 SRQC_FIELD_LOC(287, 285)
+#define SRQC_IDX_CUR_BLK_ADDR_L SRQC_FIELD_LOC(319, 288)
+#define SRQC_IDX_CUR_BLK_ADDR_H SRQC_FIELD_LOC(339, 320)
+#define SRQC_RSV5 SRQC_FIELD_LOC(341, 340)
+#define SRQC_IDX_HOP_NUM SRQC_FIELD_LOC(343, 342)
+#define SRQC_IDX_BA_PG_SZ SRQC_FIELD_LOC(347, 344)
+#define SRQC_IDX_BUF_PG_SZ SRQC_FIELD_LOC(351, 348)
+#define SRQC_IDX_NXT_BLK_ADDR_L SRQC_FIELD_LOC(383, 352)
+#define SRQC_IDX_NXT_BLK_ADDR_H SRQC_FIELD_LOC(403, 384)
+#define SRQC_RSV6 SRQC_FIELD_LOC(415, 404)
+#define SRQC_XRC_CQN SRQC_FIELD_LOC(439, 416)
+#define SRQC_WQE_BA_PG_SZ SRQC_FIELD_LOC(443, 440)
+#define SRQC_WQE_BUF_PG_SZ SRQC_FIELD_LOC(447, 444)
+#define SRQC_DB_RECORD_EN SRQC_FIELD_LOC(448, 448)
+#define SRQC_DB_RECORD_ADDR_L SRQC_FIELD_LOC(479, 449)
+#define SRQC_DB_RECORD_ADDR_H SRQC_FIELD_LOC(511, 480)
+
#define SRQC_BYTE_4_SRQ_ST_S 0
#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0)
@@ -993,6 +1031,45 @@ struct hns_roce_v2_mpt_entry {
__le32 byte_64_buf_pa1;
};
+#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l)
+
+#define MPT_ST MPT_FIELD_LOC(1, 0)
+#define MPT_PBL_HOP_NUM MPT_FIELD_LOC(3, 2)
+#define MPT_PBL_BA_PG_SZ MPT_FIELD_LOC(7, 4)
+#define MPT_PD MPT_FIELD_LOC(31, 8)
+#define MPT_RA_EN MPT_FIELD_LOC(32, 32)
+#define MPT_R_INV_EN MPT_FIELD_LOC(33, 33)
+#define MPT_L_INV_EN MPT_FIELD_LOC(34, 34)
+#define MPT_BIND_EN MPT_FIELD_LOC(35, 35)
+#define MPT_ATOMIC_EN MPT_FIELD_LOC(36, 36)
+#define MPT_RR_EN MPT_FIELD_LOC(37, 37)
+#define MPT_RW_EN MPT_FIELD_LOC(38, 38)
+#define MPT_LW_EN MPT_FIELD_LOC(39, 39)
+#define MPT_MW_CNT MPT_FIELD_LOC(63, 40)
+#define MPT_FRE MPT_FIELD_LOC(64, 64)
+#define MPT_PA MPT_FIELD_LOC(65, 65)
+#define MPT_ZBVA MPT_FIELD_LOC(66, 66)
+#define MPT_SHARE MPT_FIELD_LOC(67, 67)
+#define MPT_MR_MW MPT_FIELD_LOC(68, 68)
+#define MPT_BPD MPT_FIELD_LOC(69, 69)
+#define MPT_BQP MPT_FIELD_LOC(70, 70)
+#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71)
+#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72)
+#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96)
+#define MPT_LEN MPT_FIELD_LOC(191, 128)
+#define MPT_LKEY MPT_FIELD_LOC(223, 192)
+#define MPT_VA MPT_FIELD_LOC(287, 224)
+#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288)
+#define MPT_PBL_BA MPT_FIELD_LOC(380, 320)
+#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381)
+#define MPT_RSV0 MPT_FIELD_LOC(383, 382)
+#define MPT_PA0 MPT_FIELD_LOC(441, 384)
+#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442)
+#define MPT_PA1 MPT_FIELD_LOC(505, 448)
+#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506)
+#define MPT_RSV2 MPT_FIELD_LOC(507, 507)
+#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508)
+
#define V2_MPT_BYTE_4_MPT_ST_S 0
#define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0)
@@ -1059,6 +1136,8 @@ struct hns_roce_v2_mpt_entry {
#define V2_DB_BYTE_4_CMD_S 24
#define V2_DB_BYTE_4_CMD_M GENMASK(27, 24)
+#define V2_DB_FLAG_S 31
+
#define V2_DB_PARAMETER_IDX_S 0
#define V2_DB_PARAMETER_IDX_M GENMASK(15, 0)
@@ -1155,6 +1234,15 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0
#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
+
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
+
+#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15
+#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
+
#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7
#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8
@@ -1167,15 +1255,17 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
-#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
+#define V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S 10
+
+#define V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S 11
-#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
+#define V2_RC_FRMR_WQE_BYTE_40_RR_S 12
-#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
+#define V2_RC_FRMR_WQE_BYTE_40_RW_S 13
-#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
+#define V2_RC_FRMR_WQE_BYTE_40_LW_S 14
-#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
+#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
@@ -1190,7 +1280,7 @@ struct hns_roce_v2_rc_send_wqe {
struct hns_roce_wqe_frmr_seg {
__le32 pbl_size;
- __le32 mode_buf_pg_sz;
+ __le32 byte_40;
};
#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4
@@ -1786,12 +1876,8 @@ struct hns_roce_v2_cmq_ring {
dma_addr_t desc_dma_addr;
struct hns_roce_cmq_desc *desc;
u32 head;
- u32 tail;
-
u16 buf_size;
u16 desc_num;
- int next_to_use;
- int next_to_clean;
u8 flag;
spinlock_t lock; /* command queue lock */
};
@@ -1800,7 +1886,6 @@ struct hns_roce_v2_cmq {
struct hns_roce_v2_cmq_ring csq;
struct hns_roce_v2_cmq_ring crq;
u16 tx_timeout;
- u16 last_status;
};
enum hns_roce_link_table_type {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index d9179bae4989..c9c0836394a2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -42,22 +42,6 @@
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
-/**
- * hns_get_gid_index - Get gid index.
- * @hr_dev: pointer to structure hns_roce_dev.
- * @port: port, value range: 0 ~ MAX
- * @gid_index: gid_index, value range: 0 ~ MAX
- * Description:
- * N ports shared gids, allocation method as follow:
- * GID[0][0], GID[1][0],.....GID[N - 1][0],
- * GID[0][0], GID[1][0],.....GID[N - 1][0],
- * And so on
- */
-u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
-{
- return gid_index * hr_dev->caps.num_ports + port;
-}
-
static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
{
u8 phy_port;
@@ -217,7 +201,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_srq_sge = hr_dev->caps.max_srq_sges;
}
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR &&
+ hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA;
}
@@ -748,11 +733,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
goto err_pd_table_free;
}
- ret = hns_roce_init_cq_table(hr_dev);
- if (ret) {
- dev_err(dev, "Failed to init completion queue table.\n");
- goto err_mr_table_free;
- }
+ hns_roce_init_cq_table(hr_dev);
ret = hns_roce_init_qp_table(hr_dev);
if (ret) {
@@ -772,13 +753,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
return 0;
err_qp_table_free:
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
- hns_roce_cleanup_qp_table(hr_dev);
+ hns_roce_cleanup_qp_table(hr_dev);
err_cq_table_free:
hns_roce_cleanup_cq_table(hr_dev);
-
-err_mr_table_free:
hns_roce_cleanup_mr_table(hr_dev);
err_pd_table_free:
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 1bcffd93ff3e..79b3c3023fe7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -66,8 +66,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
- u32 pd, u64 iova, u64 size, u32 access)
+static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
unsigned long obj = 0;
@@ -82,11 +81,6 @@ static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
return -ENOMEM;
}
- mr->iova = iova; /* MR va starting addr */
- mr->size = size; /* MR addr range */
- mr->pd = pd; /* MR num */
- mr->access = access; /* MR access permit */
- mr->enabled = 0; /* MR active status */
mr->key = hw_index_to_key(obj); /* MR key */
err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
@@ -110,8 +104,7 @@ static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
}
static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
- size_t length, struct ib_udata *udata, u64 start,
- int access)
+ struct ib_udata *udata, u64 start)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
bool is_fast = mr->type == MR_TYPE_FRMR;
@@ -121,11 +114,10 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
buf_attr.page_shift = is_fast ? PAGE_SHIFT :
hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT;
- buf_attr.region[0].size = length;
+ buf_attr.region[0].size = mr->size;
buf_attr.region[0].hopnum = mr->pbl_hop_num;
buf_attr.region_count = 1;
- buf_attr.fixed_page = true;
- buf_attr.user_access = access;
+ buf_attr.user_access = mr->access;
/* fast MR's buffer is alloced before mapping, not at creation */
buf_attr.mtt_only = is_fast;
@@ -197,9 +189,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
}
mr->enabled = 1;
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return 0;
err_page:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -237,14 +226,16 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_DMA;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->access = acc;
/* Allocate memory region key */
hns_roce_hem_list_init(&mr->pbl_mtr.hem_list);
- ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc);
+ ret = alloc_mr_key(hr_dev, mr);
if (ret)
goto err_free;
- ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr);
+ ret = hns_roce_mr_enable(hr_dev, mr);
if (ret)
goto err_mr;
@@ -271,13 +262,17 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->iova = virt_addr;
+ mr->size = length;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->access = access_flags;
mr->type = MR_TYPE_MR;
- ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length,
- access_flags);
+
+ ret = alloc_mr_key(hr_dev, mr);
if (ret)
goto err_alloc_mr;
- ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags);
+ ret = alloc_mr_pbl(hr_dev, mr, udata, start);
if (ret)
goto err_alloc_key;
@@ -299,35 +294,6 @@ err_alloc_mr:
return ERR_PTR(ret);
}
-static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
- u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct hns_roce_cmd_mailbox *mailbox,
- u32 pdn, struct ib_udata *udata)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_mr *mr = to_hr_mr(ibmr);
- int ret;
-
- free_mr_pbl(hr_dev, mr);
- ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags);
- if (ret) {
- ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret);
- return ret;
- }
-
- ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
- mr_access_flags, virt_addr,
- length, mailbox->buf);
- if (ret) {
- ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret);
- free_mr_pbl(hr_dev, mr);
- }
-
- return ret;
-}
-
struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
u64 length, u64 virt_addr,
int mr_access_flags, struct ib_pd *pd,
@@ -338,7 +304,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
struct hns_roce_mr *mr = to_hr_mr(ibmr);
struct hns_roce_cmd_mailbox *mailbox;
unsigned long mtpt_idx;
- u32 pdn = 0;
int ret;
if (!mr->enabled)
@@ -360,23 +325,29 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
mr->enabled = 0;
+ mr->iova = virt_addr;
+ mr->size = length;
if (flags & IB_MR_REREG_PD)
- pdn = to_hr_pd(pd)->pdn;
+ mr->pd = to_hr_pd(pd)->pdn;
+
+ if (flags & IB_MR_REREG_ACCESS)
+ mr->access = mr_access_flags;
if (flags & IB_MR_REREG_TRANS) {
- ret = rereg_mr_trans(ibmr, flags,
- start, length,
- virt_addr, mr_access_flags,
- mailbox, pdn, udata);
- if (ret)
- goto free_cmd_mbox;
- } else {
- ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
- mr_access_flags, virt_addr,
- length, mailbox->buf);
- if (ret)
+ free_mr_pbl(hr_dev, mr);
+ ret = alloc_mr_pbl(hr_dev, mr, udata, start);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n",
+ ret);
goto free_cmd_mbox;
+ }
+ }
+
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret);
+ goto free_cmd_mbox;
}
ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
@@ -386,12 +357,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
}
mr->enabled = 1;
- if (flags & IB_MR_REREG_ACCESS)
- mr->access = mr_access_flags;
-
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return NULL;
free_cmd_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -421,7 +386,6 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct device *dev = hr_dev->dev;
struct hns_roce_mr *mr;
- u64 length;
int ret;
if (mr_type != IB_MR_TYPE_MEM_REG)
@@ -438,14 +402,15 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_FRMR;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->size = max_num_sg * (1 << PAGE_SHIFT);
/* Allocate memory region key */
- length = max_num_sg * (1 << PAGE_SHIFT);
- ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0);
+ ret = alloc_mr_key(hr_dev, mr);
if (ret)
goto err_free;
- ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0);
+ ret = alloc_mr_pbl(hr_dev, mr, NULL, 0);
if (ret)
goto err_key;
@@ -454,7 +419,7 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
goto err_pbl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
- mr->ibmr.length = length;
+ mr->ibmr.length = mr->size;
return &mr->ibmr;
@@ -631,30 +596,26 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw)
}
static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- dma_addr_t *pages, struct hns_roce_buf_region *region)
+ struct hns_roce_buf_region *region, dma_addr_t *pages,
+ int max_count)
{
+ int count, npage;
+ int offset, end;
__le64 *mtts;
- int offset;
- int count;
- int npage;
u64 addr;
- int end;
int i;
- /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */
- if (!region->hopnum)
- return 0;
-
offset = region->offset;
end = offset + region->count;
npage = 0;
- while (offset < end) {
+ while (offset < end && npage < max_count) {
+ count = 0;
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
offset, &count, NULL);
if (!mtts)
return -ENOBUFS;
- for (i = 0; i < count; i++) {
+ for (i = 0; i < count && npage < max_count; i++) {
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
addr = to_hr_hw_page_addr(pages[npage]);
else
@@ -666,7 +627,7 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
offset += count;
}
- return 0;
+ return npage;
}
static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr)
@@ -729,25 +690,15 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
}
static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- struct hns_roce_buf_attr *buf_attr, bool is_direct,
+ struct hns_roce_buf_attr *buf_attr,
struct ib_udata *udata, unsigned long user_addr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- unsigned int best_pg_shift;
- int all_pg_count = 0;
size_t total_size;
- int ret;
total_size = mtr_bufs_size(buf_attr);
- if (total_size < 1) {
- ibdev_err(ibdev, "failed to check mtr size\n.");
- return -EINVAL;
- }
if (udata) {
- unsigned long pgsz_bitmap;
- unsigned long page_size;
-
mtr->kmem = NULL;
mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
buf_attr->user_access);
@@ -756,76 +707,67 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
PTR_ERR(mtr->umem));
return -ENOMEM;
}
- if (buf_attr->fixed_page)
- pgsz_bitmap = 1 << buf_attr->page_shift;
- else
- pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT);
-
- page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap,
- user_addr);
- if (!page_size)
- return -EINVAL;
- best_pg_shift = order_base_2(page_size);
- all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size);
- ret = 0;
} else {
mtr->umem = NULL;
- mtr->kmem =
- hns_roce_buf_alloc(hr_dev, total_size,
- buf_attr->page_shift,
- is_direct ? HNS_ROCE_BUF_DIRECT : 0);
+ mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
+ buf_attr->page_shift,
+ mtr->hem_cfg.is_direct ?
+ HNS_ROCE_BUF_DIRECT : 0);
if (IS_ERR(mtr->kmem)) {
ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
PTR_ERR(mtr->kmem));
return PTR_ERR(mtr->kmem);
}
-
- best_pg_shift = buf_attr->page_shift;
- all_pg_count = mtr->kmem->npages;
- }
-
- /* must bigger than minimum hardware page shift */
- if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) {
- ret = -EINVAL;
- ibdev_err(ibdev,
- "failed to check mtr, page shift = %u count = %d.\n",
- best_pg_shift, all_pg_count);
- goto err_alloc_mem;
}
- mtr->hem_cfg.buf_pg_shift = best_pg_shift;
- mtr->hem_cfg.buf_pg_count = all_pg_count;
-
return 0;
-err_alloc_mem:
- mtr_free_bufs(hr_dev, mtr);
- return ret;
}
-static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- dma_addr_t *pages, int count, unsigned int page_shift)
+static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ int page_count, unsigned int page_shift)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t *pages;
int npage;
- int err;
+ int ret;
+
+ /* alloc a tmp array to store buffer's dma address */
+ pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
if (mtr->umem)
- npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0,
+ npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0,
mtr->umem, page_shift);
else
- npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0,
+ npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0,
mtr->kmem);
+ if (npage != page_count) {
+ ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage,
+ page_count);
+ ret = -ENOBUFS;
+ goto err_alloc_list;
+ }
+
if (mtr->hem_cfg.is_direct && npage > 1) {
- err = mtr_check_direct_pages(pages, npage, page_shift);
- if (err) {
- ibdev_err(ibdev, "Failed to check %s direct page-%d\n",
- mtr->umem ? "user" : "kernel", err);
- npage = err;
+ ret = mtr_check_direct_pages(pages, npage, page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n",
+ mtr->umem ? "user" : "kernel", ret);
+ ret = -ENOBUFS;
+ goto err_alloc_list;
}
}
- return npage;
+ ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count);
+ if (ret)
+ ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
+
+err_alloc_list:
+ kvfree(pages);
+
+ return ret;
}
int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
@@ -833,8 +775,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_buf_region *r;
- unsigned int i;
- int err;
+ unsigned int i, mapped_cnt;
+ int ret;
/*
* Only use the first page address as root ba when hopnum is 0, this
@@ -845,26 +787,42 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
return 0;
}
- for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count &&
+ mapped_cnt < page_cnt; i++) {
r = &mtr->hem_cfg.region[i];
+ /* if hopnum is 0, no need to map pages in this region */
+ if (!r->hopnum) {
+ mapped_cnt += r->count;
+ continue;
+ }
+
if (r->offset + r->count > page_cnt) {
- err = -EINVAL;
+ ret = -EINVAL;
ibdev_err(ibdev,
"failed to check mtr%u end %u + %u, max %u.\n",
i, r->offset, r->count, page_cnt);
- return err;
+ return ret;
}
- err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r);
- if (err) {
+ ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset],
+ page_cnt - mapped_cnt);
+ if (ret < 0) {
ibdev_err(ibdev,
"failed to map mtr%u offset %u, ret = %d.\n",
- i, r->offset, err);
- return err;
+ i, r->offset, ret);
+ return ret;
}
+ mapped_cnt += ret;
+ ret = 0;
}
- return 0;
+ if (mapped_cnt < page_cnt) {
+ ret = -ENOBUFS;
+ ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n",
+ mapped_cnt, page_cnt);
+ }
+
+ return ret;
}
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
@@ -928,68 +886,92 @@ done:
static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
struct hns_roce_buf_attr *attr,
struct hns_roce_hem_cfg *cfg,
- unsigned int *buf_page_shift)
+ unsigned int *buf_page_shift, int unalinged_size)
{
struct hns_roce_buf_region *r;
+ int first_region_padding;
+ int page_cnt, region_cnt;
unsigned int page_shift;
- int page_cnt = 0;
size_t buf_size;
- int region_cnt;
+ /* If mtt is disabled, all pages must be within a continuous range */
+ cfg->is_direct = !mtr_has_mtt(attr);
+ buf_size = mtr_bufs_size(attr);
if (cfg->is_direct) {
- buf_size = cfg->buf_pg_count << cfg->buf_pg_shift;
- page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE);
- /*
- * When HEM buffer use level-0 addressing, the page size equals
- * the buffer size, and the the page size = 4K * 2^N.
+ /* When HEM buffer uses 0-level addressing, the page size is
+ * equal to the whole buffer size, and we split the buffer into
+ * small pages which is used to check whether the adjacent
+ * units are in the continuous space and its size is fixed to
+ * 4K based on hns ROCEE's requirement.
*/
- cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt);
- if (attr->region_count > 1) {
- cfg->buf_pg_count = page_cnt;
- page_shift = HNS_HW_PAGE_SHIFT;
- } else {
- cfg->buf_pg_count = 1;
- page_shift = cfg->buf_pg_shift;
- if (buf_size != 1 << page_shift) {
- ibdev_err(&hr_dev->ib_dev,
- "failed to check direct size %zu shift %d.\n",
- buf_size, page_shift);
- return -EINVAL;
- }
- }
+ page_shift = HNS_HW_PAGE_SHIFT;
+
+ /* The ROCEE requires the page size to be 4K * 2 ^ N. */
+ cfg->buf_pg_count = 1;
+ cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
+ order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
+ first_region_padding = 0;
} else {
- page_shift = cfg->buf_pg_shift;
+ page_shift = attr->page_shift;
+ cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size,
+ 1 << page_shift);
+ cfg->buf_pg_shift = page_shift;
+ first_region_padding = unalinged_size;
}
- /* convert buffer size to page index and page count */
- for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count &&
- region_cnt < attr->region_count &&
+ /* Convert buffer size to page index and page count for each region and
+ * the buffer's offset needs to be appended to the first region.
+ */
+ for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count &&
region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
r = &cfg->region[region_cnt];
r->offset = page_cnt;
- buf_size = hr_hw_page_align(attr->region[region_cnt].size);
+ buf_size = hr_hw_page_align(attr->region[region_cnt].size +
+ first_region_padding);
r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
+ first_region_padding = 0;
page_cnt += r->count;
r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
r->count);
}
- if (region_cnt < 1) {
- ibdev_err(&hr_dev->ib_dev,
- "failed to check mtr region count, pages = %d.\n",
- cfg->buf_pg_count);
- return -ENOBUFS;
- }
-
cfg->region_count = region_cnt;
*buf_page_shift = page_shift;
return page_cnt;
}
+static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ unsigned int ba_page_shift)
+{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ int ret;
+
+ hns_roce_hem_list_init(&mtr->hem_list);
+ if (!cfg->is_direct) {
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+ cfg->region, cfg->region_count,
+ ba_page_shift);
+ if (ret)
+ return ret;
+ cfg->root_ba = mtr->hem_list.root_ba;
+ cfg->ba_pg_shift = ba_page_shift;
+ } else {
+ cfg->ba_pg_shift = cfg->buf_pg_shift;
+ }
+
+ return 0;
+}
+
+static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+}
+
/**
* hns_roce_mtr_create - Create hns memory translate region.
*
+ * @hr_dev: RoCE device struct pointer
* @mtr: memory translate region
* @buf_attr: buffer attribute for creating mtr
* @ba_page_shift: page shift for multi-hop base address table
@@ -1001,95 +983,51 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
unsigned int ba_page_shift, struct ib_udata *udata,
unsigned long user_addr)
{
- struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
struct ib_device *ibdev = &hr_dev->ib_dev;
unsigned int buf_page_shift = 0;
- dma_addr_t *pages = NULL;
- int all_pg_cnt;
- int get_pg_cnt;
- int ret = 0;
-
- /* if disable mtt, all pages must in a continuous address range */
- cfg->is_direct = !mtr_has_mtt(buf_attr);
-
- /* if buffer only need mtt, just init the hem cfg */
- if (buf_attr->mtt_only) {
- cfg->buf_pg_shift = buf_attr->page_shift;
- cfg->buf_pg_count = mtr_bufs_size(buf_attr) >>
- buf_attr->page_shift;
- mtr->umem = NULL;
- mtr->kmem = NULL;
- } else {
- ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct,
- udata, user_addr);
- if (ret) {
- ibdev_err(ibdev,
- "failed to alloc mtr bufs, ret = %d.\n", ret);
- return ret;
- }
- }
+ int buf_page_cnt;
+ int ret;
- all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift);
- if (all_pg_cnt < 1) {
- ret = -ENOBUFS;
- ibdev_err(ibdev, "failed to init mtr buf cfg.\n");
- goto err_alloc_bufs;
+ buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg,
+ &buf_page_shift,
+ udata ? user_addr & ~PAGE_MASK : 0);
+ if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) {
+ ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %d.\n",
+ buf_page_cnt, buf_page_shift);
+ return -EINVAL;
}
- hns_roce_hem_list_init(&mtr->hem_list);
- if (!cfg->is_direct) {
- ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
- cfg->region, cfg->region_count,
- ba_page_shift);
- if (ret) {
- ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n",
- ret);
- goto err_alloc_bufs;
- }
- cfg->root_ba = mtr->hem_list.root_ba;
- cfg->ba_pg_shift = ba_page_shift;
- } else {
- cfg->ba_pg_shift = cfg->buf_pg_shift;
+ ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
+ return ret;
}
- /* no buffer to map */
- if (buf_attr->mtt_only)
+ /* The caller has its own buffer list and invokes the hns_roce_mtr_map()
+ * to finish the MTT configuration.
+ */
+ if (buf_attr->mtt_only) {
+ mtr->umem = NULL;
+ mtr->kmem = NULL;
return 0;
-
- /* alloc a tmp array to store buffer's dma address */
- pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL);
- if (!pages) {
- ret = -ENOMEM;
- ibdev_err(ibdev, "failed to alloc mtr page list %d.\n",
- all_pg_cnt);
- goto err_alloc_hem_list;
- }
-
- get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt,
- buf_page_shift);
- if (get_pg_cnt != all_pg_cnt) {
- ibdev_err(ibdev, "failed to get mtr page %d != %d.\n",
- get_pg_cnt, all_pg_cnt);
- ret = -ENOBUFS;
- goto err_alloc_page_list;
}
- /* write buffer's dma address to BA table */
- ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt);
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
if (ret) {
- ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
- goto err_alloc_page_list;
+ ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret);
+ goto err_alloc_mtt;
}
- /* drop tmp array */
- kvfree(pages);
- return 0;
-err_alloc_page_list:
- kvfree(pages);
-err_alloc_hem_list:
- hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
-err_alloc_bufs:
+ /* Write buffer's dma address to MTT */
+ ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift);
+ if (ret)
+ ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret);
+ else
+ return 0;
+
mtr_free_bufs(hr_dev, mtr);
+err_alloc_mtt:
+ mtr_free_mtt(hr_dev, mtr);
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 1116371adf74..004aca9086ab 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -413,9 +413,32 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
mutex_unlock(&hr_dev->qp_table.bank_mutex);
}
+static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_rq_sg;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_qp->rq.rsv_sge = 1;
+
+ return max_sge;
+}
+
static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
- struct hns_roce_qp *hr_qp, int has_rq)
+ struct hns_roce_qp *hr_qp, int has_rq, bool user)
{
+ u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user);
u32 cnt;
/* If srq exist, set zero for relative number of rq */
@@ -431,8 +454,9 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
/* Check the validity of QP support capacity */
if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
- cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
- ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n",
+ cap->max_recv_sge > max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "RQ config error, depth = %u, sge = %u\n",
cap->max_recv_wr, cap->max_recv_sge);
return -EINVAL;
}
@@ -444,7 +468,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
return -EINVAL;
}
- hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
+ hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
+ hr_qp->rq.rsv_sge);
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
@@ -459,7 +484,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
hr_qp->rq_inl_buf.wqe_cnt = 0;
cap->max_recv_wr = cnt;
- cap->max_recv_sge = hr_qp->rq.max_gs;
+ cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
return 0;
}
@@ -599,7 +624,6 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
return -EINVAL;
buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
- buf_attr->fixed_page = true;
buf_attr->region_count = idx;
return 0;
@@ -919,7 +943,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
- hns_roce_qp_has_rq(init_attr));
+ hns_roce_qp_has_rq(init_attr), !!udata);
if (ret) {
ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
ret);
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index c4ae57e4173a..d5a6de0e7095 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -3,6 +3,7 @@
* Copyright (c) 2018 Hisilicon Limited.
*/
+#include <linux/pci.h>
#include <rdma/ib_umem.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
@@ -76,40 +77,16 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
- u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr)
+static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_cmd_mailbox *mailbox;
- u64 mtts_wqe[MTT_MIN_COUNT] = { 0 };
- u64 mtts_idx[MTT_MIN_COUNT] = { 0 };
- dma_addr_t dma_handle_wqe = 0;
- dma_addr_t dma_handle_idx = 0;
int ret;
- /* Get the physical address of srq buf */
- ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
- ARRAY_SIZE(mtts_wqe), &dma_handle_wqe);
- if (ret < 1) {
- ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
- ret);
- return -ENOBUFS;
- }
-
- /* Get physical address of idx que buf */
- ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx,
- ARRAY_SIZE(mtts_idx), &dma_handle_idx);
- if (ret < 1) {
- ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
- ret);
- return -ENOBUFS;
- }
-
ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
if (ret) {
- ibdev_err(ibdev,
- "failed to alloc SRQ number, ret = %d.\n", ret);
+ ibdev_err(ibdev, "failed to alloc SRQ number.\n");
return -ENOMEM;
}
@@ -127,34 +104,36 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR_OR_NULL(mailbox)) {
- ret = -ENOMEM;
ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n");
+ ret = -ENOMEM;
goto err_xa;
}
- hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf,
- mtts_wqe, mtts_idx, dma_handle_wqe,
- dma_handle_idx);
+ ret = hr_dev->hw->write_srqc(srq, mailbox->buf);
+ if (ret) {
+ ibdev_err(ibdev, "failed to write SRQC.\n");
+ goto err_mbox;
+ }
ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn);
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret) {
ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret);
- goto err_xa;
+ goto err_mbox;
}
- atomic_set(&srq->refcount, 1);
- init_completion(&srq->free);
- return ret;
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return 0;
+err_mbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
err_xa:
xa_erase(&srq_table->xa, srq->srqn);
-
err_put:
hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
-
err_out:
hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
+
return ret;
}
@@ -178,46 +157,13 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
}
-static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
- struct ib_udata *udata, unsigned long addr)
-{
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_buf_attr buf_attr = {};
- int err;
-
- srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE,
- HNS_ROCE_SGE_SIZE *
- srq->max_gs)));
-
- buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
- buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
- srq->wqe_shift);
- buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num;
- buf_attr.region_count = 1;
- buf_attr.fixed_page = true;
-
- err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr,
- hr_dev->caps.srqwqe_ba_pg_sz +
- HNS_HW_PAGE_SHIFT, udata, addr);
- if (err)
- ibdev_err(ibdev,
- "failed to alloc SRQ buf mtr, ret = %d.\n", err);
-
- return err;
-}
-
-static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
-{
- hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr);
-}
-
static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
struct ib_udata *udata, unsigned long addr)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_buf_attr buf_attr = {};
- int err;
+ int ret;
srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ);
@@ -226,31 +172,33 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
srq->idx_que.entry_shift);
buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num;
buf_attr.region_count = 1;
- buf_attr.fixed_page = true;
- err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr,
+ ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr,
hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT,
udata, addr);
- if (err) {
+ if (ret) {
ibdev_err(ibdev,
- "failed to alloc SRQ idx mtr, ret = %d.\n", err);
- return err;
+ "failed to alloc SRQ idx mtr, ret = %d.\n", ret);
+ return ret;
}
if (!udata) {
idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL);
if (!idx_que->bitmap) {
ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n");
- err = -ENOMEM;
+ ret = -ENOMEM;
goto err_idx_mtr;
}
}
+ idx_que->head = 0;
+ idx_que->tail = 0;
+
return 0;
err_idx_mtr:
hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
- return err;
+ return ret;
}
static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
@@ -262,10 +210,42 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
}
+static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
+
+ srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE,
+ HNS_ROCE_SGE_SIZE *
+ srq->max_gs)));
+
+ buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
+ buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
+ srq->wqe_shift);
+ buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num;
+ buf_attr.region_count = 1;
+
+ ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr,
+ hr_dev->caps.srqwqe_ba_pg_sz +
+ HNS_HW_PAGE_SHIFT, udata, addr);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to alloc SRQ buf mtr, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
+{
+ hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr);
+}
+
static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
- srq->head = 0;
- srq->tail = srq->wqe_cnt - 1;
srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL);
if (!srq->wrid)
return -ENOMEM;
@@ -279,96 +259,171 @@ static void free_srq_wrid(struct hns_roce_srq *srq)
srq->wrid = NULL;
}
-int hns_roce_create_srq(struct ib_srq *ib_srq,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_srq_sges;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_srq->rsv_sge = 1;
+
+ return max_sge;
+}
+
+static int set_srq_basic_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct ib_srq_attr *attr = &init_attr->attr;
+ u32 max_sge;
+
+ max_sge = proc_srq_sge(hr_dev, srq, !!udata);
+ if (attr->max_wr > hr_dev->caps.max_srq_wrs ||
+ attr->max_sge > max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid SRQ attr, depth = %u, sge = %u.\n",
+ attr->max_wr, attr->max_sge);
+ return -EINVAL;
+ }
+
+ attr->max_wr = max_t(u32, attr->max_wr, HNS_ROCE_MIN_SRQ_WQE_NUM);
+ srq->wqe_cnt = roundup_pow_of_two(attr->max_wr);
+ srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
+
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
+ attr->srq_limit = 0;
+
+ return 0;
+}
+
+static void set_srq_ext_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr)
+{
+ srq->cqn = ib_srq_has_cq(init_attr->srq_type) ?
+ to_hr_cq(init_attr->ext.cq)->cqn : 0;
+}
+
+static int set_srq_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
- struct hns_roce_ib_create_srq_resp resp = {};
- struct hns_roce_srq *srq = to_hr_srq(ib_srq);
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_ib_create_srq ucmd = {};
int ret;
- u32 cqn;
- if (init_attr->srq_type != IB_SRQT_BASIC &&
- init_attr->srq_type != IB_SRQT_XRC)
- return -EOPNOTSUPP;
+ ret = set_srq_basic_param(srq, init_attr, udata);
+ if (ret)
+ return ret;
- /* Check the actual SRQ wqe and SRQ sge num */
- if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
- init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
- return -EINVAL;
+ set_srq_ext_param(srq, init_attr);
- mutex_init(&srq->mutex);
- spin_lock_init(&srq->lock);
+ return 0;
+}
- srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1);
- srq->max_gs = init_attr->attr.max_sge;
+static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ib_create_srq ucmd = {};
+ int ret;
if (udata) {
ret = ib_copy_from_udata(&ucmd, udata,
min(udata->inlen, sizeof(ucmd)));
if (ret) {
- ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n",
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to copy SRQ udata, ret = %d.\n",
ret);
return ret;
}
}
- ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr);
- if (ret) {
- ibdev_err(ibdev,
- "failed to alloc SRQ buffer, ret = %d.\n", ret);
+ ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr);
+ if (ret)
return ret;
- }
- ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr);
- if (ret) {
- ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret);
- goto err_buf_alloc;
- }
+ ret = alloc_srq_wqe_buf(hr_dev, srq, udata, ucmd.buf_addr);
+ if (ret)
+ goto err_idx;
if (!udata) {
ret = alloc_srq_wrid(hr_dev, srq);
- if (ret) {
- ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n",
- ret);
- goto err_idx_alloc;
- }
+ if (ret)
+ goto err_wqe_buf;
}
- cqn = ib_srq_has_cq(init_attr->srq_type) ?
- to_hr_cq(init_attr->ext.cq)->cqn : 0;
- srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
+ return 0;
- ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0);
- if (ret) {
- ibdev_err(ibdev,
- "failed to alloc SRQ context, ret = %d.\n", ret);
- goto err_wrid_alloc;
- }
+err_wqe_buf:
+ free_srq_wqe_buf(hr_dev, srq);
+err_idx:
+ free_srq_idx(hr_dev, srq);
- srq->event = hns_roce_ib_srq_event;
- resp.srqn = srq->srqn;
+ return ret;
+}
+
+static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ free_srq_wrid(srq);
+ free_srq_wqe_buf(hr_dev, srq);
+ free_srq_idx(hr_dev, srq);
+}
+
+int hns_roce_create_srq(struct ib_srq *ib_srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
+ struct hns_roce_ib_create_srq_resp resp = {};
+ struct hns_roce_srq *srq = to_hr_srq(ib_srq);
+ int ret;
+
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
+
+ ret = set_srq_param(srq, init_attr, udata);
+ if (ret)
+ return ret;
+
+ ret = alloc_srq_buf(hr_dev, srq, udata);
+ if (ret)
+ return ret;
+
+ ret = alloc_srqc(hr_dev, srq);
+ if (ret)
+ goto err_srq_buf;
if (udata) {
- ret = ib_copy_to_udata(udata, &resp,
- min(udata->outlen, sizeof(resp)));
- if (ret)
- goto err_srqc_alloc;
+ resp.srqn = srq->srqn;
+ if (ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)))) {
+ ret = -EFAULT;
+ goto err_srqc;
+ }
}
+ srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
+ srq->event = hns_roce_ib_srq_event;
+ atomic_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+
return 0;
-err_srqc_alloc:
+err_srqc:
free_srqc(hr_dev, srq);
-err_wrid_alloc:
- free_srq_wrid(srq);
-err_idx_alloc:
- free_srq_idx(hr_dev, srq);
-err_buf_alloc:
+err_srq_buf:
free_srq_buf(hr_dev, srq);
+
return ret;
}
@@ -378,8 +433,6 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
free_srqc(hr_dev, srq);
- free_srq_idx(hr_dev, srq);
- free_srq_wrid(srq);
free_srq_buf(hr_dev, srq);
return 0;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 9acc0ecc9a43..ac65c8237b2e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -70,7 +70,7 @@ static void i40iw_disconnect_worker(struct work_struct *work);
/**
* i40iw_free_sqbuf - put back puda buffer if refcount = 0
* @vsi: pointer to vsi structure
- * @buf: puda buffer to free
+ * @bufp: puda buffer to free
*/
void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp)
{
@@ -729,6 +729,7 @@ static int i40iw_handle_tcp_options(struct i40iw_cm_node *cm_node,
/**
* i40iw_build_mpa_v1 - build a MPA V1 frame
* @cm_node: connection's node
+ * @start_addr: MPA frame start address
* @mpa_key: to do read0 or write0
*/
static void i40iw_build_mpa_v1(struct i40iw_cm_node *cm_node,
@@ -1040,7 +1041,7 @@ negotiate_done:
/**
* i40iw_schedule_cm_timer
- * @@cm_node: connection's node
+ * @cm_node: connection's node
* @sqbuf: buffer to send
* @type: if it is send or close
* @send_retrans: if rexmits to be done
@@ -1205,7 +1206,7 @@ static void i40iw_build_timer_list(struct list_head *timer_list,
/**
* i40iw_cm_timer_tick - system's timer expired callback
- * @pass: Pointing to cm_core
+ * @t: Timer instance to fetch the cm_core pointer from
*/
static void i40iw_cm_timer_tick(struct timer_list *t)
{
@@ -1463,6 +1464,7 @@ struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
* @cm_core: cm's core
* @dst_port: listener tcp port num
* @dst_addr: listener ip addr
+ * @vlan_id: vlan id for the given address
* @listener_state: state to match with listen node's
*/
static struct i40iw_cm_listener *i40iw_find_listener(
@@ -1521,7 +1523,7 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
/**
* i40iw_find_port - find port that matches reference port
* @hte: ptr to accelerated or non-accelerated list
- * @accelerated_list: flag for accelerated vs non-accelerated list
+ * @port: port number to locate
*/
static bool i40iw_find_port(struct list_head *hte, u16 port)
{
@@ -1834,6 +1836,7 @@ exit:
/**
* i40iw_dec_refcnt_listen - delete listener and associated cm nodes
* @cm_core: cm's core
+ * @listener: passive connection's listener
* @free_hanging_nodes: to free associated cm_nodes
* @apbvt_del: flag to delete the apbvt
*/
@@ -2029,7 +2032,7 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev,
return rc;
}
-/**
+/*
* i40iw_get_dst_ipv6
*/
static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr,
@@ -2051,7 +2054,8 @@ static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr,
/**
* i40iw_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address
* @iwdev: iwarp device structure
- * @dst_ip: remote ip address
+ * @src: source ip address
+ * @dest: remote ip address
* @arpindex: if there is an arp entry
*/
static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
@@ -3004,7 +3008,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
/**
* i40iw_cm_reject - reject and teardown a connection
* @cm_node: connection's node
- * @pdate: ptr to private data for reject
+ * @pdata: ptr to private data for reject
* @plen: size of private data
*/
static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 plen)
@@ -4302,7 +4306,7 @@ set_qhash:
* i40iw_cm_teardown_connections - teardown QPs
* @iwdev: device pointer
* @ipaddr: Pointer to IPv4 or IPv6 address
- * @ipv4: flag indicating IPv4 when true
+ * @nfo: cm info node
* @disconnect_all: flag indicating disconnect all QPs
* teardown QPs where source or destination addr matches ip addr
*/
@@ -4358,6 +4362,7 @@ void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
/**
* i40iw_ifdown_notify - process an ifdown on an interface
* @iwdev: device pointer
+ * @netdev: network interface device structure
* @ipaddr: Pointer to IPv4 or IPv6 address
* @ipv4: flag indicating IPv4 when true
* @ifup: flag indicating interface up when true
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index c943d491b72b..eaea5d545eb8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -181,7 +181,7 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
* i40iw_sc_decode_fpm_query() - Decode a 64 bit value into max count and size
* @buf: ptr to fpm query buffer
* @buf_idx: index into buf
- * @info: ptr to i40iw_hmc_obj_info struct
+ * @obj_info: ptr to i40iw_hmc_obj_info struct
* @rsrc_idx: resource index into info
*
* Decode a 64 bit value from fpm query buffer into max count and size
@@ -205,7 +205,7 @@ static u64 i40iw_sc_decode_fpm_query(u64 *buf,
/**
* i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer
* @buf: ptr to fpm query buffer
- * @info: ptr to i40iw_hmc_obj_info struct
+ * @hmc_info: ptr to i40iw_hmc_obj_info struct
* @hmc_fpm_misc: ptr to fpm data
*
* parses fpm query buffer and copy max_cnt and
@@ -775,7 +775,7 @@ static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info(
* i40iw_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ
* @cqp: struct for cqp hw
* @op_code: cqp opcode for completion
- * @info: completion q entry to return
+ * @compl_info: completion q entry to return
*/
static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done(
struct i40iw_sc_cqp *cqp,
@@ -933,7 +933,7 @@ static enum i40iw_status_code i40iw_sc_commit_fpm_values_done(struct i40iw_sc_cq
* @cqp: struct for cqp hw
* @scratch: u64 saved to be used during cqp completion
* @hmc_fn_id: hmc function id
- * @commit_fpm_mem; Memory for fpm values
+ * @commit_fpm_mem: Memory for fpm values
* @post_sq: flag for cqp db to ring
* @wait_type: poll ccq or cqp registers for cqp completion
*/
@@ -1026,7 +1026,7 @@ i40iw_sc_query_rdma_features(struct i40iw_sc_cqp *cqp,
/**
* i40iw_get_rdma_features - get RDMA features
- * @dev - sc device struct
+ * @dev: sc device struct
*/
enum i40iw_status_code i40iw_get_rdma_features(struct i40iw_sc_dev *dev)
{
@@ -1456,7 +1456,7 @@ static enum i40iw_status_code i40iw_sc_add_local_mac_ipaddr_entry(
* @cqp: struct for cqp hw
* @scratch: u64 saved to be used during cqp completion
* @entry_idx: index of mac entry
- * @ ignore_ref_count: to force mac adde delete
+ * @ignore_ref_count: to force mac adde delete
* @post_sq: flag for cqp db to ring
*/
static enum i40iw_status_code i40iw_sc_del_local_mac_ipaddr_entry(
@@ -2304,7 +2304,7 @@ static enum i40iw_status_code i40iw_sc_cq_destroy(struct i40iw_sc_cq *cq,
* i40iw_sc_cq_modify - modify a Completion Queue
* @cq: cq struct
* @info: modification info struct
- * @scratch:
+ * @scratch: u64 saved to be used during cqp completion
* @post_sq: flag to post to sq
*/
static enum i40iw_status_code i40iw_sc_cq_modify(struct i40iw_sc_cq *cq,
@@ -3673,7 +3673,7 @@ static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev
/**
* cqp_sds_wqe_fill - fill cqp wqe doe sd
* @cqp: struct for cqp hw
- * @info; sd info for wqe
+ * @info: sd info for wqe
* @scratch: u64 saved to be used during cqp completion
*/
static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp,
@@ -4884,7 +4884,7 @@ void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf)
/**
* i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs.
- * @stat: pestat struct
+ * @stats: pestat struct
* @index: index in HW stats table which contains offset reg-addr
* @value: hw stats value
*/
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.c b/drivers/infiniband/hw/i40iw/i40iw_hmc.c
index 5484cbf55f0f..8bd72af9e099 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hmc.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.c
@@ -46,7 +46,7 @@
* i40iw_find_sd_index_limit - finds segment descriptor index limit
* @hmc_info: pointer to the HMC configuration information structure
* @type: type of HMC resources we're searching
- * @index: starting index for the object
+ * @idx: starting index for the object
* @cnt: number of objects we're trying to create
* @sd_idx: pointer to return index of the segment descriptor in question
* @sd_limit: pointer to return the maximum number of segment descriptors
@@ -78,7 +78,7 @@ static inline void i40iw_find_sd_index_limit(struct i40iw_hmc_info *hmc_info,
* @type: HMC resource type we're examining
* @idx: starting index for the object
* @cnt: number of objects we're trying to create
- * @pd_index: pointer to return page descriptor index
+ * @pd_idx: pointer to return page descriptor index
* @pd_limit: pointer to return page descriptor index limit
*
* Calculates the page descriptor index and index limit for the resource
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 56fdc161f6f8..d167ac10c751 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -165,7 +165,7 @@ static void i40iw_cqp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq
/**
* i40iw_iwarp_ce_handler - handle iwarp completions
* @iwdev: iwarp device
- * @iwcp: iwarp cq receiving event
+ * @iwcq: iwarp cq receiving event
*/
static void i40iw_iwarp_ce_handler(struct i40iw_device *iwdev,
struct i40iw_sc_cq *iwcq)
@@ -519,6 +519,7 @@ enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev,
* @iwdev: iwarp device
* @mac_addr: mac address ptr
* @ip_addr: ip addr for arp cache
+ * @ipv4: flag indicating IPv4 when true
* @action: add, delete or modify
*/
void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
@@ -581,7 +582,6 @@ static void i40iw_send_syn_cqp_callback(struct i40iw_cqp_request *cqp_request, u
* @mtype: type of qhash
* @cmnode: cmnode associated with connection
* @wait: wait for completion
- * @user_pri:user pri of the connection
*/
enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
struct i40iw_cm_info *cminfo,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 584932d3cc44..ab4cb11950dc 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -186,7 +186,7 @@ static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id)
/**
* i40iw_dpc - tasklet for aeq and ceq 0
- * @data: iwarp device
+ * @t: Timer context to fetch pointer to iwarp device
*/
static void i40iw_dpc(struct tasklet_struct *t)
{
@@ -200,7 +200,7 @@ static void i40iw_dpc(struct tasklet_struct *t)
/**
* i40iw_ceq_dpc - dpc handler for CEQ
- * @data: data points to CEQ
+ * @t: Timer context to fetch pointer to CEQ data
*/
static void i40iw_ceq_dpc(struct tasklet_struct *t)
{
@@ -227,7 +227,7 @@ static irqreturn_t i40iw_irq_handler(int irq, void *data)
/**
* i40iw_destroy_cqp - destroy control qp
* @iwdev: iwarp device
- * @create_done: 1 if cqp create poll was success
+ * @free_hwcqp: 1 if CQP should be destroyed
*
* Issue destroy cqp request and
* free the resources associated with the cqp
@@ -253,7 +253,7 @@ static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
/**
* i40iw_disable_irqs - disable device interrupts
* @dev: hardware control device structure
- * @msic_vec: msix vector to disable irq
+ * @msix_vec: msix vector to disable irq
* @dev_id: parameter to pass to free_irq (used during irq setup)
*
* The function is called when destroying aeq/ceq
@@ -394,8 +394,9 @@ static enum i40iw_hmc_rsrc_type iw_hmc_obj_types[] = {
/**
* i40iw_close_hmc_objects_type - delete hmc objects of a given type
- * @iwdev: iwarp device
+ * @dev: iwarp device
* @obj_type: the hmc object type to be deleted
+ * @hmc_info: pointer to the HMC configuration information
* @is_pf: true if the function is PF otherwise false
* @reset: true if called before reset
*/
@@ -437,6 +438,7 @@ static void i40iw_del_hmc_objects(struct i40iw_sc_dev *dev,
/**
* i40iw_ceq_handler - interrupt handler for ceq
+ * @irq: interrupt request number
* @data: ceq pointer
*/
static irqreturn_t i40iw_ceq_handler(int irq, void *data)
@@ -1777,6 +1779,7 @@ static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *cli
/**
* i40iw_close - client interface operation close for iwarp/uda device
* @ldev: lan device information
+ * @reset: true if called before reset
* @client: client to close
*
* Called by the lan driver during the processing of client unregister
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
index 5f97643e22e5..53e5cd1a2bd6 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c
@@ -54,6 +54,7 @@ static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chun
/**
* i40iw_destroy_pble_pool - destroy pool during module unload
+ * @dev: i40iw_sc_dev struct
* @pble_rsrc: pble resources
*/
void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc)
@@ -112,8 +113,8 @@ enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
/**
* get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address
- * @ pble_rsrc: structure containing fpm address
- * @ idx: where to return indexes
+ * @pble_rsrc: structure containing fpm address
+ * @idx: where to return indexes
*/
static inline void get_sd_pd_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc,
struct sd_pd_idx *idx)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index 924be4b03c9a..d1c8cc0a6236 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -511,7 +511,8 @@ static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc)
/**
* i40iw_puda_qp_wqe - setup wqe for qp create
- * @rsrc: resource for qp
+ * @dev: iwarp device
+ * @qp: resource for qp
*/
static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
{
@@ -623,7 +624,8 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc)
/**
* i40iw_puda_cq_wqe - setup wqe for cq create
- * @rsrc: resource for cq
+ * @dev: iwarp device
+ * @cq: cq to setup
*/
static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
{
@@ -782,7 +784,7 @@ static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc)
/**
* i40iw_puda_dele_resources - delete all resources during close
- * @dev: iwarp device
+ * @vsi: pointer to vsi structure
* @type: type of resource to dele
* @reset: true if reset chip
*/
@@ -876,7 +878,7 @@ static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc,
/**
* i40iw_puda_create_rsrc - create resouce (ilq or ieq)
- * @dev: iwarp device
+ * @vsi: pointer to vsi structure
* @info: resource information
*/
enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
@@ -1121,6 +1123,7 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
/**
* i40iw_ieq_create_pbufl - create buffer list for single fpdu
+ * @pfpdu: partial management per user qp
* @rxlist: resource list for receive ieq buffes
* @pbufl: temp. list for buffers for fpddu
* @buf: first receive buffer
@@ -1434,7 +1437,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
/**
* i40iw_ieq_receive - received exception buffer
- * @dev: iwarp device
+ * @vsi: pointer to vsi structure
* @buf: exception buffer received
*/
static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index c3633c9944db..f521be16bf31 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -119,6 +119,8 @@ void i40iw_qp_post_wr(struct i40iw_qp_uk *qp)
* @qp: hw qp ptr
* @wqe_idx: return wqe index
* @wqe_size: size of sq wqe
+ * @total_size: work request length
+ * @wr_id: work request id
*/
u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
u32 *wqe_idx,
@@ -717,7 +719,6 @@ static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq,
* i40iw_cq_poll_completion - get cq completion info
* @cq: hw cq
* @info: cq poll information returned
- * @post_cq: update cq tail
*/
static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
struct i40iw_cq_poll_info *info)
@@ -1051,7 +1052,7 @@ void i40iw_device_init_uk(struct i40iw_dev_uk *dev)
/**
* i40iw_clean_cq - clean cq entries
- * @ queue completion context
+ * @queue: completion context
* @cq: cq to clean
*/
void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 644f8c641aa0..76f052b12c14 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -55,6 +55,7 @@
* i40iw_arp_table - manage arp table
* @iwdev: iwarp device
* @ip_addr: ip address for device
+ * @ipv4: flag indicating IPv4 when true
* @mac_addr: mac address ptr
* @action: modify, delete or add
*/
@@ -138,7 +139,7 @@ inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg)
/**
* i40iw_inetaddr_event - system notifier for ipv4 addr events
- * @notfier: not used
+ * @notifier: not used
* @event: event for notifier
* @ptr: if address
*/
@@ -214,7 +215,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
/**
* i40iw_inet6addr_event - system notifier for ipv6 addr events
- * @notfier: not used
+ * @notifier: not used
* @event: event for notifier
* @ptr: if address
*/
@@ -265,7 +266,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
/**
* i40iw_net_event - system notifier for netevents
- * @notfier: not used
+ * @notifier: not used
* @event: event for notifier
* @ptr: neighbor
*/
@@ -310,7 +311,7 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
/**
* i40iw_netdevice_event - system notifier for netdev events
- * @notfier: not used
+ * @notifier: not used
* @event: event for notifier
* @ptr: netdev
*/
@@ -652,6 +653,7 @@ struct ib_qp *i40iw_get_qp(struct ib_device *device, int qpn)
* i40iw_debug_buf - print debug msg and buffer is mask set
* @dev: hardware control device structure
* @mask: mask to compare if to print debug buffer
+ * @desc: identifying string
* @buf: points buffer addr
* @size: saize of buffer to print
*/
@@ -784,7 +786,7 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
/**
* i40iw_cqp_sds_cmd - create cqp command for sd
* @dev: hardware control device structure
- * @sd_info: information for sd cqp
+ * @sdinfo: information for sd cqp
*
*/
enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
@@ -889,7 +891,7 @@ void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred)
/**
* i40iw_terminate_imeout - timeout happened
- * @context: points to iwarp qp
+ * @t: points to iwarp qp
*/
static void i40iw_terminate_timeout(struct timer_list *t)
{
@@ -943,7 +945,7 @@ static void i40iw_cqp_generic_worker(struct work_struct *work)
/**
* i40iw_cqp_spawn_worker - spawn worket thread
- * @iwdev: device struct pointer
+ * @dev: device struct pointer
* @work_info: work request info
* @iw_vf_idx: virtual function index
*/
@@ -1048,7 +1050,7 @@ enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
/**
* i40iw_cqp_query_fpm_values_cmd - send cqp command for fpm
- * @iwdev: function device struct
+ * @dev: function device struct
* @values_mem: buffer for fpm
* @hmc_fn_id: function id for fpm
*/
@@ -1114,7 +1116,7 @@ enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
/**
* i40iw_vf_wait_vchnl_resp - wait for channel msg
- * @iwdev: function's device struct
+ * @dev: function's device struct
*/
enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev)
{
@@ -1461,7 +1463,7 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in
/**
* i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats
- * @vsi: pointer to the vsi structure
+ * @t: Timer context containing pointer to the vsi structure
*/
static void i40iw_hw_stats_timeout(struct timer_list *t)
{
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 65aedfe57e77..f18d146a6079 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -265,9 +265,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
/**
* i40iw_free_qp_resources - free up memory resources for qp
- * @iwdev: iwarp device
* @iwqp: qp ptr (user or kernel)
- * @qp_num: qp number assigned
*/
void i40iw_free_qp_resources(struct i40iw_qp *iwqp)
{
@@ -302,6 +300,7 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq)
/**
* i40iw_destroy_qp - destroy qp
* @ibqp: qp's ib pointer also to get to device's qp address
+ * @udata: user data
*/
static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
@@ -338,8 +337,8 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
/**
* i40iw_setup_virt_qp - setup for allocation of virtual qp
- * @dev: iwarp device
- * @qp: qp ptr
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr
* @init_info: initialize info to return
*/
static int i40iw_setup_virt_qp(struct i40iw_device *iwdev,
@@ -1241,7 +1240,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
* i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous
* @arr: lvl1 pbl array
* @npages: page count
- * pg_size: page size
+ * @pg_size: page size
*
*/
static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
@@ -1258,7 +1257,7 @@ static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
/**
* i40iw_check_mr_contiguous - check if MR is physically contiguous
* @palloc: pbl allocation struct
- * pg_size: page size
+ * @pg_size: page size
*/
static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size)
{
@@ -1533,6 +1532,7 @@ static int i40iw_set_page(struct ib_mr *ibmr, u64 addr)
* @ibmr: ib mem to access iwarp mr pointer
* @sg: scatter gather list for fmr
* @sg_nents: number of sg pages
+ * @sg_offset: scatter gather offset
*/
static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset)
@@ -1881,6 +1881,7 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr,
/**
* i40iw_dereg_mr - deregister mr
* @ib_mr: mr ptr for dereg
+ * @udata: user data
*/
static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
{
@@ -1945,7 +1946,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
return 0;
}
-/**
+/*
* hw_rev_show
*/
static ssize_t hw_rev_show(struct device *dev,
@@ -1959,7 +1960,7 @@ static ssize_t hw_rev_show(struct device *dev,
}
static DEVICE_ATTR_RO(hw_rev);
-/**
+/*
* hca_type_show
*/
static ssize_t hca_type_show(struct device *dev,
@@ -1969,7 +1970,7 @@ static ssize_t hca_type_show(struct device *dev,
}
static DEVICE_ATTR_RO(hca_type);
-/**
+/*
* board_id_show
*/
static ssize_t board_id_show(struct device *dev,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
index 48fd327f876b..aca9061688ae 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
@@ -119,7 +119,7 @@ static enum i40iw_status_code vchnl_vf_send_get_pe_stats_req(struct i40iw_sc_dev
return ret_code;
}
-/**
+/*
* vchnl_vf_send_add_hmc_objs_req - Add HMC objects
* @dev: IWARP device pointer
* @vchnl_req: Virtual channel message request pointer
@@ -158,9 +158,9 @@ static enum i40iw_status_code vchnl_vf_send_add_hmc_objs_req(struct i40iw_sc_dev
* vchnl_vf_send_del_hmc_objs_req - del HMC objects
* @dev: IWARP device pointer
* @vchnl_req: Virtual channel message request pointer
- * @ rsrc_type - resource type to delete
- * @ start_index - starting index for resource
- * @ rsrc_count - number of resource type to delete
+ * @rsrc_type: resource type to delete
+ * @start_index: starting index for resource
+ * @rsrc_count: number of resource type to delete
*/
static enum i40iw_status_code vchnl_vf_send_del_hmc_objs_req(struct i40iw_sc_dev *dev,
struct i40iw_virtchnl_req *vchnl_req,
@@ -222,6 +222,7 @@ static void vchnl_pf_send_get_ver_resp(struct i40iw_sc_dev *dev,
* @dev: IWARP device pointer
* @vf_id: Virtual function ID associated with the message
* @vchnl_msg: Virtual channel message buffer pointer
+ * @hmc_fcn: HMC function index pointer
*/
static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev,
u32 vf_id,
@@ -276,6 +277,7 @@ static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
* @dev: IWARP device pointer
* @vf_id: Virtual function ID associated with the message
* @vchnl_msg: Virtual channel message buffer pointer
+ * @op_ret_code: I40IW_ERR_* status code
*/
static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id,
struct i40iw_virtchnl_op_buf *vchnl_msg,
@@ -297,8 +299,9 @@ static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id,
/**
* pf_cqp_get_hmc_fcn_callback - Callback for Get HMC Fcn
- * @cqp_req_param: CQP Request param value
- * @not_used: unused CQP callback parameter
+ * @dev: IWARP device pointer
+ * @callback_param: unused CQP callback parameter
+ * @cqe_info: CQE information pointer
*/
static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback_param,
struct i40iw_ccq_cqe_info *cqe_info)
@@ -331,7 +334,7 @@ static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback
/**
* pf_add_hmc_obj - Callback for Add HMC Object
- * @vf_dev: pointer to the VF Device
+ * @work_vf_dev: pointer to the VF Device
*/
static void pf_add_hmc_obj_callback(void *work_vf_dev)
{
@@ -404,7 +407,7 @@ del_out:
/**
* i40iw_vf_init_pestat - Initialize stats for VF
- * @devL pointer to the VF Device
+ * @dev: pointer to the VF Device
* @stats: Statistics structure pointer
* @index: Stats index
*/
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index e3cd402c079a..f26a0d920842 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1699,7 +1699,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
int is_bonded = mlx4_is_bonded(dev);
- if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
+ if (!rdma_is_port_valid(qp->device, flow_attr->port))
return ERR_PTR(-EINVAL);
if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 1b5891130aab..24ee79aa2122 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -798,7 +798,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
{
- int i;
+ unsigned int i;
int ret = 0;
if (!mlx4_is_master(dev->dev))
@@ -817,7 +817,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
goto err_ports;
}
- for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
+ rdma_for_each_port(&dev->ib_dev, i) {
ret = add_port_entries(dev, i);
if (ret)
goto err_add_entries;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 819c142857d6..de3c2fc6f361 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -94,13 +94,13 @@ struct devx_umem {
struct mlx5_core_dev *mdev;
struct ib_umem *umem;
u32 dinlen;
- u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
+ u32 dinbox[MLX5_ST_SZ_DW(destroy_umem_in)];
};
struct devx_umem_reg_cmd {
void *in;
u32 inlen;
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u32 out[MLX5_ST_SZ_DW(create_umem_out)];
};
static struct mlx5_ib_ucontext *
@@ -111,8 +111,8 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
{
- u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
void *uctx;
int err;
u16 uid;
@@ -138,14 +138,14 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
if (err)
return err;
- uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ uid = MLX5_GET(create_uctx_out, out, uid);
return uid;
}
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {};
MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
MLX5_SET(destroy_uctx_in, in, uid, uid);
@@ -288,6 +288,80 @@ static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
return ((u64)opcode << 32) | obj_id;
}
+static u32 devx_get_created_obj_id(const void *in, const void *out, u16 opcode)
+{
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ return MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ case MLX5_CMD_OP_CREATE_UMEM:
+ return MLX5_GET(create_umem_out, out, umem_id);
+ case MLX5_CMD_OP_CREATE_MKEY:
+ return MLX5_GET(create_mkey_out, out, mkey_index);
+ case MLX5_CMD_OP_CREATE_CQ:
+ return MLX5_GET(create_cq_out, out, cqn);
+ case MLX5_CMD_OP_ALLOC_PD:
+ return MLX5_GET(alloc_pd_out, out, pd);
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ return MLX5_GET(alloc_transport_domain_out, out,
+ transport_domain);
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_GET(create_rmp_out, out, rmpn);
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_GET(create_sq_out, out, sqn);
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_GET(create_rq_out, out, rqn);
+ case MLX5_CMD_OP_CREATE_RQT:
+ return MLX5_GET(create_rqt_out, out, rqtn);
+ case MLX5_CMD_OP_CREATE_TIR:
+ return MLX5_GET(create_tir_out, out, tirn);
+ case MLX5_CMD_OP_CREATE_TIS:
+ return MLX5_GET(create_tis_out, out, tisn);
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ return MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ return MLX5_GET(create_flow_table_out, out, table_id);
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ return MLX5_GET(create_flow_group_out, out, group_id);
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ return MLX5_GET(set_fte_in, in, flow_index);
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ return MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
+ return MLX5_GET(alloc_packet_reformat_context_out, out,
+ packet_reformat_id);
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ return MLX5_GET(alloc_modify_header_context_out, out,
+ modify_header_id);
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ return MLX5_GET(create_scheduling_element_out, out,
+ scheduling_element_id);
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ return MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ return MLX5_GET(set_l2_table_entry_in, in, table_index);
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_GET(create_qp_out, out, qpn);
+ case MLX5_CMD_OP_CREATE_SRQ:
+ return MLX5_GET(create_srq_out, out, srqn);
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ return MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_GET(create_dct_out, out, dctn);
+ case MLX5_CMD_OP_CREATE_XRQ:
+ return MLX5_GET(create_xrq_out, out, xrqn);
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ return MLX5_GET(attach_to_mcg_in, in, qpn);
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ return MLX5_GET(alloc_xrcd_out, out, xrcd);
+ case MLX5_CMD_OP_CREATE_PSV:
+ return MLX5_GET(create_psv_out, out, psv0_index);
+ default:
+ /* The entry must match to one of the devx_is_obj_create_cmd */
+ WARN_ON(true);
+ return 0;
+ }
+}
+
static u64 devx_get_obj_id(const void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
@@ -399,8 +473,8 @@ static u64 devx_get_obj_id(const void *in)
break;
case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
- MLX5_GET(general_obj_in_cmd_hdr, in,
- obj_id));
+ MLX5_GET(query_modify_header_context_in,
+ in, modify_header_id));
break;
case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
@@ -1019,63 +1093,76 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
u32 *dinlen,
u32 *obj_id)
{
- u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
- *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ *obj_id = devx_get_created_obj_id(in, out, opcode);
*dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
-
- MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
- switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
+ switch (opcode) {
case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_type,
+ MLX5_GET(general_obj_in_cmd_hdr, in, obj_type));
break;
case MLX5_CMD_OP_CREATE_UMEM:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_umem_in, din, opcode,
MLX5_CMD_OP_DESTROY_UMEM);
+ MLX5_SET(destroy_umem_in, din, umem_id, *obj_id);
break;
case MLX5_CMD_OP_CREATE_MKEY:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, in, mkey_index, *obj_id);
break;
case MLX5_CMD_OP_CREATE_CQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, din, cqn, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_PD:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, din, pd, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_transport_domain_in, din, opcode,
MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, din, transport_domain,
+ *obj_id);
break;
case MLX5_CMD_OP_CREATE_RMP:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, din, rmpn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_SQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, din, sqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_RQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, din, rqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_RQT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, din, rqtn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_TIR:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, din, tirn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_TIS:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, din, tisn, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_q_counter_in, din, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, din, counter_set_id, *obj_id);
break;
case MLX5_CMD_OP_CREATE_FLOW_TABLE:
*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
- *obj_id = MLX5_GET(create_flow_table_out, out, table_id);
MLX5_SET(destroy_flow_table_in, din, other_vport,
MLX5_GET(create_flow_table_in, in, other_vport));
MLX5_SET(destroy_flow_table_in, din, vport_number,
@@ -1083,12 +1170,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_SET(destroy_flow_table_in, din, table_type,
MLX5_GET(create_flow_table_in, in, table_type));
MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_flow_table_in, din, opcode,
MLX5_CMD_OP_DESTROY_FLOW_TABLE);
break;
case MLX5_CMD_OP_CREATE_FLOW_GROUP:
*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
- *obj_id = MLX5_GET(create_flow_group_out, out, group_id);
MLX5_SET(destroy_flow_group_in, din, other_vport,
MLX5_GET(create_flow_group_in, in, other_vport));
MLX5_SET(destroy_flow_group_in, din, vport_number,
@@ -1098,12 +1184,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_SET(destroy_flow_group_in, din, table_id,
MLX5_GET(create_flow_group_in, in, table_id));
MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_flow_group_in, din, opcode,
MLX5_CMD_OP_DESTROY_FLOW_GROUP);
break;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
*dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
- *obj_id = MLX5_GET(set_fte_in, in, flow_index);
MLX5_SET(delete_fte_in, din, other_vport,
MLX5_GET(set_fte_in, in, other_vport));
MLX5_SET(delete_fte_in, din, vport_number,
@@ -1113,63 +1198,70 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_SET(delete_fte_in, din, table_id,
MLX5_GET(set_fte_in, in, table_id));
MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(delete_fte_in, din, opcode,
MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
break;
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_flow_counter_in, din, opcode,
MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+ MLX5_SET(dealloc_flow_counter_in, din, flow_counter_id,
+ *obj_id);
break;
case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_packet_reformat_context_in, din, opcode,
MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(dealloc_packet_reformat_context_in, din,
+ packet_reformat_id, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_modify_header_context_in, din, opcode,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(dealloc_modify_header_context_in, din,
+ modify_header_id, *obj_id);
break;
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
*dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
- *obj_id = MLX5_GET(create_scheduling_element_out, out,
- scheduling_element_id);
MLX5_SET(destroy_scheduling_element_in, din,
scheduling_hierarchy,
MLX5_GET(create_scheduling_element_in, in,
scheduling_hierarchy));
MLX5_SET(destroy_scheduling_element_in, din,
scheduling_element_id, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_scheduling_element_in, din, opcode,
MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
break;
case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
*dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
- *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(delete_vxlan_udp_dport_in, din, opcode,
MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
break;
case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
*dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
- *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(delete_l2_table_entry_in, din, opcode,
MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
break;
case MLX5_CMD_OP_CREATE_QP:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, qpn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_SRQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, din, srqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_XRC_SRQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_xrc_srq_in, din, opcode,
MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, din, xrc_srqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_DCT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, din, dctn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_XRQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, din, xrqn, *obj_id);
break;
case MLX5_CMD_OP_ATTACH_TO_MCG:
*dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
@@ -1178,16 +1270,19 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, din, opcode,
+ MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, din, qpn, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_XRCD:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, din, opcode,
+ MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, din, xrcd, *obj_id);
break;
case MLX5_CMD_OP_CREATE_PSV:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_psv_in, din, opcode,
MLX5_CMD_OP_DESTROY_PSV);
- MLX5_SET(destroy_psv_in, din, psvn,
- MLX5_GET(create_psv_out, out, psv0_index));
+ MLX5_SET(destroy_psv_in, din, psvn, *obj_id);
break;
default:
/* The entry must match to one of the devx_is_obj_create_cmd */
@@ -1215,9 +1310,9 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
mkey->size = MLX5_GET64(mkc, mkc, len);
mkey->pd = MLX5_GET(mkc, mkc, pd);
devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
+ init_waitqueue_head(&mkey->wait);
- return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey,
- GFP_KERNEL));
+ return mlx5r_store_odp_mkey(dev, mkey);
}
static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
@@ -1290,16 +1385,15 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
int ret;
dev = mlx5_udata_to_mdev(&attrs->driver_udata);
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY &&
+ xa_erase(&obj->ib_dev->odp_mkeys,
+ mlx5_base_mkey(obj->devx_mr.mmkey.key)))
/*
* The pagefault_single_data_segment() does commands against
* the mmkey, we must wait for that to stop before freeing the
* mkey, as another allocation could get the same mkey #.
*/
- xa_erase(&obj->ib_dev->odp_mkeys,
- mlx5_base_mkey(obj->devx_mr.mmkey.key));
- synchronize_srcu(&dev->odp_srcu);
- }
+ mlx5r_deref_wait_odp_mkey(&obj->devx_mr.mmkey);
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
@@ -1345,6 +1439,16 @@ out:
rcu_read_unlock();
}
+static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, apu) ||
+ !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+ apu_thread_cq))
+ return false;
+
+ return true;
+}
+
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
struct uverbs_attr_bundle *attrs)
{
@@ -1398,7 +1502,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
obj->flags |= DEVX_OBJ_FLAGS_DCT;
err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
cmd_in_len, cmd_out, cmd_out_len);
- } else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
+ } else if (opcode == MLX5_CMD_OP_CREATE_CQ &&
+ !is_apu_thread_cq(dev, cmd_in)) {
obj->flags |= DEVX_OBJ_FLAGS_CQ;
obj->core_cq.comp = devx_cq_comp;
err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
@@ -1968,8 +2073,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
num_alloc_xa_entries++;
event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
- if (!event_sub)
+ if (!event_sub) {
+ err = -ENOMEM;
goto err;
+ }
list_add_tail(&event_sub->event_list, &sub_list);
uverbs_uobject_get(&ev_file->uobj);
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 9bb9bb058932..652c6ccf1881 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -48,7 +48,7 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
return true;
- return dev->mdev->port_caps[port_num - 1].has_smi;
+ return dev->port_caps[port_num - 1].has_smi;
}
static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
@@ -279,7 +279,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
-int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
@@ -299,7 +299,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
packet_error = be16_to_cpu(out_mad->status);
- dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ?
+ dev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ?
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
out:
@@ -308,8 +308,8 @@ out:
return err;
}
-int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
- struct ib_smp *out_mad)
+static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
+ struct ib_smp *out_mad)
{
struct ib_smp *in_mad = NULL;
int err = -ENOMEM;
@@ -549,7 +549,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20));
props->gid_tbl_len = out_mad->data[50];
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
- props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len;
+ props->pkey_tbl_len = dev->pkey_table_len;
props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
@@ -589,7 +589,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == 4) {
- if (mdev->port_caps[port - 1].ext_port_cap &
+ if (dev->port_caps[port - 1].ext_port_cap &
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
init_query_mad(in_mad);
in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index aabdc07e4753..0d69a697d75f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*/
#include <linux/debugfs.h>
@@ -461,7 +462,6 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
struct net_device *ndev, *upper;
enum ib_mtu ndev_ib_mtu;
bool put_mdev = true;
- u16 qkey_viol_cntr;
u32 eth_prot_oper;
u8 mdev_port_num;
bool ext;
@@ -499,20 +499,22 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width, ext);
- props->port_cap_flags |= IB_PORT_CM_SUP;
- props->ip_gids = true;
+ if (!dev->is_rep && mlx5_is_roce_enabled(mdev)) {
+ u16 qkey_viol_cntr;
- props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
- roce_address_table_size);
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->ip_gids = true;
+ props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
+ roce_address_table_size);
+ mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
+ props->qkey_viol_cntr = qkey_viol_cntr;
+ }
props->max_mtu = IB_MTU_4096;
props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
props->pkey_tbl_len = 1;
props->state = IB_PORT_DOWN;
props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
- mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
- props->qkey_viol_cntr = qkey_viol_cntr;
-
/* If this is a stub query for an unaffiliated port stop here */
if (!put_mdev)
goto out;
@@ -815,9 +817,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (err)
return err;
- err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
- if (err)
- return err;
+ props->max_pkeys = dev->pkey_table_len;
err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
if (err)
@@ -1384,19 +1384,17 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
- int ret;
+ return mlx5_query_port_roce(ibdev, port, props);
+}
- /* Only link layer == ethernet is valid for representors
- * and we always use port 1
+static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ /* Default special Pkey for representor device port as per the
+ * IB specification 1.3 section 10.9.1.2.
*/
- ret = mlx5_query_port_roce(ibdev, port, props);
- if (ret || !props)
- return ret;
-
- /* We don't support GIDS */
- props->gid_tbl_len = 0;
-
- return ret;
+ *pkey = 0xffff;
+ return 0;
}
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
@@ -2935,8 +2933,8 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
int err;
int port;
- for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) {
- dev->mdev->port_caps[port - 1].has_smi = false;
+ for (port = 1; port <= ARRAY_SIZE(dev->port_caps); port++) {
+ dev->port_caps[port - 1].has_smi = false;
if (MLX5_CAP_GEN(dev->mdev, port_type) ==
MLX5_CAP_PORT_TYPE_IB) {
if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
@@ -2948,10 +2946,10 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
port, err);
return err;
}
- dev->mdev->port_caps[port - 1].has_smi =
+ dev->port_caps[port - 1].has_smi =
vport_ctx.has_smi;
} else {
- dev->mdev->port_caps[port - 1].has_smi = true;
+ dev->port_caps[port - 1].has_smi = true;
}
}
}
@@ -2960,63 +2958,12 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
- int port;
+ unsigned int port;
- for (port = 1; port <= dev->num_ports; port++)
+ rdma_for_each_port (&dev->ib_dev, port)
mlx5_query_ext_port_caps(dev, port);
}
-static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
-{
- struct ib_device_attr *dprops = NULL;
- struct ib_port_attr *pprops = NULL;
- int err = -ENOMEM;
-
- pprops = kzalloc(sizeof(*pprops), GFP_KERNEL);
- if (!pprops)
- goto out;
-
- dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
- if (!dprops)
- goto out;
-
- err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL);
- if (err) {
- mlx5_ib_warn(dev, "query_device failed %d\n", err);
- goto out;
- }
-
- err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
- if (err) {
- mlx5_ib_warn(dev, "query_port %d failed %d\n",
- port, err);
- goto out;
- }
-
- dev->mdev->port_caps[port - 1].pkey_table_len =
- dprops->max_pkeys;
- dev->mdev->port_caps[port - 1].gid_table_len =
- pprops->gid_tbl_len;
- mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n",
- port, dprops->max_pkeys, pprops->gid_tbl_len);
-
-out:
- kfree(pprops);
- kfree(dprops);
-
- return err;
-}
-
-static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
-{
- /* For representors use port 1, is this is the only native
- * port
- */
- if (dev->is_rep)
- return __get_port_caps(dev, 1);
- return __get_port_caps(dev, port);
-}
-
static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
{
switch (umr_fence_cap) {
@@ -3488,10 +3435,6 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
if (err)
goto unbind;
- err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev));
- if (err)
- goto unbind;
-
err = mlx5_add_netdev_notifier(ibdev, port_num);
if (err) {
mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
@@ -3569,11 +3512,9 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
break;
}
}
- if (!bound) {
- get_port_caps(dev, i + 1);
+ if (!bound)
mlx5_ib_dbg(dev, "no free port found for port %d\n",
i + 1);
- }
}
list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list);
@@ -3926,8 +3867,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
WARN_ON(!xa_empty(&dev->odp_mkeys));
- cleanup_srcu_struct(&dev->odp_srcu);
-
+ mutex_destroy(&dev->cap_mask_mutex);
WARN_ON(!xa_empty(&dev->sig_mrs));
WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
}
@@ -3938,6 +3878,12 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
int err;
int i;
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
+ dev->ib_dev.phys_port_cnt = dev->num_ports;
+ dev->ib_dev.dev.parent = mdev->device;
+ dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES;
+
for (i = 0; i < dev->num_ports; i++) {
spin_lock_init(&dev->port[i].mp.mpi_lock);
rwlock_init(&dev->port[i].roce.netdev_lock);
@@ -3956,27 +3902,14 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
if (err)
goto err_mp;
- if (!mlx5_core_mp_enabled(mdev)) {
- for (i = 1; i <= dev->num_ports; i++) {
- err = get_port_caps(dev, i);
- if (err)
- break;
- }
- } else {
- err = get_port_caps(dev, mlx5_core_native_port_num(mdev));
- }
+ err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len);
if (err)
goto err_mp;
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- dev->ib_dev.node_type = RDMA_NODE_IB_CA;
- dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
- dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
- dev->ib_dev.dev.parent = mdev->device;
- dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES;
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
@@ -3987,17 +3920,11 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;
-
- err = init_srcu_struct(&dev->odp_srcu);
- if (err)
- goto err_mp;
-
return 0;
err_mp:
mlx5_ib_cleanup_multiport_master(dev);
-
- return -ENOMEM;
+ return err;
}
static int mlx5_ib_enable_driver(struct ib_device *dev)
@@ -4067,6 +3994,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.query_srq = mlx5_ib_query_srq,
.query_ucontext = mlx5_ib_query_ucontext,
.reg_user_mr = mlx5_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf,
.req_notify_cq = mlx5_ib_arm_cq,
.rereg_user_mr = mlx5_ib_rereg_user_mr,
.resize_cq = mlx5_ib_resize_cq,
@@ -4207,6 +4135,7 @@ static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = {
.get_port_immutable = mlx5_port_rep_immutable,
.query_port = mlx5_ib_rep_query_port,
+ .query_pkey = mlx5_ib_rep_query_pkey,
};
static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b0fdc1b08e06..88cc26e008fc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*/
#ifndef MLX5_IB_H
@@ -683,11 +684,8 @@ struct mlx5_ib_mr {
u64 pi_iova;
/* For ODP and implicit */
- atomic_t num_deferred_work;
- wait_queue_head_t q_deferred_work;
struct xarray implicit_children;
union {
- struct rcu_head rcu;
struct list_head elm;
struct work_struct work;
} odp_destroy;
@@ -703,6 +701,12 @@ static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
mr->umem->is_odp;
}
+static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
+ mr->umem->is_dmabuf;
+}
+
struct mlx5_ib_mw {
struct ib_mw ibmw;
struct mlx5_core_mkey mmkey;
@@ -1029,6 +1033,11 @@ struct mlx5_var_table {
u64 num_var_hw_entries;
};
+struct mlx5_port_caps {
+ bool has_smi;
+ u8 ext_port_cap;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
@@ -1056,11 +1065,6 @@ struct mlx5_ib_dev {
u64 odp_max_size;
struct mlx5_ib_pf_eq odp_pf_eq;
- /*
- * Sleepable RCU that prevents destruction of MRs while they are still
- * being used by a page fault handler.
- */
- struct srcu_struct odp_srcu;
struct xarray odp_mkeys;
u32 null_mkey;
@@ -1089,6 +1093,8 @@ struct mlx5_ib_dev {
struct mlx5_var_table var_table;
struct xarray sig_mrs;
+ struct mlx5_port_caps port_caps[MLX5_MAX_PORTS];
+ u16 pkey_table_len;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1243,6 +1249,10 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
+struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_udata *udata);
int mlx5_ib_advise_mr(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags,
@@ -1253,11 +1263,13 @@ int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
+int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct ib_udata *udata,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr);
+void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr);
struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
@@ -1279,9 +1291,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
size_t *out_mad_size, u16 *out_mad_pkey_index);
int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
-int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
-int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
- struct ib_smp *out_mad);
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port);
int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid);
int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev,
@@ -1345,6 +1355,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge);
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
+int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@@ -1370,6 +1381,10 @@ static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
return -EOPNOTSUPP;
}
+static inline int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
extern const struct mmu_interval_notifier_ops mlx5_mn_ops;
@@ -1576,6 +1591,29 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
return true;
}
+static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
+ struct mlx5_core_mkey *mmkey)
+{
+ refcount_set(&mmkey->usecount, 1);
+
+ return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mmkey->key),
+ mmkey, GFP_KERNEL));
+}
+
+/* deref an mkey that can participate in ODP flow */
+static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey)
+{
+ if (refcount_dec_and_test(&mmkey->usecount))
+ wake_up(&mmkey->wait);
+}
+
+/* deref an mkey that can participate in ODP flow and wait for relese */
+static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey)
+{
+ mlx5r_deref_odp_mkey(mmkey);
+ wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0);
+}
+
int mlx5_ib_test_wc(struct mlx5_ib_dev *dev);
static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 24f8d59a42ea..db05b0e0a8d7 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -36,6 +37,8 @@
#include <linux/debugfs.h>
#include <linux/export.h>
#include <linux/delay.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include <rdma/ib_verbs.h>
@@ -155,6 +158,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
mr->mmkey.type = MLX5_MKEY_MR;
mr->mmkey.key |= mlx5_idx_to_mkey(
MLX5_GET(create_mkey_out, mr->out, mkey_index));
+ init_waitqueue_head(&mr->mmkey.wait);
WRITE_ONCE(dev->cache.last_add, jiffies);
@@ -935,6 +939,17 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
mr->access_flags = access_flags;
}
+static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
+ u64 iova)
+{
+ /*
+ * The alignment of iova has already been checked upon entering
+ * UVERBS_METHOD_REG_DMABUF_MR
+ */
+ umem->iova = iova;
+ return PAGE_SIZE;
+}
+
static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
int access_flags)
@@ -944,7 +959,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct mlx5_ib_mr *mr;
unsigned int page_size;
- page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
+ if (umem->is_dmabuf)
+ page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
+ else
+ page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
+ 0, iova);
if (WARN_ON(!page_size))
return ERR_PTR(-EINVAL);
ent = mr_cache_ent_from_order(
@@ -980,7 +999,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
mr->mmkey.size = umem->length;
mr->mmkey.pd = to_mpd(pd)->pdn;
mr->page_shift = order_base_2(page_size);
- mr->umem = umem;
set_mr_fields(dev, mr, umem->length, access_flags);
return mr;
@@ -1201,8 +1219,10 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
/*
* Send the DMA list to the HW for a normal MR using UMR.
+ * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
+ * flag may be used.
*/
-static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
@@ -1244,6 +1264,10 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
cur_mtt->ptag =
cpu_to_be64(rdma_block_iter_dma_address(&biter) |
MLX5_IB_MTT_PRESENT);
+
+ if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
+ cur_mtt->ptag = 0;
+
cur_mtt++;
}
@@ -1528,10 +1552,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
}
odp->private = mr;
- init_waitqueue_head(&mr->q_deferred_work);
- atomic_set(&mr->num_deferred_work, 0);
- err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey, GFP_KERNEL));
+ err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
if (err)
goto err_dereg_mr;
@@ -1567,6 +1588,81 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return create_real_mr(pd, umem, iova, access_flags);
}
+static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
+ struct mlx5_ib_mr *mr = umem_dmabuf->private;
+
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (!umem_dmabuf->sgt)
+ return;
+
+ mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+}
+
+static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
+ .allow_peer2peer = 1,
+ .move_notify = mlx5_ib_dmabuf_invalidate_cb,
+};
+
+struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_mr *mr = NULL;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
+ !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mlx5_ib_dbg(dev,
+ "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n",
+ offset, virt_addr, length, fd, access_flags);
+
+ /* dmabuf requires xlt update via umr to work. */
+ if (!mlx5_ib_can_load_pas_with_umr(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
+ access_flags,
+ &mlx5_ib_dmabuf_attach_ops);
+ if (IS_ERR(umem_dmabuf)) {
+ mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n",
+ PTR_ERR(umem_dmabuf));
+ return ERR_CAST(umem_dmabuf);
+ }
+
+ mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
+ access_flags);
+ if (IS_ERR(mr)) {
+ ib_umem_release(&umem_dmabuf->umem);
+ return ERR_CAST(mr);
+ }
+
+ mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
+
+ atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
+ umem_dmabuf->private = mr;
+ err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
+ if (err)
+ goto err_dereg_mr;
+
+ err = mlx5_ib_init_dmabuf_mr(mr);
+ if (err)
+ goto err_dereg_mr;
+ return &mr->ibmr;
+
+err_dereg_mr:
+ dereg_mr(dev, mr);
+ return ERR_PTR(err);
+}
+
/**
* mlx5_mr_cache_invalidate - Fence all DMA on the MR
* @mr: The MR to fence
@@ -1740,8 +1836,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
return ERR_PTR(err);
return NULL;
}
- /* DM or ODP MR's don't have a umem so we can't re-use it */
- if (!mr->umem || is_odp_mr(mr))
+ /* DM or ODP MR's don't have a normal umem so we can't re-use it */
+ if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
goto recreate;
/*
@@ -1760,10 +1856,10 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
}
/*
- * DM doesn't have a PAS list so we can't re-use it, odp does but the
- * logic around releasing the umem is different
+ * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does
+ * but the logic around releasing the umem is different
*/
- if (!mr->umem || is_odp_mr(mr))
+ if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
goto recreate;
if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
@@ -1876,6 +1972,8 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
/* Stop all DMA */
if (is_odp_mr(mr))
mlx5_ib_fence_odp_mr(mr);
+ else if (is_dmabuf_mr(mr))
+ mlx5_ib_fence_dmabuf_mr(mr);
else
clean_mr(dev, mr);
@@ -2227,9 +2325,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
}
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- err = xa_err(xa_store(&dev->odp_mkeys,
- mlx5_base_mkey(mw->mmkey.key), &mw->mmkey,
- GFP_KERNEL));
+ err = mlx5r_store_odp_mkey(dev, &mw->mmkey);
if (err)
goto free_mkey;
}
@@ -2249,14 +2345,13 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw)
struct mlx5_ib_dev *dev = to_mdev(mw->device);
struct mlx5_ib_mw *mmw = to_mmw(mw);
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
+ xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)))
/*
- * pagefault_single_data_segment() may be accessing mmw under
- * SRCU if the user bound an ODP MR to this MW.
+ * pagefault_single_data_segment() may be accessing mmw
+ * if the user bound an ODP MR to this MW.
*/
- synchronize_srcu(&dev->odp_srcu);
- }
+ mlx5r_deref_wait_odp_mkey(&mmw->mmkey);
return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index aa2413b50adc..b103555b1f5d 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -33,6 +33,8 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include <linux/kernel.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
#include "mlx5_ib.h"
#include "cmd.h"
@@ -113,7 +115,6 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
* xarray would be protected by the umem_mutex, however that is not
* possible. Instead this uses a weaker update-then-lock pattern:
*
- * srcu_read_lock()
* xa_store()
* mutex_lock(umem_mutex)
* mlx5_ib_update_xlt()
@@ -124,12 +125,9 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
* before destroying.
*
* The umem_mutex provides the acquire/release semantic needed to make
- * the xa_store() visible to a racing thread. While SRCU is not
- * technically required, using it gives consistent use of the SRCU
- * locking around the xarray.
+ * the xa_store() visible to a racing thread.
*/
lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
- lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu);
for (; pklm != end; pklm++, idx++) {
struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
@@ -205,8 +203,8 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
}
/*
- * This must be called after the mr has been removed from implicit_children
- * and the SRCU synchronized. NOTE: The MR does not necessarily have to be
+ * This must be called after the mr has been removed from implicit_children.
+ * NOTE: The MR does not necessarily have to be
* empty here, parallel page faults could have raced with the free process and
* added pages to it.
*/
@@ -216,19 +214,15 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
- int srcu_key;
- /* implicit_child_mr's are not allowed to have deferred work */
- WARN_ON(atomic_read(&mr->num_deferred_work));
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
if (need_imr_xlt) {
- srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu);
mutex_lock(&odp_imr->umem_mutex);
mlx5_ib_update_xlt(mr->parent, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
- srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key);
}
dma_fence_odp_mr(mr);
@@ -236,26 +230,16 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
mr->parent = NULL;
mlx5_mr_cache_free(mr_to_mdev(mr), mr);
ib_umem_odp_release(odp);
- if (atomic_dec_and_test(&imr->num_deferred_work))
- wake_up(&imr->q_deferred_work);
}
static void free_implicit_child_mr_work(struct work_struct *work)
{
struct mlx5_ib_mr *mr =
container_of(work, struct mlx5_ib_mr, odp_destroy.work);
+ struct mlx5_ib_mr *imr = mr->parent;
free_implicit_child_mr(mr, true);
-}
-
-static void free_implicit_child_mr_rcu(struct rcu_head *head)
-{
- struct mlx5_ib_mr *mr =
- container_of(head, struct mlx5_ib_mr, odp_destroy.rcu);
-
- /* Freeing a MR is a sleeping operation, so bounce to a work queue */
- INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
- queue_work(system_unbound_wq, &mr->odp_destroy.work);
+ mlx5r_deref_odp_mkey(&imr->mmkey);
}
static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
@@ -264,21 +248,14 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *imr = mr->parent;
- xa_lock(&imr->implicit_children);
- /*
- * This can race with mlx5_ib_free_implicit_mr(), the first one to
- * reach the xa lock wins the race and destroys the MR.
- */
- if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) !=
- mr)
- goto out_unlock;
+ if (!refcount_inc_not_zero(&imr->mmkey.usecount))
+ return;
- atomic_inc(&imr->num_deferred_work);
- call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu,
- free_implicit_child_mr_rcu);
+ xa_erase(&imr->implicit_children, idx);
-out_unlock:
- xa_unlock(&imr->implicit_children);
+ /* Freeing a MR is a sleeping operation, so bounce to a work queue */
+ INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
+ queue_work(system_unbound_wq, &mr->odp_destroy.work);
}
static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
@@ -490,6 +467,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
mr->parent = imr;
odp->private = mr;
+ /*
+ * First refcount is owned by the xarray and second refconut
+ * is returned to the caller.
+ */
+ refcount_set(&mr->mmkey.usecount, 2);
+
err = mlx5_ib_update_xlt(mr, 0,
MLX5_IMR_MTT_ENTRIES,
PAGE_SHIFT,
@@ -500,27 +483,28 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
goto out_mr;
}
- /*
- * Once the store to either xarray completes any error unwind has to
- * use synchronize_srcu(). Avoid this with xa_reserve()
- */
- ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr,
- GFP_KERNEL);
+ xa_lock(&imr->implicit_children);
+ ret = __xa_cmpxchg(&imr->implicit_children, idx, NULL, mr,
+ GFP_KERNEL);
if (unlikely(ret)) {
if (xa_is_err(ret)) {
ret = ERR_PTR(xa_err(ret));
- goto out_mr;
+ goto out_lock;
}
/*
* Another thread beat us to creating the child mr, use
* theirs.
*/
- goto out_mr;
+ refcount_inc(&ret->mmkey.usecount);
+ goto out_lock;
}
+ xa_unlock(&imr->implicit_children);
mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr);
return mr;
+out_lock:
+ xa_unlock(&imr->implicit_children);
out_mr:
mlx5_mr_cache_free(mr_to_mdev(imr), mr);
out_umem:
@@ -559,8 +543,6 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr->ibmr.device = &dev->ib_dev;
imr->umem = &umem_odp->umem;
imr->is_odp_implicit = true;
- atomic_set(&imr->num_deferred_work, 0);
- init_waitqueue_head(&imr->q_deferred_work);
xa_init(&imr->implicit_children);
err = mlx5_ib_update_xlt(imr, 0,
@@ -572,8 +554,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
if (err)
goto out_mr;
- err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key),
- &imr->mmkey, GFP_KERNEL));
+ err = mlx5r_store_odp_mkey(dev, &imr->mmkey);
if (err)
goto out_mr;
@@ -591,60 +572,35 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
{
struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
struct mlx5_ib_dev *dev = mr_to_mdev(imr);
- struct list_head destroy_list;
struct mlx5_ib_mr *mtt;
- struct mlx5_ib_mr *tmp;
unsigned long idx;
- INIT_LIST_HEAD(&destroy_list);
-
xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key));
/*
- * This stops the SRCU protected page fault path from touching either
- * the imr or any children. The page fault path can only reach the
- * children xarray via the imr.
- */
- synchronize_srcu(&dev->odp_srcu);
-
- /*
* All work on the prefetch list must be completed, xa_erase() prevented
* new work from being created.
*/
- wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));
-
+ mlx5r_deref_wait_odp_mkey(&imr->mmkey);
/*
* At this point it is forbidden for any other thread to enter
* pagefault_mr() on this imr. It is already forbidden to call
* pagefault_mr() on an implicit child. Due to this additions to
* implicit_children are prevented.
+ * In addition, any new call to destroy_unused_implicit_child_mr()
+ * may return immediately.
*/
/*
- * Block destroy_unused_implicit_child_mr() from incrementing
- * num_deferred_work.
- */
- xa_lock(&imr->implicit_children);
- xa_for_each (&imr->implicit_children, idx, mtt) {
- __xa_erase(&imr->implicit_children, idx);
- list_add(&mtt->odp_destroy.elm, &destroy_list);
- }
- xa_unlock(&imr->implicit_children);
-
- /*
- * Wait for any concurrent destroy_unused_implicit_child_mr() to
- * complete.
- */
- wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));
-
- /*
* Fence the imr before we destroy the children. This allows us to
* skip updating the XLT of the imr during destroy of the child mkey
* the imr points to.
*/
mlx5_mr_cache_invalidate(imr);
- list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm)
+ xa_for_each(&imr->implicit_children, idx, mtt) {
+ xa_erase(&imr->implicit_children, idx);
free_implicit_child_mr(mtt, false);
+ }
mlx5_mr_cache_free(dev, imr);
ib_umem_odp_release(odp_imr);
@@ -663,13 +619,39 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
/* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&mr_to_mdev(mr)->odp_srcu);
-
- wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work));
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
dma_fence_odp_mr(mr);
}
+/**
+ * mlx5_ib_fence_dmabuf_mr - Stop all access to the dmabuf MR
+ * @mr: to fence
+ *
+ * On return no parallel threads will be touching this MR and no DMA will be
+ * active.
+ */
+void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
+
+ /* Prevent new page faults and prefetch requests from succeeding */
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
+
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
+
+ dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+ mlx5_mr_cache_invalidate(mr);
+ umem_dmabuf->private = NULL;
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+
+ if (!mr->cache_ent) {
+ mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey);
+ WARN_ON(mr->descs);
+ }
+}
+
#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
#define MLX5_PF_FLAGS_SNAPSHOT BIT(2)
#define MLX5_PF_FLAGS_ENABLE BIT(3)
@@ -747,8 +729,10 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
struct mlx5_ib_mr *mtt;
u64 len;
+ xa_lock(&imr->implicit_children);
mtt = xa_load(&imr->implicit_children, idx);
if (unlikely(!mtt)) {
+ xa_unlock(&imr->implicit_children);
mtt = implicit_get_child_mr(imr, idx);
if (IS_ERR(mtt)) {
ret = PTR_ERR(mtt);
@@ -756,6 +740,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
}
upd_start_idx = min(upd_start_idx, idx);
upd_len = idx - upd_start_idx + 1;
+ } else {
+ refcount_inc(&mtt->mmkey.usecount);
+ xa_unlock(&imr->implicit_children);
}
umem_odp = to_ib_umem_odp(mtt->umem);
@@ -764,6 +751,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
ret = pagefault_real_mr(mtt, umem_odp, user_va, len,
bytes_mapped, flags);
+
+ mlx5r_deref_odp_mkey(&mtt->mmkey);
+
if (ret < 0)
goto out;
user_va += len;
@@ -803,6 +793,44 @@ out:
return ret;
}
+static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
+ u32 *bytes_mapped, u32 flags)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
+ u32 xlt_flags = 0;
+ int err;
+ unsigned int page_size;
+
+ if (flags & MLX5_PF_FLAGS_ENABLE)
+ xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
+
+ dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+ err = ib_umem_dmabuf_map_pages(umem_dmabuf);
+ if (err) {
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+ return err;
+ }
+
+ page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc,
+ log_page_size, 0,
+ umem_dmabuf->umem.iova);
+ if (unlikely(page_size < PAGE_SIZE)) {
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+ err = -EINVAL;
+ } else {
+ err = mlx5_ib_update_mr_pas(mr, xlt_flags);
+ }
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+
+ if (err)
+ return err;
+
+ if (bytes_mapped)
+ *bytes_mapped += bcnt;
+
+ return ib_umem_num_pages(mr->umem);
+}
+
/*
* Returns:
* -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are
@@ -817,10 +845,12 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu);
if (unlikely(io_virt < mr->mmkey.iova))
return -EFAULT;
+ if (mr->umem->is_dmabuf)
+ return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags);
+
if (!odp->is_implicit_odp) {
u64 user_va;
@@ -847,6 +877,16 @@ int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
return ret >= 0 ? 0 : ret;
}
+int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ int ret;
+
+ ret = pagefault_dmabuf_mr(mr, mr->umem->length, NULL,
+ MLX5_PF_FLAGS_ENABLE);
+
+ return ret >= 0 ? 0 : ret;
+}
+
struct pf_frame {
struct pf_frame *next;
u32 key;
@@ -896,7 +936,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
u32 *bytes_committed,
u32 *bytes_mapped)
{
- int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
+ int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0;
struct pf_frame *head = NULL, *frame;
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
@@ -905,14 +945,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
size_t offset;
int ndescs;
- srcu_key = srcu_read_lock(&dev->odp_srcu);
-
io_virt += *bytes_committed;
bcnt -= *bytes_committed;
next_mr:
+ xa_lock(&dev->odp_mkeys);
mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key));
if (!mmkey) {
+ xa_unlock(&dev->odp_mkeys);
mlx5_ib_dbg(
dev,
"skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
@@ -925,12 +965,15 @@ next_mr:
* faulted.
*/
ret = 0;
- goto srcu_unlock;
+ goto end;
}
+ refcount_inc(&mmkey->usecount);
+ xa_unlock(&dev->odp_mkeys);
+
if (!mkey_is_eq(mmkey, key)) {
mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
ret = -EFAULT;
- goto srcu_unlock;
+ goto end;
}
switch (mmkey->type) {
@@ -939,7 +982,7 @@ next_mr:
ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0);
if (ret < 0)
- goto srcu_unlock;
+ goto end;
mlx5_update_odp_stats(mr, faults, ret);
@@ -954,7 +997,7 @@ next_mr:
if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
mlx5_ib_dbg(dev, "indirection level exceeded\n");
ret = -EFAULT;
- goto srcu_unlock;
+ goto end;
}
outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
@@ -965,7 +1008,7 @@ next_mr:
out = kzalloc(outlen, GFP_KERNEL);
if (!out) {
ret = -ENOMEM;
- goto srcu_unlock;
+ goto end;
}
cur_outlen = outlen;
}
@@ -975,7 +1018,7 @@ next_mr:
ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen);
if (ret)
- goto srcu_unlock;
+ goto end;
offset = io_virt - MLX5_GET64(query_mkey_out, out,
memory_key_mkey_entry.start_addr);
@@ -989,7 +1032,7 @@ next_mr:
frame = kzalloc(sizeof(*frame), GFP_KERNEL);
if (!frame) {
ret = -ENOMEM;
- goto srcu_unlock;
+ goto end;
}
frame->key = be32_to_cpu(pklm->key);
@@ -1008,7 +1051,7 @@ next_mr:
default:
mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type);
ret = -EFAULT;
- goto srcu_unlock;
+ goto end;
}
if (head) {
@@ -1021,10 +1064,13 @@ next_mr:
depth = frame->depth;
kfree(frame);
+ mlx5r_deref_odp_mkey(mmkey);
goto next_mr;
}
-srcu_unlock:
+end:
+ if (mmkey)
+ mlx5r_deref_odp_mkey(mmkey);
while (head) {
frame = head;
head = frame->next;
@@ -1032,24 +1078,25 @@ srcu_unlock:
}
kfree(out);
- srcu_read_unlock(&dev->odp_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;
}
-/**
+/*
* Parse a series of data segments for page fault handling.
*
- * @pfault contains page fault information.
- * @wqe points at the first data segment in the WQE.
- * @wqe_end points after the end of the WQE.
- * @bytes_mapped receives the number of bytes that the function was able to
- * map. This allows the caller to decide intelligently whether
- * enough memory was mapped to resolve the page fault
- * successfully (e.g. enough for the next MTU, or the entire
- * WQE).
- * @total_wqe_bytes receives the total data size of this WQE in bytes (minus
- * the committed bytes).
+ * @dev: Pointer to mlx5 IB device
+ * @pfault: contains page fault information.
+ * @wqe: points at the first data segment in the WQE.
+ * @wqe_end: points after the end of the WQE.
+ * @bytes_mapped: receives the number of bytes that the function was able to
+ * map. This allows the caller to decide intelligently whether
+ * enough memory was mapped to resolve the page fault
+ * successfully (e.g. enough for the next MTU, or the entire
+ * WQE).
+ * @total_wqe_bytes: receives the total data size of this WQE in bytes (minus
+ * the committed bytes).
+ * @receive_queue: receive WQE end of sg list
*
* Returns the number of pages loaded if positive, zero for an empty WQE, or a
* negative error code.
@@ -1738,8 +1785,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work)
u32 i;
for (i = 0; i < work->num_sge; ++i)
- if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work))
- wake_up(&work->frags[i].mr->q_deferred_work);
+ mlx5r_deref_odp_mkey(&work->frags[i].mr->mmkey);
+
kvfree(work);
}
@@ -1749,27 +1796,30 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_core_mkey *mmkey;
- struct ib_umem_odp *odp;
- struct mlx5_ib_mr *mr;
-
- lockdep_assert_held(&dev->odp_srcu);
+ struct mlx5_ib_mr *mr = NULL;
+ xa_lock(&dev->odp_mkeys);
mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey));
if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR)
- return NULL;
+ goto end;
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (mr->ibmr.pd != pd)
- return NULL;
-
- odp = to_ib_umem_odp(mr->umem);
+ if (mr->ibmr.pd != pd) {
+ mr = NULL;
+ goto end;
+ }
/* prefetch with write-access must be supported by the MR */
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
- !odp->umem.writable)
- return NULL;
+ !mr->umem->writable) {
+ mr = NULL;
+ goto end;
+ }
+ refcount_inc(&mmkey->usecount);
+end:
+ xa_unlock(&dev->odp_mkeys);
return mr;
}
@@ -1777,17 +1827,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
{
struct prefetch_mr_work *work =
container_of(w, struct prefetch_mr_work, work);
- struct mlx5_ib_dev *dev;
u32 bytes_mapped = 0;
- int srcu_key;
int ret;
u32 i;
/* We rely on IB/core that work is executed if we have num_sge != 0 only. */
WARN_ON(!work->num_sge);
- dev = mr_to_mdev(work->frags[0].mr);
- /* SRCU should be held when calling to mlx5_odp_populate_xlt() */
- srcu_key = srcu_read_lock(&dev->odp_srcu);
for (i = 0; i < work->num_sge; ++i) {
ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
work->frags[i].length, &bytes_mapped,
@@ -1796,7 +1841,6 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
continue;
mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret);
}
- srcu_read_unlock(&dev->odp_srcu, srcu_key);
destroy_prefetch_work(work);
}
@@ -1820,9 +1864,6 @@ static bool init_prefetch_work(struct ib_pd *pd,
work->num_sge = i;
return false;
}
-
- /* Keep the MR pointer will valid outside the SRCU */
- atomic_inc(&work->frags[i].mr->num_deferred_work);
}
work->num_sge = num_sge;
return true;
@@ -1833,42 +1874,35 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
u32 pf_flags, struct ib_sge *sg_list,
u32 num_sge)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
u32 bytes_mapped = 0;
- int srcu_key;
int ret = 0;
u32 i;
- srcu_key = srcu_read_lock(&dev->odp_srcu);
for (i = 0; i < num_sge; ++i) {
struct mlx5_ib_mr *mr;
mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey);
- if (!mr) {
- ret = -ENOENT;
- goto out;
- }
+ if (!mr)
+ return -ENOENT;
ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length,
&bytes_mapped, pf_flags);
- if (ret < 0)
- goto out;
+ if (ret < 0) {
+ mlx5r_deref_odp_mkey(&mr->mmkey);
+ return ret;
+ }
mlx5_update_odp_stats(mr, prefetch, ret);
+ mlx5r_deref_odp_mkey(&mr->mmkey);
}
- ret = 0;
-out:
- srcu_read_unlock(&dev->odp_srcu, srcu_key);
- return ret;
+ return 0;
}
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
u32 pf_flags = 0;
struct prefetch_mr_work *work;
- int srcu_key;
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
@@ -1884,13 +1918,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
if (!work)
return -ENOMEM;
- srcu_key = srcu_read_lock(&dev->odp_srcu);
if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) {
- srcu_read_unlock(&dev->odp_srcu, srcu_key);
destroy_prefetch_work(work);
return -EINVAL;
}
queue_work(system_unbound_wq, &work->work);
- srcu_read_unlock(&dev->odp_srcu, srcu_key);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 0cb7cc642d87..ec4b3f6a8222 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1078,6 +1078,7 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev,
qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
MLX5_SET(qpc, qpc, uar_page, uar_index);
+ MLX5_SET(qpc, qpc, ts_format, MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT);
MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
/* Set "fast registration enabled" for all kernel QPs */
@@ -1172,10 +1173,72 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq)
sq->flow_rule = NULL;
}
+static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
+{
+ bool fr_supported =
+ MLX5_CAP_GEN(dev->mdev, rq_ts_format) ==
+ MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
+ MLX5_CAP_GEN(dev->mdev, rq_ts_format) ==
+ MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
+
+ if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) {
+ if (!fr_supported) {
+ mlx5_ib_dbg(dev, "Free running TS format is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ return MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING;
+ }
+ return MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT;
+}
+
+static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
+{
+ bool fr_supported =
+ MLX5_CAP_GEN(dev->mdev, sq_ts_format) ==
+ MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
+ MLX5_CAP_GEN(dev->mdev, sq_ts_format) ==
+ MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
+
+ if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) {
+ if (!fr_supported) {
+ mlx5_ib_dbg(dev, "Free running TS format is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ return MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING;
+ }
+ return MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT;
+}
+
+static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq,
+ struct mlx5_ib_cq *recv_cq)
+{
+ bool fr_supported =
+ MLX5_CAP_ROCE(dev->mdev, qp_ts_format) ==
+ MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
+ MLX5_CAP_ROCE(dev->mdev, qp_ts_format) ==
+ MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
+ int ts_format = MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT;
+
+ if (recv_cq &&
+ recv_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION)
+ ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING;
+
+ if (send_cq &&
+ send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION)
+ ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING;
+
+ if (ts_format == MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING &&
+ !fr_supported) {
+ mlx5_ib_dbg(dev, "Free running TS format is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ return ts_format;
+}
+
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct ib_udata *udata,
struct mlx5_ib_sq *sq, void *qpin,
- struct ib_pd *pd)
+ struct ib_pd *pd, struct mlx5_ib_cq *cq)
{
struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
__be64 *pas;
@@ -1187,6 +1250,11 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
int err;
unsigned int page_offset_quantized;
unsigned long page_size;
+ int ts_format;
+
+ ts_format = get_sq_ts_format(dev, cq);
+ if (ts_format < 0)
+ return ts_format;
sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
ubuffer->buf_size, 0);
@@ -1215,6 +1283,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1);
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, ts_format, ts_format);
MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
MLX5_SET(sqc, sqc, tis_lst_sz, 1);
@@ -1263,7 +1332,7 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, void *qpin,
- struct ib_pd *pd)
+ struct ib_pd *pd, struct mlx5_ib_cq *cq)
{
struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
@@ -1274,9 +1343,14 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct ib_umem *umem = rq->base.ubuffer.umem;
unsigned int page_offset_quantized;
unsigned long page_size = 0;
+ int ts_format;
size_t inlen;
int err;
+ ts_format = get_rq_ts_format(dev, cq);
+ if (ts_format < 0)
+ return ts_format;
+
page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz,
MLX5_ADAPTER_PAGE_SHIFT,
page_offset, 64,
@@ -1296,6 +1370,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
MLX5_SET(rqc, rqc, vsd, 1);
MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, ts_format, ts_format);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
@@ -1393,10 +1468,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
}
static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- u32 *in, size_t inlen,
- struct ib_pd *pd,
+ u32 *in, size_t inlen, struct ib_pd *pd,
struct ib_udata *udata,
- struct mlx5_ib_create_qp_resp *resp)
+ struct mlx5_ib_create_qp_resp *resp,
+ struct ib_qp_init_attr *init_attr)
{
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
@@ -1415,7 +1490,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (err)
return err;
- err = create_raw_packet_qp_sq(dev, udata, sq, in, pd);
+ err = create_raw_packet_qp_sq(dev, udata, sq, in, pd,
+ to_mcq(init_attr->send_cq));
if (err)
goto err_destroy_tis;
@@ -1437,7 +1513,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING)
rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
- err = create_raw_packet_qp_rq(dev, rq, in, pd);
+ err = create_raw_packet_qp_rq(dev, rq, in, pd,
+ to_mcq(init_attr->recv_cq));
if (err)
goto err_destroy_sq;
@@ -1907,6 +1984,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_cq *recv_cq;
unsigned long flags;
struct mlx5_ib_qp_base *base;
+ int ts_format;
int mlx5_st;
void *qpc;
u32 *in;
@@ -1944,6 +2022,13 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
return -EINVAL;
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ if (ts_format < 0)
+ return ts_format;
+ }
+
err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
&inlen, base, ucmd);
if (err)
@@ -1992,6 +2077,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
}
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ MLX5_SET(qpc, qpc, ts_format, ts_format);
+
MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
if (qp->sq.wqe_cnt) {
@@ -2046,7 +2134,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
- &params->resp);
+ &params->resp, init_attr);
} else
err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
@@ -2432,9 +2520,6 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
case MLX5_IB_QPT_HW_GSI:
case IB_QPT_DRIVER:
case IB_QPT_GSI:
- if (dev->profile == &raw_eth_profile)
- goto out;
- fallthrough;
case IB_QPT_RAW_PACKET:
case IB_QPT_UD:
case MLX5_IB_QPT_REG_UMR:
@@ -2629,10 +2714,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
int create_flags = attr->create_flags;
bool cond;
- if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile)
- if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST)
- return -EINVAL;
-
if (qp_type == MLX5_IB_QPT_DCT)
return (create_flags) ? -EINVAL : 0;
@@ -3076,6 +3157,8 @@ static int ib_to_mlx5_rate_map(u8 rate)
return 4;
case IB_RATE_50_GBPS:
return 5;
+ case IB_RATE_400_GBPS:
+ return 6;
default:
return rate + MLX5_STAT_RATE_OFFSET;
}
@@ -3183,11 +3266,13 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
alt ? attr->alt_pkey_index : attr->pkey_index);
if (ah_flags & IB_AH_GRH) {
- if (grh->sgid_index >=
- dev->mdev->port_caps[port - 1].gid_table_len) {
+ const struct ib_port_immutable *immutable;
+
+ immutable = ib_port_immutable_read(&dev->ib_dev, port);
+ if (grh->sgid_index >= immutable->gid_tbl_len) {
pr_err("sgid_index (%u) too large. max is %d\n",
grh->sgid_index,
- dev->mdev->port_caps[port - 1].gid_table_len);
+ immutable->gid_tbl_len);
return -EINVAL;
}
}
@@ -4211,6 +4296,23 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
return 0;
}
+static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ enum ib_qp_type qp_type)
+{
+ if (dev->profile != &raw_eth_profile)
+ return true;
+
+ if (qp_type == IB_QPT_RAW_PACKET || qp_type == MLX5_IB_QPT_REG_UMR)
+ return true;
+
+ /* Internal QP used for wc testing, with NOPs in wq */
+ if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
+ return true;
+
+ return false;
+}
+
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
@@ -4221,7 +4323,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum ib_qp_type qp_type;
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
- int port;
+
+ if (!mlx5_ib_modify_qp_allowed(dev, qp, ibqp->qp_type))
+ return -EOPNOTSUPP;
if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
return -EOPNOTSUPP;
@@ -4263,10 +4367,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
- port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- }
-
if (qp->flags & IB_QP_CREATE_SOURCE_QPN) {
if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
@@ -4295,14 +4395,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- if (attr_mask & IB_QP_PKEY_INDEX) {
- port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- if (attr->pkey_index >=
- dev->mdev->port_caps[port - 1].pkey_table_len) {
- mlx5_ib_dbg(dev, "invalid pkey index %d\n",
- attr->pkey_index);
- goto out;
- }
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= dev->pkey_table_len) {
+ mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index);
+ goto out;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
@@ -5376,7 +5472,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp)
handle_drain_completion(cq, &rdrain, dev);
}
-/**
+/*
* Bind a qp to a counter. If @counter is NULL then bind the qp to
* the default counter
*/
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index d6038fb6c50c..cf2852cba45c 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -1369,7 +1369,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
handle_qpt_uc(wr, &seg, &size);
break;
case IB_QPT_SMI:
- if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
+ if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) {
mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
err = -EPERM;
*bad_wr = wr;
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 9dde70373a55..3cb4febaad0f 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -617,18 +617,18 @@ static inline bool qedr_qp_has_srq(struct qedr_qp *qp)
static inline bool qedr_qp_has_sq(struct qedr_qp *qp)
{
if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT)
- return 0;
+ return false;
- return 1;
+ return true;
}
static inline bool qedr_qp_has_rq(struct qedr_qp *qp)
{
if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI ||
qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp))
- return 0;
+ return false;
- return 1;
+ return true;
}
static inline struct qedr_user_mmap_entry *
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index f5542d703ef9..13e5e6bbec99 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -586,8 +586,8 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
qedr_inc_sw_prod(&qp->sq);
DP_DEBUG(qp->dev, QEDR_MSG_GSI,
- "gsi post send: opcode=%d, in_irq=%ld, irqs_disabled=%d, wr_id=%llx\n",
- wr->opcode, in_irq(), irqs_disabled(), wr->wr_id);
+ "gsi post send: opcode=%d, wr_id=%llx\n", wr->opcode,
+ wr->wr_id);
} else {
DP_ERR(dev, "gsi post send: failed to transmit (rc=%d)\n", rc);
rc = -EAGAIN;
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 92eeea5679e2..84fc4dcc5399 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -151,7 +151,7 @@ int qib_count_units(int *npresentp, int *nupp)
/**
* qib_wait_linkstate - wait for an IB link state change to occur
- * @dd: the qlogic_ib device
+ * @ppd: the qlogic_ib device
* @state: the state to wait for
* @msecs: the number of milliseconds to wait
*
diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c
index 5838b3bf34b9..bf660c001b6d 100644
--- a/drivers/infiniband/hw/qib/qib_eeprom.c
+++ b/drivers/infiniband/hw/qib/qib_eeprom.c
@@ -47,7 +47,7 @@
* qib_eeprom_read - receives bytes from the eeprom via I2C
* @dd: the qlogic_ib device
* @eeprom_offset: address to read from
- * @buffer: where to store result
+ * @buff: where to store result
* @len: number of bytes to receive
*/
int qib_eeprom_read(struct qib_devdata *dd, u8 eeprom_offset,
@@ -94,7 +94,7 @@ static int eeprom_write_with_enable(struct qib_devdata *dd, u8 offset,
* qib_eeprom_write - writes data to the eeprom via I2C
* @dd: the qlogic_ib device
* @eeprom_offset: where to place data
- * @buffer: data to write
+ * @buff: data to write
* @len: number of bytes to write
*/
int qib_eeprom_write(struct qib_devdata *dd, u8 eeprom_offset,
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 44150be215bf..b35e1174be22 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1223,7 +1223,7 @@ static void qib_set_ib_6120_lstate(struct qib_pportdata *ppd, u16 linkcmd,
/**
* qib_6120_bringup_serdes - bring up the serdes
- * @dd: the qlogic_ib device
+ * @ppd: the qlogic_ib device
*/
static int qib_6120_bringup_serdes(struct qib_pportdata *ppd)
{
@@ -1412,7 +1412,7 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd)
/**
* qib_6120_setup_setextled - set the state of the two external LEDs
- * @dd: the qlogic_ib device
+ * @ppd: the qlogic_ib device
* @on: whether the link is up or not
*
* The exact combo of LEDs if on is true is determined by looking
@@ -1823,7 +1823,7 @@ bail:
* qib_6120_put_tid - write a TID in chip
* @dd: the qlogic_ib device
* @tidptr: pointer to the expected TID (in chip) to update
- * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0)
+ * @type: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0)
* for expected
* @pa: physical address of in memory buffer; tidinvalid if freeing
*
@@ -1890,7 +1890,7 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
* qib_6120_put_tid_2 - write a TID in chip, Revision 2 or higher
* @dd: the qlogic_ib device
* @tidptr: pointer to the expected TID (in chip) to update
- * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0)
+ * @type: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0)
* for expected
* @pa: physical address of in memory buffer; tidinvalid if freeing
*
@@ -1932,7 +1932,7 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr,
/**
* qib_6120_clear_tids - clear all TID entries for a context, expected and eager
* @dd: the qlogic_ib device
- * @ctxt: the context
+ * @rcd: the context
*
* clear all TID entries for a context, expected and eager.
* Used from qib_close(). On this chip, TIDs are only 32 bits,
@@ -2008,7 +2008,7 @@ int __attribute__((weak)) qib_unordered_wc(void)
/**
* qib_6120_get_base_info - set chip-specific flags for user code
* @rcd: the qlogic_ib ctxt
- * @kbase: qib_base_info pointer
+ * @kinfo: qib_base_info pointer
*
* We set the PCIE flag because the lower bandwidth on PCIe vs
* HyperTransport can affect some user packet algorithms.
@@ -2270,8 +2270,8 @@ static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op)
/**
* qib_portcntr_6120 - read a per-port counter
- * @dd: the qlogic_ib device
- * @creg: the counter to snapshot
+ * @ppd: the qlogic_ib device
+ * @reg: the counter to snapshot
*/
static u64 qib_portcntr_6120(struct qib_pportdata *ppd, u32 reg)
{
@@ -2610,7 +2610,7 @@ static void qib_chk_6120_errormask(struct qib_devdata *dd)
/**
* qib_get_faststats - get word counters from chip before they overflow
- * @opaque - contains a pointer to the qlogic_ib device qib_devdata
+ * @t: contains a pointer to the qlogic_ib device qib_devdata
*
* This needs more work; in particular, decision on whether we really
* need traffic_wds done the way it is
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 0a6f26d4cb31..229dcd6ead95 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1701,7 +1701,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd)
/**
* qib_setup_7220_setextled - set the state of the two external LEDs
- * @dd: the qlogic_ib device
+ * @ppd: the qlogic_ib device
* @on: whether the link is up or not
*
* The exact combo of LEDs if on is true is determined by looking
@@ -2146,7 +2146,7 @@ bail:
* qib_7220_put_tid - write a TID to the chip
* @dd: the qlogic_ib device
* @tidptr: pointer to the expected TID (in chip) to update
- * @tidtype: 0 for eager, 1 for expected
+ * @type: 0 for eager, 1 for expected
* @pa: physical address of in memory buffer; tidinvalid if freeing
*/
static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
@@ -2180,7 +2180,7 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
/**
* qib_7220_clear_tids - clear all TID entries for a ctxt, expected and eager
* @dd: the qlogic_ib device
- * @ctxt: the ctxt
+ * @rcd: the ctxt
*
* clear all TID entries for a ctxt, expected and eager.
* Used from qib_close(). On this chip, TIDs are only 32 bits,
@@ -2238,7 +2238,7 @@ static void qib_7220_tidtemplate(struct qib_devdata *dd)
/**
* qib_init_7220_get_base_info - set chip-specific flags for user code
* @rcd: the qlogic_ib ctxt
- * @kbase: qib_base_info pointer
+ * @kinfo: qib_base_info pointer
*
* We set the PCIE flag because the lower bandwidth on PCIe vs
* HyperTransport can affect some user packet algorithims.
@@ -2896,8 +2896,8 @@ static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op)
/**
* qib_portcntr_7220 - read a per-port counter
- * @dd: the qlogic_ib device
- * @creg: the counter to snapshot
+ * @ppd: the qlogic_ib device
+ * @reg: the counter to snapshot
*/
static u64 qib_portcntr_7220(struct qib_pportdata *ppd, u32 reg)
{
@@ -3232,7 +3232,7 @@ done:
/**
* qib_get_7220_faststats - get word counters from chip before they overflow
- * @opaque - contains a pointer to the qlogic_ib device qib_devdata
+ * @t: contains a pointer to the qlogic_ib device qib_devdata
*
* This needs more work; in particular, decision on whether we really
* need traffic_wds done the way it is
@@ -4468,7 +4468,7 @@ static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen)
/**
* qib_init_iba7220_funcs - set up the chip-specific function pointers
- * @dev: the pci_dev for qlogic_ib device
+ * @pdev: the pci_dev for qlogic_ib device
* @ent: pci_device_id struct for this dev
*
* This is global, and is called directly at init to set up the
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 189a0ce6056a..9fe6ea75b45e 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2514,7 +2514,7 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
/**
* qib_7322_quiet_serdes - set serdes to txidle
- * @dd: the qlogic_ib device
+ * @ppd: the qlogic_ib device
* Called when driver is being unloaded
*/
static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd)
@@ -3760,7 +3760,7 @@ bail:
* qib_7322_put_tid - write a TID to the chip
* @dd: the qlogic_ib device
* @tidptr: pointer to the expected TID (in chip) to update
- * @tidtype: 0 for eager, 1 for expected
+ * @type: 0 for eager, 1 for expected
* @pa: physical address of in memory buffer; tidinvalid if freeing
*/
static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
@@ -3796,7 +3796,7 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
/**
* qib_7322_clear_tids - clear all TID entries for a ctxt, expected and eager
* @dd: the qlogic_ib device
- * @ctxt: the ctxt
+ * @rcd: the ctxt
*
* clear all TID entries for a ctxt, expected and eager.
* Used from qib_close().
@@ -3861,7 +3861,7 @@ static void qib_7322_tidtemplate(struct qib_devdata *dd)
/**
* qib_init_7322_get_base_info - set chip-specific flags for user code
* @rcd: the qlogic_ib ctxt
- * @kbase: qib_base_info pointer
+ * @kinfo: qib_base_info pointer
*
* We set the PCIE flag because the lower bandwidth on PCIe vs
* HyperTransport can affect some user packet algorithims.
@@ -4724,7 +4724,7 @@ static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op)
/**
* qib_portcntr_7322 - read a per-port chip counter
* @ppd: the qlogic_ib pport
- * @creg: the counter to read (not a chip offset)
+ * @reg: the counter to read (not a chip offset)
*/
static u64 qib_portcntr_7322(struct qib_pportdata *ppd, u32 reg)
{
@@ -5096,7 +5096,7 @@ done:
/**
* qib_get_7322_faststats - get word counters from chip before they overflow
- * @opaque - contains a pointer to the qlogic_ib device qib_devdata
+ * @t: contains a pointer to the qlogic_ib device qib_devdata
*
* VESTIGIAL IBA7322 has no "small fast counters", so the only
* real purpose of this function is to maintain the notion of
@@ -7175,7 +7175,7 @@ static int qib_7322_tempsense_rd(struct qib_devdata *dd, int regnum)
/**
* qib_init_iba7322_funcs - set up the chip-specific function pointers
- * @dev: the pci_dev for qlogic_ib device
+ * @pdev: the pci_dev for qlogic_ib device
* @ent: pci_device_id struct for this dev
*
* Also allocates, inits, and returns the devdata struct for this
diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
index 65c3b964ad1b..85c3187d796d 100644
--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -40,9 +40,9 @@
/**
* qib_format_hwmsg - format a single hwerror message
- * @msg message buffer
- * @msgl length of message buffer
- * @hwmsg message to add to message buffer
+ * @msg: message buffer
+ * @msgl: length of message buffer
+ * @hwmsg: message to add to message buffer
*/
static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
{
@@ -53,11 +53,11 @@ static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
/**
* qib_format_hwerrors - format hardware error messages for display
- * @hwerrs hardware errors bit vector
- * @hwerrmsgs hardware error descriptions
- * @nhwerrmsgs number of hwerrmsgs
- * @msg message buffer
- * @msgl message buffer length
+ * @hwerrs: hardware errors bit vector
+ * @hwerrmsgs: hardware error descriptions
+ * @nhwerrmsgs: number of hwerrmsgs
+ * @msg: message buffer
+ * @msgl: message buffer length
*/
void qib_format_hwerrors(u64 hwerrs, const struct qib_hwerror_msgs *hwerrmsgs,
size_t nhwerrmsgs, char *msg, size_t msgl)
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index f83e331977f8..44e2f813024a 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -886,7 +886,7 @@ done:
/**
* rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the qlogic_ib device
+ * @ppd: the qlogic_ib device
* @key: the PKEY index
*
* Return true if this was the last reference and the hardware table entry
@@ -916,7 +916,7 @@ bail:
/**
* add_pkey - add the given PKEY to the hardware table
- * @dd: the qlogic_ib device
+ * @ppd: the qlogic_ib device
* @key: the PKEY
*
* Return an error code if unable to add the entry, zero if no change,
@@ -2346,8 +2346,10 @@ static int process_cc(struct ib_device *ibdev, int mad_flags,
* @port: the port number this packet came in on
* @in_wc: the work completion entry for this packet
* @in_grh: the global route header for this packet
- * @in_mad: the incoming MAD
- * @out_mad: any outgoing MAD reply
+ * @in: the incoming MAD
+ * @out: any outgoing MAD reply
+ * @out_mad_size: size of the outgoing MAD reply
+ * @out_mad_pkey_index: unused
*
* Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
* interested in processing.
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 2e07b3749b88..cb2a02d671e2 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -181,7 +181,7 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd)
pci_set_drvdata(dd->pcidev, NULL);
}
-/**
+/*
* We save the msi lo and hi values, so we can restore them after
* chip reset (the kernel PCI infrastructure doesn't yet handle that
* correctly.
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 8d0563ef5be1..ca39a029e4af 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -207,7 +207,7 @@ bail:
return ret;
}
-/**
+/*
* qib_free_all_qps - check for QPs still in use
*/
unsigned qib_free_all_qps(struct rvt_dev_info *rdi)
@@ -376,9 +376,9 @@ void qib_flush_qp_waiters(struct rvt_qp *qp)
/**
* qib_check_send_wqe - validate wr/wqe
- * @qp - The qp
- * @wqe - The built wqe
- * @call_send - Determine if the send should be posted or scheduled
+ * @qp: The qp
+ * @wqe: The built wqe
+ * @call_send: Determine if the send should be posted or scheduled
*
* Returns 0 on success, -EINVAL on failure
*/
@@ -418,8 +418,8 @@ static const char * const qp_type_str[] = {
/**
* qib_qp_iter_print - print information to seq_file
- * @s - the seq_file
- * @iter - the iterator
+ * @s: the seq_file
+ * @iter: the iterator
*/
void qib_qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
{
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 3915e5b4a9bc..a1c20ffb4490 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -207,6 +207,7 @@ bail:
/**
* qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP
+ * @flags: unused
*
* Assumes the s_lock is held.
*
@@ -992,7 +993,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
return wqe;
}
-/**
+/*
* do_rc_ack - process an incoming RC ACK
* @qp: the QP the ACK came in on
* @psn: the packet sequence number of the ACK
@@ -1259,6 +1260,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
* @psn: the packet sequence number for this packet
* @hdrsize: the header length
* @pmtu: the path MTU
+ * @rcd: the context pointer
*
* This is called from qib_rc_rcv() to process an incoming RC response
* packet for the given QP.
@@ -1480,6 +1482,7 @@ bail:
* @opcode: the opcode for this packet
* @psn: the packet sequence number for this packet
* @diff: the difference between the PSN and the expected PSN
+ * @rcd: the context pointer
*
* This is called from qib_rc_rcv() to process an unexpected
* incoming RC packet for the given QP.
diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c
index f5698664419b..97b8a2bf5c69 100644
--- a/drivers/infiniband/hw/qib/qib_twsi.c
+++ b/drivers/infiniband/hw/qib/qib_twsi.c
@@ -168,6 +168,7 @@ static void stop_cmd(struct qib_devdata *dd);
/**
* rd_byte - read a byte, sending STOP on last, else ACK
* @dd: the qlogic_ib device
+ * @last: identifies the last read
*
* Returns byte shifted out of device
*/
diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c
index 29785eb84646..6a8148851f21 100644
--- a/drivers/infiniband/hw/qib/qib_tx.c
+++ b/drivers/infiniband/hw/qib/qib_tx.c
@@ -377,6 +377,7 @@ void qib_sendbuf_done(struct qib_devdata *dd, unsigned n)
* @start: the starting send buffer number
* @len: the number of send buffers
* @avail: true if the buffers are available for kernel use, false otherwise
+ * @rcd: the context pointer
*/
void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start,
unsigned len, u32 avail, struct qib_ctxtdata *rcd)
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 554af4273a13..8e2bda77d8b9 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -40,6 +40,7 @@
/**
* qib_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
+ * @flags: unused
*
* Assumes the s_lock is held.
*
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 93ca21347959..81eda94bd279 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -222,6 +222,7 @@ drop:
/**
* qib_make_ud_req - construct a UD request packet
* @qp: the QP
+ * @flags: flags to modify and pass back to caller
*
* Assumes the s_lock is held.
*
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 4c24e83f3175..5d6cf7427431 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -43,7 +43,7 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages,
unpin_user_pages_dirty_lock(p, num_pages, dirty);
}
-/**
+/*
* qib_map_page - a safety wrapper around pci_map_page()
*
* A dma_addr of all 0's is interpreted by the chip as "disabled".
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index f6c01bad5a74..8e0de265ad57 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1067,7 +1067,7 @@ bail:
/**
* qib_get_counters - get various chip counters
- * @dd: the qlogic_ib device
+ * @ppd: the qlogic_ib device
* @cntrs: counters are placed here
*
* Return the counters needed by recv_pma_get_portcounters().
@@ -1675,7 +1675,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
/**
* _qib_schedule_send - schedule progress
- * @qp - the qp
+ * @qp: the qp
*
* This schedules progress w/o regard to the s_flags.
*
@@ -1694,7 +1694,7 @@ bool _qib_schedule_send(struct rvt_qp *qp)
/**
* qib_schedule_send - schedule progress
- * @qp - the qp
+ * @qp: the qp
*
* This schedules qp progress. The s_lock
* should be held.
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 00a330909bb3..4b6019e7de67 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -474,7 +474,6 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE /
sizeof(struct pvrdma_cqne);
unsigned int head;
- unsigned long flags;
dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n");
@@ -483,11 +482,11 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
struct pvrdma_cq *cq;
cqne = get_cqne(dev, head);
- spin_lock_irqsave(&dev->cq_tbl_lock, flags);
+ spin_lock(&dev->cq_tbl_lock);
cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq];
if (cq)
refcount_inc(&cq->refcnt);
- spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
+ spin_unlock(&dev->cq_tbl_lock);
if (cq && cq->ibcq.comp_handler)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 20cc0799ac4b..5138afca067f 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -371,7 +371,7 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
return ret;
}
-/**
+/*
* rvt_resize_cq - change the size of the CQ
* @ibcq: the completion queue
*
diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c
index 108c71e3ac23..fa5be13a4394 100644
--- a/drivers/infiniband/sw/rdmavt/mad.c
+++ b/drivers/infiniband/sw/rdmavt/mad.c
@@ -56,8 +56,11 @@
* @port_num: the port number this packet came in on, 1 based from ib core
* @in_wc: the work completion entry for this packet
* @in_grh: the global route header for this packet
- * @in_mad: the incoming MAD
- * @out_mad: any outgoing MAD reply
+ * @in: the incoming MAD
+ * @in_mad_size: size of the incoming MAD reply
+ * @out: any outgoing MAD reply
+ * @out_mad_size: size of the outgoing MAD reply
+ * @out_mad_pkey_index: unused
*
* Note that the verbs framework has already done the MAD sanity checks,
* and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index 5233a63d99a6..951abac13dbb 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -180,7 +180,7 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
}
EXPORT_SYMBOL(rvt_mcast_find);
-/**
+/*
* rvt_mcast_add - insert mcast GID into table and attach QP struct
* @mcast: the mcast GID table
* @mqp: the QP to attach
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 90fc234f489a..601d18dda1f5 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -369,6 +369,7 @@ bail:
* @pd: protection domain for this memory region
* @start: starting userspace address
* @length: length of region to register
+ * @virt_addr: associated virtual address
* @mr_access_flags: access flags for this memory region
* @udata: unused by the driver
*
@@ -438,8 +439,8 @@ bail_umem:
/**
* rvt_dereg_clean_qp_cb - callback from iterator
- * @qp - the qp
- * @v - the mregion (as u64)
+ * @qp: the qp
+ * @v: the mregion (as u64)
*
* This routine fields the callback for all QPs and
* for QPs in the same PD as the MR will call the
@@ -457,7 +458,7 @@ static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v)
/**
* rvt_dereg_clean_qps - find QPs for reference cleanup
- * @mr - the MR that is being deregistered
+ * @mr: the MR that is being deregistered
*
* This routine iterates RC QPs looking for references
* to the lkey noted in mr.
@@ -471,8 +472,8 @@ static void rvt_dereg_clean_qps(struct rvt_mregion *mr)
/**
* rvt_check_refs - check references
- * @mr - the megion
- * @t - the caller identification
+ * @mr: the megion
+ * @t: the caller identification
*
* This routine checks MRs holding a reference during
* when being de-registered.
@@ -506,8 +507,8 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
/**
* rvt_mr_has_lkey - is MR
- * @mr - the mregion
- * @lkey - the lkey
+ * @mr: the mregion
+ * @lkey: the lkey
*/
bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey)
{
@@ -516,8 +517,8 @@ bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey)
/**
* rvt_ss_has_lkey - is mr in sge tests
- * @ss - the sge state
- * @lkey
+ * @ss: the sge state
+ * @lkey: the lkey
*
* This code tests for an MR in the indicated
* sge state.
@@ -540,7 +541,7 @@ bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey)
/**
* rvt_dereg_mr - unregister and free a memory region
* @ibmr: the memory region to free
- *
+ * @udata: unused by the driver
*
* Note that this is called to free MRs created by rvt_get_dma_mr()
* or rvt_reg_user_mr().
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 22fa9bde5419..9d13db68283c 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -156,7 +156,7 @@ void rvt_wss_exit(struct rvt_dev_info *rdi)
rdi->wss = NULL;
}
-/**
+/*
* rvt_wss_init - Init wss data structures
*
* Return: 0 on success
@@ -323,6 +323,7 @@ static void get_map_page(struct rvt_qpn_table *qpt,
/**
* init_qpn_table - initialize the QP number table for a device
+ * @rdi: rvt dev struct
* @qpt: the QPN table
*/
static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
@@ -524,6 +525,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
* IB_QPT_SMI/IB_QPT_GSI
* @rdi: rvt device info structure
* @qpt: queue pair number table pointer
+ * @type: the QP type
* @port_num: IB port number, 1 based, comes from core
* @exclude_prefix: prefix of special queue pair number being allocated
*
@@ -655,8 +657,8 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
/**
* rvt_swqe_has_lkey - return true if lkey is used by swqe
- * @wqe - the send wqe
- * @lkey - the lkey
+ * @wqe: the send wqe
+ * @lkey: the lkey
*
* Test the swqe for using lkey
*/
@@ -675,8 +677,8 @@ static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey)
/**
* rvt_qp_sends_has_lkey - return true is qp sends use lkey
- * @qp - the rvt_qp
- * @lkey - the lkey
+ * @qp: the rvt_qp
+ * @lkey: the lkey
*/
static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey)
{
@@ -699,8 +701,8 @@ static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey)
/**
* rvt_qp_acks_has_lkey - return true if acks have lkey
- * @qp - the qp
- * @lkey - the lkey
+ * @qp: the qp
+ * @lkey: the lkey
*/
static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey)
{
@@ -716,10 +718,10 @@ static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey)
return false;
}
-/*
+/**
* rvt_qp_mr_clean - clean up remote ops for lkey
- * @qp - the qp
- * @lkey - the lkey that is being de-registered
+ * @qp: the qp
+ * @lkey: the lkey that is being de-registered
*
* This routine checks if the lkey is being used by
* the qp.
@@ -853,6 +855,7 @@ bail:
/**
* rvt_init_qp - initialize the QP state to the reset state
+ * @rdi: rvt dev struct
* @qp: the QP to init or reinit
* @type: the QP type
*
@@ -907,6 +910,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
/**
* _rvt_reset_qp - initialize the QP state to the reset state
+ * @rdi: rvt dev struct
* @qp: the QP to reset
* @type: the QP type
*
@@ -1726,6 +1730,7 @@ inval:
/**
* rvt_destroy_qp - destroy a queue pair
* @ibqp: the queue pair to destroy
+ * @udata: unused by the driver
*
* Note that this can be called while the QP is actively sending or
* receiving!
@@ -1901,9 +1906,9 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
/**
* rvt_qp_valid_operation - validate post send wr request
- * @qp - the qp
- * @post-parms - the post send table for the driver
- * @wr - the work request
+ * @qp: the qp
+ * @post_parms: the post send table for the driver
+ * @wr: the work request
*
* The routine validates the operation based on the
* validation table an returns the length of the operation
@@ -2013,6 +2018,7 @@ static inline int rvt_qp_is_avail(
* rvt_post_one_wr - post one RC, UC, or UD send work request
* @qp: the QP to post on
* @wr: the work request to send
+ * @call_send: kick the send engine into gear
*/
static int rvt_post_one_wr(struct rvt_qp *qp,
const struct ib_send_wr *wr,
@@ -2612,7 +2618,7 @@ EXPORT_SYMBOL(rvt_stop_rc_timers);
/**
* rvt_stop_rnr_timer - stop an rnr timer
- * @qp - the QP
+ * @qp: the QP
*
* stop an rnr timer and return if the timer
* had been pending.
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 64d98bf238ab..2a7c2f12d372 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -67,7 +67,7 @@ void rvt_driver_srq_init(struct rvt_dev_info *rdi)
/**
* rvt_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
+ * @ibsrq: the protection domain of the SRQ to create
* @srq_init_attr: the attributes of the SRQ
* @udata: data from libibverbs when creating a user SRQ
*
@@ -311,7 +311,8 @@ bail_free:
return ret;
}
-/** rvt_query_srq - query srq data
+/**
+ * rvt_query_srq - query srq data
* @ibsrq: srq to query
* @attr: return info in attr
*
@@ -330,7 +331,7 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
/**
* rvt_destroy_srq - destory an srq
* @ibsrq: srq object to destroy
- *
+ * @udata: user data for libibverbs.so
*/
int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 49cec85a372a..8fd0128a9336 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -294,7 +294,7 @@ static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
/**
* rvt_dealloc_ucontext - Free a user context
- * @context - Free this
+ * @context: Unused
*/
static void rvt_dealloc_ucontext(struct ib_ucontext *context)
{
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 452149066792..06b8dc5093f7 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -4,6 +4,7 @@ config RDMA_RXE
depends on INET && PCI && INFINIBAND
depends on INFINIBAND_VIRT_DMA
select NET_UDP_TUNNEL
+ select CRYPTO
select CRYPTO_CRC32
help
This driver implements the InfiniBand RDMA transport over
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 0a1e6393250b..17a361b8dbb1 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -515,6 +515,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
while ((skb = skb_dequeue(&qp->resp_pkts))) {
rxe_drop_ref(qp);
kfree_skb(skb);
+ ib_device_put(qp->ibqp.device);
}
while ((wqe = queue_head(qp->sq.queue))) {
@@ -527,6 +528,17 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
}
}
+static void free_pkt(struct rxe_pkt_info *pkt)
+{
+ struct sk_buff *skb = PKT_TO_SKB(pkt);
+ struct rxe_qp *qp = pkt->qp;
+ struct ib_device *dev = qp->ibqp.device;
+
+ kfree_skb(skb);
+ rxe_drop_ref(qp);
+ ib_device_put(dev);
+}
+
int rxe_completer(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
@@ -535,6 +547,7 @@ int rxe_completer(void *arg)
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
+ int ret = 0;
rxe_add_ref(qp);
@@ -542,7 +555,8 @@ int rxe_completer(void *arg)
qp->req.state == QP_STATE_RESET) {
rxe_drain_resp_pkts(qp, qp->valid &&
qp->req.state == QP_STATE_ERROR);
- goto exit;
+ ret = -EAGAIN;
+ goto done;
}
if (qp->comp.timeout) {
@@ -552,8 +566,10 @@ int rxe_completer(void *arg)
qp->comp.timeout_retry = 0;
}
- if (qp->req.need_retry)
- goto exit;
+ if (qp->req.need_retry) {
+ ret = -EAGAIN;
+ goto done;
+ }
state = COMPST_GET_ACK;
@@ -624,11 +640,6 @@ int rxe_completer(void *arg)
break;
case COMPST_DONE:
- if (pkt) {
- rxe_drop_ref(pkt->qp);
- kfree_skb(skb);
- skb = NULL;
- }
goto done;
case COMPST_EXIT:
@@ -651,7 +662,8 @@ int rxe_completer(void *arg)
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
- goto exit;
+ ret = -EAGAIN;
+ goto done;
case COMPST_ERROR_RETRY:
/* we come here if the retry timer fired and we did
@@ -663,22 +675,18 @@ int rxe_completer(void *arg)
*/
/* there is nothing to retry in this case */
- if (!wqe || (wqe->state == wqe_state_posted))
- goto exit;
+ if (!wqe || (wqe->state == wqe_state_posted)) {
+ pr_warn("Retry attempted without a valid wqe\n");
+ ret = -EAGAIN;
+ goto done;
+ }
/* if we've started a retry, don't start another
* retry sequence, unless this is a timeout.
*/
if (qp->comp.started_retry &&
- !qp->comp.timeout_retry) {
- if (pkt) {
- rxe_drop_ref(pkt->qp);
- kfree_skb(skb);
- skb = NULL;
- }
-
+ !qp->comp.timeout_retry)
goto done;
- }
if (qp->comp.retry_cnt > 0) {
if (qp->comp.retry_cnt != 7)
@@ -699,13 +707,6 @@ int rxe_completer(void *arg)
qp->comp.started_retry = 1;
rxe_run_task(&qp->req.task, 0);
}
-
- if (pkt) {
- rxe_drop_ref(pkt->qp);
- kfree_skb(skb);
- skb = NULL;
- }
-
goto done;
} else {
@@ -726,10 +727,8 @@ int rxe_completer(void *arg)
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
- rxe_drop_ref(pkt->qp);
- kfree_skb(skb);
- skb = NULL;
- goto exit;
+ ret = -EAGAIN;
+ goto done;
} else {
rxe_counter_inc(rxe,
RXE_CNT_RNR_RETRY_EXCEEDED);
@@ -742,30 +741,15 @@ int rxe_completer(void *arg)
WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
do_complete(qp, wqe);
rxe_qp_error(qp);
-
- if (pkt) {
- rxe_drop_ref(pkt->qp);
- kfree_skb(skb);
- skb = NULL;
- }
-
- goto exit;
+ ret = -EAGAIN;
+ goto done;
}
}
-exit:
- /* we come here if we are done with processing and want the task to
- * exit from the loop calling us
- */
- WARN_ON_ONCE(skb);
- rxe_drop_ref(qp);
- return -EAGAIN;
-
done:
- /* we come here if we have processed a packet we want the task to call
- * us again to see if there is anything else to do
- */
- WARN_ON_ONCE(skb);
+ if (pkt)
+ free_pkt(pkt);
rxe_drop_ref(qp);
- return 0;
+
+ return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index 3b483b75dfe3..e432f9e37795 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -22,7 +22,6 @@ struct rxe_pkt_info {
u16 paylen; /* length of bth - icrc */
u8 port_num; /* port pkt received on */
u8 opcode; /* bth opcode of packet */
- u8 offset; /* bth offset from pkt->hdr */
};
/* Macros should be used only for received skb */
@@ -280,134 +279,134 @@ static inline void __bth_set_psn(void *arg, u32 psn)
static inline u8 bth_opcode(struct rxe_pkt_info *pkt)
{
- return __bth_opcode(pkt->hdr + pkt->offset);
+ return __bth_opcode(pkt->hdr);
}
static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode)
{
- __bth_set_opcode(pkt->hdr + pkt->offset, opcode);
+ __bth_set_opcode(pkt->hdr, opcode);
}
static inline u8 bth_se(struct rxe_pkt_info *pkt)
{
- return __bth_se(pkt->hdr + pkt->offset);
+ return __bth_se(pkt->hdr);
}
static inline void bth_set_se(struct rxe_pkt_info *pkt, int se)
{
- __bth_set_se(pkt->hdr + pkt->offset, se);
+ __bth_set_se(pkt->hdr, se);
}
static inline u8 bth_mig(struct rxe_pkt_info *pkt)
{
- return __bth_mig(pkt->hdr + pkt->offset);
+ return __bth_mig(pkt->hdr);
}
static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig)
{
- __bth_set_mig(pkt->hdr + pkt->offset, mig);
+ __bth_set_mig(pkt->hdr, mig);
}
static inline u8 bth_pad(struct rxe_pkt_info *pkt)
{
- return __bth_pad(pkt->hdr + pkt->offset);
+ return __bth_pad(pkt->hdr);
}
static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad)
{
- __bth_set_pad(pkt->hdr + pkt->offset, pad);
+ __bth_set_pad(pkt->hdr, pad);
}
static inline u8 bth_tver(struct rxe_pkt_info *pkt)
{
- return __bth_tver(pkt->hdr + pkt->offset);
+ return __bth_tver(pkt->hdr);
}
static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver)
{
- __bth_set_tver(pkt->hdr + pkt->offset, tver);
+ __bth_set_tver(pkt->hdr, tver);
}
static inline u16 bth_pkey(struct rxe_pkt_info *pkt)
{
- return __bth_pkey(pkt->hdr + pkt->offset);
+ return __bth_pkey(pkt->hdr);
}
static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey)
{
- __bth_set_pkey(pkt->hdr + pkt->offset, pkey);
+ __bth_set_pkey(pkt->hdr, pkey);
}
static inline u32 bth_qpn(struct rxe_pkt_info *pkt)
{
- return __bth_qpn(pkt->hdr + pkt->offset);
+ return __bth_qpn(pkt->hdr);
}
static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn)
{
- __bth_set_qpn(pkt->hdr + pkt->offset, qpn);
+ __bth_set_qpn(pkt->hdr, qpn);
}
static inline int bth_fecn(struct rxe_pkt_info *pkt)
{
- return __bth_fecn(pkt->hdr + pkt->offset);
+ return __bth_fecn(pkt->hdr);
}
static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn)
{
- __bth_set_fecn(pkt->hdr + pkt->offset, fecn);
+ __bth_set_fecn(pkt->hdr, fecn);
}
static inline int bth_becn(struct rxe_pkt_info *pkt)
{
- return __bth_becn(pkt->hdr + pkt->offset);
+ return __bth_becn(pkt->hdr);
}
static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn)
{
- __bth_set_becn(pkt->hdr + pkt->offset, becn);
+ __bth_set_becn(pkt->hdr, becn);
}
static inline u8 bth_resv6a(struct rxe_pkt_info *pkt)
{
- return __bth_resv6a(pkt->hdr + pkt->offset);
+ return __bth_resv6a(pkt->hdr);
}
static inline void bth_set_resv6a(struct rxe_pkt_info *pkt)
{
- __bth_set_resv6a(pkt->hdr + pkt->offset);
+ __bth_set_resv6a(pkt->hdr);
}
static inline int bth_ack(struct rxe_pkt_info *pkt)
{
- return __bth_ack(pkt->hdr + pkt->offset);
+ return __bth_ack(pkt->hdr);
}
static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack)
{
- __bth_set_ack(pkt->hdr + pkt->offset, ack);
+ __bth_set_ack(pkt->hdr, ack);
}
static inline void bth_set_resv7(struct rxe_pkt_info *pkt)
{
- __bth_set_resv7(pkt->hdr + pkt->offset);
+ __bth_set_resv7(pkt->hdr);
}
static inline u32 bth_psn(struct rxe_pkt_info *pkt)
{
- return __bth_psn(pkt->hdr + pkt->offset);
+ return __bth_psn(pkt->hdr);
}
static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn)
{
- __bth_set_psn(pkt->hdr + pkt->offset, psn);
+ __bth_set_psn(pkt->hdr, psn);
}
static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se,
int mig, int pad, u16 pkey, u32 qpn, int ack_req,
u32 psn)
{
- struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset);
+ struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr);
bth->opcode = opcode;
bth->flags = (pad << 4) & BTH_PAD_MASK;
@@ -448,14 +447,14 @@ static inline void __rdeth_set_een(void *arg, u32 een)
static inline u8 rdeth_een(struct rxe_pkt_info *pkt)
{
- return __rdeth_een(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_RDETH]);
+ return __rdeth_een(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_RDETH]);
}
static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een)
{
- __rdeth_set_een(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een);
+ __rdeth_set_een(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_RDETH], een);
}
/******************************************************************************
@@ -499,26 +498,26 @@ static inline void __deth_set_sqp(void *arg, u32 sqp)
static inline u32 deth_qkey(struct rxe_pkt_info *pkt)
{
- return __deth_qkey(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_DETH]);
+ return __deth_qkey(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_DETH]);
}
static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey)
{
- __deth_set_qkey(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey);
+ __deth_set_qkey(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey);
}
static inline u32 deth_sqp(struct rxe_pkt_info *pkt)
{
- return __deth_sqp(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_DETH]);
+ return __deth_sqp(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_DETH]);
}
static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp)
{
- __deth_set_sqp(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp);
+ __deth_set_sqp(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp);
}
/******************************************************************************
@@ -574,38 +573,38 @@ static inline void __reth_set_len(void *arg, u32 len)
static inline u64 reth_va(struct rxe_pkt_info *pkt)
{
- return __reth_va(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_RETH]);
+ return __reth_va(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_RETH]);
}
static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va)
{
- __reth_set_va(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_RETH], va);
+ __reth_set_va(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_RETH], va);
}
static inline u32 reth_rkey(struct rxe_pkt_info *pkt)
{
- return __reth_rkey(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_RETH]);
+ return __reth_rkey(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_RETH]);
}
static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
{
- __reth_set_rkey(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey);
+ __reth_set_rkey(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey);
}
static inline u32 reth_len(struct rxe_pkt_info *pkt)
{
- return __reth_len(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_RETH]);
+ return __reth_len(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_RETH]);
}
static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len)
{
- __reth_set_len(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_RETH], len);
+ __reth_set_len(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_RETH], len);
}
/******************************************************************************
@@ -676,50 +675,50 @@ static inline void __atmeth_set_comp(void *arg, u64 comp)
static inline u64 atmeth_va(struct rxe_pkt_info *pkt)
{
- return __atmeth_va(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
+ return __atmeth_va(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
}
static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va)
{
- __atmeth_set_va(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va);
+ __atmeth_set_va(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va);
}
static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt)
{
- return __atmeth_rkey(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
+ return __atmeth_rkey(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
}
static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
{
- __atmeth_set_rkey(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey);
+ __atmeth_set_rkey(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey);
}
static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt)
{
- return __atmeth_swap_add(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
+ return __atmeth_swap_add(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
}
static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add)
{
- __atmeth_set_swap_add(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add);
+ __atmeth_set_swap_add(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add);
}
static inline u64 atmeth_comp(struct rxe_pkt_info *pkt)
{
- return __atmeth_comp(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
+ return __atmeth_comp(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
}
static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp)
{
- __atmeth_set_comp(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp);
+ __atmeth_set_comp(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp);
}
/******************************************************************************
@@ -780,26 +779,26 @@ static inline void __aeth_set_msn(void *arg, u32 msn)
static inline u8 aeth_syn(struct rxe_pkt_info *pkt)
{
- return __aeth_syn(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_AETH]);
+ return __aeth_syn(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_AETH]);
}
static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn)
{
- __aeth_set_syn(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn);
+ __aeth_set_syn(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_AETH], syn);
}
static inline u32 aeth_msn(struct rxe_pkt_info *pkt)
{
- return __aeth_msn(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_AETH]);
+ return __aeth_msn(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_AETH]);
}
static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn)
{
- __aeth_set_msn(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn);
+ __aeth_set_msn(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_AETH], msn);
}
/******************************************************************************
@@ -825,14 +824,14 @@ static inline void __atmack_set_orig(void *arg, u64 orig)
static inline u64 atmack_orig(struct rxe_pkt_info *pkt)
{
- return __atmack_orig(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]);
+ return __atmack_orig(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_ATMACK]);
}
static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig)
{
- __atmack_set_orig(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig);
+ __atmack_set_orig(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig);
}
/******************************************************************************
@@ -858,14 +857,14 @@ static inline void __immdt_set_imm(void *arg, __be32 imm)
static inline __be32 immdt_imm(struct rxe_pkt_info *pkt)
{
- return __immdt_imm(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]);
+ return __immdt_imm(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_IMMDT]);
}
static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm)
{
- __immdt_set_imm(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm);
+ __immdt_set_imm(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm);
}
/******************************************************************************
@@ -891,14 +890,14 @@ static inline void __ieth_set_rkey(void *arg, u32 rkey)
static inline u32 ieth_rkey(struct rxe_pkt_info *pkt)
{
- return __ieth_rkey(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_IETH]);
+ return __ieth_rkey(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_IETH]);
}
static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
{
- __ieth_set_rkey(pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey);
+ __ieth_set_rkey(pkt->hdr +
+ rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey);
}
enum rxe_hdr_length {
@@ -915,13 +914,12 @@ enum rxe_hdr_length {
static inline size_t header_size(struct rxe_pkt_info *pkt)
{
- return pkt->offset + rxe_opcode[pkt->opcode].length;
+ return rxe_opcode[pkt->opcode].length;
}
static inline void *payload_addr(struct rxe_pkt_info *pkt)
{
- return pkt->hdr + pkt->offset
- + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD];
+ return pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD];
}
static inline size_t payload_size(struct rxe_pkt_info *pkt)
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index c02315aed8d1..0ea9a5aa4ec0 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -7,45 +7,61 @@
#include "rxe.h"
#include "rxe_loc.h"
+/* caller should hold mc_grp_pool->pool_lock */
+static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe,
+ struct rxe_pool *pool,
+ union ib_gid *mgid)
+{
+ int err;
+ struct rxe_mc_grp *grp;
+
+ grp = rxe_alloc_locked(&rxe->mc_grp_pool);
+ if (!grp)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&grp->qp_list);
+ spin_lock_init(&grp->mcg_lock);
+ grp->rxe = rxe;
+ rxe_add_key_locked(grp, mgid);
+
+ err = rxe_mcast_add(rxe, mgid);
+ if (unlikely(err)) {
+ rxe_drop_key_locked(grp);
+ rxe_drop_ref(grp);
+ return ERR_PTR(err);
+ }
+
+ return grp;
+}
+
int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
struct rxe_mc_grp **grp_p)
{
int err;
struct rxe_mc_grp *grp;
+ struct rxe_pool *pool = &rxe->mc_grp_pool;
+ unsigned long flags;
- if (rxe->attr.max_mcast_qp_attach == 0) {
- err = -EINVAL;
- goto err1;
- }
+ if (rxe->attr.max_mcast_qp_attach == 0)
+ return -EINVAL;
- grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
+ write_lock_irqsave(&pool->pool_lock, flags);
+
+ grp = rxe_pool_get_key_locked(pool, mgid);
if (grp)
goto done;
- grp = rxe_alloc(&rxe->mc_grp_pool);
- if (!grp) {
- err = -ENOMEM;
- goto err1;
+ grp = create_grp(rxe, pool, mgid);
+ if (IS_ERR(grp)) {
+ write_unlock_irqrestore(&pool->pool_lock, flags);
+ err = PTR_ERR(grp);
+ return err;
}
- INIT_LIST_HEAD(&grp->qp_list);
- spin_lock_init(&grp->mcg_lock);
- grp->rxe = rxe;
-
- rxe_add_key(grp, mgid);
-
- err = rxe_mcast_add(rxe, mgid);
- if (err)
- goto err2;
-
done:
+ write_unlock_irqrestore(&pool->pool_lock, flags);
*grp_p = grp;
return 0;
-
-err2:
- rxe_drop_ref(grp);
-err1:
- return err;
}
int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 943914c2a50c..01662727dca0 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -153,15 +153,16 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct udphdr *udph;
+ struct rxe_dev *rxe;
struct net_device *ndev = skb->dev;
- struct net_device *rdev = ndev;
- struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
- if (!rxe && is_vlan_dev(rdev)) {
- rdev = vlan_dev_real_dev(ndev);
- rxe = rxe_get_dev_from_net(rdev);
- }
+ /* takes a reference on rxe->ib_dev
+ * drop when skb is freed
+ */
+ rxe = rxe_get_dev_from_net(ndev);
+ if (!rxe && is_vlan_dev(ndev))
+ rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev));
if (!rxe)
goto drop;
@@ -180,12 +181,6 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
rxe_rcv(skb);
- /*
- * FIXME: this is in the wrong place, it needs to be done when pkt is
- * destroyed
- */
- ib_device_put(&rxe->ib_dev);
-
return 0;
drop:
kfree_skb(skb);
@@ -412,9 +407,22 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
return 0;
}
+/* fix up a send packet to match the packets
+ * received from UDP before looping them back
+ */
void rxe_loopback(struct sk_buff *skb)
{
- rxe_rcv(skb);
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+
+ if (skb->protocol == htons(ETH_P_IP))
+ skb_pull(skb, sizeof(struct iphdr));
+ else
+ skb_pull(skb, sizeof(struct ipv6hdr));
+
+ if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev)))
+ kfree_skb(skb);
+ else
+ rxe_rcv(skb);
}
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index b374eb53e2fe..307d8986e7c9 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -15,21 +15,25 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_UC] = {
.name = "rxe-uc",
.size = sizeof(struct rxe_ucontext),
+ .elem_offset = offsetof(struct rxe_ucontext, pelem),
.flags = RXE_POOL_NO_ALLOC,
},
[RXE_TYPE_PD] = {
.name = "rxe-pd",
.size = sizeof(struct rxe_pd),
+ .elem_offset = offsetof(struct rxe_pd, pelem),
.flags = RXE_POOL_NO_ALLOC,
},
[RXE_TYPE_AH] = {
.name = "rxe-ah",
.size = sizeof(struct rxe_ah),
- .flags = RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC,
+ .elem_offset = offsetof(struct rxe_ah, pelem),
+ .flags = RXE_POOL_NO_ALLOC,
},
[RXE_TYPE_SRQ] = {
.name = "rxe-srq",
.size = sizeof(struct rxe_srq),
+ .elem_offset = offsetof(struct rxe_srq, pelem),
.flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
.min_index = RXE_MIN_SRQ_INDEX,
.max_index = RXE_MAX_SRQ_INDEX,
@@ -37,6 +41,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_QP] = {
.name = "rxe-qp",
.size = sizeof(struct rxe_qp),
+ .elem_offset = offsetof(struct rxe_qp, pelem),
.cleanup = rxe_qp_cleanup,
.flags = RXE_POOL_INDEX,
.min_index = RXE_MIN_QP_INDEX,
@@ -45,12 +50,14 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_CQ] = {
.name = "rxe-cq",
.size = sizeof(struct rxe_cq),
+ .elem_offset = offsetof(struct rxe_cq, pelem),
.flags = RXE_POOL_NO_ALLOC,
.cleanup = rxe_cq_cleanup,
},
[RXE_TYPE_MR] = {
.name = "rxe-mr",
.size = sizeof(struct rxe_mem),
+ .elem_offset = offsetof(struct rxe_mem, pelem),
.cleanup = rxe_mem_cleanup,
.flags = RXE_POOL_INDEX,
.max_index = RXE_MAX_MR_INDEX,
@@ -59,6 +66,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_MW] = {
.name = "rxe-mw",
.size = sizeof(struct rxe_mem),
+ .elem_offset = offsetof(struct rxe_mem, pelem),
.flags = RXE_POOL_INDEX,
.max_index = RXE_MAX_MW_INDEX,
.min_index = RXE_MIN_MW_INDEX,
@@ -66,6 +74,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_MC_GRP] = {
.name = "rxe-mc_grp",
.size = sizeof(struct rxe_mc_grp),
+ .elem_offset = offsetof(struct rxe_mc_grp, pelem),
.cleanup = rxe_mc_cleanup,
.flags = RXE_POOL_KEY,
.key_offset = offsetof(struct rxe_mc_grp, mgid),
@@ -74,7 +83,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_MC_ELEM] = {
.name = "rxe-mc_elem",
.size = sizeof(struct rxe_mc_elem),
- .flags = RXE_POOL_ATOMIC,
+ .elem_offset = offsetof(struct rxe_mc_elem, pelem),
},
};
@@ -94,18 +103,18 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
goto out;
}
- pool->max_index = max;
- pool->min_index = min;
+ pool->index.max_index = max;
+ pool->index.min_index = min;
size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
- pool->table = kmalloc(size, GFP_KERNEL);
- if (!pool->table) {
+ pool->index.table = kmalloc(size, GFP_KERNEL);
+ if (!pool->index.table) {
err = -ENOMEM;
goto out;
}
- pool->table_size = size;
- bitmap_zero(pool->table, max - min + 1);
+ pool->index.table_size = size;
+ bitmap_zero(pool->index.table, max - min + 1);
out:
return err;
@@ -127,13 +136,12 @@ int rxe_pool_init(
pool->max_elem = max_elem;
pool->elem_size = ALIGN(size, RXE_POOL_ALIGN);
pool->flags = rxe_type_info[type].flags;
- pool->tree = RB_ROOT;
+ pool->index.tree = RB_ROOT;
+ pool->key.tree = RB_ROOT;
pool->cleanup = rxe_type_info[type].cleanup;
atomic_set(&pool->num_elem, 0);
- kref_init(&pool->ref_cnt);
-
rwlock_init(&pool->pool_lock);
if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
@@ -145,67 +153,47 @@ int rxe_pool_init(
}
if (rxe_type_info[type].flags & RXE_POOL_KEY) {
- pool->key_offset = rxe_type_info[type].key_offset;
- pool->key_size = rxe_type_info[type].key_size;
+ pool->key.key_offset = rxe_type_info[type].key_offset;
+ pool->key.key_size = rxe_type_info[type].key_size;
}
- pool->state = RXE_POOL_STATE_VALID;
-
out:
return err;
}
-static void rxe_pool_release(struct kref *kref)
-{
- struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
-
- pool->state = RXE_POOL_STATE_INVALID;
- kfree(pool->table);
-}
-
-static void rxe_pool_put(struct rxe_pool *pool)
-{
- kref_put(&pool->ref_cnt, rxe_pool_release);
-}
-
void rxe_pool_cleanup(struct rxe_pool *pool)
{
- unsigned long flags;
-
- write_lock_irqsave(&pool->pool_lock, flags);
- pool->state = RXE_POOL_STATE_INVALID;
if (atomic_read(&pool->num_elem) > 0)
pr_warn("%s pool destroyed with unfree'd elem\n",
pool_name(pool));
- write_unlock_irqrestore(&pool->pool_lock, flags);
- rxe_pool_put(pool);
+ kfree(pool->index.table);
}
static u32 alloc_index(struct rxe_pool *pool)
{
u32 index;
- u32 range = pool->max_index - pool->min_index + 1;
+ u32 range = pool->index.max_index - pool->index.min_index + 1;
- index = find_next_zero_bit(pool->table, range, pool->last);
+ index = find_next_zero_bit(pool->index.table, range, pool->index.last);
if (index >= range)
- index = find_first_zero_bit(pool->table, range);
+ index = find_first_zero_bit(pool->index.table, range);
WARN_ON_ONCE(index >= range);
- set_bit(index, pool->table);
- pool->last = index;
- return index + pool->min_index;
+ set_bit(index, pool->index.table);
+ pool->index.last = index;
+ return index + pool->index.min_index;
}
static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
{
- struct rb_node **link = &pool->tree.rb_node;
+ struct rb_node **link = &pool->index.tree.rb_node;
struct rb_node *parent = NULL;
struct rxe_pool_entry *elem;
while (*link) {
parent = *link;
- elem = rb_entry(parent, struct rxe_pool_entry, node);
+ elem = rb_entry(parent, struct rxe_pool_entry, index_node);
if (elem->index == new->index) {
pr_warn("element already exists!\n");
@@ -218,25 +206,25 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
link = &(*link)->rb_right;
}
- rb_link_node(&new->node, parent, link);
- rb_insert_color(&new->node, &pool->tree);
+ rb_link_node(&new->index_node, parent, link);
+ rb_insert_color(&new->index_node, &pool->index.tree);
out:
return;
}
static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
{
- struct rb_node **link = &pool->tree.rb_node;
+ struct rb_node **link = &pool->key.tree.rb_node;
struct rb_node *parent = NULL;
struct rxe_pool_entry *elem;
int cmp;
while (*link) {
parent = *link;
- elem = rb_entry(parent, struct rxe_pool_entry, node);
+ elem = rb_entry(parent, struct rxe_pool_entry, key_node);
- cmp = memcmp((u8 *)elem + pool->key_offset,
- (u8 *)new + pool->key_offset, pool->key_size);
+ cmp = memcmp((u8 *)elem + pool->key.key_offset,
+ (u8 *)new + pool->key.key_offset, pool->key.key_size);
if (cmp == 0) {
pr_warn("key already exists!\n");
@@ -249,116 +237,135 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
link = &(*link)->rb_right;
}
- rb_link_node(&new->node, parent, link);
- rb_insert_color(&new->node, &pool->tree);
+ rb_link_node(&new->key_node, parent, link);
+ rb_insert_color(&new->key_node, &pool->key.tree);
out:
return;
}
-void rxe_add_key(void *arg, void *key)
+void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key)
{
- struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
- unsigned long flags;
- write_lock_irqsave(&pool->pool_lock, flags);
- memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
+ memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
insert_key(pool, elem);
- write_unlock_irqrestore(&pool->pool_lock, flags);
}
-void rxe_drop_key(void *arg)
+void __rxe_add_key(struct rxe_pool_entry *elem, void *key)
{
- struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
write_lock_irqsave(&pool->pool_lock, flags);
- rb_erase(&elem->node, &pool->tree);
+ __rxe_add_key_locked(elem, key);
write_unlock_irqrestore(&pool->pool_lock, flags);
}
-void rxe_add_index(void *arg)
+void __rxe_drop_key_locked(struct rxe_pool_entry *elem)
+{
+ struct rxe_pool *pool = elem->pool;
+
+ rb_erase(&elem->key_node, &pool->key.tree);
+}
+
+void __rxe_drop_key(struct rxe_pool_entry *elem)
{
- struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
write_lock_irqsave(&pool->pool_lock, flags);
+ __rxe_drop_key_locked(elem);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
+}
+
+void __rxe_add_index_locked(struct rxe_pool_entry *elem)
+{
+ struct rxe_pool *pool = elem->pool;
+
elem->index = alloc_index(pool);
insert_index(pool, elem);
- write_unlock_irqrestore(&pool->pool_lock, flags);
}
-void rxe_drop_index(void *arg)
+void __rxe_add_index(struct rxe_pool_entry *elem)
{
- struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
write_lock_irqsave(&pool->pool_lock, flags);
- clear_bit(elem->index - pool->min_index, pool->table);
- rb_erase(&elem->node, &pool->tree);
+ __rxe_add_index_locked(elem);
write_unlock_irqrestore(&pool->pool_lock, flags);
}
-void *rxe_alloc(struct rxe_pool *pool)
+void __rxe_drop_index_locked(struct rxe_pool_entry *elem)
{
- struct rxe_pool_entry *elem;
- unsigned long flags;
+ struct rxe_pool *pool = elem->pool;
- might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
+ clear_bit(elem->index - pool->index.min_index, pool->index.table);
+ rb_erase(&elem->index_node, &pool->index.tree);
+}
- read_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != RXE_POOL_STATE_VALID) {
- read_unlock_irqrestore(&pool->pool_lock, flags);
- return NULL;
- }
- kref_get(&pool->ref_cnt);
- read_unlock_irqrestore(&pool->pool_lock, flags);
+void __rxe_drop_index(struct rxe_pool_entry *elem)
+{
+ struct rxe_pool *pool = elem->pool;
+ unsigned long flags;
+
+ write_lock_irqsave(&pool->pool_lock, flags);
+ __rxe_drop_index_locked(elem);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
+}
- if (!ib_device_try_get(&pool->rxe->ib_dev))
- goto out_put_pool;
+void *rxe_alloc_locked(struct rxe_pool *pool)
+{
+ struct rxe_type_info *info = &rxe_type_info[pool->type];
+ struct rxe_pool_entry *elem;
+ u8 *obj;
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
goto out_cnt;
- elem = kzalloc(rxe_type_info[pool->type].size,
- (pool->flags & RXE_POOL_ATOMIC) ?
- GFP_ATOMIC : GFP_KERNEL);
- if (!elem)
+ obj = kzalloc(info->size, GFP_ATOMIC);
+ if (!obj)
goto out_cnt;
+ elem = (struct rxe_pool_entry *)(obj + info->elem_offset);
+
elem->pool = pool;
kref_init(&elem->ref_cnt);
- return elem;
+ return obj;
out_cnt:
atomic_dec(&pool->num_elem);
- ib_device_put(&pool->rxe->ib_dev);
-out_put_pool:
- rxe_pool_put(pool);
return NULL;
}
-int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
+void *rxe_alloc(struct rxe_pool *pool)
{
- unsigned long flags;
+ struct rxe_type_info *info = &rxe_type_info[pool->type];
+ struct rxe_pool_entry *elem;
+ u8 *obj;
+
+ if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
+ goto out_cnt;
- might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
+ obj = kzalloc(info->size, GFP_KERNEL);
+ if (!obj)
+ goto out_cnt;
- read_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != RXE_POOL_STATE_VALID) {
- read_unlock_irqrestore(&pool->pool_lock, flags);
- return -EINVAL;
- }
- kref_get(&pool->ref_cnt);
- read_unlock_irqrestore(&pool->pool_lock, flags);
+ elem = (struct rxe_pool_entry *)(obj + info->elem_offset);
- if (!ib_device_try_get(&pool->rxe->ib_dev))
- goto out_put_pool;
+ elem->pool = pool;
+ kref_init(&elem->ref_cnt);
+
+ return obj;
+
+out_cnt:
+ atomic_dec(&pool->num_elem);
+ return NULL;
+}
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
+{
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
goto out_cnt;
@@ -369,9 +376,6 @@ int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
out_cnt:
atomic_dec(&pool->num_elem);
- ib_device_put(&pool->rxe->ib_dev);
-out_put_pool:
- rxe_pool_put(pool);
return -EINVAL;
}
@@ -380,67 +384,77 @@ void rxe_elem_release(struct kref *kref)
struct rxe_pool_entry *elem =
container_of(kref, struct rxe_pool_entry, ref_cnt);
struct rxe_pool *pool = elem->pool;
+ struct rxe_type_info *info = &rxe_type_info[pool->type];
+ u8 *obj;
if (pool->cleanup)
pool->cleanup(elem);
- if (!(pool->flags & RXE_POOL_NO_ALLOC))
- kfree(elem);
+ if (!(pool->flags & RXE_POOL_NO_ALLOC)) {
+ obj = (u8 *)elem - info->elem_offset;
+ kfree(obj);
+ }
+
atomic_dec(&pool->num_elem);
- ib_device_put(&pool->rxe->ib_dev);
- rxe_pool_put(pool);
}
-void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
+void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index)
{
- struct rb_node *node = NULL;
- struct rxe_pool_entry *elem = NULL;
- unsigned long flags;
-
- read_lock_irqsave(&pool->pool_lock, flags);
-
- if (pool->state != RXE_POOL_STATE_VALID)
- goto out;
+ struct rxe_type_info *info = &rxe_type_info[pool->type];
+ struct rb_node *node;
+ struct rxe_pool_entry *elem;
+ u8 *obj;
- node = pool->tree.rb_node;
+ node = pool->index.tree.rb_node;
while (node) {
- elem = rb_entry(node, struct rxe_pool_entry, node);
+ elem = rb_entry(node, struct rxe_pool_entry, index_node);
if (elem->index > index)
node = node->rb_left;
else if (elem->index < index)
node = node->rb_right;
- else {
- kref_get(&elem->ref_cnt);
+ else
break;
- }
}
-out:
- read_unlock_irqrestore(&pool->pool_lock, flags);
- return node ? elem : NULL;
+ if (node) {
+ kref_get(&elem->ref_cnt);
+ obj = (u8 *)elem - info->elem_offset;
+ } else {
+ obj = NULL;
+ }
+
+ return obj;
}
-void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
+void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
{
- struct rb_node *node = NULL;
- struct rxe_pool_entry *elem = NULL;
- int cmp;
+ u8 *obj;
unsigned long flags;
read_lock_irqsave(&pool->pool_lock, flags);
+ obj = rxe_pool_get_index_locked(pool, index);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
- if (pool->state != RXE_POOL_STATE_VALID)
- goto out;
+ return obj;
+}
- node = pool->tree.rb_node;
+void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key)
+{
+ struct rxe_type_info *info = &rxe_type_info[pool->type];
+ struct rb_node *node;
+ struct rxe_pool_entry *elem;
+ u8 *obj;
+ int cmp;
+
+ node = pool->key.tree.rb_node;
while (node) {
- elem = rb_entry(node, struct rxe_pool_entry, node);
+ elem = rb_entry(node, struct rxe_pool_entry, key_node);
- cmp = memcmp((u8 *)elem + pool->key_offset,
- key, pool->key_size);
+ cmp = memcmp((u8 *)elem + pool->key.key_offset,
+ key, pool->key.key_size);
if (cmp > 0)
node = node->rb_left;
@@ -450,10 +464,24 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
break;
}
- if (node)
+ if (node) {
kref_get(&elem->ref_cnt);
+ obj = (u8 *)elem - info->elem_offset;
+ } else {
+ obj = NULL;
+ }
-out:
+ return obj;
+}
+
+void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
+{
+ u8 *obj;
+ unsigned long flags;
+
+ read_lock_irqsave(&pool->pool_lock, flags);
+ obj = rxe_pool_get_key_locked(pool, key);
read_unlock_irqrestore(&pool->pool_lock, flags);
- return node ? elem : NULL;
+
+ return obj;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 432745ffc8d4..61210b300a78 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -11,7 +11,6 @@
#define RXE_POOL_CACHE_FLAGS (0)
enum rxe_pool_flags {
- RXE_POOL_ATOMIC = BIT(0),
RXE_POOL_INDEX = BIT(1),
RXE_POOL_KEY = BIT(2),
RXE_POOL_NO_ALLOC = BIT(4),
@@ -36,6 +35,7 @@ struct rxe_pool_entry;
struct rxe_type_info {
const char *name;
size_t size;
+ size_t elem_offset;
void (*cleanup)(struct rxe_pool_entry *obj);
enum rxe_pool_flags flags;
u32 max_index;
@@ -46,18 +46,16 @@ struct rxe_type_info {
extern struct rxe_type_info rxe_type_info[];
-enum rxe_pool_state {
- RXE_POOL_STATE_INVALID,
- RXE_POOL_STATE_VALID,
-};
-
struct rxe_pool_entry {
struct rxe_pool *pool;
struct kref ref_cnt;
struct list_head list;
- /* only used if indexed or keyed */
- struct rb_node node;
+ /* only used if keyed */
+ struct rb_node key_node;
+
+ /* only used if indexed */
+ struct rb_node index_node;
u32 index;
};
@@ -65,24 +63,29 @@ struct rxe_pool {
struct rxe_dev *rxe;
rwlock_t pool_lock; /* protects pool add/del/search */
size_t elem_size;
- struct kref ref_cnt;
void (*cleanup)(struct rxe_pool_entry *obj);
- enum rxe_pool_state state;
enum rxe_pool_flags flags;
enum rxe_elem_type type;
unsigned int max_elem;
atomic_t num_elem;
- /* only used if indexed or keyed */
- struct rb_root tree;
- unsigned long *table;
- size_t table_size;
- u32 max_index;
- u32 min_index;
- u32 last;
- size_t key_offset;
- size_t key_size;
+ /* only used if indexed */
+ struct {
+ struct rb_root tree;
+ unsigned long *table;
+ size_t table_size;
+ u32 last;
+ u32 max_index;
+ u32 min_index;
+ } index;
+
+ /* only used if keyed */
+ struct {
+ struct rb_root tree;
+ size_t key_offset;
+ size_t key_size;
+ } key;
};
/* initialize a pool of objects with given limit on
@@ -95,32 +98,70 @@ int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
/* free resources from object pool */
void rxe_pool_cleanup(struct rxe_pool *pool);
-/* allocate an object from pool */
+/* allocate an object from pool holding and not holding the pool lock */
+void *rxe_alloc_locked(struct rxe_pool *pool);
+
void *rxe_alloc(struct rxe_pool *pool);
/* connect already allocated object to pool */
-int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
+
+#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem)
/* assign an index to an indexed object and insert object into
- * pool's rb tree
+ * pool's rb tree holding and not holding the pool_lock
*/
-void rxe_add_index(void *elem);
+void __rxe_add_index_locked(struct rxe_pool_entry *elem);
+
+#define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->pelem)
-/* drop an index and remove object from rb tree */
-void rxe_drop_index(void *elem);
+void __rxe_add_index(struct rxe_pool_entry *elem);
+
+#define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem)
+
+/* drop an index and remove object from rb tree
+ * holding and not holding the pool_lock
+ */
+void __rxe_drop_index_locked(struct rxe_pool_entry *elem);
+
+#define rxe_drop_index_locked(obj) __rxe_drop_index_locked(&(obj)->pelem)
+
+void __rxe_drop_index(struct rxe_pool_entry *elem);
+
+#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem)
/* assign a key to a keyed object and insert object into
- * pool's rb tree
+ * pool's rb tree holding and not holding pool_lock
*/
-void rxe_add_key(void *elem, void *key);
+void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key);
+
+#define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->pelem, key)
+
+void __rxe_add_key(struct rxe_pool_entry *elem, void *key);
+
+#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key)
+
+/* remove elem from rb tree holding and not holding the pool_lock */
+void __rxe_drop_key_locked(struct rxe_pool_entry *elem);
-/* remove elem from rb tree */
-void rxe_drop_key(void *elem);
+#define rxe_drop_key_locked(obj) __rxe_drop_key_locked(&(obj)->pelem)
+
+void __rxe_drop_key(struct rxe_pool_entry *elem);
+
+#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem)
+
+/* lookup an indexed object from index holding and not holding the pool_lock.
+ * takes a reference on object
+ */
+void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index);
-/* lookup an indexed object from index. takes a reference on object */
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
-/* lookup keyed object from key. takes a reference on the object */
+/* lookup keyed object from key holding and not holding the pool_lock.
+ * takes a reference on the objecti
+ */
+void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key);
+
void *rxe_pool_get_key(struct rxe_pool *pool, void *key);
/* cleanup an object when all references are dropped */
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 656a5b4be847..34ae957a315c 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -62,6 +62,17 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
struct rxe_port *port;
int port_num = init->port_num;
+ switch (init->qp_type) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
if (!init->recv_cq || !init->send_cq) {
pr_warn("missing cq\n");
goto err1;
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index c9984a28eecc..7a49e27da23a 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -9,21 +9,26 @@
#include "rxe.h"
#include "rxe_loc.h"
+/* check that QP matches packet opcode type and is in a valid state */
static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct rxe_qp *qp)
{
+ unsigned int pkt_type;
+
if (unlikely(!qp->valid))
goto err1;
+ pkt_type = pkt->opcode & 0xe0;
+
switch (qp_type(qp)) {
case IB_QPT_RC:
- if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) {
+ if (unlikely(pkt_type != IB_OPCODE_RC)) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
break;
case IB_QPT_UC:
- if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) {
+ if (unlikely(pkt_type != IB_OPCODE_UC)) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
@@ -31,7 +36,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
- if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) {
+ if (unlikely(pkt_type != IB_OPCODE_UD)) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
@@ -85,8 +90,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
goto err1;
}
- if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) &&
- pkt->mask) {
+ if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) {
u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey;
if (unlikely(deth_qkey(pkt) != qkey)) {
@@ -233,8 +237,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
struct rxe_mc_elem *mce;
struct rxe_qp *qp;
union ib_gid dgid;
- struct sk_buff *per_qp_skb;
- struct rxe_pkt_info *per_qp_pkt;
int err;
if (skb->protocol == htons(ETH_P_IP))
@@ -246,13 +248,17 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
/* lookup mcast group corresponding to mgid, takes a ref */
mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
if (!mcg)
- goto err1; /* mcast group not registered */
+ goto drop; /* mcast group not registered */
spin_lock_bh(&mcg->mcg_lock);
+ /* this is unreliable datagram service so we let
+ * failures to deliver a multicast packet to a
+ * single QP happen and just move on and try
+ * the rest of them on the list
+ */
list_for_each_entry(mce, &mcg->qp_list, qp_list) {
qp = mce->qp;
- pkt = SKB_TO_PKT(skb);
/* validate qp for incoming packet */
err = check_type_state(rxe, pkt, qp);
@@ -263,31 +269,49 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
if (err)
continue;
- /* for all but the last qp create a new clone of the
- * skb and pass to the qp.
+ /* for all but the last QP create a new clone of the
+ * skb and pass to the QP. Pass the original skb to
+ * the last QP in the list.
*/
- if (mce->qp_list.next != &mcg->qp_list)
- per_qp_skb = skb_clone(skb, GFP_ATOMIC);
- else
- per_qp_skb = skb;
-
- if (unlikely(!per_qp_skb))
- continue;
-
- per_qp_pkt = SKB_TO_PKT(per_qp_skb);
- per_qp_pkt->qp = qp;
- rxe_add_ref(qp);
- rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
+ if (mce->qp_list.next != &mcg->qp_list) {
+ struct sk_buff *cskb;
+ struct rxe_pkt_info *cpkt;
+
+ cskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!cskb))
+ continue;
+
+ if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
+ kfree_skb(cskb);
+ break;
+ }
+
+ cpkt = SKB_TO_PKT(cskb);
+ cpkt->qp = qp;
+ rxe_add_ref(qp);
+ rxe_rcv_pkt(cpkt, cskb);
+ } else {
+ pkt->qp = qp;
+ rxe_add_ref(qp);
+ rxe_rcv_pkt(pkt, skb);
+ skb = NULL; /* mark consumed */
+ }
}
spin_unlock_bh(&mcg->mcg_lock);
rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
- return;
+ if (likely(!skb))
+ return;
-err1:
+ /* This only occurs if one of the checks fails on the last
+ * QP in the list above
+ */
+
+drop:
kfree_skb(skb);
+ ib_device_put(&rxe->ib_dev);
}
/**
@@ -340,9 +364,7 @@ void rxe_rcv(struct sk_buff *skb)
__be32 *icrcp;
u32 calc_icrc, pack_icrc;
- pkt->offset = 0;
-
- if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))
+ if (unlikely(skb->len < RXE_BTH_BYTES))
goto drop;
if (rxe_chk_dgid(rxe, skb) < 0) {
@@ -397,4 +419,5 @@ drop:
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
+ ib_device_put(&rxe->ib_dev);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index d4917646641a..889290793d75 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -375,7 +375,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
pkt->psn = qp->req.psn;
pkt->mask = rxe_opcode[opcode].mask;
pkt->paylen = paylen;
- pkt->offset = 0;
pkt->wqe = wqe;
/* init skb */
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c7e3b6a4af38..142f3d8014d8 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -99,6 +99,7 @@ static inline enum resp_states get_req(struct rxe_qp *qp,
while ((skb = skb_dequeue(&qp->req_pkts))) {
rxe_drop_ref(qp);
kfree_skb(skb);
+ ib_device_put(qp->ibqp.device);
}
/* go drain recv wr queue */
@@ -585,11 +586,10 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
ack->qp = qp;
ack->opcode = opcode;
ack->mask = rxe_opcode[opcode].mask;
- ack->offset = pkt->offset;
ack->paylen = paylen;
/* fill in bth using the request packet headers */
- memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES);
+ memcpy(ack->hdr, pkt->hdr, RXE_BTH_BYTES);
bth_set_opcode(ack, opcode);
bth_set_qpn(ack, qp->attr.dest_qp_num);
@@ -1017,6 +1017,7 @@ static enum resp_states cleanup(struct rxe_qp *qp,
skb = skb_dequeue(&qp->req_pkts);
rxe_drop_ref(qp);
kfree_skb(skb);
+ ib_device_put(qp->ibqp.device);
}
if (qp->resp.mr) {
@@ -1181,6 +1182,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
while ((skb = skb_dequeue(&qp->req_pkts))) {
rxe_drop_ref(qp);
kfree_skb(skb);
+ ib_device_put(qp->ibqp.device);
}
if (notify)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index a031514e2f41..dee5e0e919d2 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -106,12 +106,12 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
return IB_LINK_LAYER_ETHERNET;
}
-static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
+static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
{
- struct rxe_dev *rxe = to_rdev(uctx->device);
- struct rxe_ucontext *uc = to_ruc(uctx);
+ struct rxe_dev *rxe = to_rdev(ibuc->device);
+ struct rxe_ucontext *uc = to_ruc(ibuc);
- return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem);
+ return rxe_add_to_pool(&rxe->uc_pool, uc);
}
static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
@@ -145,7 +145,7 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
- return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem);
+ return rxe_add_to_pool(&rxe->pd_pool, pd);
}
static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
@@ -169,7 +169,7 @@ static int rxe_create_ah(struct ib_ah *ibah,
if (err)
return err;
- err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem);
+ err = rxe_add_to_pool(&rxe->ah_pool, ah);
if (err)
return err;
@@ -273,7 +273,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
if (err)
goto err1;
- err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem);
+ err = rxe_add_to_pool(&rxe->srq_pool, srq);
if (err)
goto err1;
@@ -555,37 +555,42 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
}
}
-static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
+static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
+ const struct ib_send_wr *ibwr)
+{
+ struct ib_sge *sge = ibwr->sg_list;
+ u8 *p = wqe->dma.inline_data;
+ int i;
+
+ for (i = 0; i < ibwr->num_sge; i++, sge++) {
+ memcpy(p, (void *)(uintptr_t)sge->addr, sge->length);
+ p += sge->length;
+ }
+}
+
+static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
unsigned int mask, unsigned int length,
struct rxe_send_wqe *wqe)
{
int num_sge = ibwr->num_sge;
- struct ib_sge *sge;
- int i;
- u8 *p;
init_send_wr(qp, &wqe->wr, ibwr);
+ /* local operation */
+ if (unlikely(mask & WR_REG_MASK)) {
+ wqe->mask = mask;
+ wqe->state = wqe_state_posted;
+ return;
+ }
+
if (qp_type(qp) == IB_QPT_UD ||
qp_type(qp) == IB_QPT_SMI ||
qp_type(qp) == IB_QPT_GSI)
memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));
- if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) {
- p = wqe->dma.inline_data;
-
- sge = ibwr->sg_list;
- for (i = 0; i < num_sge; i++, sge++) {
- memcpy(p, (void *)(uintptr_t)sge->addr,
- sge->length);
-
- p += sge->length;
- }
- } else if (mask & WR_REG_MASK) {
- wqe->mask = mask;
- wqe->state = wqe_state_posted;
- return 0;
- } else
+ if (unlikely(ibwr->send_flags & IB_SEND_INLINE))
+ copy_inline_data_to_wqe(wqe, ibwr);
+ else
memcpy(wqe->dma.sge, ibwr->sg_list,
num_sge * sizeof(struct ib_sge));
@@ -599,8 +604,6 @@ static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
wqe->dma.sge_offset = 0;
wqe->state = wqe_state_posted;
wqe->ssn = atomic_add_return(1, &qp->ssn);
-
- return 0;
}
static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
@@ -623,10 +626,7 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
}
send_wqe = producer_addr(sq->queue);
-
- err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
- if (unlikely(err))
- goto err1;
+ init_send_wqe(qp, ibwr, mask, length, send_wqe);
advance_producer(sq->queue);
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
@@ -774,7 +774,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
if (err)
return err;
- return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem);
+ return rxe_add_to_pool(&rxe->cq_pool, cq);
}
static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
@@ -1118,7 +1118,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
struct ib_device *dev = &rxe->ib_dev;
struct crypto_shash *tfm;
- strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
+ strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index adda78996219..368959ae9a8c 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -653,7 +653,7 @@ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
{
struct siw_sqe *orq_e = orq_get_tail(qp);
- if (orq_e && READ_ONCE(orq_e->flags) == 0)
+ if (READ_ONCE(orq_e->flags) == 0)
return orq_e;
return NULL;
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index ee95cf29179d..cf55326f2ab4 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -135,7 +135,7 @@ static struct {
static int siw_init_cpulist(void)
{
- int i, num_nodes = num_possible_nodes();
+ int i, num_nodes = nr_node_ids;
memset(siw_tx_thread, 0, sizeof(siw_tx_thread));
@@ -357,7 +357,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
sizeof(base_dev->iw_ifname));
/* Disable TCP port mapping */
- base_dev->iw_driver_flags = IW_F_NO_PORT_MAP,
+ base_dev->iw_driver_flags = IW_F_NO_PORT_MAP;
sdev->attrs.max_qp = SIW_MAX_QP;
sdev->attrs.max_qp_wr = SIW_MAX_QP_WR;
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 875d36d4b1c6..ddb2e66f9f13 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -199,26 +199,26 @@ void siw_qp_llp_write_space(struct sock *sk)
static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
{
- irq_size = roundup_pow_of_two(irq_size);
- orq_size = roundup_pow_of_two(orq_size);
-
- qp->attrs.irq_size = irq_size;
- qp->attrs.orq_size = orq_size;
-
- qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
- if (!qp->irq) {
- siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size);
- qp->attrs.irq_size = 0;
- return -ENOMEM;
+ if (irq_size) {
+ irq_size = roundup_pow_of_two(irq_size);
+ qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
+ if (!qp->irq) {
+ qp->attrs.irq_size = 0;
+ return -ENOMEM;
+ }
}
- qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
- if (!qp->orq) {
- siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size);
- qp->attrs.orq_size = 0;
- qp->attrs.irq_size = 0;
- vfree(qp->irq);
- return -ENOMEM;
+ if (orq_size) {
+ orq_size = roundup_pow_of_two(orq_size);
+ qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
+ if (!qp->orq) {
+ qp->attrs.orq_size = 0;
+ qp->attrs.irq_size = 0;
+ vfree(qp->irq);
+ return -ENOMEM;
+ }
}
+ qp->attrs.irq_size = irq_size;
+ qp->attrs.orq_size = orq_size;
siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
return 0;
}
@@ -288,13 +288,14 @@ int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
if (ctrl & MPA_V2_RDMA_WRITE_RTR)
wqe->sqe.opcode = SIW_OP_WRITE;
else if (ctrl & MPA_V2_RDMA_READ_RTR) {
- struct siw_sqe *rreq;
+ struct siw_sqe *rreq = NULL;
wqe->sqe.opcode = SIW_OP_READ;
spin_lock(&qp->orq_lock);
- rreq = orq_get_free(qp);
+ if (qp->attrs.orq_size)
+ rreq = orq_get_free(qp);
if (rreq) {
siw_read_to_orq(rreq, &wqe->sqe);
qp->orq_put++;
@@ -877,135 +878,88 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
rreq->num_sge = 1;
}
-/*
- * Must be called with SQ locked.
- * To avoid complete SQ starvation by constant inbound READ requests,
- * the active IRQ will not be served after qp->irq_burst, if the
- * SQ has pending work.
- */
-int siw_activate_tx(struct siw_qp *qp)
+static int siw_activate_tx_from_sq(struct siw_qp *qp)
{
- struct siw_sqe *irqe, *sqe;
+ struct siw_sqe *sqe;
struct siw_wqe *wqe = tx_wqe(qp);
int rv = 1;
- irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
-
- if (irqe->flags & SIW_WQE_VALID) {
- sqe = sq_get_next(qp);
-
- /*
- * Avoid local WQE processing starvation in case
- * of constant inbound READ request stream
- */
- if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
- qp->irq_burst = 0;
- goto skip_irq;
- }
- memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
- wqe->wr_status = SIW_WR_QUEUED;
-
- /* start READ RESPONSE */
- wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
- wqe->sqe.flags = 0;
- if (irqe->num_sge) {
- wqe->sqe.num_sge = 1;
- wqe->sqe.sge[0].length = irqe->sge[0].length;
- wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
- wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
- } else {
- wqe->sqe.num_sge = 0;
- }
-
- /* Retain original RREQ's message sequence number for
- * potential error reporting cases.
- */
- wqe->sqe.sge[1].length = irqe->sge[1].length;
-
- wqe->sqe.rkey = irqe->rkey;
- wqe->sqe.raddr = irqe->raddr;
+ sqe = sq_get_next(qp);
+ if (!sqe)
+ return 0;
- wqe->processed = 0;
- qp->irq_get++;
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
- /* mark current IRQ entry free */
- smp_store_mb(irqe->flags, 0);
+ /* First copy SQE to kernel private memory */
+ memcpy(&wqe->sqe, sqe, sizeof(*sqe));
+ if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
+ rv = -EINVAL;
goto out;
}
- sqe = sq_get_next(qp);
- if (sqe) {
-skip_irq:
- memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
- wqe->wr_status = SIW_WR_QUEUED;
-
- /* First copy SQE to kernel private memory */
- memcpy(&wqe->sqe, sqe, sizeof(*sqe));
-
- if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
+ if (wqe->sqe.flags & SIW_WQE_INLINE) {
+ if (wqe->sqe.opcode != SIW_OP_SEND &&
+ wqe->sqe.opcode != SIW_OP_WRITE) {
rv = -EINVAL;
goto out;
}
- if (wqe->sqe.flags & SIW_WQE_INLINE) {
- if (wqe->sqe.opcode != SIW_OP_SEND &&
- wqe->sqe.opcode != SIW_OP_WRITE) {
- rv = -EINVAL;
- goto out;
- }
- if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
- rv = -EINVAL;
- goto out;
- }
- wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
- wqe->sqe.sge[0].lkey = 0;
- wqe->sqe.num_sge = 1;
+ if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
+ rv = -EINVAL;
+ goto out;
}
- if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
- /* A READ cannot be fenced */
- if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
- wqe->sqe.opcode ==
- SIW_OP_READ_LOCAL_INV)) {
- siw_dbg_qp(qp, "cannot fence read\n");
- rv = -EINVAL;
- goto out;
- }
- spin_lock(&qp->orq_lock);
+ wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
+ wqe->sqe.sge[0].lkey = 0;
+ wqe->sqe.num_sge = 1;
+ }
+ if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
+ /* A READ cannot be fenced */
+ if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode ==
+ SIW_OP_READ_LOCAL_INV)) {
+ siw_dbg_qp(qp, "cannot fence read\n");
+ rv = -EINVAL;
+ goto out;
+ }
+ spin_lock(&qp->orq_lock);
- if (!siw_orq_empty(qp)) {
- qp->tx_ctx.orq_fence = 1;
- rv = 0;
- }
- spin_unlock(&qp->orq_lock);
+ if (qp->attrs.orq_size && !siw_orq_empty(qp)) {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
+ }
+ spin_unlock(&qp->orq_lock);
- } else if (wqe->sqe.opcode == SIW_OP_READ ||
- wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
- struct siw_sqe *rreq;
+ } else if (wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
+ struct siw_sqe *rreq;
- wqe->sqe.num_sge = 1;
+ if (unlikely(!qp->attrs.orq_size)) {
+ /* We negotiated not to send READ req's */
+ rv = -EINVAL;
+ goto out;
+ }
+ wqe->sqe.num_sge = 1;
- spin_lock(&qp->orq_lock);
+ spin_lock(&qp->orq_lock);
- rreq = orq_get_free(qp);
- if (rreq) {
- /*
- * Make an immediate copy in ORQ to be ready
- * to process loopback READ reply
- */
- siw_read_to_orq(rreq, &wqe->sqe);
- qp->orq_put++;
- } else {
- qp->tx_ctx.orq_fence = 1;
- rv = 0;
- }
- spin_unlock(&qp->orq_lock);
+ rreq = orq_get_free(qp);
+ if (rreq) {
+ /*
+ * Make an immediate copy in ORQ to be ready
+ * to process loopback READ reply
+ */
+ siw_read_to_orq(rreq, &wqe->sqe);
+ qp->orq_put++;
+ } else {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
}
-
- /* Clear SQE, can be re-used by application */
- smp_store_mb(sqe->flags, 0);
- qp->sq_get++;
- } else {
- rv = 0;
+ spin_unlock(&qp->orq_lock);
}
+
+ /* Clear SQE, can be re-used by application */
+ smp_store_mb(sqe->flags, 0);
+ qp->sq_get++;
out:
if (unlikely(rv < 0)) {
siw_dbg_qp(qp, "error %d\n", rv);
@@ -1015,6 +969,65 @@ out:
}
/*
+ * Must be called with SQ locked.
+ * To avoid complete SQ starvation by constant inbound READ requests,
+ * the active IRQ will not be served after qp->irq_burst, if the
+ * SQ has pending work.
+ */
+int siw_activate_tx(struct siw_qp *qp)
+{
+ struct siw_sqe *irqe;
+ struct siw_wqe *wqe = tx_wqe(qp);
+
+ if (!qp->attrs.irq_size)
+ return siw_activate_tx_from_sq(qp);
+
+ irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
+
+ if (!(irqe->flags & SIW_WQE_VALID))
+ return siw_activate_tx_from_sq(qp);
+
+ /*
+ * Avoid local WQE processing starvation in case
+ * of constant inbound READ request stream
+ */
+ if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
+ qp->irq_burst = 0;
+ return siw_activate_tx_from_sq(qp);
+ }
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
+
+ /* start READ RESPONSE */
+ wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
+ wqe->sqe.flags = 0;
+ if (irqe->num_sge) {
+ wqe->sqe.num_sge = 1;
+ wqe->sqe.sge[0].length = irqe->sge[0].length;
+ wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
+ wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
+ } else {
+ wqe->sqe.num_sge = 0;
+ }
+
+ /* Retain original RREQ's message sequence number for
+ * potential error reporting cases.
+ */
+ wqe->sqe.sge[1].length = irqe->sge[1].length;
+
+ wqe->sqe.rkey = irqe->rkey;
+ wqe->sqe.raddr = irqe->raddr;
+
+ wqe->processed = 0;
+ qp->irq_get++;
+
+ /* mark current IRQ entry free */
+ smp_store_mb(irqe->flags, 0);
+
+ return 1;
+}
+
+/*
* Check if current CQ state qualifies for calling CQ completion
* handler. Must be called with CQ lock held.
*/
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index 4bd1f1f84057..60116f20653c 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -680,6 +680,10 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
}
spin_lock_irqsave(&qp->sq_lock, flags);
+ if (unlikely(!qp->attrs.irq_size)) {
+ run_sq = 0;
+ goto error_irq;
+ }
if (tx_work->wr_status == SIW_WR_IDLE) {
/*
* immediately schedule READ response w/o
@@ -712,8 +716,9 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
/* RRESP now valid as current TX wqe or placed into IRQ */
smp_store_mb(resp->flags, SIW_WQE_VALID);
} else {
- pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp),
- qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size);
+error_irq:
+ pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n",
+ qp_id(qp), qp->attrs.irq_size);
siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
RDMAP_ETYPE_REMOTE_OPERATION,
@@ -740,6 +745,9 @@ static int siw_orqe_start_rx(struct siw_qp *qp)
struct siw_sqe *orqe;
struct siw_wqe *wqe = NULL;
+ if (unlikely(!qp->attrs.orq_size))
+ return -EPROTO;
+
/* make sure ORQ indices are current */
smp_mb();
@@ -796,8 +804,8 @@ int siw_proc_rresp(struct siw_qp *qp)
*/
rv = siw_orqe_start_rx(qp);
if (rv) {
- pr_warn("siw: [QP %u]: ORQ empty at idx %d\n",
- qp_id(qp), qp->orq_get % qp->attrs.orq_size);
+ pr_warn("siw: [QP %u]: ORQ empty, size %d\n",
+ qp_id(qp), qp->attrs.orq_size);
goto error_term;
}
rv = siw_rresp_check_ntoh(srx, frx);
@@ -1290,11 +1298,13 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error)
wc_status);
siw_wqe_put_mem(wqe, SIW_OP_READ);
- if (!error)
+ if (!error) {
rv = siw_check_tx_fence(qp);
- else
- /* Disable current ORQ eleement */
- WRITE_ONCE(orq_get_current(qp)->flags, 0);
+ } else {
+ /* Disable current ORQ element */
+ if (qp->attrs.orq_size)
+ WRITE_ONCE(orq_get_current(qp)->flags, 0);
+ }
break;
case RDMAP_RDMA_READ_REQ:
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index d19d8325588b..7989c4043db4 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -1107,8 +1107,8 @@ next_wqe:
/*
* RREQ may have already been completed by inbound RRESP!
*/
- if (tx_type == SIW_OP_READ ||
- tx_type == SIW_OP_READ_LOCAL_INV) {
+ if ((tx_type == SIW_OP_READ ||
+ tx_type == SIW_OP_READ_LOCAL_INV) && qp->attrs.orq_size) {
/* Cleanup pending entry in ORQ */
qp->orq_put--;
qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 68fd053fc774..e389d44e5591 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -365,13 +365,23 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
if (rv)
goto err_out;
+ num_sqe = attrs->cap.max_send_wr;
+ num_rqe = attrs->cap.max_recv_wr;
+
/* All queue indices are derived from modulo operations
* on a free running 'get' (consumer) and 'put' (producer)
* unsigned counter. Having queue sizes at power of two
* avoids handling counter wrap around.
*/
- num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr);
- num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr);
+ if (num_sqe)
+ num_sqe = roundup_pow_of_two(num_sqe);
+ else {
+ /* Zero sized SQ is not supported */
+ rv = -EINVAL;
+ goto err_out;
+ }
+ if (num_rqe)
+ num_rqe = roundup_pow_of_two(num_rqe);
if (udata)
qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe));
@@ -379,7 +389,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe));
if (qp->sendq == NULL) {
- siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe);
rv = -ENOMEM;
goto err_out_xa;
}
@@ -413,7 +422,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe));
if (qp->recvq == NULL) {
- siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe);
rv = -ENOMEM;
goto err_out_xa;
}
@@ -966,9 +974,9 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
unsigned long flags;
int rv = 0;
- if (qp->srq) {
+ if (qp->srq || qp->attrs.rq_size == 0) {
*bad_wr = wr;
- return -EOPNOTSUPP; /* what else from errno.h? */
+ return -EINVAL;
}
if (!rdma_is_kernel_res(&qp->base_qp.res)) {
siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 3440dc48d02c..179ff1d068e5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -413,7 +413,6 @@ struct ipoib_dev_priv {
u64 hca_caps;
struct ipoib_ethtool_st ethtool;
unsigned int max_send_sge;
- bool sm_fullmember_sendonly_support;
const struct net_device_ops *rn_ops;
};
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index a6f413491321..e16b40c09f82 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -141,8 +141,6 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
- priv->sm_fullmember_sendonly_support = false;
-
if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 86e4ed64e4e2..5b3154503bf4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -275,7 +275,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
memset(&av, 0, sizeof(av));
av.type = rdma_ah_find_type(priv->ca, priv->port);
- rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)),
+ rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid));
rdma_ah_set_port_num(&av, priv->port);
rdma_ah_set_sl(&av, mcast->mcmember.sl);
rdma_ah_set_static_rate(&av, mcast->mcmember.rate);
@@ -334,15 +334,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
return;
}
/*
- * Check if can send sendonly MCG's with sendonly-fullmember join state.
- * It done here after the successfully join to the broadcast group,
- * because the broadcast group must always be joined first and is always
- * re-joined if the SM changes substantially.
- */
- priv->sm_fullmember_sendonly_support =
- ib_sa_sendonly_fullmem_support(&ipoib_sa_client,
- priv->ca, priv->port);
- /*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
* removed. However, ipoib_stop() will attempt to flush
@@ -537,9 +528,7 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
* most closely emulates the behavior, from a user space
* application perspective, of Ethernet multicast operation.
*/
- if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
- priv->sm_fullmember_sendonly_support)
- /* SM supports sendonly-fullmember, otherwise fallback to full-member */
+ if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
rec.join_state = SENDONLY_FULLMEMBER_JOIN;
}
spin_unlock_irq(&priv->lock);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 4792b9bf400f..8fcaa1136f2c 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -89,13 +89,20 @@ int iser_debug_level = 0;
module_param_named(debug_level, iser_debug_level, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)");
+static int iscsi_iser_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops iscsi_iser_size_ops = {
+ .set = iscsi_iser_set,
+ .get = param_get_uint,
+};
+
static unsigned int iscsi_max_lun = 512;
-module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
-MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session (default:512");
+module_param_cb(max_lun, &iscsi_iser_size_ops, &iscsi_max_lun, S_IRUGO);
+MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session, should > 0 (default:512)");
unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS;
-module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024");
+module_param_cb(max_sectors, &iscsi_iser_size_ops, &iser_max_sectors,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command, should > 0 (default:1024)");
bool iser_always_reg = true;
module_param_named(always_register, iser_always_reg, bool, S_IRUGO);
@@ -110,6 +117,18 @@ int iser_pi_guard;
module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO);
MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
+static int iscsi_iser_set(const char *val, const struct kernel_param *kp)
+{
+ int ret;
+ unsigned int n = 0;
+
+ ret = kstrtouint(val, 10, &n);
+ if (ret != 0 || n == 0)
+ return -EINVAL;
+
+ return param_set_uint(val, kp);
+}
+
/*
* iscsi_iser_recv() - Process a successful recv completion
* @conn: iscsi connection
@@ -571,13 +590,20 @@ iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
static inline unsigned int
iser_dif_prot_caps(int prot_caps)
{
- return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ?
- SHOST_DIF_TYPE1_PROTECTION | SHOST_DIX_TYPE0_PROTECTION |
- SHOST_DIX_TYPE1_PROTECTION : 0) |
- ((prot_caps & IB_PROT_T10DIF_TYPE_2) ?
- SHOST_DIF_TYPE2_PROTECTION | SHOST_DIX_TYPE2_PROTECTION : 0) |
- ((prot_caps & IB_PROT_T10DIF_TYPE_3) ?
- SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE3_PROTECTION : 0);
+ int ret = 0;
+
+ if (prot_caps & IB_PROT_T10DIF_TYPE_1)
+ ret |= SHOST_DIF_TYPE1_PROTECTION |
+ SHOST_DIX_TYPE0_PROTECTION |
+ SHOST_DIX_TYPE1_PROTECTION;
+ if (prot_caps & IB_PROT_T10DIF_TYPE_2)
+ ret |= SHOST_DIF_TYPE2_PROTECTION |
+ SHOST_DIX_TYPE2_PROTECTION;
+ if (prot_caps & IB_PROT_T10DIF_TYPE_3)
+ ret |= SHOST_DIF_TYPE3_PROTECTION |
+ SHOST_DIX_TYPE3_PROTECTION;
+
+ return ret;
}
/**
@@ -1009,11 +1035,6 @@ static int __init iser_init(void)
iser_dbg("Starting iSER datamover...\n");
- if (iscsi_max_lun < 1) {
- iser_err("Invalid max_lun value of %u\n", iscsi_max_lun);
- return -EINVAL;
- }
-
memset(&ig, 0, sizeof(struct iser_global));
ig.desc_cache = kmem_cache_create("iser_descriptors",
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index d4e057fac219..afec40da9b58 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -169,7 +169,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain)
domain->sig.dif.ref_escape = true;
if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
domain->sig.dif.ref_remap = true;
-};
+}
static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
@@ -390,4 +390,3 @@ err_reg:
return err;
}
-
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 2bd18b006893..136f6c4492e0 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -685,7 +685,7 @@ static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
iser_disconnected_handler(cma_id);
iser_free_ib_conn_res(iser_conn, destroy);
complete(&iser_conn->ib_completion);
-};
+}
static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 2ba27221ea85..7305ed8976c2 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -71,7 +71,6 @@ static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp
return param_set_int(val, kp);
}
-
static inline bool
isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
{
@@ -79,7 +78,6 @@ isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
cmd->prot_op != TARGET_PROT_NORMAL);
}
-
static void
isert_qp_event_callback(struct ib_event *e, void *context)
{
@@ -232,8 +230,10 @@ isert_create_device_ib_res(struct isert_device *device)
}
/* Check signature cap */
- device->pi_capable = ib_dev->attrs.device_cap_flags &
- IB_DEVICE_INTEGRITY_HANDOVER ? true : false;
+ if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER)
+ device->pi_capable = true;
+ else
+ device->pi_capable = false;
return 0;
}
@@ -1993,7 +1993,7 @@ isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_domain *domain)
if (se_cmd->prot_type == TARGET_DIF_TYPE1_PROT ||
se_cmd->prot_type == TARGET_DIF_TYPE2_PROT)
domain->sig.dif.ref_remap = true;
-};
+}
static int
isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index 4933085a864a..cecf0f7cadf9 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -233,7 +233,7 @@ static void vema_get_class_port_info(struct opa_vnic_vema_port *port,
port_info = (struct opa_class_port_info *)rsp_mad->data;
memcpy(port_info, &port->class_port_info, sizeof(*port_info));
- port_info->base_version = OPA_MGMT_BASE_VERSION,
+ port_info->base_version = OPA_MGMT_BASE_VERSION;
port_info->class_version = OPA_EMA_CLASS_VERSION;
/*
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
index ba00f0de14ca..b6a0abf40589 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
@@ -408,6 +408,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess)
"%s", str);
if (err) {
pr_err("kobject_init_and_add: %d\n", err);
+ kobject_put(&sess->kobj);
return err;
}
err = sysfs_create_group(&sess->kobj, &rtrs_clt_sess_attr_group);
@@ -419,6 +420,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess)
&sess->kobj, "stats");
if (err) {
pr_err("kobject_init_and_add: %d\n", err);
+ kobject_put(&sess->stats->kobj_stats);
goto remove_group;
}
@@ -469,15 +471,12 @@ int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt)
return sysfs_create_group(&clt->dev.kobj, &rtrs_clt_attr_group);
}
-void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt)
+void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt)
{
+ sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group);
+
if (clt->kobj_paths) {
kobject_del(clt->kobj_paths);
kobject_put(clt->kobj_paths);
}
}
-
-void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt)
-{
- sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group);
-}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 67f86c405a26..0a08b4b742a3 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -31,6 +31,8 @@
*/
#define RTRS_RECONNECT_SEED 8
+#define FIRST_CONN 0x01
+
MODULE_DESCRIPTION("RDMA Transport Client");
MODULE_LICENSE("GPL");
@@ -178,18 +180,18 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
}
/**
- * __rtrs_clt_change_state() - change the session state through session state
+ * rtrs_clt_change_state() - change the session state through session state
* machine.
*
* @sess: client session to change the state of.
* @new_state: state to change to.
*
- * returns true if successful, false if the requested state can not be set.
+ * returns true if sess's state is changed to new state, otherwise return false.
*
* Locks:
* state_wq lock must be hold.
*/
-static bool __rtrs_clt_change_state(struct rtrs_clt_sess *sess,
+static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess,
enum rtrs_clt_state new_state)
{
enum rtrs_clt_state old_state;
@@ -286,7 +288,7 @@ static bool rtrs_clt_change_state_from_to(struct rtrs_clt_sess *sess,
spin_lock_irq(&sess->state_wq.lock);
if (sess->state == old_state)
- changed = __rtrs_clt_change_state(sess, new_state);
+ changed = rtrs_clt_change_state(sess, new_state);
spin_unlock_irq(&sess->state_wq.lock);
return changed;
@@ -494,7 +496,7 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc)
int err;
struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
- WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F);
+ WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0);
iu = container_of(wc->wr_cqe, struct rtrs_iu,
cqe);
err = rtrs_iu_post_recv(&con->c, iu);
@@ -514,7 +516,7 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
u32 buf_id;
int err;
- WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F);
+ WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0);
iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
@@ -621,12 +623,12 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
} else if (imm_type == RTRS_HB_MSG_IMM) {
WARN_ON(con->c.cid);
rtrs_send_hb_ack(&sess->s);
- if (sess->flags == RTRS_MSG_NEW_RKEY_F)
+ if (sess->flags & RTRS_MSG_NEW_RKEY_F)
return rtrs_clt_recv_done(con, wc);
} else if (imm_type == RTRS_HB_ACK_IMM) {
WARN_ON(con->c.cid);
sess->s.hb_missed_cnt = 0;
- if (sess->flags == RTRS_MSG_NEW_RKEY_F)
+ if (sess->flags & RTRS_MSG_NEW_RKEY_F)
return rtrs_clt_recv_done(con, wc);
} else {
rtrs_wrn(con->c.sess, "Unknown IMM type %u\n",
@@ -654,7 +656,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE ||
wc->wc_flags & IB_WC_WITH_IMM));
WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done);
- if (sess->flags == RTRS_MSG_NEW_RKEY_F) {
+ if (sess->flags & RTRS_MSG_NEW_RKEY_F) {
if (wc->wc_flags & IB_WC_WITH_INVALIDATE)
return rtrs_clt_recv_done(con, wc);
@@ -664,7 +666,6 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
case IB_WC_RDMA_WRITE:
/*
* post_send() RDMA write completions of IO reqs (read/write)
- * and hb
*/
break;
@@ -680,7 +681,7 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size)
struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
for (i = 0; i < q_size; i++) {
- if (sess->flags == RTRS_MSG_NEW_RKEY_F) {
+ if (sess->flags & RTRS_MSG_NEW_RKEY_F) {
struct rtrs_iu *iu = &con->rsp_ius[i];
err = rtrs_iu_post_recv(&con->c, iu);
@@ -1318,6 +1319,12 @@ out_err:
static void free_permits(struct rtrs_clt *clt)
{
+ if (clt->permits_map) {
+ size_t sz = clt->queue_depth;
+
+ wait_event(clt->permits_wait,
+ find_first_bit(clt->permits_map, sz) >= sz);
+ }
kfree(clt->permits_map);
clt->permits_map = NULL;
kfree(clt->permits);
@@ -1353,21 +1360,14 @@ static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess,
bool changed;
spin_lock_irq(&sess->state_wq.lock);
- *old_state = sess->state;
- changed = __rtrs_clt_change_state(sess, new_state);
+ if (old_state)
+ *old_state = sess->state;
+ changed = rtrs_clt_change_state(sess, new_state);
spin_unlock_irq(&sess->state_wq.lock);
return changed;
}
-static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess,
- enum rtrs_clt_state new_state)
-{
- enum rtrs_clt_state old_state;
-
- return rtrs_clt_change_state_get_old(sess, new_state, &old_state);
-}
-
static void rtrs_clt_hb_err_handler(struct rtrs_con *c)
{
struct rtrs_clt_con *con = container_of(c, typeof(*con), c);
@@ -1511,7 +1511,7 @@ static void destroy_con(struct rtrs_clt_con *con)
static int create_con_cq_qp(struct rtrs_clt_con *con)
{
struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
- u16 wr_queue_size;
+ u32 max_send_wr, max_recv_wr, cq_size;
int err, cq_vector;
struct rtrs_msg_rkey_rsp *rsp;
@@ -1523,7 +1523,8 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
* + 2 for drain and heartbeat
* in case qp gets into error state
*/
- wr_queue_size = SERVICE_CON_QUEUE_DEPTH * 3 + 2;
+ max_send_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2;
+ max_recv_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2;
/* We must be the first here */
if (WARN_ON(sess->s.dev))
return -EINVAL;
@@ -1555,25 +1556,29 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
/* Shared between connections */
sess->s.dev_ref++;
- wr_queue_size =
+ max_send_wr =
min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr,
/* QD * (REQ + RSP + FR REGS or INVS) + drain */
sess->queue_depth * 3 + 1);
+ max_recv_wr =
+ min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr,
+ sess->queue_depth * 3 + 1);
}
/* alloc iu to recv new rkey reply when server reports flags set */
- if (sess->flags == RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) {
- con->rsp_ius = rtrs_iu_alloc(wr_queue_size, sizeof(*rsp),
+ if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) {
+ con->rsp_ius = rtrs_iu_alloc(max_recv_wr, sizeof(*rsp),
GFP_KERNEL, sess->s.dev->ib_dev,
DMA_FROM_DEVICE,
rtrs_clt_rdma_done);
if (!con->rsp_ius)
return -ENOMEM;
- con->queue_size = wr_queue_size;
+ con->queue_size = max_recv_wr;
}
+ cq_size = max_send_wr + max_recv_wr;
cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors;
err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
- cq_vector, wr_queue_size, wr_queue_size,
- IB_POLL_SOFTIRQ);
+ cq_vector, cq_size, max_send_wr,
+ max_recv_wr, IB_POLL_SOFTIRQ);
/*
* In case of error we do not bother to clean previous allocations,
* since destroy_con_cq_qp() must be called.
@@ -1657,6 +1662,7 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con)
.cid_num = cpu_to_le16(sess->s.con_num),
.recon_cnt = cpu_to_le16(sess->s.recon_cnt),
};
+ msg.first_conn = sess->for_new_clt ? FIRST_CONN : 0;
uuid_copy(&msg.sess_uuid, &sess->s.uuid);
uuid_copy(&msg.paths_uuid, &clt->paths_uuid);
@@ -1742,6 +1748,8 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
scnprintf(sess->hca_name, sizeof(sess->hca_name),
sess->s.dev->ib_dev->name);
sess->s.src_addr = con->c.cm_id->route.addr.src_addr;
+ /* set for_new_clt, to allow future reconnect on any path */
+ sess->for_new_clt = 1;
}
return 0;
@@ -1788,7 +1796,7 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con,
static void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait)
{
- if (rtrs_clt_change_state(sess, RTRS_CLT_CLOSING))
+ if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL))
queue_work(rtrs_wq, &sess->close_work);
if (wait)
flush_work(&sess->close_work);
@@ -2174,7 +2182,7 @@ static void rtrs_clt_close_work(struct work_struct *work)
cancel_delayed_work_sync(&sess->reconnect_dwork);
rtrs_clt_stop_and_destroy_conns(sess);
- rtrs_clt_change_state(sess, RTRS_CLT_CLOSED);
+ rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSED, NULL);
}
static int init_conns(struct rtrs_clt_sess *sess)
@@ -2226,7 +2234,7 @@ destroy:
* doing rdma_resolve_addr(), switch to CONNECTION_ERR state
* manually to keep reconnecting.
*/
- rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR);
+ rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
return err;
}
@@ -2243,7 +2251,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) {
rtrs_err(sess->clt, "Sess info request send failed: %s\n",
ib_wc_status_msg(wc->status));
- rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR);
+ rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
return;
}
@@ -2367,7 +2375,7 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
out:
rtrs_clt_update_wc_stats(con);
rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
- rtrs_clt_change_state(sess, state);
+ rtrs_clt_change_state_get_old(sess, state, NULL);
}
static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
@@ -2423,7 +2431,6 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
err = -ECONNRESET;
else
err = -ETIMEDOUT;
- goto out;
}
out:
@@ -2433,7 +2440,7 @@ out:
rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1);
if (unlikely(err))
/* If we've never taken async path because of malloc problems */
- rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR);
+ rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
return err;
}
@@ -2490,7 +2497,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work)
/* Stop everything */
rtrs_clt_stop_and_destroy_conns(sess);
msleep(RTRS_RECONNECT_BACKOFF);
- if (rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING)) {
+ if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING, NULL)) {
err = init_sess(sess);
if (err)
goto reconnect_again;
@@ -2499,7 +2506,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work)
return;
reconnect_again:
- if (rtrs_clt_change_state(sess, RTRS_CLT_RECONNECTING)) {
+ if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, NULL)) {
sess->stats->reconnects.fail_cnt++;
delay_ms = clt->reconnect_delay_sec * 1000;
queue_delayed_work(rtrs_wq, &sess->reconnect_dwork,
@@ -2565,11 +2572,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
clt->dev.class = rtrs_clt_dev_class;
clt->dev.release = rtrs_clt_dev_release;
err = dev_set_name(&clt->dev, "%s", sessname);
- if (err) {
- free_percpu(clt->pcpu_path);
- kfree(clt);
- return ERR_PTR(err);
- }
+ if (err)
+ goto err;
/*
* Suppress user space notification until
* sysfs files are created
@@ -2577,44 +2581,35 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
dev_set_uevent_suppress(&clt->dev, true);
err = device_register(&clt->dev);
if (err) {
- free_percpu(clt->pcpu_path);
put_device(&clt->dev);
- return ERR_PTR(err);
+ goto err;
}
clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj);
if (!clt->kobj_paths) {
- free_percpu(clt->pcpu_path);
- device_unregister(&clt->dev);
- return NULL;
+ err = -ENOMEM;
+ goto err_dev;
}
err = rtrs_clt_create_sysfs_root_files(clt);
if (err) {
- free_percpu(clt->pcpu_path);
kobject_del(clt->kobj_paths);
kobject_put(clt->kobj_paths);
- device_unregister(&clt->dev);
- return ERR_PTR(err);
+ goto err_dev;
}
dev_set_uevent_suppress(&clt->dev, false);
kobject_uevent(&clt->dev.kobj, KOBJ_ADD);
return clt;
-}
-
-static void wait_for_inflight_permits(struct rtrs_clt *clt)
-{
- if (clt->permits_map) {
- size_t sz = clt->queue_depth;
-
- wait_event(clt->permits_wait,
- find_first_bit(clt->permits_map, sz) >= sz);
- }
+err_dev:
+ device_unregister(&clt->dev);
+err:
+ free_percpu(clt->pcpu_path);
+ kfree(clt);
+ return ERR_PTR(err);
}
static void free_clt(struct rtrs_clt *clt)
{
- wait_for_inflight_permits(clt);
free_permits(clt);
free_percpu(clt->pcpu_path);
mutex_destroy(&clt->paths_ev_mutex);
@@ -2672,6 +2667,8 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
err = PTR_ERR(sess);
goto close_all_sess;
}
+ if (!i)
+ sess->for_new_clt = 1;
list_add_tail_rcu(&sess->s.entry, &clt->paths_list);
err = init_sess(sess);
@@ -2702,8 +2699,7 @@ close_all_sess:
rtrs_clt_close_conns(sess, true);
kobject_put(&sess->kobj);
}
- rtrs_clt_destroy_sysfs_root_files(clt);
- rtrs_clt_destroy_sysfs_root_folders(clt);
+ rtrs_clt_destroy_sysfs_root(clt);
free_clt(clt);
out:
@@ -2720,8 +2716,7 @@ void rtrs_clt_close(struct rtrs_clt *clt)
struct rtrs_clt_sess *sess, *tmp;
/* Firstly forbid sysfs access */
- rtrs_clt_destroy_sysfs_root_files(clt);
- rtrs_clt_destroy_sysfs_root_folders(clt);
+ rtrs_clt_destroy_sysfs_root(clt);
/* Now it is safe to iterate over all paths without locks */
list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) {
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index b8dbd701b3cb..692bc83e1f09 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -143,6 +143,7 @@ struct rtrs_clt_sess {
int max_send_sge;
u32 flags;
struct kobject kobj;
+ u8 for_new_clt;
struct rtrs_clt_stats *stats;
/* cache hca_port and hca_name to display in sysfs */
u8 hca_port;
@@ -243,8 +244,7 @@ ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats,
/* rtrs-clt-sysfs.c */
int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt);
-void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt);
-void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt);
+void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt);
int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess);
void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index 3f2918671dbe..8caad0a2322b 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -188,7 +188,9 @@ struct rtrs_msg_conn_req {
__le16 recon_cnt;
uuid_t sess_uuid;
uuid_t paths_uuid;
- u8 reserved[12];
+ u8 first_conn : 1;
+ u8 reserved_bits : 7;
+ u8 reserved[11];
};
/**
@@ -303,8 +305,9 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe,
struct ib_send_wr *head);
int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con,
- u32 max_send_sge, int cq_vector, u16 cq_size,
- u16 wr_queue_size, enum ib_poll_context poll_ctx);
+ u32 max_send_sge, int cq_vector, int cq_size,
+ u32 max_send_wr, u32 max_recv_wr,
+ enum ib_poll_context poll_ctx);
void rtrs_cq_qp_destroy(struct rtrs_con *con);
void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index d2edff3b8f0d..126a96e75c62 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -51,6 +51,8 @@ static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj,
sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, str, sizeof(str));
rtrs_info(s, "disconnect for path %s requested\n", str);
+ /* first remove sysfs itself to avoid deadlock */
+ sysfs_remove_file_self(&sess->kobj, &attr->attr);
close_sess(sess);
return count;
@@ -181,6 +183,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess)
err = -ENOMEM;
pr_err("kobject_create_and_add(): %d\n", err);
device_del(&srv->dev);
+ put_device(&srv->dev);
goto unlock;
}
dev_set_uevent_suppress(&srv->dev, false);
@@ -206,6 +209,7 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess)
kobject_put(srv->kobj_paths);
mutex_unlock(&srv->paths_mutex);
device_del(&srv->dev);
+ put_device(&srv->dev);
} else {
mutex_unlock(&srv->paths_mutex);
}
@@ -234,6 +238,7 @@ static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess)
&sess->kobj, "stats");
if (err) {
rtrs_err(s, "kobject_init_and_add(): %d\n", err);
+ kobject_put(&sess->stats->kobj_stats);
return err;
}
err = sysfs_create_group(&sess->stats->kobj_stats,
@@ -290,8 +295,8 @@ remove_group:
sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group);
put_kobj:
kobject_del(&sess->kobj);
- kobject_put(&sess->kobj);
destroy_root:
+ kobject_put(&sess->kobj);
rtrs_srv_destroy_once_sysfs_root_folders(sess);
return err;
@@ -302,7 +307,7 @@ void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess)
if (sess->kobj.state_in_sysfs) {
kobject_del(&sess->stats->kobj_stats);
kobject_put(&sess->stats->kobj_stats);
- kobject_del(&sess->kobj);
+ sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group);
kobject_put(&sess->kobj);
rtrs_srv_destroy_once_sysfs_root_folders(sess);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index c42fd470c4eb..d071809e3ed2 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -222,7 +222,8 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
dma_addr_t dma_addr = sess->dma_addr[id->msg_id];
struct rtrs_srv_mr *srv_mr;
struct rtrs_srv *srv = sess->srv;
- struct ib_send_wr inv_wr, imm_wr;
+ struct ib_send_wr inv_wr;
+ struct ib_rdma_wr imm_wr;
struct ib_rdma_wr *wr = NULL;
enum ib_send_flags flags;
size_t sg_cnt;
@@ -267,21 +268,22 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
WARN_ON_ONCE(rkey != wr->rkey);
wr->wr.opcode = IB_WR_RDMA_WRITE;
+ wr->wr.wr_cqe = &io_comp_cqe;
wr->wr.ex.imm_data = 0;
wr->wr.send_flags = 0;
if (need_inval && always_invalidate) {
wr->wr.next = &rwr.wr;
rwr.wr.next = &inv_wr;
- inv_wr.next = &imm_wr;
+ inv_wr.next = &imm_wr.wr;
} else if (always_invalidate) {
wr->wr.next = &rwr.wr;
- rwr.wr.next = &imm_wr;
+ rwr.wr.next = &imm_wr.wr;
} else if (need_inval) {
wr->wr.next = &inv_wr;
- inv_wr.next = &imm_wr;
+ inv_wr.next = &imm_wr.wr;
} else {
- wr->wr.next = &imm_wr;
+ wr->wr.next = &imm_wr.wr;
}
/*
* From time to time we have to post signaled sends,
@@ -294,16 +296,18 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
inv_wr.sg_list = NULL;
inv_wr.num_sge = 0;
inv_wr.opcode = IB_WR_SEND_WITH_INV;
+ inv_wr.wr_cqe = &io_comp_cqe;
inv_wr.send_flags = 0;
inv_wr.ex.invalidate_rkey = rkey;
}
- imm_wr.next = NULL;
+ imm_wr.wr.next = NULL;
if (always_invalidate) {
struct rtrs_msg_rkey_rsp *msg;
srv_mr = &sess->mrs[id->msg_id];
rwr.wr.opcode = IB_WR_REG_MR;
+ rwr.wr.wr_cqe = &local_reg_cqe;
rwr.wr.num_sge = 0;
rwr.mr = srv_mr->mr;
rwr.wr.send_flags = 0;
@@ -318,22 +322,22 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
list.addr = srv_mr->iu->dma_addr;
list.length = sizeof(*msg);
list.lkey = sess->s.dev->ib_pd->local_dma_lkey;
- imm_wr.sg_list = &list;
- imm_wr.num_sge = 1;
- imm_wr.opcode = IB_WR_SEND_WITH_IMM;
+ imm_wr.wr.sg_list = &list;
+ imm_wr.wr.num_sge = 1;
+ imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM;
ib_dma_sync_single_for_device(sess->s.dev->ib_dev,
srv_mr->iu->dma_addr,
srv_mr->iu->size, DMA_TO_DEVICE);
} else {
- imm_wr.sg_list = NULL;
- imm_wr.num_sge = 0;
- imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM;
+ imm_wr.wr.sg_list = NULL;
+ imm_wr.wr.num_sge = 0;
+ imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM;
}
- imm_wr.send_flags = flags;
- imm_wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id,
+ imm_wr.wr.send_flags = flags;
+ imm_wr.wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id,
0, need_inval));
- imm_wr.wr_cqe = &io_comp_cqe;
+ imm_wr.wr.wr_cqe = &io_comp_cqe;
ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr,
offset, DMA_BIDIRECTIONAL);
@@ -360,7 +364,8 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
{
struct rtrs_sess *s = con->c.sess;
struct rtrs_srv_sess *sess = to_srv_sess(s);
- struct ib_send_wr inv_wr, imm_wr, *wr = NULL;
+ struct ib_send_wr inv_wr, *wr = NULL;
+ struct ib_rdma_wr imm_wr;
struct ib_reg_wr rwr;
struct rtrs_srv *srv = sess->srv;
struct rtrs_srv_mr *srv_mr;
@@ -379,6 +384,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
if (need_inval) {
if (likely(sg_cnt)) {
+ inv_wr.wr_cqe = &io_comp_cqe;
inv_wr.sg_list = NULL;
inv_wr.num_sge = 0;
inv_wr.opcode = IB_WR_SEND_WITH_INV;
@@ -396,15 +402,15 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
if (need_inval && always_invalidate) {
wr = &inv_wr;
inv_wr.next = &rwr.wr;
- rwr.wr.next = &imm_wr;
+ rwr.wr.next = &imm_wr.wr;
} else if (always_invalidate) {
wr = &rwr.wr;
- rwr.wr.next = &imm_wr;
+ rwr.wr.next = &imm_wr.wr;
} else if (need_inval) {
wr = &inv_wr;
- inv_wr.next = &imm_wr;
+ inv_wr.next = &imm_wr.wr;
} else {
- wr = &imm_wr;
+ wr = &imm_wr.wr;
}
/*
* From time to time we have to post signalled sends,
@@ -413,14 +419,15 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
flags = (atomic_inc_return(&con->wr_cnt) % srv->queue_depth) ?
0 : IB_SEND_SIGNALED;
imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval);
- imm_wr.next = NULL;
+ imm_wr.wr.next = NULL;
if (always_invalidate) {
struct ib_sge list;
struct rtrs_msg_rkey_rsp *msg;
srv_mr = &sess->mrs[id->msg_id];
- rwr.wr.next = &imm_wr;
+ rwr.wr.next = &imm_wr.wr;
rwr.wr.opcode = IB_WR_REG_MR;
+ rwr.wr.wr_cqe = &local_reg_cqe;
rwr.wr.num_sge = 0;
rwr.wr.send_flags = 0;
rwr.mr = srv_mr->mr;
@@ -435,21 +442,21 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
list.addr = srv_mr->iu->dma_addr;
list.length = sizeof(*msg);
list.lkey = sess->s.dev->ib_pd->local_dma_lkey;
- imm_wr.sg_list = &list;
- imm_wr.num_sge = 1;
- imm_wr.opcode = IB_WR_SEND_WITH_IMM;
+ imm_wr.wr.sg_list = &list;
+ imm_wr.wr.num_sge = 1;
+ imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM;
ib_dma_sync_single_for_device(sess->s.dev->ib_dev,
srv_mr->iu->dma_addr,
srv_mr->iu->size, DMA_TO_DEVICE);
} else {
- imm_wr.sg_list = NULL;
- imm_wr.num_sge = 0;
- imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM;
+ imm_wr.wr.sg_list = NULL;
+ imm_wr.wr.num_sge = 0;
+ imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM;
}
- imm_wr.send_flags = flags;
- imm_wr.wr_cqe = &io_comp_cqe;
+ imm_wr.wr.send_flags = flags;
+ imm_wr.wr.wr_cqe = &io_comp_cqe;
- imm_wr.ex.imm_data = cpu_to_be32(imm);
+ imm_wr.wr.ex.imm_data = cpu_to_be32(imm);
err = ib_post_send(id->con->c.qp, wr, NULL);
if (unlikely(err))
@@ -651,7 +658,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
if (!srv_mr->iu) {
err = -ENOMEM;
rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", err);
- goto free_iu;
+ goto dereg_mr;
}
}
/* Eventually dma addr for each chunk can be cached */
@@ -667,7 +674,6 @@ err:
srv_mr = &sess->mrs[mri];
sgt = &srv_mr->sgt;
mr = srv_mr->mr;
-free_iu:
rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1);
dereg_mr:
ib_dereg_mr(mr);
@@ -814,7 +820,7 @@ static int process_info_req(struct rtrs_srv_con *con,
rwr[mri].wr.opcode = IB_WR_REG_MR;
rwr[mri].wr.wr_cqe = &local_reg_cqe;
rwr[mri].wr.num_sge = 0;
- rwr[mri].wr.send_flags = mri ? 0 : IB_SEND_SIGNALED;
+ rwr[mri].wr.send_flags = 0;
rwr[mri].mr = mr;
rwr[mri].key = mr->rkey;
rwr[mri].access = (IB_ACCESS_LOCAL_WRITE |
@@ -1238,7 +1244,6 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
case IB_WC_SEND:
/*
* post_send() RDMA write completions of IO reqs (read/write)
- * and hb
*/
atomic_add(srv->queue_depth, &con->sq_wr_avail);
@@ -1328,7 +1333,8 @@ static void free_srv(struct rtrs_srv *srv)
}
static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx,
- const uuid_t *paths_uuid)
+ const uuid_t *paths_uuid,
+ bool first_conn)
{
struct rtrs_srv *srv;
int i;
@@ -1341,13 +1347,18 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx,
return srv;
}
}
+ mutex_unlock(&ctx->srv_mutex);
+ /*
+ * If this request is not the first connection request from the
+ * client for this session then fail and return error.
+ */
+ if (!first_conn)
+ return ERR_PTR(-ENXIO);
/* need to allocate a new srv */
srv = kzalloc(sizeof(*srv), GFP_KERNEL);
- if (!srv) {
- mutex_unlock(&ctx->srv_mutex);
- return NULL;
- }
+ if (!srv)
+ return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&srv->paths_list);
mutex_init(&srv->paths_mutex);
@@ -1357,8 +1368,6 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx,
srv->ctx = ctx;
device_initialize(&srv->dev);
srv->dev.release = rtrs_srv_dev_release;
- list_add(&srv->ctx_list, &ctx->srv_list);
- mutex_unlock(&ctx->srv_mutex);
srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks),
GFP_KERNEL);
@@ -1371,6 +1380,9 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx,
goto err_free_chunks;
}
refcount_set(&srv->refcount, 1);
+ mutex_lock(&ctx->srv_mutex);
+ list_add(&srv->ctx_list, &ctx->srv_list);
+ mutex_unlock(&ctx->srv_mutex);
return srv;
@@ -1381,7 +1393,7 @@ err_free_chunks:
err_free_srv:
kfree(srv);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
static void put_srv(struct rtrs_srv *srv)
@@ -1461,10 +1473,12 @@ static bool __is_path_w_addr_exists(struct rtrs_srv *srv,
static void free_sess(struct rtrs_srv_sess *sess)
{
- if (sess->kobj.state_in_sysfs)
+ if (sess->kobj.state_in_sysfs) {
+ kobject_del(&sess->kobj);
kobject_put(&sess->kobj);
- else
+ } else {
kfree(sess);
+ }
}
static void rtrs_srv_close_work(struct work_struct *work)
@@ -1586,7 +1600,7 @@ static int create_con(struct rtrs_srv_sess *sess,
struct rtrs_sess *s = &sess->s;
struct rtrs_srv_con *con;
- u16 cq_size, wr_queue_size;
+ u32 cq_size, wr_queue_size;
int err, cq_vector;
con = kzalloc(sizeof(*con), GFP_KERNEL);
@@ -1600,7 +1614,7 @@ static int create_con(struct rtrs_srv_sess *sess,
con->c.cm_id = cm_id;
con->c.sess = &sess->s;
con->c.cid = cid;
- atomic_set(&con->wr_cnt, 0);
+ atomic_set(&con->wr_cnt, 1);
if (con->c.cid == 0) {
/*
@@ -1630,7 +1644,8 @@ static int create_con(struct rtrs_srv_sess *sess,
/* TODO: SOFTIRQ can be faster, but be careful with softirq context */
err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_size,
- wr_queue_size, IB_POLL_WORKQUEUE);
+ wr_queue_size, wr_queue_size,
+ IB_POLL_WORKQUEUE);
if (err) {
rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err);
goto free_con;
@@ -1781,13 +1796,9 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
goto reject_w_econnreset;
}
recon_cnt = le16_to_cpu(msg->recon_cnt);
- srv = get_or_create_srv(ctx, &msg->paths_uuid);
- /*
- * "refcount == 0" happens if a previous thread calls get_or_create_srv
- * allocate srv, but chunks of srv are not allocated yet.
- */
- if (!srv || refcount_read(&srv->refcount) == 0) {
- err = -ENOMEM;
+ srv = get_or_create_srv(ctx, &msg->paths_uuid, msg->first_conn);
+ if (IS_ERR(srv)) {
+ err = PTR_ERR(srv);
goto reject_w_err;
}
mutex_lock(&srv->paths_mutex);
@@ -1862,8 +1873,8 @@ reject_w_econnreset:
return rtrs_rdma_do_reject(cm_id, -ECONNRESET);
close_and_return_err:
- close_sess(sess);
mutex_unlock(&srv->paths_mutex);
+ close_sess(sess);
return err;
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
index 2e3a849e0a77..d13aff0aa816 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs.c
@@ -182,16 +182,16 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe,
u32 imm_data, enum ib_send_flags flags,
struct ib_send_wr *head)
{
- struct ib_send_wr wr;
+ struct ib_rdma_wr wr;
- wr = (struct ib_send_wr) {
- .wr_cqe = cqe,
- .send_flags = flags,
- .opcode = IB_WR_RDMA_WRITE_WITH_IMM,
- .ex.imm_data = cpu_to_be32(imm_data),
+ wr = (struct ib_rdma_wr) {
+ .wr.wr_cqe = cqe,
+ .wr.send_flags = flags,
+ .wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM,
+ .wr.ex.imm_data = cpu_to_be32(imm_data),
};
- return rtrs_post_send(con->qp, head, &wr);
+ return rtrs_post_send(con->qp, head, &wr.wr);
}
EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty);
@@ -231,14 +231,14 @@ static int create_cq(struct rtrs_con *con, int cq_vector, u16 cq_size,
}
static int create_qp(struct rtrs_con *con, struct ib_pd *pd,
- u16 wr_queue_size, u32 max_sge)
+ u32 max_send_wr, u32 max_recv_wr, u32 max_sge)
{
struct ib_qp_init_attr init_attr = {NULL};
struct rdma_cm_id *cm_id = con->cm_id;
int ret;
- init_attr.cap.max_send_wr = wr_queue_size;
- init_attr.cap.max_recv_wr = wr_queue_size;
+ init_attr.cap.max_send_wr = max_send_wr;
+ init_attr.cap.max_recv_wr = max_recv_wr;
init_attr.cap.max_recv_sge = 1;
init_attr.event_handler = qp_event_handler;
init_attr.qp_context = con;
@@ -260,8 +260,9 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd,
}
int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
- u32 max_send_sge, int cq_vector, u16 cq_size,
- u16 wr_queue_size, enum ib_poll_context poll_ctx)
+ u32 max_send_sge, int cq_vector, int cq_size,
+ u32 max_send_wr, u32 max_recv_wr,
+ enum ib_poll_context poll_ctx)
{
int err;
@@ -269,7 +270,8 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
if (err)
return err;
- err = create_qp(con, sess->dev->ib_pd, wr_queue_size, max_send_sge);
+ err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr,
+ max_send_sge);
if (err) {
ib_free_cq(con->cq);
con->cq = NULL;
@@ -308,7 +310,7 @@ void rtrs_send_hb_ack(struct rtrs_sess *sess)
imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0);
err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
- IB_SEND_SIGNALED, NULL);
+ 0, NULL);
if (err) {
sess->hb_err_handler(usr_con);
return;
@@ -337,7 +339,7 @@ static void hb_work(struct work_struct *work)
}
imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0);
err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
- IB_SEND_SIGNALED, NULL);
+ 0, NULL);
if (err) {
sess->hb_err_handler(usr_con);
return;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 5492b66a8153..31f8aa2c40ed 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct device *dev,
struct srp_rdma_ch *ch;
struct srp_device *srp_dev = host->srp_dev;
struct ib_device *ibdev = srp_dev->dev;
- int ret, node_idx, node, cpu, i;
+ int ret, i, ch_idx;
unsigned int max_sectors_per_mr, mr_per_cmd = 0;
bool multich = false;
uint32_t max_iu_len;
@@ -3753,81 +3753,61 @@ static ssize_t srp_create_target(struct device *dev,
goto out;
ret = -ENOMEM;
- if (target->ch_count == 0)
+ if (target->ch_count == 0) {
target->ch_count =
- max_t(unsigned int, num_online_nodes(),
- min(ch_count ?:
- min(4 * num_online_nodes(),
- ibdev->num_comp_vectors),
- num_online_cpus()));
+ min(ch_count ?:
+ max(4 * num_online_nodes(),
+ ibdev->num_comp_vectors),
+ num_online_cpus());
+ }
+
target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
GFP_KERNEL);
if (!target->ch)
goto out;
- node_idx = 0;
- for_each_online_node(node) {
- const int ch_start = (node_idx * target->ch_count /
- num_online_nodes());
- const int ch_end = ((node_idx + 1) * target->ch_count /
- num_online_nodes());
- const int cv_start = node_idx * ibdev->num_comp_vectors /
- num_online_nodes();
- const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
- num_online_nodes();
- int cpu_idx = 0;
-
- for_each_online_cpu(cpu) {
- if (cpu_to_node(cpu) != node)
- continue;
- if (ch_start + cpu_idx >= ch_end)
- continue;
- ch = &target->ch[ch_start + cpu_idx];
- ch->target = target;
- ch->comp_vector = cv_start == cv_end ? cv_start :
- cv_start + cpu_idx % (cv_end - cv_start);
- spin_lock_init(&ch->lock);
- INIT_LIST_HEAD(&ch->free_tx);
- ret = srp_new_cm_id(ch);
- if (ret)
- goto err_disconnect;
+ for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) {
+ ch = &target->ch[ch_idx];
+ ch->target = target;
+ ch->comp_vector = ch_idx % ibdev->num_comp_vectors;
+ spin_lock_init(&ch->lock);
+ INIT_LIST_HEAD(&ch->free_tx);
+ ret = srp_new_cm_id(ch);
+ if (ret)
+ goto err_disconnect;
- ret = srp_create_ch_ib(ch);
- if (ret)
- goto err_disconnect;
+ ret = srp_create_ch_ib(ch);
+ if (ret)
+ goto err_disconnect;
- ret = srp_alloc_req_data(ch);
- if (ret)
- goto err_disconnect;
+ ret = srp_alloc_req_data(ch);
+ if (ret)
+ goto err_disconnect;
- ret = srp_connect_ch(ch, max_iu_len, multich);
- if (ret) {
- char dst[64];
-
- if (target->using_rdma_cm)
- snprintf(dst, sizeof(dst), "%pIS",
- &target->rdma_cm.dst);
- else
- snprintf(dst, sizeof(dst), "%pI6",
- target->ib_cm.orig_dgid.raw);
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "Connection %d/%d to %s failed\n",
- ch_start + cpu_idx,
- target->ch_count, dst);
- if (node_idx == 0 && cpu_idx == 0) {
- goto free_ch;
- } else {
- srp_free_ch_ib(target, ch);
- srp_free_req_data(target, ch);
- target->ch_count = ch - target->ch;
- goto connected;
- }
- }
+ ret = srp_connect_ch(ch, max_iu_len, multich);
+ if (ret) {
+ char dst[64];
- multich = true;
- cpu_idx++;
+ if (target->using_rdma_cm)
+ snprintf(dst, sizeof(dst), "%pIS",
+ &target->rdma_cm.dst);
+ else
+ snprintf(dst, sizeof(dst), "%pI6",
+ target->ib_cm.orig_dgid.raw);
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "Connection %d/%d to %s failed\n",
+ ch_idx,
+ target->ch_count, dst);
+ if (ch_idx == 0) {
+ goto free_ch;
+ } else {
+ srp_free_ch_ib(target, ch);
+ srp_free_req_data(target, ch);
+ target->ch_count = ch - target->ch;
+ goto connected;
+ }
}
- node_idx++;
+ multich = true;
}
connected: