aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core/verbs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core/verbs.c')
-rw-r--r--drivers/infiniband/core/verbs.c313
1 files changed, 177 insertions, 136 deletions
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e36d27ed4daa..93025d2009b8 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -124,16 +124,24 @@ EXPORT_SYMBOL(ib_wc_status_msg);
__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
{
switch (rate) {
- case IB_RATE_2_5_GBPS: return 1;
- case IB_RATE_5_GBPS: return 2;
- case IB_RATE_10_GBPS: return 4;
- case IB_RATE_20_GBPS: return 8;
- case IB_RATE_30_GBPS: return 12;
- case IB_RATE_40_GBPS: return 16;
- case IB_RATE_60_GBPS: return 24;
- case IB_RATE_80_GBPS: return 32;
- case IB_RATE_120_GBPS: return 48;
- default: return -1;
+ case IB_RATE_2_5_GBPS: return 1;
+ case IB_RATE_5_GBPS: return 2;
+ case IB_RATE_10_GBPS: return 4;
+ case IB_RATE_20_GBPS: return 8;
+ case IB_RATE_30_GBPS: return 12;
+ case IB_RATE_40_GBPS: return 16;
+ case IB_RATE_60_GBPS: return 24;
+ case IB_RATE_80_GBPS: return 32;
+ case IB_RATE_120_GBPS: return 48;
+ case IB_RATE_14_GBPS: return 6;
+ case IB_RATE_56_GBPS: return 22;
+ case IB_RATE_112_GBPS: return 45;
+ case IB_RATE_168_GBPS: return 67;
+ case IB_RATE_25_GBPS: return 10;
+ case IB_RATE_100_GBPS: return 40;
+ case IB_RATE_200_GBPS: return 80;
+ case IB_RATE_300_GBPS: return 120;
+ default: return -1;
}
}
EXPORT_SYMBOL(ib_rate_to_mult);
@@ -141,16 +149,24 @@ EXPORT_SYMBOL(ib_rate_to_mult);
__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
{
switch (mult) {
- case 1: return IB_RATE_2_5_GBPS;
- case 2: return IB_RATE_5_GBPS;
- case 4: return IB_RATE_10_GBPS;
- case 8: return IB_RATE_20_GBPS;
- case 12: return IB_RATE_30_GBPS;
- case 16: return IB_RATE_40_GBPS;
- case 24: return IB_RATE_60_GBPS;
- case 32: return IB_RATE_80_GBPS;
- case 48: return IB_RATE_120_GBPS;
- default: return IB_RATE_PORT_CURRENT;
+ case 1: return IB_RATE_2_5_GBPS;
+ case 2: return IB_RATE_5_GBPS;
+ case 4: return IB_RATE_10_GBPS;
+ case 8: return IB_RATE_20_GBPS;
+ case 12: return IB_RATE_30_GBPS;
+ case 16: return IB_RATE_40_GBPS;
+ case 24: return IB_RATE_60_GBPS;
+ case 32: return IB_RATE_80_GBPS;
+ case 48: return IB_RATE_120_GBPS;
+ case 6: return IB_RATE_14_GBPS;
+ case 22: return IB_RATE_56_GBPS;
+ case 45: return IB_RATE_112_GBPS;
+ case 67: return IB_RATE_168_GBPS;
+ case 10: return IB_RATE_25_GBPS;
+ case 40: return IB_RATE_100_GBPS;
+ case 80: return IB_RATE_200_GBPS;
+ case 120: return IB_RATE_300_GBPS;
+ default: return IB_RATE_PORT_CURRENT;
}
}
EXPORT_SYMBOL(mult_to_ib_rate);
@@ -247,6 +263,10 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
}
+ pd->res.type = RDMA_RESTRACK_PD;
+ pd->res.kern_name = caller;
+ rdma_restrack_add(&pd->res);
+
if (mr_access_flags) {
struct ib_mr *mr;
@@ -296,6 +316,7 @@ void ib_dealloc_pd(struct ib_pd *pd)
requires the caller to guarantee we can't race here. */
WARN_ON(atomic_read(&pd->usecnt));
+ rdma_restrack_del(&pd->res);
/* Making delalloc_pd a void return is a WIP, no driver should return
an error here. */
ret = pd->device->dealloc_pd(pd);
@@ -421,8 +442,7 @@ static bool find_gid_index(const union ib_gid *gid,
const struct ib_gid_attr *gid_attr,
void *context)
{
- struct find_gid_index_context *ctx =
- (struct find_gid_index_context *)context;
+ struct find_gid_index_context *ctx = context;
if (ctx->gid_type != gid_attr->gid_type)
return false;
@@ -481,8 +501,53 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
}
EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
+/* Resolve destination mac address and hop limit for unicast destination
+ * GID entry, considering the source GID entry as well.
+ * ah_attribute must have have valid port_num, sgid_index.
+ */
+static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct ib_gid_attr sgid_attr;
+ struct ib_global_route *grh;
+ int hop_limit = 0xff;
+ union ib_gid sgid;
+ int ret;
+
+ grh = rdma_ah_retrieve_grh(ah_attr);
+
+ ret = ib_query_gid(device,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index,
+ &sgid, &sgid_attr);
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
+ return ret;
+ }
+
+ /* If destination is link local and source GID is RoCEv1,
+ * IP stack is not used.
+ */
+ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
+ sgid_attr.gid_type == IB_GID_TYPE_ROCE) {
+ rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
+ ah_attr->roce.dmac);
+ goto done;
+ }
+
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac,
+ sgid_attr.ndev, &hop_limit);
+done:
+ dev_put(sgid_attr.ndev);
+
+ grh->hop_limit = hop_limit;
+ return ret;
+}
+
/*
- * This function creates ah from the incoming packet.
+ * This function initializes address handle attributes from the incoming packet.
* Incoming packet has dgid of the receiver node on which this code is
* getting executed and, sgid contains the GID of the sender.
*
@@ -490,13 +555,10 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
* as sgid and, sgid is used as dgid because sgid contains destinations
* GID whom to respond to.
*
- * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the
- * position of arguments dgid and sgid do not match the order of the
- * parameters.
*/
-int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
- const struct ib_wc *wc, const struct ib_grh *grh,
- struct rdma_ah_attr *ah_attr)
+int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
+ const struct ib_wc *wc, const struct ib_grh *grh,
+ struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
u16 gid_index;
@@ -523,57 +585,33 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
if (ret)
return ret;
+ rdma_ah_set_sl(ah_attr, wc->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+
if (rdma_protocol_roce(device, port_num)) {
- int if_index = 0;
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
- struct net_device *idev;
- struct net_device *resolved_dev;
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
-
- ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
- ah_attr->roce.dmac,
- wc->wc_flags & IB_WC_WITH_VLAN ?
- NULL : &vlan_id,
- &if_index, &hoplimit);
- if (ret) {
- dev_put(idev);
- return ret;
- }
-
- resolved_dev = dev_get_by_index(&init_net, if_index);
- rcu_read_lock();
- if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
- resolved_dev))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(idev);
- dev_put(resolved_dev);
+ ret = get_sgid_index_from_eth(device, port_num,
+ vlan_id, &dgid,
+ gid_type, &gid_index);
if (ret)
return ret;
- ret = get_sgid_index_from_eth(device, port_num, vlan_id,
- &dgid, gid_type, &gid_index);
- if (ret)
- return ret;
- }
-
- rdma_ah_set_dlid(ah_attr, wc->slid);
- rdma_ah_set_sl(ah_attr, wc->sl);
- rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- rdma_ah_set_port_num(ah_attr, port_num);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+ return ib_resolve_unicast_gid_dmac(device, ah_attr);
+ } else {
+ rdma_ah_set_dlid(ah_attr, wc->slid);
+ rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- if (wc->wc_flags & IB_WC_GRH) {
- if (!rdma_cap_eth_ah(device, port_num)) {
+ if (wc->wc_flags & IB_WC_GRH) {
if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
ret = ib_find_cached_gid_by_port(device, &dgid,
IB_GID_TYPE_IB,
@@ -584,18 +622,17 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
} else {
gid_index = 0;
}
- }
-
- flow_class = be32_to_cpu(grh->version_tclass_flow);
- rdma_ah_set_grh(ah_attr, &sgid,
- flow_class & 0xFFFFF,
- (u8)gid_index, hoplimit,
- (flow_class >> 20) & 0xFF);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+ }
+ return 0;
}
- return 0;
}
-EXPORT_SYMBOL(ib_init_ah_from_wc);
+EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
const struct ib_grh *grh, u8 port_num)
@@ -603,7 +640,7 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
struct rdma_ah_attr ah_attr;
int ret;
- ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
+ ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
@@ -850,7 +887,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
if (qp_init_attr->cap.max_rdma_ctxs)
rdma_rw_init_qp(device, qp_init_attr);
- qp = device->create_qp(pd, qp_init_attr, NULL);
+ qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
if (IS_ERR(qp))
return qp;
@@ -860,9 +897,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
return ERR_PTR(ret);
}
- qp->device = device;
qp->real_qp = qp;
- qp->uobject = NULL;
qp->qp_type = qp_init_attr->qp_type;
qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
@@ -890,7 +925,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
atomic_inc(&qp_init_attr->srq->usecnt);
}
- qp->pd = pd;
qp->send_cq = qp_init_attr->send_cq;
qp->xrcd = NULL;
@@ -1269,16 +1303,8 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
return -EINVAL;
- if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE)
- return 0;
-
grh = rdma_ah_retrieve_grh(ah_attr);
- if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
- ah_attr->roce.dmac);
- return 0;
- }
if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
__be32 addr = 0;
@@ -1290,40 +1316,52 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
(char *)ah_attr->roce.dmac);
}
} else {
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
- int ifindex;
- int hop_limit;
-
- ret = ib_query_gid(device,
- rdma_ah_get_port_num(ah_attr),
- grh->sgid_index,
- &sgid, &sgid_attr);
-
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
- goto out;
- }
-
- ifindex = sgid_attr.ndev->ifindex;
+ ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
+ }
+ return ret;
+}
- ret =
- rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
- ah_attr->roce.dmac,
- NULL, &ifindex, &hop_limit);
+/**
+ * IB core internal function to perform QP attributes modification.
+ */
+static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ int ret;
- dev_put(sgid_attr.ndev);
+ if (rdma_ib_or_roce(qp->device, port)) {
+ if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
+ pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
+ __func__, qp->device->name);
+ attr->rq_psn &= 0xffffff;
+ }
- grh->hop_limit = hop_limit;
+ if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
+ pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n",
+ __func__, qp->device->name);
+ attr->sq_psn &= 0xffffff;
+ }
}
-out:
+
+ ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
+ if (!ret && (attr_mask & IB_QP_PORT))
+ qp->port = attr->port_num;
+
return ret;
}
+static bool is_qp_type_connected(const struct ib_qp *qp)
+{
+ return (qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_RC ||
+ qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT);
+}
+
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
- * @qp: The QP to modify.
+ * @ib_qp: The QP to modify.
* @attr: On input, specifies the QP attributes to modify. On output,
* the current values of selected QP attributes are returned.
* @attr_mask: A bit-mask used to specify which attributes of the QP
@@ -1332,21 +1370,20 @@ out:
* are being modified.
* It returns 0 on success and returns appropriate error code on error.
*/
-int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr,
+int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
+ struct ib_qp *qp = ib_qp->real_qp;
int ret;
- if (attr_mask & IB_QP_AV) {
+ if (attr_mask & IB_QP_AV &&
+ attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
if (ret)
return ret;
}
- ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
- if (!ret && (attr_mask & IB_QP_PORT))
- qp->port = attr->port_num;
-
- return ret;
+ return _ib_modify_qp(qp, attr, attr_mask, udata);
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
@@ -1409,7 +1446,7 @@ int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
- return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL);
+ return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -1503,6 +1540,7 @@ int ib_destroy_qp(struct ib_qp *qp)
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
+ rdma_restrack_del(&qp->res);
ret = qp->device->destroy_qp(qp);
if (!ret) {
if (pd)
@@ -1545,6 +1583,8 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
cq->event_handler = event_handler;
cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
}
return cq;
@@ -1563,6 +1603,7 @@ int ib_destroy_cq(struct ib_cq *cq)
if (atomic_read(&cq->usecnt))
return -EBUSY;
+ rdma_restrack_del(&cq->res);
return cq->device->destroy_cq(cq);
}
EXPORT_SYMBOL(ib_destroy_cq);
@@ -1747,7 +1788,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
}
EXPORT_SYMBOL(ib_detach_mcast);
-struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
{
struct ib_xrcd *xrcd;
@@ -1765,7 +1806,7 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
return xrcd;
}
-EXPORT_SYMBOL(ib_alloc_xrcd);
+EXPORT_SYMBOL(__ib_alloc_xrcd);
int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
@@ -1790,11 +1831,11 @@ EXPORT_SYMBOL(ib_dealloc_xrcd);
* ib_create_wq - Creates a WQ associated with the specified protection
* domain.
* @pd: The protection domain associated with the WQ.
- * @wq_init_attr: A list of initial attributes required to create the
+ * @wq_attr: A list of initial attributes required to create the
* WQ. If WQ creation succeeds, then the attributes are updated to
* the actual capabilities of the created WQ.
*
- * wq_init_attr->max_wr and wq_init_attr->max_sge determine
+ * wq_attr->max_wr and wq_attr->max_sge determine
* the requested size of the WQ, and set to the actual values allocated
* on return.
* If ib_create_wq() succeeds, then max_wr and max_sge will always be
@@ -2156,16 +2197,16 @@ static void __ib_drain_sq(struct ib_qp *qp)
struct ib_send_wr swr = {}, *bad_swr;
int ret;
- swr.wr_cqe = &sdrain.cqe;
- sdrain.cqe.done = ib_drain_qp_done;
- init_completion(&sdrain.done);
-
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
+ swr.wr_cqe = &sdrain.cqe;
+ sdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&sdrain.done);
+
ret = ib_post_send(qp, &swr, &bad_swr);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
@@ -2190,16 +2231,16 @@ static void __ib_drain_rq(struct ib_qp *qp)
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
- rwr.wr_cqe = &rdrain.cqe;
- rdrain.cqe.done = ib_drain_qp_done;
- init_completion(&rdrain.done);
-
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
+ rwr.wr_cqe = &rdrain.cqe;
+ rdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&rdrain.done);
+
ret = ib_post_recv(qp, &rwr, &bad_rwr);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);