aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 12:05:10 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 12:05:10 -0800
commit7b1cd95d65eb3b1e13f8a90eb757e0ea232c7899 (patch)
treecbc3ec5d45b04666c24f7c0b1df04a85d29c7d0f /drivers/infiniband/core
parentMerge tag 'dmaengine-4.16-rc1' of git://git.infradead.org/users/vkoul/slave-dma (diff)
parentMerge tag v4.15 of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git (diff)
downloadlinux-dev-7b1cd95d65eb3b1e13f8a90eb757e0ea232c7899.tar.xz
linux-dev-7b1cd95d65eb3b1e13f8a90eb757e0ea232c7899.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull RDMA subsystem updates from Jason Gunthorpe: "Overall this cycle did not have any major excitement, and did not require any shared branch with netdev. Lots of driver updates, particularly of the scale-up and performance variety. The largest body of core work was Parav's patches fixing and restructing some of the core code to make way for future RDMA containerization. Summary: - misc small driver fixups to bnxt_re/hfi1/qib/hns/ocrdma/rdmavt/vmw_pvrdma/nes - several major feature adds to bnxt_re driver: SRIOV VF RoCE support, HugePages support, extended hardware stats support, and SRQ support - a notable number of fixes to the i40iw driver from debugging scale up testing - more work to enable the new hip08 chip in the hns driver - misc small ULP fixups to srp/srpt//ipoib - preparation for srp initiator and target to support the RDMA-CM protocol for connections - add RDMA-CM support to srp initiator, srp target is still a WIP - fixes for a couple of places where ipoib could spam the dmesg log - fix encode/decode of FDR/EDR data rates in the core - many patches from Parav with ongoing work to clean up inconsistencies and bugs in RoCE support around the rdma_cm - mlx5 driver support for the userspace features 'thread domain', 'wallclock timestamps' and 'DV Direct Connected transport'. Support for the firmware dual port rocee capability - core support for more than 32 rdma devices in the char dev allocation - kernel doc updates from Randy Dunlap - new netlink uAPI for inspecting RDMA objects similar in spirit to 'ss' - one minor change to the kobject code acked by Greg KH" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (259 commits) RDMA/nldev: Provide detailed QP information RDMA/nldev: Provide global resource utilization RDMA/core: Add resource tracking for create and destroy PDs RDMA/core: Add resource tracking for create and destroy CQs RDMA/core: Add resource tracking for create and destroy QPs RDMA/restrack: Add general infrastructure to track RDMA resources RDMA/core: Save kernel caller name when creating PD and CQ objects RDMA/core: Use the MODNAME instead of the function name for pd callers RDMA: Move enum ib_cq_creation_flags to uapi headers IB/rxe: Change RDMA_RXE kconfig to use select IB/qib: remove qib_keys.c IB/mthca: remove mthca_user.h RDMA/cm: Fix access to uninitialized variable RDMA/cma: Use existing netif_is_bond_master function IB/core: Avoid SGID attributes query while converting GID from OPA to IB RDMA/mlx5: Avoid memory leak in case of XRCD dealloc failure IB/umad: Fix use of unprotected device pointer IB/iser: Combine substrings for three messages IB/iser: Delete an unnecessary variable initialisation in iser_send_data_out() IB/iser: Delete an error message for a failed memory allocation in iser_send_data_out() ...
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/addr.c65
-rw-r--r--drivers/infiniband/core/cache.c23
-rw-r--r--drivers/infiniband/core/cm.c227
-rw-r--r--drivers/infiniband/core/cma.c252
-rw-r--r--drivers/infiniband/core/cma_configfs.c2
-rw-r--r--drivers/infiniband/core/core_priv.h52
-rw-r--r--drivers/infiniband/core/cq.c39
-rw-r--r--drivers/infiniband/core/device.c42
-rw-r--r--drivers/infiniband/core/fmr_pool.c12
-rw-r--r--drivers/infiniband/core/iwpm_util.c1
-rw-r--r--drivers/infiniband/core/mad.c1
-rw-r--r--drivers/infiniband/core/netlink.c10
-rw-r--r--drivers/infiniband/core/nldev.c394
-rw-r--r--drivers/infiniband/core/restrack.c164
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c13
-rw-r--r--drivers/infiniband/core/sa_query.c18
-rw-r--r--drivers/infiniband/core/security.c10
-rw-r--r--drivers/infiniband/core/sysfs.c1
-rw-r--r--drivers/infiniband/core/ucm.c73
-rw-r--r--drivers/infiniband/core/ucma.c19
-rw-r--r--drivers/infiniband/core/umem.c2
-rw-r--r--drivers/infiniband/core/user_mad.c123
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c14
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c19
-rw-r--r--drivers/infiniband/core/uverbs_main.c95
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c3
-rw-r--r--drivers/infiniband/core/verbs.c312
28 files changed, 1327 insertions, 661 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 504b926552c6..f69833db0a32 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
- security.o nldev.o
+ security.o nldev.o restrack.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index f4e8185bccd3..a5b4cf030c11 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -243,8 +243,7 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(const struct sockaddr *addr,
- struct rdma_dev_addr *dev_addr,
- u16 *vlan_id)
+ struct rdma_dev_addr *dev_addr)
{
struct net_device *dev;
@@ -266,9 +265,6 @@ int rdma_translate_ip(const struct sockaddr *addr,
return -EADDRNOTAVAIL;
rdma_copy_addr(dev_addr, dev, NULL);
- dev_addr->bound_dev_if = dev->ifindex;
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
#if IS_ENABLED(CONFIG_IPV6)
@@ -279,9 +275,6 @@ int rdma_translate_ip(const struct sockaddr *addr,
&((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
rdma_copy_addr(dev_addr, dev, NULL);
- dev_addr->bound_dev_if = dev->ifindex;
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
break;
}
}
@@ -481,7 +474,7 @@ static int addr_resolve_neigh(struct dst_entry *dst,
if (dst->dev->flags & IFF_LOOPBACK) {
int ret;
- ret = rdma_translate_ip(dst_in, addr, NULL);
+ ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr,
MAX_ADDR_LEN);
@@ -558,7 +551,7 @@ static int addr_resolve(struct sockaddr *src_in,
}
if (ndev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip(dst_in, addr, NULL);
+ ret = rdma_translate_ip(dst_in, addr);
/*
* Put the loopback device and get the translated
* device instead.
@@ -744,7 +737,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
EXPORT_SYMBOL(rdma_addr_cancel);
struct resolve_cb_context {
- struct rdma_dev_addr *addr;
struct completion comp;
int status;
};
@@ -752,39 +744,31 @@ struct resolve_cb_context {
static void resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
- if (!status)
- memcpy(((struct resolve_cb_context *)context)->addr,
- addr, sizeof(struct rdma_dev_addr));
((struct resolve_cb_context *)context)->status = status;
complete(&((struct resolve_cb_context *)context)->comp);
}
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
- u8 *dmac, u16 *vlan_id, int *if_index,
+ u8 *dmac, const struct net_device *ndev,
int *hoplimit)
{
- int ret = 0;
struct rdma_dev_addr dev_addr;
struct resolve_cb_context ctx;
- struct net_device *dev;
-
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
-
+ int ret;
rdma_gid2ip(&sgid_addr._sockaddr, sgid);
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
- if (if_index)
- dev_addr.bound_dev_if = *if_index;
+ dev_addr.bound_dev_if = ndev->ifindex;
dev_addr.net = &init_net;
- ctx.addr = &dev_addr;
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx);
@@ -798,42 +782,9 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
return ret;
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
- dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
- if (!dev)
- return -ENODEV;
- if (if_index)
- *if_index = dev_addr.bound_dev_if;
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
- if (hoplimit)
- *hoplimit = dev_addr.hoplimit;
- dev_put(dev);
- return ret;
-}
-EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
-
-int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
-{
- int ret = 0;
- struct rdma_dev_addr dev_addr;
- union {
- struct sockaddr _sockaddr;
- struct sockaddr_in _sockaddr_in;
- struct sockaddr_in6 _sockaddr_in6;
- } gid_addr;
-
- rdma_gid2ip(&gid_addr._sockaddr, sgid);
-
- memset(&dev_addr, 0, sizeof(dev_addr));
- dev_addr.net = &init_net;
- ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
- if (ret)
- return ret;
-
- memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
- return ret;
+ *hoplimit = dev_addr.hoplimit;
+ return 0;
}
-EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 77515638c55c..e9a409d7f4e2 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -573,27 +573,24 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
struct ib_gid_attr attr;
if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
- goto next;
+ continue;
if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
- goto next;
+ continue;
memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
- if (filter(gid, &attr, context))
+ if (filter(gid, &attr, context)) {
found = true;
-
-next:
- if (found)
+ if (index)
+ *index = i;
break;
+ }
}
read_unlock_irqrestore(&table->rwlock, flags);
if (!found)
return -ENOENT;
-
- if (index)
- *index = i;
return 0;
}
@@ -824,12 +821,7 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
if (err)
return err;
- err = roce_rescan_device(ib_dev);
-
- if (err) {
- gid_table_cleanup_one(ib_dev);
- gid_table_release_one(ib_dev);
- }
+ rdma_roce_rescan_device(ib_dev);
return err;
}
@@ -883,7 +875,6 @@ int ib_find_gid_by_filter(struct ib_device *device,
port_num, filter,
context, index);
}
-EXPORT_SYMBOL(ib_find_gid_by_filter);
int ib_get_cached_pkey(struct ib_device *device,
u8 port_num,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index f6b159d79977..e6749157fd86 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -452,13 +452,14 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
cm_id_priv->private_data_len = private_data_len;
}
-static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
- struct ib_grh *grh, struct cm_av *av)
+static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
+ struct ib_grh *grh, struct cm_av *av)
{
av->port = port;
av->pkey_index = wc->pkey_index;
- ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
- grh, &av->ah_attr);
+ return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
+ port->port_num, wc,
+ grh, &av->ah_attr);
}
static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
@@ -494,8 +495,11 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
return ret;
av->port = port;
- ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
- &av->ah_attr);
+ ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
+ &av->ah_attr);
+ if (ret)
+ return ret;
+
av->timeout = path->packet_life_time + 1;
spin_lock_irqsave(&cm.lock, flags);
@@ -1560,6 +1564,35 @@ static u16 cm_get_bth_pkey(struct cm_work *work)
return pkey;
}
+/**
+ * Convert OPA SGID to IB SGID
+ * ULPs (such as IPoIB) do not understand OPA GIDs and will
+ * reject them as the local_gid will not match the sgid. Therefore,
+ * change the pathrec's SGID to an IB SGID.
+ *
+ * @work: Work completion
+ * @path: Path record
+ */
+static void cm_opa_to_ib_sgid(struct cm_work *work,
+ struct sa_path_rec *path)
+{
+ struct ib_device *dev = work->port->cm_dev->ib_device;
+ u8 port_num = work->port->port_num;
+
+ if (rdma_cap_opa_ah(dev, port_num) &&
+ (ib_is_opa_gid(&path->sgid))) {
+ union ib_gid sgid;
+
+ if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) {
+ dev_warn(&dev->dev,
+ "Error updating sgid in CM request\n");
+ return;
+ }
+
+ path->sgid = sgid;
+ }
+}
+
static void cm_format_req_event(struct cm_work *work,
struct cm_id_private *cm_id_priv,
struct ib_cm_id *listen_id)
@@ -1573,10 +1606,13 @@ static void cm_format_req_event(struct cm_work *work,
param->bth_pkey = cm_get_bth_pkey(work);
param->port = cm_id_priv->av.port->port_num;
param->primary_path = &work->path[0];
- if (cm_req_has_alt_path(req_msg))
+ cm_opa_to_ib_sgid(work, param->primary_path);
+ if (cm_req_has_alt_path(req_msg)) {
param->alternate_path = &work->path[1];
- else
+ cm_opa_to_ib_sgid(work, param->alternate_path);
+ } else {
param->alternate_path = NULL;
+ }
param->remote_ca_guid = req_msg->local_ca_guid;
param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
@@ -1826,9 +1862,11 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
cm_id_priv->id.remote_id = req_msg->local_comm_id;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto destroy;
cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
@@ -1841,9 +1879,10 @@ static int cm_req_handler(struct cm_work *work)
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
+ pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
+ be32_to_cpu(cm_id->local_id));
ret = -EINVAL;
- kfree(cm_id_priv->timewait_info);
- goto destroy;
+ goto free_timeinfo;
}
cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1861,56 +1900,50 @@ static int cm_req_handler(struct cm_work *work)
work->port->port_num,
grh->sgid_index,
&gid, &gid_attr);
- if (!ret) {
- if (gid_attr.ndev) {
- work->path[0].rec_type =
- sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
- sa_path_set_ifindex(&work->path[0],
- gid_attr.ndev->ifindex);
- sa_path_set_ndev(&work->path[0],
- dev_net(gid_attr.ndev));
- dev_put(gid_attr.ndev);
- } else {
- cm_path_set_rec_type(work->port->cm_dev->ib_device,
- work->port->port_num,
- &work->path[0],
- &req_msg->primary_local_gid);
- }
- if (cm_req_has_alt_path(req_msg))
- work->path[1].rec_type = work->path[0].rec_type;
- cm_format_paths_from_req(req_msg, &work->path[0],
- &work->path[1]);
- if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
- sa_path_set_dmac(&work->path[0],
- cm_id_priv->av.ah_attr.roce.dmac);
- work->path[0].hop_limit = grh->hop_limit;
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
- cm_id_priv);
+ if (ret) {
+ ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0);
+ goto rejected;
+ }
+
+ if (gid_attr.ndev) {
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
+ dev_put(gid_attr.ndev);
+ } else {
+ cm_path_set_rec_type(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ &work->path[0],
+ &req_msg->primary_local_gid);
}
+ if (cm_req_has_alt_path(req_msg))
+ work->path[1].rec_type = work->path[0].rec_type;
+ cm_format_paths_from_req(req_msg, &work->path[0],
+ &work->path[1]);
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ sa_path_set_dmac(&work->path[0],
+ cm_id_priv->av.ah_attr.roce.dmac);
+ work->path[0].hop_limit = grh->hop_limit;
+ ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+ cm_id_priv);
if (ret) {
- int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0,
- &work->path[0].sgid,
- &gid_attr);
- if (!err && gid_attr.ndev) {
- work->path[0].rec_type =
- sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
- sa_path_set_ifindex(&work->path[0],
- gid_attr.ndev->ifindex);
- sa_path_set_ndev(&work->path[0],
- dev_net(gid_attr.ndev));
- dev_put(gid_attr.ndev);
- } else {
- cm_path_set_rec_type(work->port->cm_dev->ib_device,
- work->port->port_num,
- &work->path[0],
- &req_msg->primary_local_gid);
- }
- if (cm_req_has_alt_path(req_msg))
- work->path[1].rec_type = work->path[0].rec_type;
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
- &work->path[0].sgid, sizeof work->path[0].sgid,
- NULL, 0);
+ int err;
+
+ err = ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num, 0,
+ &work->path[0].sgid,
+ NULL);
+ if (err)
+ ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ NULL, 0, NULL, 0);
+ else
+ ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ &work->path[0].sgid,
+ sizeof(work->path[0].sgid),
+ NULL, 0);
goto rejected;
}
if (cm_req_has_alt_path(req_msg)) {
@@ -1919,7 +1952,7 @@ static int cm_req_handler(struct cm_work *work)
if (ret) {
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
- sizeof work->path[0].sgid, NULL, 0);
+ sizeof(work->path[0].sgid), NULL, 0);
goto rejected;
}
}
@@ -1945,6 +1978,8 @@ static int cm_req_handler(struct cm_work *work)
rejected:
atomic_dec(&cm_id_priv->refcount);
cm_deref_id(listen_cm_id_priv);
+free_timeinfo:
+ kfree(cm_id_priv->timewait_info);
destroy:
ib_destroy_cm_id(cm_id);
return ret;
@@ -1997,6 +2032,8 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
+ pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2063,6 +2100,8 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
+ pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
+ be32_to_cpu(cm_id->local_id), cm_id->state);
ret = -EINVAL;
goto error;
}
@@ -2170,6 +2209,8 @@ static int cm_rep_handler(struct cm_work *work)
cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
+ pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
+ be32_to_cpu(rep_msg->remote_comm_id));
return -EINVAL;
}
@@ -2183,6 +2224,10 @@ static int cm_rep_handler(struct cm_work *work)
default:
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
+ pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
+ __func__, cm_id_priv->id.state,
+ be32_to_cpu(rep_msg->local_comm_id),
+ be32_to_cpu(rep_msg->remote_comm_id));
goto error;
}
@@ -2196,6 +2241,8 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
+ pr_debug("%s: Failed to insert remote id %d\n", __func__,
+ be32_to_cpu(rep_msg->remote_comm_id));
goto error;
}
/* Check for a stale connection. */
@@ -2213,6 +2260,10 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
+ pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
+ __func__, be32_to_cpu(rep_msg->local_comm_id),
+ be32_to_cpu(rep_msg->remote_comm_id));
+
if (cur_cm_id_priv) {
cm_id = &cur_cm_id_priv->id;
ib_send_cm_dreq(cm_id, NULL, 0);
@@ -2359,6 +2410,8 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_ESTABLISHED) {
+ pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id->local_id), cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2428,6 +2481,8 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
if (cm_id->state != IB_CM_DREQ_RCVD) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
kfree(data);
+ pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
+ __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
return -EINVAL;
}
@@ -2493,6 +2548,9 @@ static int cm_dreq_handler(struct cm_work *work)
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
+ pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
+ __func__, be32_to_cpu(dreq_msg->local_comm_id),
+ be32_to_cpu(dreq_msg->remote_comm_id));
return -EINVAL;
}
@@ -2535,6 +2593,9 @@ static int cm_dreq_handler(struct cm_work *work)
counter[CM_DREQ_COUNTER]);
goto unlock;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2638,6 +2699,8 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
cm_enter_timewait(cm_id_priv);
break;
default:
+ pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2748,6 +2811,9 @@ static int cm_rej_handler(struct cm_work *work)
/* fall through */
default:
spin_unlock_irq(&cm_id_priv->lock);
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
goto out;
}
@@ -2811,6 +2877,9 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
}
/* fall through */
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
goto error1;
}
@@ -2912,6 +2981,9 @@ static int cm_mra_handler(struct cm_work *work)
counter[CM_MRA_COUNTER]);
/* fall through */
default:
+ pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
goto out;
}
@@ -3085,6 +3157,12 @@ static int cm_lap_handler(struct cm_work *work)
if (!cm_id_priv)
return -EINVAL;
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto deref;
+
param = &work->cm_event.param.lap_rcvd;
memset(&work->path[0], 0, sizeof(work->path[1]));
cm_path_set_rec_type(work->port->cm_dev->ib_device,
@@ -3131,9 +3209,6 @@ static int cm_lap_handler(struct cm_work *work)
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
cm_id_priv);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -3386,6 +3461,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
struct cm_sidr_req_msg *sidr_req_msg;
struct ib_wc *wc;
+ int ret;
cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
if (IS_ERR(cm_id))
@@ -3398,9 +3474,12 @@ static int cm_sidr_req_handler(struct cm_work *work)
wc = work->mad_recv_wc->wc;
cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
cm_id_priv->av.dgid.global.interface_id = 0;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto out;
+
cm_id_priv->id.remote_id = sidr_req_msg->request_id;
cm_id_priv->tid = sidr_req_msg->hdr.tid;
atomic_inc(&cm_id_priv->work_count);
@@ -3692,6 +3771,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
+ pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3727,6 +3807,8 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
+ pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id->local_id), cm_id->state);
ret = -EINVAL;
break;
}
@@ -3924,6 +4006,9 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -3971,6 +4056,9 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -4030,6 +4118,9 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
break;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6294a7001d33..e66963ca58bd 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -601,7 +601,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
int ret;
if (addr->sa_family != AF_IB) {
- ret = rdma_translate_ip(addr, dev_addr, NULL);
+ ret = rdma_translate_ip(addr, dev_addr);
} else {
cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
ret = 0;
@@ -612,11 +612,14 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
static inline int cma_validate_port(struct ib_device *device, u8 port,
enum ib_gid_type gid_type,
- union ib_gid *gid, int dev_type,
- int bound_if_index)
+ union ib_gid *gid,
+ struct rdma_id_private *id_priv)
{
- int ret = -ENODEV;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ int bound_if_index = dev_addr->bound_dev_if;
+ int dev_type = dev_addr->dev_type;
struct net_device *ndev = NULL;
+ int ret = -ENODEV;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
return ret;
@@ -624,11 +627,13 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ret;
- if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port))
- ndev = dev_get_by_index(&init_net, bound_if_index);
- else
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ return ret;
+ } else {
gid_type = IB_GID_TYPE_IB;
-
+ }
ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
ndev, NULL);
@@ -669,8 +674,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
listen_id_priv->gid_type, gidp,
- dev_addr->dev_type,
- dev_addr->bound_dev_if);
+ id_priv);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@@ -691,8 +695,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
cma_dev->default_gid_type[port - 1],
- gidp, dev_addr->dev_type,
- dev_addr->bound_dev_if);
+ gidp, id_priv);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@@ -2036,6 +2039,33 @@ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
}
EXPORT_SYMBOL(rdma_get_service_id);
+void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
+ union ib_gid *dgid)
+{
+ struct rdma_addr *addr = &cm_id->route.addr;
+
+ if (!cm_id->device) {
+ if (sgid)
+ memset(sgid, 0, sizeof(*sgid));
+ if (dgid)
+ memset(dgid, 0, sizeof(*dgid));
+ return;
+ }
+
+ if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) {
+ if (sgid)
+ rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid);
+ if (dgid)
+ rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid);
+ } else {
+ if (sgid)
+ rdma_addr_get_sgid(&addr->dev_addr, sgid);
+ if (dgid)
+ rdma_addr_get_dgid(&addr->dev_addr, dgid);
+ }
+}
+EXPORT_SYMBOL(rdma_read_gids);
+
static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
{
struct rdma_id_private *id_priv = iw_id->context;
@@ -2132,7 +2162,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
- ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
+ ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -2414,6 +2444,26 @@ out:
kfree(work);
}
+static void cma_init_resolve_route_work(struct cma_work *work,
+ struct rdma_id_private *id_priv)
+{
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ROUTE_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+}
+
+static void cma_init_resolve_addr_work(struct cma_work *work,
+ struct rdma_id_private *id_priv)
+{
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ADDR_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+}
+
static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
{
struct rdma_route *route = &id_priv->id.route;
@@ -2424,11 +2474,7 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
if (!work)
return -ENOMEM;
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ROUTE_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ cma_init_resolve_route_work(work, id_priv);
route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
@@ -2449,10 +2495,63 @@ err1:
return ret;
}
-int rdma_set_ib_paths(struct rdma_cm_id *id,
- struct sa_path_rec *path_rec, int num_paths)
+static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
+ unsigned long supported_gids,
+ enum ib_gid_type default_gid)
+{
+ if ((network_type == RDMA_NETWORK_IPV4 ||
+ network_type == RDMA_NETWORK_IPV6) &&
+ test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
+ return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+ return default_gid;
+}
+
+/*
+ * cma_iboe_set_path_rec_l2_fields() is helper function which sets
+ * path record type based on GID type.
+ * It also sets up other L2 fields which includes destination mac address
+ * netdev ifindex, of the path record.
+ * It returns the netdev of the bound interface for this path record entry.
+ */
+static struct net_device *
+cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
+{
+ struct rdma_route *route = &id_priv->id.route;
+ enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
+ struct rdma_addr *addr = &route->addr;
+ unsigned long supported_gids;
+ struct net_device *ndev;
+
+ if (!addr->dev_addr.bound_dev_if)
+ return NULL;
+
+ ndev = dev_get_by_index(addr->dev_addr.net,
+ addr->dev_addr.bound_dev_if);
+ if (!ndev)
+ return NULL;
+
+ supported_gids = roce_gid_type_mask_support(id_priv->id.device,
+ id_priv->id.port_num);
+ gid_type = cma_route_gid_type(addr->dev_addr.network,
+ supported_gids,
+ id_priv->gid_type);
+ /* Use the hint from IP Stack to select GID Type */
+ if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+ gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
+
+ sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
+ sa_path_set_ifindex(route->path_rec, ndev->ifindex);
+ sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
+ return ndev;
+}
+
+int rdma_set_ib_path(struct rdma_cm_id *id,
+ struct sa_path_rec *path_rec)
{
struct rdma_id_private *id_priv;
+ struct net_device *ndev;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
@@ -2460,20 +2559,33 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
RDMA_CM_ROUTE_RESOLVED))
return -EINVAL;
- id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
+ id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec),
GFP_KERNEL);
if (!id->route.path_rec) {
ret = -ENOMEM;
goto err;
}
- id->route.num_paths = num_paths;
+ if (rdma_protocol_roce(id->device, id->port_num)) {
+ ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
+ if (!ndev) {
+ ret = -ENODEV;
+ goto err_free;
+ }
+ dev_put(ndev);
+ }
+
+ id->route.num_paths = 1;
return 0;
+
+err_free:
+ kfree(id->route.path_rec);
+ id->route.path_rec = NULL;
err:
cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
return ret;
}
-EXPORT_SYMBOL(rdma_set_ib_paths);
+EXPORT_SYMBOL(rdma_set_ib_path);
static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
{
@@ -2483,11 +2595,7 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
if (!work)
return -ENOMEM;
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ROUTE_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
}
@@ -2510,26 +2618,14 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
return 0;
}
-static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
- unsigned long supported_gids,
- enum ib_gid_type default_gid)
-{
- if ((network_type == RDMA_NETWORK_IPV4 ||
- network_type == RDMA_NETWORK_IPV6) &&
- test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
- return IB_GID_TYPE_ROCE_UDP_ENCAP;
-
- return default_gid;
-}
-
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
struct rdma_addr *addr = &route->addr;
struct cma_work *work;
int ret;
- struct net_device *ndev = NULL;
- enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ struct net_device *ndev;
+
u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
@@ -2539,9 +2635,6 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
if (!work)
return -ENOMEM;
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
-
route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
ret = -ENOMEM;
@@ -2550,42 +2643,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->num_paths = 1;
- if (addr->dev_addr.bound_dev_if) {
- unsigned long supported_gids;
-
- ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
- if (!ndev) {
- ret = -ENODEV;
- goto err2;
- }
-
- supported_gids = roce_gid_type_mask_support(id_priv->id.device,
- id_priv->id.port_num);
- gid_type = cma_route_gid_type(addr->dev_addr.network,
- supported_gids,
- id_priv->gid_type);
- route->path_rec->rec_type =
- sa_conv_gid_to_pathrec_type(gid_type);
- sa_path_set_ndev(route->path_rec, &init_net);
- sa_path_set_ifindex(route->path_rec, ndev->ifindex);
- }
+ ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
if (!ndev) {
ret = -ENODEV;
goto err2;
}
- sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
-
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
- /* Use the hint from IP Stack to select GID Type */
- if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
- gid_type = ib_network_to_gid_type(addr->dev_addr.network);
- route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
-
if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
/* TODO: get the hoplimit from the inet/inet6 device */
route->path_rec->hop_limit = addr->dev_addr.hoplimit;
@@ -2607,11 +2675,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ROUTE_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
- work->event.status = 0;
-
+ cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
@@ -2791,11 +2855,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ADDR_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+ cma_init_resolve_addr_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
err:
@@ -2821,11 +2881,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ADDR_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+ cma_init_resolve_addr_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
err:
@@ -3404,9 +3460,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
event.status = ret;
break;
}
- ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
- id_priv->id.route.path_rec,
- &event.param.ud.ah_attr);
+ ib_init_ah_attr_from_path(id_priv->id.device,
+ id_priv->id.port_num,
+ id_priv->id.route.path_rec,
+ &event.param.ud.ah_attr);
event.param.ud.qp_num = rep->qpn;
event.param.ud.qkey = rep->qkey;
event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -3873,7 +3930,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
struct rdma_dev_addr *dev_addr =
&id_priv->id.route.addr.dev_addr;
struct net_device *ndev =
- dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
enum ib_gid_type gid_type =
id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
@@ -4010,8 +4067,10 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
} else if (addr->sa_family == AF_INET6) {
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else {
- mgid->raw[0] = (gid_type == IB_GID_TYPE_IB) ? 0xff : 0;
- mgid->raw[1] = (gid_type == IB_GID_TYPE_IB) ? 0x0e : 0;
+ mgid->raw[0] =
+ (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff;
+ mgid->raw[1] =
+ (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e;
mgid->raw[2] = 0;
mgid->raw[3] = 0;
mgid->raw[4] = 0;
@@ -4061,7 +4120,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!ndev) {
err = -ENODEV;
goto out2;
@@ -4179,7 +4238,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
struct net_device *ndev = NULL;
if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net,
+ ndev = dev_get_by_index(dev_addr->net,
dev_addr->bound_dev_if);
if (ndev) {
cma_igmp_send(ndev,
@@ -4235,7 +4294,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
if (event != NETDEV_BONDING_FAILOVER)
return NOTIFY_DONE;
- if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
+ if (!netif_is_bond_master(ndev))
return NOTIFY_DONE;
mutex_lock(&lock);
@@ -4432,7 +4491,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
goto out;
if (ibnl_put_attr(skb, nlh,
- rdma_addr_size(cma_src_addr(id_priv)),
+ rdma_addr_size(cma_dst_addr(id_priv)),
cma_dst_addr(id_priv),
RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
goto out;
@@ -4444,6 +4503,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
id_stats->qp_type = id->qp_type;
i_id++;
+ nlmsg_end(skb, nlh);
}
cb->args[1] = 0;
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 31dfee0c8295..eee38b40be99 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -295,7 +295,7 @@ static struct config_group *make_cma_dev(struct config_group *group,
goto fail;
}
- strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+ strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
config_group_init_type_name(&cma_dev_group->ports_group, "ports",
&cma_ports_group_type);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 66f0268f37a6..c4560d84dfae 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -40,8 +40,12 @@
#include <rdma/ib_verbs.h>
#include <rdma/opa_addr.h>
#include <rdma/ib_mad.h>
+#include <rdma/restrack.h>
#include "mad_priv.h"
+/* Total number of ports combined across all struct ib_devices's */
+#define RDMA_MAX_PORTS 1024
+
struct pkey_index_qp_list {
struct list_head pkey_index_list;
u16 pkey_index;
@@ -137,7 +141,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
-int roce_rescan_device(struct ib_device *ib_dev);
unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
int ib_cache_setup_one(struct ib_device *device);
@@ -191,13 +194,6 @@ void ib_sa_cleanup(void);
int rdma_nl_init(void);
void rdma_nl_exit(void);
-/**
- * Check if there are any listeners to the netlink group
- * @group: the netlink group ID
- * Returns 0 on success or a negative for no listeners.
- */
-int ibnl_chk_listeners(unsigned int group);
-
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
@@ -213,11 +209,6 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
u64 *sn_pfx);
#ifdef CONFIG_SECURITY_INFINIBAND
-int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
- u16 pkey_index,
- void *sec);
-
void ib_security_destroy_port_pkey_list(struct ib_device *device);
void ib_security_cache_change(struct ib_device *device,
@@ -240,14 +231,6 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
#else
-static inline int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
- u16 pkey_index,
- void *sec)
-{
- return 0;
-}
-
static inline void ib_security_destroy_port_pkey_list(struct ib_device *device)
{
}
@@ -318,4 +301,31 @@ struct ib_device *ib_device_get_by_index(u32 ifindex);
/* RDMA device netlink */
void nldev_init(void);
void nldev_exit(void);
+
+static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
+ struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct ib_qp *qp;
+
+ qp = dev->create_qp(pd, attr, udata);
+ if (IS_ERR(qp))
+ return qp;
+
+ qp->device = dev;
+ qp->pd = pd;
+ /*
+ * We don't track XRC QPs for now, because they don't have PD
+ * and more importantly they are created internaly by driver,
+ * see mlx5 create_dev_resources() as an example.
+ */
+ if (attr->qp_type < IB_QPT_XRC_INI) {
+ qp->res.type = RDMA_RESTRACK_QP;
+ rdma_restrack_add(&qp->res);
+ } else
+ qp->res.valid = false;
+
+ return qp;
+}
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index f2ae75fa3128..bc79ca8215d7 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -25,9 +25,10 @@
#define IB_POLL_FLAGS \
(IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
-static int __ib_process_cq(struct ib_cq *cq, int budget)
+static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
{
int i, n, completed = 0;
+ struct ib_wc *wcs = poll_wc ? : cq->wc;
/*
* budget might be (-1) if the caller does not
@@ -35,9 +36,9 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
* minimum here.
*/
while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
- budget - completed), cq->wc)) > 0) {
+ budget - completed), wcs)) > 0) {
for (i = 0; i < n; i++) {
- struct ib_wc *wc = &cq->wc[i];
+ struct ib_wc *wc = &wcs[i];
if (wc->wr_cqe)
wc->wr_cqe->done(cq, wc);
@@ -60,18 +61,20 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
* @cq: CQ to process
* @budget: number of CQEs to poll for
*
- * This function is used to process all outstanding CQ entries on a
- * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
- * context and does not ask for completion interrupts from the HCA.
+ * This function is used to process all outstanding CQ entries.
+ * It does not offload CQ processing to a different context and does
+ * not ask for completion interrupts from the HCA.
+ * Using direct processing on CQ with non IB_POLL_DIRECT type may trigger
+ * concurrent processing.
*
* Note: do not pass -1 as %budget unless it is guaranteed that the number
* of completions that will be processed is small.
*/
int ib_process_cq_direct(struct ib_cq *cq, int budget)
{
- WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
+ struct ib_wc wcs[IB_POLL_BATCH];
- return __ib_process_cq(cq, budget);
+ return __ib_process_cq(cq, budget, wcs);
}
EXPORT_SYMBOL(ib_process_cq_direct);
@@ -85,7 +88,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
int completed;
- completed = __ib_process_cq(cq, budget);
+ completed = __ib_process_cq(cq, budget, NULL);
if (completed < budget) {
irq_poll_complete(&cq->iop);
if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
@@ -105,7 +108,7 @@ static void ib_cq_poll_work(struct work_struct *work)
struct ib_cq *cq = container_of(work, struct ib_cq, work);
int completed;
- completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
+ completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, NULL);
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
queue_work(ib_comp_wq, &cq->work);
@@ -117,20 +120,22 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
}
/**
- * ib_alloc_cq - allocate a completion queue
+ * __ib_alloc_cq - allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
+ * @caller: module owner name.
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
* specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
-struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx, const char *caller)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
@@ -154,6 +159,10 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
if (!cq->wc)
goto out_destroy_cq;
+ cq->res.type = RDMA_RESTRACK_CQ;
+ cq->res.kern_name = caller;
+ rdma_restrack_add(&cq->res);
+
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
cq->comp_handler = ib_cq_completion_direct;
@@ -178,11 +187,12 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
out_free_wc:
kfree(cq->wc);
+ rdma_restrack_del(&cq->res);
out_destroy_cq:
cq->device->destroy_cq(cq);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(ib_alloc_cq);
+EXPORT_SYMBOL(__ib_alloc_cq);
/**
* ib_free_cq - free a completion queue
@@ -209,6 +219,7 @@ void ib_free_cq(struct ib_cq *cq)
}
kfree(cq->wc);
+ rdma_restrack_del(&cq->res);
ret = cq->device->destroy_cq(cq);
WARN_ON_ONCE(ret);
}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 465520627e4b..e8010e73a1cf 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -263,6 +263,8 @@ struct ib_device *ib_alloc_device(size_t size)
if (!device)
return NULL;
+ rdma_restrack_init(&device->res);
+
device->dev.class = &ib_class;
device_initialize(&device->dev);
@@ -288,7 +290,7 @@ void ib_dealloc_device(struct ib_device *device)
{
WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
device->reg_state != IB_DEV_UNINITIALIZED);
- kobject_put(&device->dev.kobj);
+ put_device(&device->dev);
}
EXPORT_SYMBOL(ib_dealloc_device);
@@ -462,7 +464,6 @@ int ib_register_device(struct ib_device *device,
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
struct device *parent = device->dev.parent;
- WARN_ON_ONCE(!parent);
WARN_ON_ONCE(device->dma_device);
if (device->dev.dma_ops) {
/*
@@ -471,16 +472,25 @@ int ib_register_device(struct ib_device *device,
* into device->dev.
*/
device->dma_device = &device->dev;
- if (!device->dev.dma_mask)
- device->dev.dma_mask = parent->dma_mask;
- if (!device->dev.coherent_dma_mask)
- device->dev.coherent_dma_mask =
- parent->coherent_dma_mask;
+ if (!device->dev.dma_mask) {
+ if (parent)
+ device->dev.dma_mask = parent->dma_mask;
+ else
+ WARN_ON_ONCE(true);
+ }
+ if (!device->dev.coherent_dma_mask) {
+ if (parent)
+ device->dev.coherent_dma_mask =
+ parent->coherent_dma_mask;
+ else
+ WARN_ON_ONCE(true);
+ }
} else {
/*
* The caller did not provide custom DMA operations. Use the
* DMA mapping operations of the parent device.
*/
+ WARN_ON_ONCE(!parent);
device->dma_device = parent;
}
@@ -588,6 +598,8 @@ void ib_unregister_device(struct ib_device *device)
}
up_read(&lists_rwsem);
+ rdma_restrack_clean(&device->res);
+
ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device);
@@ -1033,32 +1045,22 @@ EXPORT_SYMBOL(ib_modify_port);
/**
* ib_find_gid - Returns the port number and GID table index where
- * a specified GID value occurs.
+ * a specified GID value occurs. Its searches only for IB link layer.
* @device: The device to query.
* @gid: The GID value to search for.
- * @gid_type: Type of GID.
* @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- enum ib_gid_type gid_type, struct net_device *ndev,
- u8 *port_num, u16 *index)
+ struct net_device *ndev, u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
- if (rdma_cap_roce_gid_table(device, port)) {
- if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
- ndev, index)) {
- *port_num = port;
- return 0;
- }
- }
-
- if (gid_type != IB_GID_TYPE_IB)
+ if (rdma_cap_roce_gid_table(device, port))
continue;
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 84d2615b5d4b..a0a9ed719031 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -388,13 +388,11 @@ int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
EXPORT_SYMBOL(ib_flush_fmr_pool);
/**
- * ib_fmr_pool_map_phys -
- * @pool:FMR pool to allocate FMR from
- * @page_list:List of pages to map
- * @list_len:Number of pages in @page_list
- * @io_virtual_address:I/O virtual address for new FMR
- *
- * Map an FMR from an FMR pool.
+ * ib_fmr_pool_map_phys - Map an FMR from an FMR pool.
+ * @pool_handle: FMR pool to allocate FMR from
+ * @page_list: List of pages to map
+ * @list_len: Number of pages in @page_list
+ * @io_virtual_address: I/O virtual address for new FMR
*/
struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
u64 *page_list,
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 3c4faadb8cdd..81528f64061a 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -654,6 +654,7 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
}
skb_num++;
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ ret = -EINVAL;
for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) {
hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
hlist_node) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index cb91245e9163..c50596f7f98a 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -49,7 +49,6 @@
#include "smi.h"
#include "opa_smi.h"
#include "agent.h"
-#include "core_priv.h"
static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 1fb72c356e36..3ccaae18ad75 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -41,8 +41,6 @@
#include <linux/module.h>
#include "core_priv.h"
-#include "core_priv.h"
-
static DEFINE_MUTEX(rdma_nl_mutex);
static struct sock *nls;
static struct {
@@ -83,15 +81,13 @@ static bool is_nl_valid(unsigned int type, unsigned int op)
if (!is_nl_msg_valid(type, op))
return false;
- cb_table = rdma_nl_types[type].cb_table;
-#ifdef CONFIG_MODULES
- if (!cb_table) {
+ if (!rdma_nl_types[type].cb_table) {
mutex_unlock(&rdma_nl_mutex);
request_module("rdma-netlink-subsys-%d", type);
mutex_lock(&rdma_nl_mutex);
- cb_table = rdma_nl_types[type].cb_table;
}
-#endif
+
+ cb_table = rdma_nl_types[type].cb_table;
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
return false;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 0dcd1aa6f683..fa8655e3b3ed 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -31,6 +31,8 @@
*/
#include <linux/module.h>
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
#include <net/netlink.h>
#include <rdma/rdma_netlink.h>
@@ -52,16 +54,42 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
+ .len = 16 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
+ .len = TASK_COMM_LEN },
};
-static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
+static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
{
- char fw[IB_FW_VERSION_NAME_MAX];
-
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
return -EMSGSIZE;
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
return -EMSGSIZE;
+
+ return 0;
+}
+
+static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
+{
+ char fw[IB_FW_VERSION_NAME_MAX];
+
+ if (fill_nldev_handle(msg, device))
+ return -EMSGSIZE;
+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
return -EMSGSIZE;
@@ -92,10 +120,9 @@ static int fill_port_info(struct sk_buff *msg,
struct ib_port_attr attr;
int ret;
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
- return -EMSGSIZE;
- if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
+ if (fill_nldev_handle(msg, device))
return -EMSGSIZE;
+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
return -EMSGSIZE;
@@ -126,6 +153,137 @@ static int fill_port_info(struct sk_buff *msg,
return 0;
}
+static int fill_res_info_entry(struct sk_buff *msg,
+ const char *name, u64 curr)
+{
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
+ goto err;
+ if (nla_put_u64_64bit(msg,
+ RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
+{
+ static const char * const names[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_PD] = "pd",
+ [RDMA_RESTRACK_CQ] = "cq",
+ [RDMA_RESTRACK_QP] = "qp",
+ };
+
+ struct rdma_restrack_root *res = &device->res;
+ struct nlattr *table_attr;
+ int ret, i, curr;
+
+ if (fill_nldev_handle(msg, device))
+ return -EMSGSIZE;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
+ if (!names[i])
+ continue;
+ curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
+ ret = fill_res_info_entry(msg, names[i], curr);
+ if (ret)
+ goto err;
+ }
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return ret;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg,
+ struct ib_qp *qp, uint32_t port)
+{
+ struct rdma_restrack_entry *res = &qp->res;
+ struct ib_qp_init_attr qp_init_attr;
+ struct nlattr *entry_attr;
+ struct ib_qp_attr qp_attr;
+ int ret;
+
+ ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
+ if (ret)
+ return ret;
+
+ if (port && port != qp_attr.port_num)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ /* In create_qp() port is not set yet */
+ if (qp_attr.port_num &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
+ goto err;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
+ goto err;
+ if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
+ qp_attr.dest_qp_num))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
+ qp_attr.rq_psn))
+ goto err;
+ }
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
+ goto err;
+
+ if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
+ qp_attr.path_mig_state))
+ goto err;
+ }
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
+ goto err;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
+ goto err;
+
+ /*
+ * Existence of task means that it is user QP and netlink
+ * user is invited to go and read /proc/PID/comm to get name
+ * of the task file and res->task_com should be NULL.
+ */
+ if (rdma_is_kernel_res(res)) {
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
+ goto err;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
+ goto err;
+ }
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -321,6 +479,213 @@ out:
return skb->len;
}
+static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ goto err;
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
+ 0, 0);
+
+ ret = fill_res_info(msg, device);
+ if (ret)
+ goto err_free;
+
+ nlmsg_end(msg, nlh);
+ put_device(&device->dev);
+ return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_free:
+ nlmsg_free(msg);
+err:
+ put_device(&device->dev);
+ return ret;
+}
+
+static int _nldev_res_get_dumpit(struct ib_device *device,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ unsigned int idx)
+{
+ int start = cb->args[0];
+ struct nlmsghdr *nlh;
+
+ if (idx < start)
+ return 0;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_res_info(skb, device)) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+
+ nlmsg_end(skb, nlh);
+
+ idx++;
+
+out:
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nldev_res_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
+}
+
+static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct rdma_restrack_entry *res;
+ int err, ret = 0, idx = 0;
+ struct nlattr *table_attr;
+ struct ib_device *device;
+ int start = cb->args[0];
+ struct ib_qp *qp = NULL;
+ struct nlmsghdr *nlh;
+ u32 index, port = 0;
+
+ err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NULL);
+ /*
+ * Right now, we are expecting the device index to get QP information,
+ * but it is possible to extend this code to return all devices in
+ * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
+ * if it doesn't exist, we will iterate over all devices.
+ *
+ * But it is not needed for now.
+ */
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ /*
+ * If no PORT_INDEX is supplied, we will return all QPs from that device
+ */
+ if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err_index;
+ }
+ }
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_nldev_handle(skb, device)) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ down_read(&device->res.rwsem);
+ hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
+ if (idx < start)
+ goto next;
+
+ if ((rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) != &init_pid_ns) ||
+ (!rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) != task_active_pid_ns(res->task)))
+ /*
+ * 1. Kernel QPs should be visible in init namspace only
+ * 2. Present only QPs visible in the current namespace
+ */
+ goto next;
+
+ if (!rdma_restrack_get(res))
+ /*
+ * Resource is under release now, but we are not
+ * relesing lock now, so it will be released in
+ * our next pass, once we will get ->next pointer.
+ */
+ goto next;
+
+ qp = container_of(res, struct ib_qp, res);
+
+ up_read(&device->res.rwsem);
+ ret = fill_res_qp_entry(skb, qp, port);
+ down_read(&device->res.rwsem);
+ /*
+ * Return resource back, but it won't be released till
+ * the &device->res.rwsem will be released for write.
+ */
+ rdma_restrack_put(res);
+
+ if (ret == -EMSGSIZE)
+ /*
+ * There is a chance to optimize here.
+ * It can be done by using list_prepare_entry
+ * and list_for_each_entry_continue afterwards.
+ */
+ break;
+ if (ret)
+ goto res_err;
+next: idx++;
+ }
+ up_read(&device->res.rwsem);
+
+ nla_nest_end(skb, table_attr);
+ nlmsg_end(skb, nlh);
+ cb->args[0] = idx;
+
+ /*
+ * No more QPs to fill, cancel the message and
+ * return 0 to mark end of dumpit.
+ */
+ if (!qp)
+ goto err;
+
+ put_device(&device->dev);
+ return skb->len;
+
+res_err:
+ nla_nest_cancel(skb, table_attr);
+ up_read(&device->res.rwsem);
+
+err:
+ nlmsg_cancel(skb, nlh);
+
+err_index:
+ put_device(&device->dev);
+ return ret;
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -330,6 +695,23 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_port_get_doit,
.dump = nldev_port_get_dumpit,
},
+ [RDMA_NLDEV_CMD_RES_GET] = {
+ .doit = nldev_res_get_doit,
+ .dump = nldev_res_get_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_QP_GET] = {
+ .dump = nldev_res_get_qp_dumpit,
+ /*
+ * .doit is not implemented yet for two reasons:
+ * 1. It is not needed yet.
+ * 2. There is a need to provide identifier, while it is easy
+ * for the QPs (device index + port index + LQPN), it is not
+ * the case for the rest of resources (PD and CQ). Because it
+ * is better to provide similar interface for all resources,
+ * let's wait till we will have other resources implemented
+ * too.
+ */
+ },
};
void __init nldev_init(void)
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
new file mode 100644
index 000000000000..857637bf46da
--- /dev/null
+++ b/drivers/infiniband/core/restrack.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/restrack.h>
+#include <linux/mutex.h>
+#include <linux/sched/task.h>
+#include <linux/uaccess.h>
+#include <linux/pid_namespace.h>
+
+void rdma_restrack_init(struct rdma_restrack_root *res)
+{
+ init_rwsem(&res->rwsem);
+}
+
+void rdma_restrack_clean(struct rdma_restrack_root *res)
+{
+ WARN_ON_ONCE(!hash_empty(res->hash));
+}
+
+int rdma_restrack_count(struct rdma_restrack_root *res,
+ enum rdma_restrack_type type,
+ struct pid_namespace *ns)
+{
+ struct rdma_restrack_entry *e;
+ u32 cnt = 0;
+
+ down_read(&res->rwsem);
+ hash_for_each_possible(res->hash, e, node, type) {
+ if (ns == &init_pid_ns ||
+ (!rdma_is_kernel_res(e) &&
+ ns == task_active_pid_ns(e->task)))
+ cnt++;
+ }
+ up_read(&res->rwsem);
+ return cnt;
+}
+EXPORT_SYMBOL(rdma_restrack_count);
+
+static void set_kern_name(struct rdma_restrack_entry *res)
+{
+ enum rdma_restrack_type type = res->type;
+ struct ib_qp *qp;
+
+ if (type != RDMA_RESTRACK_QP)
+ /* PD and CQ types already have this name embedded in */
+ return;
+
+ qp = container_of(res, struct ib_qp, res);
+ if (!qp->pd) {
+ WARN_ONCE(true, "XRC QPs are not supported\n");
+ /* Survive, despite the programmer's error */
+ res->kern_name = " ";
+ return;
+ }
+
+ res->kern_name = qp->pd->res.kern_name;
+}
+
+static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
+{
+ enum rdma_restrack_type type = res->type;
+ struct ib_device *dev;
+ struct ib_xrcd *xrcd;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+
+ switch (type) {
+ case RDMA_RESTRACK_PD:
+ pd = container_of(res, struct ib_pd, res);
+ dev = pd->device;
+ break;
+ case RDMA_RESTRACK_CQ:
+ cq = container_of(res, struct ib_cq, res);
+ dev = cq->device;
+ break;
+ case RDMA_RESTRACK_QP:
+ qp = container_of(res, struct ib_qp, res);
+ dev = qp->device;
+ break;
+ case RDMA_RESTRACK_XRCD:
+ xrcd = container_of(res, struct ib_xrcd, res);
+ dev = xrcd->device;
+ break;
+ default:
+ WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
+ return NULL;
+ }
+
+ return dev;
+}
+
+void rdma_restrack_add(struct rdma_restrack_entry *res)
+{
+ struct ib_device *dev = res_to_dev(res);
+
+ if (!dev)
+ return;
+
+ if (!uaccess_kernel()) {
+ get_task_struct(current);
+ res->task = current;
+ res->kern_name = NULL;
+ } else {
+ set_kern_name(res);
+ res->task = NULL;
+ }
+
+ kref_init(&res->kref);
+ init_completion(&res->comp);
+ res->valid = true;
+
+ down_write(&dev->res.rwsem);
+ hash_add(dev->res.hash, &res->node, res->type);
+ up_write(&dev->res.rwsem);
+}
+EXPORT_SYMBOL(rdma_restrack_add);
+
+int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
+{
+ return kref_get_unless_zero(&res->kref);
+}
+EXPORT_SYMBOL(rdma_restrack_get);
+
+static void restrack_release(struct kref *kref)
+{
+ struct rdma_restrack_entry *res;
+
+ res = container_of(kref, struct rdma_restrack_entry, kref);
+ complete(&res->comp);
+}
+
+int rdma_restrack_put(struct rdma_restrack_entry *res)
+{
+ return kref_put(&res->kref, restrack_release);
+}
+EXPORT_SYMBOL(rdma_restrack_put);
+
+void rdma_restrack_del(struct rdma_restrack_entry *res)
+{
+ struct ib_device *dev;
+
+ if (!res->valid)
+ return;
+
+ dev = res_to_dev(res);
+ if (!dev)
+ return;
+
+ rdma_restrack_put(res);
+
+ wait_for_completion(&res->comp);
+
+ down_write(&dev->res.rwsem);
+ hash_del(&res->node);
+ res->valid = false;
+ if (res->task)
+ put_task_struct(res->task);
+ up_write(&dev->res.rwsem);
+}
+EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 90e3889b7fbe..5a52ec77940a 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -410,15 +410,18 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
rtnl_unlock();
}
-/* This function will rescan all of the network devices in the system
- * and add their gids, as needed, to the relevant RoCE devices. */
-int roce_rescan_device(struct ib_device *ib_dev)
+/**
+ * rdma_roce_rescan_device - Rescan all of the network devices in the system
+ * and add their gids, as needed, to the relevant RoCE devices.
+ *
+ * @device: the rdma device
+ */
+void rdma_roce_rescan_device(struct ib_device *ib_dev)
{
ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL,
enum_all_gids_of_dev_cb, NULL);
-
- return 0;
}
+EXPORT_SYMBOL(rdma_roce_rescan_device);
static void callback_for_addr_gid_device_scan(struct ib_device *device,
u8 port,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index ab5e1024fea9..8cf15d4a8ac4 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1227,9 +1227,9 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr)
+int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
@@ -1341,10 +1341,11 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
return 0;
}
-EXPORT_SYMBOL(ib_init_ah_from_path);
+EXPORT_SYMBOL(ib_init_ah_attr_from_path);
static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
{
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
spin_lock_irqsave(&query->port->ah_lock, flags);
@@ -1356,6 +1357,15 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
query->sm_ah = query->port->sm_ah;
spin_unlock_irqrestore(&query->port->ah_lock, flags);
+ /*
+ * Always check if sm_ah has valid dlid assigned,
+ * before querying for class port info
+ */
+ if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
+ !rdma_is_valid_unicast_lid(&ah_attr)) {
+ kref_put(&query->sm_ah->ref, free_sm_ah);
+ return -EAGAIN;
+ }
query->mad_buf = ib_create_send_mad(query->port->agent, 1,
query->sm_ah->pkey_index,
0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 59b2f96d986a..b61dda6b04fc 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -653,12 +653,11 @@ int ib_security_modify_qp(struct ib_qp *qp,
}
return ret;
}
-EXPORT_SYMBOL(ib_security_modify_qp);
-int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
- u16 pkey_index,
- void *sec)
+static int ib_security_pkey_access(struct ib_device *dev,
+ u8 port_num,
+ u16 pkey_index,
+ void *sec)
{
u64 subnet_prefix;
u16 pkey;
@@ -678,7 +677,6 @@ int ib_security_pkey_access(struct ib_device *dev,
return security_ib_pkey_access(sec, subnet_prefix, pkey);
}
-EXPORT_SYMBOL(ib_security_pkey_access);
static int ib_mad_agent_security_change(struct notifier_block *nb,
unsigned long event,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index e30d86fa1855..8ae1308eecc7 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1276,7 +1276,6 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret;
int i;
- WARN_ON_ONCE(!device->dev.parent);
ret = dev_set_name(class_dev, "%s", device->name);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index f7adae0adc19..8ae636bb09e5 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -53,6 +53,8 @@
#include <rdma/ib_user_cm.h>
#include <rdma/ib_marshall.h>
+#include "core_priv.h"
+
MODULE_AUTHOR("Libor Michalek");
MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
MODULE_LICENSE("Dual BSD/GPL");
@@ -104,10 +106,13 @@ struct ib_ucm_event {
enum {
IB_UCM_MAJOR = 231,
IB_UCM_BASE_MINOR = 224,
- IB_UCM_MAX_DEVICES = 32
+ IB_UCM_MAX_DEVICES = RDMA_MAX_PORTS,
+ IB_UCM_NUM_FIXED_MINOR = 32,
+ IB_UCM_NUM_DYNAMIC_MINOR = IB_UCM_MAX_DEVICES - IB_UCM_NUM_FIXED_MINOR,
};
#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
+static dev_t dynamic_ucm_dev;
static void ib_ucm_add_one(struct ib_device *device);
static void ib_ucm_remove_one(struct ib_device *device, void *client_data);
@@ -1199,7 +1204,6 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
return 0;
}
-static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
static void ib_ucm_release_dev(struct device *dev)
{
struct ib_ucm_device *ucm_dev;
@@ -1210,10 +1214,7 @@ static void ib_ucm_release_dev(struct device *dev)
static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev)
{
- if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
- clear_bit(ucm_dev->devnum, dev_map);
- else
- clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map);
+ clear_bit(ucm_dev->devnum, dev_map);
}
static const struct file_operations ucm_fops = {
@@ -1235,27 +1236,6 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
-static dev_t overflow_maj;
-static int find_overflow_devnum(void)
-{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
- "infiniband_cm");
- if (ret) {
- pr_err("ucm: couldn't register dynamic device number\n");
- return ret;
- }
- }
-
- ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
- if (ret >= IB_UCM_MAX_DEVICES)
- return -1;
-
- return ret;
-}
-
static void ib_ucm_add_one(struct ib_device *device)
{
int devnum;
@@ -1274,19 +1254,14 @@ static void ib_ucm_add_one(struct ib_device *device)
ucm_dev->dev.release = ib_ucm_release_dev;
devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
- if (devnum >= IB_UCM_MAX_DEVICES) {
- devnum = find_overflow_devnum();
- if (devnum < 0)
- goto err;
-
- ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
- } else {
- ucm_dev->devnum = devnum;
- base = devnum + IB_UCM_BASE_DEV;
- set_bit(devnum, dev_map);
- }
+ if (devnum >= IB_UCM_MAX_DEVICES)
+ goto err;
+ ucm_dev->devnum = devnum;
+ set_bit(devnum, dev_map);
+ if (devnum >= IB_UCM_NUM_FIXED_MINOR)
+ base = dynamic_ucm_dev + devnum - IB_UCM_NUM_FIXED_MINOR;
+ else
+ base = IB_UCM_BASE_DEV + devnum;
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
@@ -1334,13 +1309,20 @@ static int __init ib_ucm_init(void)
{
int ret;
- ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
+ ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR,
"infiniband_cm");
if (ret) {
pr_err("ucm: couldn't register device number\n");
goto error1;
}
+ ret = alloc_chrdev_region(&dynamic_ucm_dev, 0, IB_UCM_NUM_DYNAMIC_MINOR,
+ "infiniband_cm");
+ if (ret) {
+ pr_err("ucm: couldn't register dynamic device number\n");
+ goto err_alloc;
+ }
+
ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
if (ret) {
pr_err("ucm: couldn't create abi_version attribute\n");
@@ -1357,7 +1339,9 @@ static int __init ib_ucm_init(void)
error3:
class_remove_file(&cm_class, &class_attr_abi_version.attr);
error2:
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+ unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
+err_alloc:
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
error1:
return ret;
}
@@ -1366,9 +1350,8 @@ static void __exit ib_ucm_cleanup(void)
{
ib_unregister_client(&ucm_client);
class_remove_file(&cm_class, &class_attr_abi_version.attr);
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
+ unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
idr_destroy(&ctx_id_table);
}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index e4be89d1f3d8..6ba4231f2b07 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -904,13 +904,14 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
IB_PATH_BIDIRECTIONAL;
- if (rec->rec_type == SA_PATH_REC_TYPE_IB) {
- ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
- } else {
+ if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
struct sa_path_rec ib;
sa_convert_path_opa_to_ib(&ib, rec);
ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
+
+ } else {
+ ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
}
}
@@ -943,8 +944,8 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
} else {
addr->sib_family = AF_IB;
addr->sib_pkey = (__force __be16) resp.pkey;
- rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
- (union ib_gid *) &addr->sib_addr);
+ rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr,
+ NULL);
addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
&ctx->cm_id->route.addr.src_addr);
}
@@ -956,8 +957,8 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
} else {
addr->sib_family = AF_IB;
addr->sib_pkey = (__force __be16) resp.pkey;
- rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
- (union ib_gid *) &addr->sib_addr);
+ rdma_read_gids(ctx->cm_id, NULL,
+ (union ib_gid *)&addr->sib_addr);
addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
&ctx->cm_id->route.addr.dst_addr);
}
@@ -1231,9 +1232,9 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
struct sa_path_rec opa;
sa_convert_path_ib_to_opa(&opa, &sa_path);
- ret = rdma_set_ib_paths(ctx->cm_id, &opa, 1);
+ ret = rdma_set_ib_path(ctx->cm_id, &opa);
} else {
- ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+ ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
}
if (ret)
return ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 130606c3b07c..9a4e899d94b3 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -352,7 +352,7 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
return -EINVAL;
}
- ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length,
+ ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length,
offset + ib_umem_offset(umem));
if (ret < 0)
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 0c32d10f23ff..78c77962422e 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -55,16 +55,21 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_user_mad.h>
+#include "core_priv.h"
+
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
MODULE_LICENSE("Dual BSD/GPL");
enum {
- IB_UMAD_MAX_PORTS = 64,
+ IB_UMAD_MAX_PORTS = RDMA_MAX_PORTS,
IB_UMAD_MAX_AGENTS = 32,
IB_UMAD_MAJOR = 231,
- IB_UMAD_MINOR_BASE = 0
+ IB_UMAD_MINOR_BASE = 0,
+ IB_UMAD_NUM_FIXED_MINOR = 64,
+ IB_UMAD_NUM_DYNAMIC_MINOR = IB_UMAD_MAX_PORTS - IB_UMAD_NUM_FIXED_MINOR,
+ IB_ISSM_MINOR_BASE = IB_UMAD_NUM_FIXED_MINOR,
};
/*
@@ -127,9 +132,12 @@ struct ib_umad_packet {
static struct class *umad_class;
-static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
+static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
+static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
+ IB_UMAD_NUM_FIXED_MINOR;
+static dev_t dynamic_umad_dev;
+static dev_t dynamic_issm_dev;
-static DEFINE_SPINLOCK(port_lock);
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
@@ -233,8 +241,7 @@ static void recv_handler(struct ib_mad_agent *agent,
* On OPA devices it is okay to lose the upper 16 bits of LID as this
* information is obtained elsewhere. Mask off the upper 16 bits.
*/
- if (agent->device->port_immutable[agent->port_num].core_cap_flags &
- RDMA_CORE_PORT_INTEL_OPA)
+ if (rdma_cap_opa_mad(agent->device, agent->port_num))
packet->mad.hdr.lid = ib_lid_be16(0xFFFF &
mad_recv_wc->wc->slid);
else
@@ -246,10 +253,14 @@ static void recv_handler(struct ib_mad_agent *agent,
if (packet->mad.hdr.grh_present) {
struct rdma_ah_attr ah_attr;
const struct ib_global_route *grh;
+ int ret;
- ib_init_ah_from_wc(agent->device, agent->port_num,
- mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
- &ah_attr);
+ ret = ib_init_ah_attr_from_wc(agent->device, agent->port_num,
+ mad_recv_wc->wc,
+ mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+ if (ret)
+ goto err2;
grh = rdma_ah_read_grh(&ah_attr);
packet->mad.hdr.gid_index = grh->sgid_index;
@@ -500,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
}
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.type = rdma_ah_find_type(file->port->ib_dev,
+ ah_attr.type = rdma_ah_find_type(agent->device,
file->port->port_num);
rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid));
rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl);
@@ -1139,54 +1150,26 @@ static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_MAD_ABI_VERSION));
-static dev_t overflow_maj;
-static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
-static int find_overflow_devnum(struct ib_device *device)
-{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
- "infiniband_mad");
- if (ret) {
- dev_err(&device->dev,
- "couldn't register dynamic device number\n");
- return ret;
- }
- }
-
- ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
- if (ret >= IB_UMAD_MAX_PORTS)
- return -1;
-
- return ret;
-}
-
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_device *umad_dev,
struct ib_umad_port *port)
{
int devnum;
- dev_t base;
+ dev_t base_umad;
+ dev_t base_issm;
- spin_lock(&port_lock);
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (devnum >= IB_UMAD_MAX_PORTS) {
- spin_unlock(&port_lock);
- devnum = find_overflow_devnum(device);
- if (devnum < 0)
- return -1;
-
- spin_lock(&port_lock);
- port->dev_num = devnum + IB_UMAD_MAX_PORTS;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
+ if (devnum >= IB_UMAD_MAX_PORTS)
+ return -1;
+ port->dev_num = devnum;
+ set_bit(devnum, dev_map);
+ if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
+ base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
+ base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
} else {
- port->dev_num = devnum;
- base = devnum + base_dev;
- set_bit(devnum, dev_map);
+ base_umad = devnum + base_umad_dev;
+ base_issm = devnum + base_issm_dev;
}
- spin_unlock(&port_lock);
port->ib_dev = device;
port->port_num = port_num;
@@ -1198,7 +1181,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
port->cdev.owner = THIS_MODULE;
cdev_set_parent(&port->cdev, &umad_dev->kobj);
kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
- if (cdev_add(&port->cdev, base, 1))
+ if (cdev_add(&port->cdev, base_umad, 1))
goto err_cdev;
port->dev = device_create(umad_class, device->dev.parent,
@@ -1212,12 +1195,11 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (device_create_file(port->dev, &dev_attr_port))
goto err_dev;
- base += IB_UMAD_MAX_PORTS;
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
cdev_set_parent(&port->sm_cdev, &umad_dev->kobj);
kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
- if (cdev_add(&port->sm_cdev, base, 1))
+ if (cdev_add(&port->sm_cdev, base_issm, 1))
goto err_sm_cdev;
port->sm_dev = device_create(umad_class, device->dev.parent,
@@ -1244,10 +1226,7 @@ err_dev:
err_cdev:
cdev_del(&port->cdev);
- if (port->dev_num < IB_UMAD_MAX_PORTS)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ clear_bit(devnum, dev_map);
return -1;
}
@@ -1281,11 +1260,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
}
mutex_unlock(&port->file_mutex);
-
- if (port->dev_num < IB_UMAD_MAX_PORTS)
- clear_bit(port->dev_num, dev_map);
- else
- clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map);
+ clear_bit(port->dev_num, dev_map);
}
static void ib_umad_add_one(struct ib_device *device)
@@ -1361,13 +1336,23 @@ static int __init ib_umad_init(void)
{
int ret;
- ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
+ ret = register_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2,
"infiniband_mad");
if (ret) {
pr_err("couldn't register device number\n");
goto out;
}
+ ret = alloc_chrdev_region(&dynamic_umad_dev, 0,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2,
+ "infiniband_mad");
+ if (ret) {
+ pr_err("couldn't register dynamic device number\n");
+ goto out_alloc;
+ }
+ dynamic_issm_dev = dynamic_umad_dev + IB_UMAD_NUM_DYNAMIC_MINOR;
+
umad_class = class_create(THIS_MODULE, "infiniband_mad");
if (IS_ERR(umad_class)) {
ret = PTR_ERR(umad_class);
@@ -1395,7 +1380,12 @@ out_class:
class_destroy(umad_class);
out_chrdev:
- unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+ unregister_chrdev_region(dynamic_umad_dev,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2);
+
+out_alloc:
+ unregister_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2);
out:
return ret;
@@ -1405,9 +1395,10 @@ static void __exit ib_umad_cleanup(void)
{
ib_unregister_client(&umad_client);
class_destroy(umad_class);
- unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
+ unregister_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2);
+ unregister_chrdev_region(dynamic_umad_dev,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2);
}
module_init(ib_umad_init);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 840b24096690..256934d1f64f 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -340,6 +340,8 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
uobj->object = pd;
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
+ pd->res.type = RDMA_RESTRACK_PD;
+ rdma_restrack_add(&pd->res);
if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
ret = -EFAULT;
@@ -1033,6 +1035,8 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
goto err_cb;
uobj_alloc_commit(&obj->uobject);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
return obj;
@@ -1145,10 +1149,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
min(ucore->inlen, sizeof(cmd)),
ib_uverbs_ex_create_cq_cb, NULL);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- return 0;
+ return PTR_ERR_OR_ZERO(obj);
}
ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
@@ -1199,7 +1200,7 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
tmp.opcode = wc->opcode;
tmp.vendor_err = wc->vendor_err;
tmp.byte_len = wc->byte_len;
- tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data;
+ tmp.ex.imm_data = wc->ex.imm_data;
tmp.qp_num = wc->qp->qp_num;
tmp.src_qp = wc->src_qp;
tmp.wc_flags = wc->wc_flags;
@@ -1517,7 +1518,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = device->create_qp(pd, &attr, uhw);
+ qp = _ib_create_qp(device, pd, &attr, uhw);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
@@ -1530,7 +1531,6 @@ static int create_qp(struct ib_uverbs_file *file,
goto err_cb;
qp->real_qp = qp;
- qp->device = device;
qp->pd = pd;
qp->send_cq = attr.send_cq;
qp->recv_cq = attr.recv_cq;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 71ff2644e053..d96dc1d17be1 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -243,16 +243,13 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
size_t ctx_size;
uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
- if (hdr->reserved)
- return -EINVAL;
-
object_spec = uverbs_get_object(ib_dev, hdr->object_id);
if (!object_spec)
- return -EOPNOTSUPP;
+ return -EPROTONOSUPPORT;
method_spec = uverbs_get_method(object_spec, hdr->method_id);
if (!method_spec)
- return -EOPNOTSUPP;
+ return -EPROTONOSUPPORT;
if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext)
return -EINVAL;
@@ -305,6 +302,16 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev,
file, method_spec, ctx->uverbs_attr_bundle);
+
+ /*
+ * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
+ * not invoke the method because the request is not supported. No
+ * other cases should return this code.
+ */
+ if (unlikely(err == -EPROTONOSUPPORT)) {
+ WARN_ON_ONCE(err == -EPROTONOSUPPORT);
+ err = -EINVAL;
+ }
out:
if (ctx != (void *)data)
kfree(ctx);
@@ -341,7 +348,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
if (hdr.reserved) {
- err = -EOPNOTSUPP;
+ err = -EPROTONOSUPPORT;
goto out;
}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5f216ffb465a..5b811bf574d6 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -62,14 +62,16 @@ MODULE_LICENSE("Dual BSD/GPL");
enum {
IB_UVERBS_MAJOR = 231,
IB_UVERBS_BASE_MINOR = 192,
- IB_UVERBS_MAX_DEVICES = 32
+ IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
+ IB_UVERBS_NUM_FIXED_MINOR = 32,
+ IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
};
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
-static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
@@ -1005,34 +1007,6 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_VERBS_ABI_VERSION));
-static dev_t overflow_maj;
-static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
-
-/*
- * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
- * requesting a new major number and doubling the number of max devices we
- * support. It's stupid, but simple.
- */
-static int find_overflow_devnum(void)
-{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
- "infiniband_verbs");
- if (ret) {
- pr_err("user_verbs: couldn't register dynamic device number\n");
- return ret;
- }
- }
-
- ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
- if (ret >= IB_UVERBS_MAX_DEVICES)
- return -1;
-
- return ret;
-}
-
static void ib_uverbs_add_one(struct ib_device *device)
{
int devnum;
@@ -1062,24 +1036,15 @@ static void ib_uverbs_add_one(struct ib_device *device)
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
- spin_lock(&map_lock);
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
- if (devnum >= IB_UVERBS_MAX_DEVICES) {
- spin_unlock(&map_lock);
- devnum = find_overflow_devnum();
- if (devnum < 0)
- goto err;
-
- spin_lock(&map_lock);
- uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
- } else {
- uverbs_dev->devnum = devnum;
- base = devnum + IB_UVERBS_BASE_DEV;
- set_bit(devnum, dev_map);
- }
- spin_unlock(&map_lock);
+ if (devnum >= IB_UVERBS_MAX_DEVICES)
+ goto err;
+ uverbs_dev->devnum = devnum;
+ set_bit(devnum, dev_map);
+ if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
+ base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
+ else
+ base = IB_UVERBS_BASE_DEV + devnum;
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
@@ -1124,10 +1089,7 @@ err_class:
err_cdev:
cdev_del(&uverbs_dev->cdev);
- if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ clear_bit(devnum, dev_map);
err:
if (atomic_dec_and_test(&uverbs_dev->refcount))
@@ -1219,11 +1181,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
dev_set_drvdata(uverbs_dev->dev, NULL);
device_destroy(uverbs_class, uverbs_dev->cdev.dev);
cdev_del(&uverbs_dev->cdev);
-
- if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(uverbs_dev->devnum, dev_map);
- else
- clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
+ clear_bit(uverbs_dev->devnum, dev_map);
if (device->disassociate_ucontext) {
/* We disassociate HW resources and immediately return.
@@ -1265,13 +1223,22 @@ static int __init ib_uverbs_init(void)
{
int ret;
- ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
+ ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR,
"infiniband_verbs");
if (ret) {
pr_err("user_verbs: couldn't register device number\n");
goto out;
}
+ ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
+ IB_UVERBS_NUM_DYNAMIC_MINOR,
+ "infiniband_verbs");
+ if (ret) {
+ pr_err("couldn't register dynamic device number\n");
+ goto out_alloc;
+ }
+
uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
if (IS_ERR(uverbs_class)) {
ret = PTR_ERR(uverbs_class);
@@ -1299,7 +1266,12 @@ out_class:
class_destroy(uverbs_class);
out_chrdev:
- unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ unregister_chrdev_region(dynamic_uverbs_dev,
+ IB_UVERBS_NUM_DYNAMIC_MINOR);
+
+out_alloc:
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR);
out:
return ret;
@@ -1309,9 +1281,10 @@ static void __exit ib_uverbs_cleanup(void)
{
ib_unregister_client(&uverbs_client);
class_destroy(uverbs_class);
- unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR);
+ unregister_chrdev_region(dynamic_uverbs_dev,
+ IB_UVERBS_NUM_DYNAMIC_MINOR);
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index c3ee5d9b336d..b571176babbe 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -35,6 +35,7 @@
#include <rdma/ib_verbs.h>
#include <linux/bug.h>
#include <linux/file.h>
+#include <rdma/restrack.h>
#include "rdma_core.h"
#include "uverbs.h"
@@ -319,6 +320,8 @@ static int uverbs_create_cq_handler(struct ib_device *ib_dev,
obj->uobject.object = cq;
obj->uobject.user_handle = user_handle;
atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe);
if (ret)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e36d27ed4daa..16ebc6372c31 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -124,16 +124,24 @@ EXPORT_SYMBOL(ib_wc_status_msg);
__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
{
switch (rate) {
- case IB_RATE_2_5_GBPS: return 1;
- case IB_RATE_5_GBPS: return 2;
- case IB_RATE_10_GBPS: return 4;
- case IB_RATE_20_GBPS: return 8;
- case IB_RATE_30_GBPS: return 12;
- case IB_RATE_40_GBPS: return 16;
- case IB_RATE_60_GBPS: return 24;
- case IB_RATE_80_GBPS: return 32;
- case IB_RATE_120_GBPS: return 48;
- default: return -1;
+ case IB_RATE_2_5_GBPS: return 1;
+ case IB_RATE_5_GBPS: return 2;
+ case IB_RATE_10_GBPS: return 4;
+ case IB_RATE_20_GBPS: return 8;
+ case IB_RATE_30_GBPS: return 12;
+ case IB_RATE_40_GBPS: return 16;
+ case IB_RATE_60_GBPS: return 24;
+ case IB_RATE_80_GBPS: return 32;
+ case IB_RATE_120_GBPS: return 48;
+ case IB_RATE_14_GBPS: return 6;
+ case IB_RATE_56_GBPS: return 22;
+ case IB_RATE_112_GBPS: return 45;
+ case IB_RATE_168_GBPS: return 67;
+ case IB_RATE_25_GBPS: return 10;
+ case IB_RATE_100_GBPS: return 40;
+ case IB_RATE_200_GBPS: return 80;
+ case IB_RATE_300_GBPS: return 120;
+ default: return -1;
}
}
EXPORT_SYMBOL(ib_rate_to_mult);
@@ -141,16 +149,24 @@ EXPORT_SYMBOL(ib_rate_to_mult);
__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
{
switch (mult) {
- case 1: return IB_RATE_2_5_GBPS;
- case 2: return IB_RATE_5_GBPS;
- case 4: return IB_RATE_10_GBPS;
- case 8: return IB_RATE_20_GBPS;
- case 12: return IB_RATE_30_GBPS;
- case 16: return IB_RATE_40_GBPS;
- case 24: return IB_RATE_60_GBPS;
- case 32: return IB_RATE_80_GBPS;
- case 48: return IB_RATE_120_GBPS;
- default: return IB_RATE_PORT_CURRENT;
+ case 1: return IB_RATE_2_5_GBPS;
+ case 2: return IB_RATE_5_GBPS;
+ case 4: return IB_RATE_10_GBPS;
+ case 8: return IB_RATE_20_GBPS;
+ case 12: return IB_RATE_30_GBPS;
+ case 16: return IB_RATE_40_GBPS;
+ case 24: return IB_RATE_60_GBPS;
+ case 32: return IB_RATE_80_GBPS;
+ case 48: return IB_RATE_120_GBPS;
+ case 6: return IB_RATE_14_GBPS;
+ case 22: return IB_RATE_56_GBPS;
+ case 45: return IB_RATE_112_GBPS;
+ case 67: return IB_RATE_168_GBPS;
+ case 10: return IB_RATE_25_GBPS;
+ case 40: return IB_RATE_100_GBPS;
+ case 80: return IB_RATE_200_GBPS;
+ case 120: return IB_RATE_300_GBPS;
+ default: return IB_RATE_PORT_CURRENT;
}
}
EXPORT_SYMBOL(mult_to_ib_rate);
@@ -247,6 +263,10 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
}
+ pd->res.type = RDMA_RESTRACK_PD;
+ pd->res.kern_name = caller;
+ rdma_restrack_add(&pd->res);
+
if (mr_access_flags) {
struct ib_mr *mr;
@@ -296,6 +316,7 @@ void ib_dealloc_pd(struct ib_pd *pd)
requires the caller to guarantee we can't race here. */
WARN_ON(atomic_read(&pd->usecnt));
+ rdma_restrack_del(&pd->res);
/* Making delalloc_pd a void return is a WIP, no driver should return
an error here. */
ret = pd->device->dealloc_pd(pd);
@@ -421,8 +442,7 @@ static bool find_gid_index(const union ib_gid *gid,
const struct ib_gid_attr *gid_attr,
void *context)
{
- struct find_gid_index_context *ctx =
- (struct find_gid_index_context *)context;
+ struct find_gid_index_context *ctx = context;
if (ctx->gid_type != gid_attr->gid_type)
return false;
@@ -481,8 +501,53 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
}
EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
+/* Resolve destination mac address and hop limit for unicast destination
+ * GID entry, considering the source GID entry as well.
+ * ah_attribute must have have valid port_num, sgid_index.
+ */
+static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct ib_gid_attr sgid_attr;
+ struct ib_global_route *grh;
+ int hop_limit = 0xff;
+ union ib_gid sgid;
+ int ret;
+
+ grh = rdma_ah_retrieve_grh(ah_attr);
+
+ ret = ib_query_gid(device,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index,
+ &sgid, &sgid_attr);
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
+ return ret;
+ }
+
+ /* If destination is link local and source GID is RoCEv1,
+ * IP stack is not used.
+ */
+ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
+ sgid_attr.gid_type == IB_GID_TYPE_ROCE) {
+ rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
+ ah_attr->roce.dmac);
+ goto done;
+ }
+
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac,
+ sgid_attr.ndev, &hop_limit);
+done:
+ dev_put(sgid_attr.ndev);
+
+ grh->hop_limit = hop_limit;
+ return ret;
+}
+
/*
- * This function creates ah from the incoming packet.
+ * This function initializes address handle attributes from the incoming packet.
* Incoming packet has dgid of the receiver node on which this code is
* getting executed and, sgid contains the GID of the sender.
*
@@ -490,13 +555,10 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
* as sgid and, sgid is used as dgid because sgid contains destinations
* GID whom to respond to.
*
- * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the
- * position of arguments dgid and sgid do not match the order of the
- * parameters.
*/
-int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
- const struct ib_wc *wc, const struct ib_grh *grh,
- struct rdma_ah_attr *ah_attr)
+int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
+ const struct ib_wc *wc, const struct ib_grh *grh,
+ struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
u16 gid_index;
@@ -523,57 +585,33 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
if (ret)
return ret;
+ rdma_ah_set_sl(ah_attr, wc->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+
if (rdma_protocol_roce(device, port_num)) {
- int if_index = 0;
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
- struct net_device *idev;
- struct net_device *resolved_dev;
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
-
- ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
- ah_attr->roce.dmac,
- wc->wc_flags & IB_WC_WITH_VLAN ?
- NULL : &vlan_id,
- &if_index, &hoplimit);
- if (ret) {
- dev_put(idev);
- return ret;
- }
-
- resolved_dev = dev_get_by_index(&init_net, if_index);
- rcu_read_lock();
- if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
- resolved_dev))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(idev);
- dev_put(resolved_dev);
+ ret = get_sgid_index_from_eth(device, port_num,
+ vlan_id, &dgid,
+ gid_type, &gid_index);
if (ret)
return ret;
- ret = get_sgid_index_from_eth(device, port_num, vlan_id,
- &dgid, gid_type, &gid_index);
- if (ret)
- return ret;
- }
-
- rdma_ah_set_dlid(ah_attr, wc->slid);
- rdma_ah_set_sl(ah_attr, wc->sl);
- rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- rdma_ah_set_port_num(ah_attr, port_num);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+ return ib_resolve_unicast_gid_dmac(device, ah_attr);
+ } else {
+ rdma_ah_set_dlid(ah_attr, wc->slid);
+ rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- if (wc->wc_flags & IB_WC_GRH) {
- if (!rdma_cap_eth_ah(device, port_num)) {
+ if (wc->wc_flags & IB_WC_GRH) {
if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
ret = ib_find_cached_gid_by_port(device, &dgid,
IB_GID_TYPE_IB,
@@ -584,18 +622,17 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
} else {
gid_index = 0;
}
- }
-
- flow_class = be32_to_cpu(grh->version_tclass_flow);
- rdma_ah_set_grh(ah_attr, &sgid,
- flow_class & 0xFFFFF,
- (u8)gid_index, hoplimit,
- (flow_class >> 20) & 0xFF);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+ }
+ return 0;
}
- return 0;
}
-EXPORT_SYMBOL(ib_init_ah_from_wc);
+EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
const struct ib_grh *grh, u8 port_num)
@@ -603,7 +640,7 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
struct rdma_ah_attr ah_attr;
int ret;
- ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
+ ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
@@ -850,7 +887,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
if (qp_init_attr->cap.max_rdma_ctxs)
rdma_rw_init_qp(device, qp_init_attr);
- qp = device->create_qp(pd, qp_init_attr, NULL);
+ qp = _ib_create_qp(device, pd, qp_init_attr, NULL);
if (IS_ERR(qp))
return qp;
@@ -860,7 +897,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
return ERR_PTR(ret);
}
- qp->device = device;
qp->real_qp = qp;
qp->uobject = NULL;
qp->qp_type = qp_init_attr->qp_type;
@@ -890,7 +926,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
atomic_inc(&qp_init_attr->srq->usecnt);
}
- qp->pd = pd;
qp->send_cq = qp_init_attr->send_cq;
qp->xrcd = NULL;
@@ -1269,16 +1304,8 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
return -EINVAL;
- if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE)
- return 0;
-
grh = rdma_ah_retrieve_grh(ah_attr);
- if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
- ah_attr->roce.dmac);
- return 0;
- }
if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
__be32 addr = 0;
@@ -1290,40 +1317,52 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
(char *)ah_attr->roce.dmac);
}
} else {
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
- int ifindex;
- int hop_limit;
-
- ret = ib_query_gid(device,
- rdma_ah_get_port_num(ah_attr),
- grh->sgid_index,
- &sgid, &sgid_attr);
-
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
- goto out;
- }
-
- ifindex = sgid_attr.ndev->ifindex;
+ ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
+ }
+ return ret;
+}
- ret =
- rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
- ah_attr->roce.dmac,
- NULL, &ifindex, &hop_limit);
+/**
+ * IB core internal function to perform QP attributes modification.
+ */
+static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ int ret;
- dev_put(sgid_attr.ndev);
+ if (rdma_ib_or_roce(qp->device, port)) {
+ if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
+ pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
+ __func__, qp->device->name);
+ attr->rq_psn &= 0xffffff;
+ }
- grh->hop_limit = hop_limit;
+ if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
+ pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n",
+ __func__, qp->device->name);
+ attr->sq_psn &= 0xffffff;
+ }
}
-out:
+
+ ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
+ if (!ret && (attr_mask & IB_QP_PORT))
+ qp->port = attr->port_num;
+
return ret;
}
+static bool is_qp_type_connected(const struct ib_qp *qp)
+{
+ return (qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_RC ||
+ qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT);
+}
+
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
- * @qp: The QP to modify.
+ * @ib_qp: The QP to modify.
* @attr: On input, specifies the QP attributes to modify. On output,
* the current values of selected QP attributes are returned.
* @attr_mask: A bit-mask used to specify which attributes of the QP
@@ -1332,21 +1371,20 @@ out:
* are being modified.
* It returns 0 on success and returns appropriate error code on error.
*/
-int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr,
+int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
+ struct ib_qp *qp = ib_qp->real_qp;
int ret;
- if (attr_mask & IB_QP_AV) {
+ if (attr_mask & IB_QP_AV &&
+ attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
if (ret)
return ret;
}
- ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
- if (!ret && (attr_mask & IB_QP_PORT))
- qp->port = attr->port_num;
-
- return ret;
+ return _ib_modify_qp(qp, attr, attr_mask, udata);
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
@@ -1409,7 +1447,7 @@ int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
- return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL);
+ return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -1503,6 +1541,7 @@ int ib_destroy_qp(struct ib_qp *qp)
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
+ rdma_restrack_del(&qp->res);
ret = qp->device->destroy_qp(qp);
if (!ret) {
if (pd)
@@ -1545,6 +1584,8 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
cq->event_handler = event_handler;
cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
}
return cq;
@@ -1563,6 +1604,7 @@ int ib_destroy_cq(struct ib_cq *cq)
if (atomic_read(&cq->usecnt))
return -EBUSY;
+ rdma_restrack_del(&cq->res);
return cq->device->destroy_cq(cq);
}
EXPORT_SYMBOL(ib_destroy_cq);
@@ -1747,7 +1789,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
}
EXPORT_SYMBOL(ib_detach_mcast);
-struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
{
struct ib_xrcd *xrcd;
@@ -1765,7 +1807,7 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
return xrcd;
}
-EXPORT_SYMBOL(ib_alloc_xrcd);
+EXPORT_SYMBOL(__ib_alloc_xrcd);
int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
@@ -1790,11 +1832,11 @@ EXPORT_SYMBOL(ib_dealloc_xrcd);
* ib_create_wq - Creates a WQ associated with the specified protection
* domain.
* @pd: The protection domain associated with the WQ.
- * @wq_init_attr: A list of initial attributes required to create the
+ * @wq_attr: A list of initial attributes required to create the
* WQ. If WQ creation succeeds, then the attributes are updated to
* the actual capabilities of the created WQ.
*
- * wq_init_attr->max_wr and wq_init_attr->max_sge determine
+ * wq_attr->max_wr and wq_attr->max_sge determine
* the requested size of the WQ, and set to the actual values allocated
* on return.
* If ib_create_wq() succeeds, then max_wr and max_sge will always be
@@ -2156,16 +2198,16 @@ static void __ib_drain_sq(struct ib_qp *qp)
struct ib_send_wr swr = {}, *bad_swr;
int ret;
- swr.wr_cqe = &sdrain.cqe;
- sdrain.cqe.done = ib_drain_qp_done;
- init_completion(&sdrain.done);
-
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
+ swr.wr_cqe = &sdrain.cqe;
+ sdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&sdrain.done);
+
ret = ib_post_send(qp, &swr, &bad_swr);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
@@ -2190,16 +2232,16 @@ static void __ib_drain_rq(struct ib_qp *qp)
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
- rwr.wr_cqe = &rdrain.cqe;
- rdrain.cqe.done = ib_drain_qp_done;
- init_completion(&rdrain.done);
-
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
+ rwr.wr_cqe = &rdrain.cqe;
+ rdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&rdrain.done);
+
ret = ib_post_recv(qp, &rwr, &bad_rwr);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);