aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile6
-rw-r--r--drivers/infiniband/core/addr.c12
-rw-r--r--drivers/infiniband/core/cache.c23
-rw-r--r--drivers/infiniband/core/cm.c224
-rw-r--r--drivers/infiniband/core/cma.c35
-rw-r--r--drivers/infiniband/core/core_priv.h27
-rw-r--r--drivers/infiniband/core/device.c144
-rw-r--r--drivers/infiniband/core/iwcm.c16
-rw-r--r--drivers/infiniband/core/iwpm_msg.c20
-rw-r--r--drivers/infiniband/core/iwpm_util.c15
-rw-r--r--drivers/infiniband/core/mad_rmpp.c2
-rw-r--r--drivers/infiniband/core/netlink.c321
-rw-r--r--drivers/infiniband/core/nldev.c325
-rw-r--r--drivers/infiniband/core/rdma_core.c179
-rw-r--r--drivers/infiniband/core/rdma_core.h42
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c2
-rw-r--r--drivers/infiniband/core/sa_query.c42
-rw-r--r--drivers/infiniband/core/sysfs.c4
-rw-r--r--drivers/infiniband/core/ucm.c2
-rw-r--r--drivers/infiniband/core/ucma.c10
-rw-r--r--drivers/infiniband/core/user_mad.c2
-rw-r--r--drivers/infiniband/core/uverbs.h3
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c276
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c364
-rw-r--r--drivers/infiniband/core/uverbs_ioctl_merge.c665
-rw-r--r--drivers/infiniband/core/uverbs_main.c37
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c48
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c281
-rw-r--r--drivers/infiniband/core/verbs.c169
29 files changed, 2718 insertions, 578 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index e3cdafff8ece..b4df164f71a6 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -11,7 +11,8 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
- security.o
+ security.o nldev.o
+
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
@@ -31,4 +32,5 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
- rdma_core.o uverbs_std_types.o
+ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
+ uverbs_ioctl_merge.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 437522ca97b4..12523f630b61 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -130,13 +130,11 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
}
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
-
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
- !(NETLINK_CB(skb).sk) ||
- !netlink_capable(skb, CAP_NET_ADMIN))
+ !(NETLINK_CB(skb).sk))
return -EPERM;
if (ib_nl_is_good_ip_resp(nlh))
@@ -186,7 +184,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
+ rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
/* Make the request retry, so when we get the response from userspace
* we will have something.
@@ -326,7 +324,7 @@ static void queue_req(struct addr_req *req)
static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family)
{
- if (ibnl_chk_listeners(RDMA_NL_GROUP_LS))
+ if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
return -EADDRNOTAVAIL;
/* We fill in what we can, the response will fill the rest */
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index efc94304dee3..77515638c55c 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1199,30 +1199,23 @@ int ib_cache_setup_one(struct ib_device *device)
device->cache.ports =
kzalloc(sizeof(*device->cache.ports) *
(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
- if (!device->cache.ports) {
- err = -ENOMEM;
- goto out;
- }
+ if (!device->cache.ports)
+ return -ENOMEM;
err = gid_table_setup_one(device);
- if (err)
- goto out;
+ if (err) {
+ kfree(device->cache.ports);
+ device->cache.ports = NULL;
+ return err;
+ }
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
ib_cache_update(device, p + rdma_start_port(device), true);
INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
device, ib_cache_event);
- err = ib_register_event_handler(&device->cache.event_handler);
- if (err)
- goto err;
-
+ ib_register_event_handler(&device->cache.event_handler);
return 0;
-
-err:
- gid_table_cleanup_one(device);
-out:
- return err;
}
void ib_cache_release_one(struct ib_device *device)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 2b4d613a3474..4c4b46586af2 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -373,11 +373,19 @@ out:
return ret;
}
-static int cm_alloc_response_msg(struct cm_port *port,
- struct ib_mad_recv_wc *mad_recv_wc,
- struct ib_mad_send_buf **msg)
+static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
+ 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+ GFP_ATOMIC,
+ IB_MGMT_BASE_VERSION);
+}
+
+static int cm_create_response_msg_ah(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ struct ib_mad_send_buf *msg)
{
- struct ib_mad_send_buf *m;
struct ib_ah *ah;
ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
@@ -385,27 +393,40 @@ static int cm_alloc_response_msg(struct cm_port *port,
if (IS_ERR(ah))
return PTR_ERR(ah);
- m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
- 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
- GFP_ATOMIC,
- IB_MGMT_BASE_VERSION);
- if (IS_ERR(m)) {
- rdma_destroy_ah(ah);
- return PTR_ERR(m);
- }
- m->ah = ah;
- *msg = m;
+ msg->ah = ah;
return 0;
}
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
- rdma_destroy_ah(msg->ah);
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
}
+static int cm_alloc_response_msg(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ struct ib_mad_send_buf **msg)
+{
+ struct ib_mad_send_buf *m;
+ int ret;
+
+ m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
+ if (IS_ERR(m))
+ return PTR_ERR(m);
+
+ ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
+ if (ret) {
+ cm_free_msg(m);
+ return ret;
+ }
+
+ *msg = m;
+ return 0;
+}
+
static void * cm_copy_private_data(const void *private_data,
u8 private_data_len)
{
@@ -1175,6 +1196,11 @@ static void cm_format_req(struct cm_req_msg *req_msg,
{
struct sa_path_rec *pri_path = param->primary_path;
struct sa_path_rec *alt_path = param->alternate_path;
+ bool pri_ext = false;
+
+ if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
+ pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
+ pri_path->opa.slid);
cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
@@ -1202,18 +1228,24 @@ static void cm_format_req(struct cm_req_msg *req_msg,
cm_req_set_srq(req_msg, param->srq);
}
+ req_msg->primary_local_gid = pri_path->sgid;
+ req_msg->primary_remote_gid = pri_path->dgid;
+ if (pri_ext) {
+ req_msg->primary_local_gid.global.interface_id
+ = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
+ req_msg->primary_remote_gid.global.interface_id
+ = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
+ }
if (pri_path->hop_limit <= 1) {
- req_msg->primary_local_lid =
+ req_msg->primary_local_lid = pri_ext ? 0 :
htons(ntohl(sa_path_get_slid(pri_path)));
- req_msg->primary_remote_lid =
+ req_msg->primary_remote_lid = pri_ext ? 0 :
htons(ntohl(sa_path_get_dlid(pri_path)));
} else {
/* Work-around until there's a way to obtain remote LID info */
req_msg->primary_local_lid = IB_LID_PERMISSIVE;
req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
}
- req_msg->primary_local_gid = pri_path->sgid;
- req_msg->primary_remote_gid = pri_path->dgid;
cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
req_msg->primary_traffic_class = pri_path->traffic_class;
@@ -1225,17 +1257,29 @@ static void cm_format_req(struct cm_req_msg *req_msg,
pri_path->packet_life_time));
if (alt_path) {
+ bool alt_ext = false;
+
+ if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA)
+ alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
+ alt_path->opa.slid);
+
+ req_msg->alt_local_gid = alt_path->sgid;
+ req_msg->alt_remote_gid = alt_path->dgid;
+ if (alt_ext) {
+ req_msg->alt_local_gid.global.interface_id
+ = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
+ req_msg->alt_remote_gid.global.interface_id
+ = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
+ }
if (alt_path->hop_limit <= 1) {
- req_msg->alt_local_lid =
+ req_msg->alt_local_lid = alt_ext ? 0 :
htons(ntohl(sa_path_get_slid(alt_path)));
- req_msg->alt_remote_lid =
+ req_msg->alt_remote_lid = alt_ext ? 0 :
htons(ntohl(sa_path_get_dlid(alt_path)));
} else {
req_msg->alt_local_lid = IB_LID_PERMISSIVE;
req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
}
- req_msg->alt_local_gid = alt_path->sgid;
- req_msg->alt_remote_gid = alt_path->dgid;
cm_req_set_alt_flow_label(req_msg,
alt_path->flow_label);
cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
@@ -1405,16 +1449,63 @@ static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
(be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
}
+static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
+{
+ return ((req_msg->alt_local_lid) ||
+ (ib_is_opa_gid(&req_msg->alt_local_gid)));
+}
+
+static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
+ struct sa_path_rec *path, union ib_gid *gid)
+{
+ if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
+ path->rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path->rec_type = SA_PATH_REC_TYPE_IB;
+}
+
+static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
+ struct sa_path_rec *primary_path,
+ struct sa_path_rec *alt_path)
+{
+ u32 lid;
+
+ if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
+ sa_path_set_dlid(primary_path,
+ htonl(ntohs(req_msg->primary_local_lid)));
+ sa_path_set_slid(primary_path,
+ htonl(ntohs(req_msg->primary_remote_lid)));
+ } else {
+ lid = opa_get_lid_from_gid(&req_msg->primary_local_gid);
+ sa_path_set_dlid(primary_path, cpu_to_be32(lid));
+
+ lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid);
+ sa_path_set_slid(primary_path, cpu_to_be32(lid));
+ }
+
+ if (!cm_req_has_alt_path(req_msg))
+ return;
+
+ if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
+ sa_path_set_dlid(alt_path,
+ htonl(ntohs(req_msg->alt_local_lid)));
+ sa_path_set_slid(alt_path,
+ htonl(ntohs(req_msg->alt_remote_lid)));
+ } else {
+ lid = opa_get_lid_from_gid(&req_msg->alt_local_gid);
+ sa_path_set_dlid(alt_path, cpu_to_be32(lid));
+
+ lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid);
+ sa_path_set_slid(alt_path, cpu_to_be32(lid));
+ }
+}
+
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
struct sa_path_rec *primary_path,
struct sa_path_rec *alt_path)
{
primary_path->dgid = req_msg->primary_local_gid;
primary_path->sgid = req_msg->primary_remote_gid;
- sa_path_set_dlid(primary_path,
- htonl(ntohs(req_msg->primary_local_lid)));
- sa_path_set_slid(primary_path,
- htonl(ntohs(req_msg->primary_remote_lid)));
primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
primary_path->hop_limit = req_msg->primary_hop_limit;
primary_path->traffic_class = req_msg->primary_traffic_class;
@@ -1431,13 +1522,9 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
primary_path->service_id = req_msg->service_id;
- if (req_msg->alt_local_lid) {
+ if (cm_req_has_alt_path(req_msg)) {
alt_path->dgid = req_msg->alt_local_gid;
alt_path->sgid = req_msg->alt_remote_gid;
- sa_path_set_dlid(alt_path,
- htonl(ntohs(req_msg->alt_local_lid)));
- sa_path_set_slid(alt_path,
- htonl(ntohs(req_msg->alt_remote_lid)));
alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
alt_path->hop_limit = req_msg->alt_hop_limit;
alt_path->traffic_class = req_msg->alt_traffic_class;
@@ -1454,6 +1541,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
alt_path->service_id = req_msg->service_id;
}
+ cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
}
static u16 cm_get_bth_pkey(struct cm_work *work)
@@ -1703,7 +1791,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
{
if (!cm_req_get_primary_subnet_local(req_msg)) {
if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
- req_msg->primary_local_lid = cpu_to_be16(wc->slid);
+ req_msg->primary_local_lid = ib_lid_be16(wc->slid);
cm_req_set_primary_sl(req_msg, wc->sl);
}
@@ -1713,7 +1801,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
if (!cm_req_get_alt_subnet_local(req_msg)) {
if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
- req_msg->alt_local_lid = cpu_to_be16(wc->slid);
+ req_msg->alt_local_lid = ib_lid_be16(wc->slid);
cm_req_set_alt_sl(req_msg, wc->sl);
}
@@ -1784,9 +1872,12 @@ static int cm_req_handler(struct cm_work *work)
dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
} else {
- work->path[0].rec_type = SA_PATH_REC_TYPE_IB;
+ cm_path_set_rec_type(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ &work->path[0],
+ &req_msg->primary_local_gid);
}
- if (req_msg->alt_local_lid)
+ if (cm_req_has_alt_path(req_msg))
work->path[1].rec_type = work->path[0].rec_type;
cm_format_paths_from_req(req_msg, &work->path[0],
&work->path[1]);
@@ -1811,16 +1902,19 @@ static int cm_req_handler(struct cm_work *work)
dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
} else {
- work->path[0].rec_type = SA_PATH_REC_TYPE_IB;
+ cm_path_set_rec_type(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ &work->path[0],
+ &req_msg->primary_local_gid);
}
- if (req_msg->alt_local_lid)
+ if (cm_req_has_alt_path(req_msg))
work->path[1].rec_type = work->path[0].rec_type;
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
goto rejected;
}
- if (req_msg->alt_local_lid) {
+ if (cm_req_has_alt_path(req_msg)) {
ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
cm_id_priv);
if (ret) {
@@ -2424,7 +2518,8 @@ static int cm_dreq_handler(struct cm_work *work)
case IB_CM_TIMEWAIT:
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
- if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
+ msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
+ if (IS_ERR(msg))
goto unlock;
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
@@ -2432,7 +2527,8 @@ static int cm_dreq_handler(struct cm_work *work)
cm_id_priv->private_data_len);
spin_unlock_irq(&cm_id_priv->lock);
- if (ib_post_send_mad(msg, NULL))
+ if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
+ ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
case IB_CM_DREQ_RCVD:
@@ -2843,6 +2939,11 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
const void *private_data,
u8 private_data_len)
{
+ bool alt_ext = false;
+
+ if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA)
+ alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
+ alternate_path->opa.slid);
cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
lap_msg->local_comm_id = cm_id_priv->id.local_id;
@@ -2856,6 +2957,12 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
htons(ntohl(sa_path_get_dlid(alternate_path)));
lap_msg->alt_local_gid = alternate_path->sgid;
lap_msg->alt_remote_gid = alternate_path->dgid;
+ if (alt_ext) {
+ lap_msg->alt_local_gid.global.interface_id
+ = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid));
+ lap_msg->alt_remote_gid.global.interface_id
+ = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid));
+ }
cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
lap_msg->alt_hop_limit = alternate_path->hop_limit;
@@ -2924,16 +3031,29 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
}
EXPORT_SYMBOL(ib_send_cm_lap);
+static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
+ struct sa_path_rec *path)
+{
+ u32 lid;
+
+ if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
+ sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid)));
+ sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid)));
+ } else {
+ lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid);
+ sa_path_set_dlid(path, cpu_to_be32(lid));
+
+ lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid);
+ sa_path_set_slid(path, cpu_to_be32(lid));
+ }
+}
+
static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
struct sa_path_rec *path,
struct cm_lap_msg *lap_msg)
{
- memset(path, 0, sizeof *path);
- path->rec_type = SA_PATH_REC_TYPE_IB;
path->dgid = lap_msg->alt_local_gid;
path->sgid = lap_msg->alt_remote_gid;
- sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid)));
- sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid)));
path->flow_label = cm_lap_get_flow_label(lap_msg);
path->hop_limit = lap_msg->alt_hop_limit;
path->traffic_class = cm_lap_get_traffic_class(lap_msg);
@@ -2947,6 +3067,7 @@ static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
path->packet_life_time_selector = IB_SA_EQ;
path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
path->packet_life_time -= (path->packet_life_time > 0);
+ cm_format_path_lid_from_lap(lap_msg, path);
}
static int cm_lap_handler(struct cm_work *work)
@@ -2965,6 +3086,11 @@ static int cm_lap_handler(struct cm_work *work)
return -EINVAL;
param = &work->cm_event.param.lap_rcvd;
+ memset(&work->path[0], 0, sizeof(work->path[1]));
+ cm_path_set_rec_type(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ &work->path[0],
+ &lap_msg->alt_local_gid);
param->alternate_path = &work->path[0];
cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
work->cm_event.private_data = &lap_msg->private_data;
@@ -2980,7 +3106,8 @@ static int cm_lap_handler(struct cm_work *work)
case IB_CM_MRA_LAP_SENT:
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_LAP_COUNTER]);
- if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
+ msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
+ if (IS_ERR(msg))
goto unlock;
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
@@ -2990,7 +3117,8 @@ static int cm_lap_handler(struct cm_work *work)
cm_id_priv->private_data_len);
spin_unlock_irq(&cm_id_priv->lock);
- if (ib_post_send_mad(msg, NULL))
+ if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
+ ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
case IB_CM_LAP_RCVD:
@@ -4201,7 +4329,7 @@ static int __init ib_cm_init(void)
goto error1;
}
- cm.wq = create_workqueue("ib_cm");
+ cm.wq = alloc_workqueue("ib_cm", 0, 1);
if (!cm.wq) {
ret = -ENOMEM;
goto error2;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0eb393237ba2..852c8fec8088 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -72,6 +72,7 @@ MODULE_LICENSE("Dual BSD/GPL");
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
#define CMA_IBOE_PACKET_LIFETIME 18
+#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
static const char * const cma_events[] = {
[RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved",
@@ -3998,7 +3999,8 @@ static void iboe_mcast_work_handler(struct work_struct *work)
kfree(mw);
}
-static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type)
{
struct sockaddr_in *sin = (struct sockaddr_in *)addr;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
@@ -4008,8 +4010,8 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
} else if (addr->sa_family == AF_INET6) {
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else {
- mgid->raw[0] = 0xff;
- mgid->raw[1] = 0x0e;
+ mgid->raw[0] = (gid_type == IB_GID_TYPE_IB) ? 0xff : 0;
+ mgid->raw[1] = (gid_type == IB_GID_TYPE_IB) ? 0x0e : 0;
mgid->raw[2] = 0;
mgid->raw[3] = 0;
mgid->raw[4] = 0;
@@ -4050,7 +4052,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
goto out1;
}
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
+ gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
if (id_priv->id.ps == RDMA_PS_UDP)
@@ -4066,8 +4070,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
- gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
- rdma_start_port(id_priv->cma_dev->device)];
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
@@ -4280,8 +4282,12 @@ static void cma_add_one(struct ib_device *device)
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
supported_gids = roce_gid_type_mask_support(device, i);
WARN_ON(!supported_gids);
- cma_dev->default_gid_type[i - rdma_start_port(device)] =
- find_first_bit(&supported_gids, BITS_PER_LONG);
+ if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
+ cma_dev->default_gid_type[i - rdma_start_port(device)] =
+ CMA_PREFERRED_ROCE_GID_TYPE;
+ else
+ cma_dev->default_gid_type[i - rdma_start_port(device)] =
+ find_first_bit(&supported_gids, BITS_PER_LONG);
cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
}
@@ -4452,9 +4458,8 @@ out:
return skb->len;
}
-static const struct ibnl_client_cbs cma_cb_table[] = {
- [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats,
- .module = THIS_MODULE },
+static const struct rdma_nl_cbs cma_cb_table[] = {
+ [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats},
};
static int cma_init_net(struct net *net)
@@ -4506,9 +4511,7 @@ static int __init cma_init(void)
if (ret)
goto err;
- if (ibnl_add_client(RDMA_NL_RDMA_CM, ARRAY_SIZE(cma_cb_table),
- cma_cb_table))
- pr_warn("RDMA CMA: failed to add netlink callback\n");
+ rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table);
cma_configfs_init();
return 0;
@@ -4525,7 +4528,7 @@ err_wq:
static void __exit cma_cleanup(void)
{
cma_configfs_exit();
- ibnl_remove_client(RDMA_NL_RDMA_CM);
+ rdma_nl_unregister(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
@@ -4534,5 +4537,7 @@ static void __exit cma_cleanup(void)
destroy_workqueue(cma_wq);
}
+MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1);
+
module_init(cma_init);
module_exit(cma_cleanup);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 11ae67514e13..a1d687a664f8 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -38,6 +38,7 @@
#include <linux/cgroup_rdma.h>
#include <rdma/ib_verbs.h>
+#include <rdma/opa_addr.h>
#include <rdma/ib_mad.h>
#include "mad_priv.h"
@@ -102,6 +103,14 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
roce_netdev_callback cb,
void *cookie);
+typedef int (*nldev_callback)(struct ib_device *device,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ unsigned int idx);
+
+int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
+ struct netlink_callback *cb);
+
enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_SET,
IB_CACHE_GID_DEFAULT_MODE_DELETE
@@ -179,8 +188,8 @@ void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);
-int ibnl_init(void);
-void ibnl_cleanup(void);
+int rdma_nl_init(void);
+void rdma_nl_exit(void);
/**
* Check if there are any listeners to the netlink group
@@ -190,11 +199,14 @@ void ibnl_cleanup(void);
int ibnl_chk_listeners(unsigned int group);
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
- struct netlink_callback *cb);
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack);
int ib_nl_handle_set_timeout(struct sk_buff *skb,
- struct netlink_callback *cb);
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack);
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
- struct netlink_callback *cb);
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack);
int ib_get_cached_subnet_prefix(struct ib_device *device,
u8 port_num,
@@ -301,4 +313,9 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
return 0;
}
#endif
+
+struct ib_device *__ib_device_get_by_index(u32 ifindex);
+/* RDMA device netlink */
+void nldev_init(void);
+void nldev_exit(void);
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 221468f77128..84fc32a2c8b3 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -134,6 +134,17 @@ static int ib_device_check_mandatory(struct ib_device *device)
return 0;
}
+struct ib_device *__ib_device_get_by_index(u32 index)
+{
+ struct ib_device *device;
+
+ list_for_each_entry(device, &device_list, core_list)
+ if (device->index == index)
+ return device;
+
+ return NULL;
+}
+
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
@@ -145,7 +156,6 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
return NULL;
}
-
static int alloc_name(char *name)
{
unsigned long *inuse;
@@ -326,10 +336,10 @@ static int read_port_immutable(struct ib_device *device)
return 0;
}
-void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len)
+void ib_get_device_fw_str(struct ib_device *dev, char *str)
{
if (dev->get_dev_fw_str)
- dev->get_dev_fw_str(dev, str, str_len);
+ dev->get_dev_fw_str(dev, str);
else
str[0] = '\0';
}
@@ -395,6 +405,30 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event,
}
/**
+ * __dev_new_index - allocate an device index
+ *
+ * Returns a suitable unique value for a new device interface
+ * number. It assumes that there are less than 2^32-1 ib devices
+ * will be present in the system.
+ */
+static u32 __dev_new_index(void)
+{
+ /*
+ * The device index to allow stable naming.
+ * Similar to struct net -> ifindex.
+ */
+ static u32 index;
+
+ for (;;) {
+ if (!(++index))
+ index = 1;
+
+ if (!__ib_device_get_by_index(index))
+ return index;
+ }
+}
+
+/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
*
@@ -489,9 +523,10 @@ int ib_register_device(struct ib_device *device,
device->reg_state = IB_DEV_REGISTERED;
list_for_each_entry(client, &client_list, list)
- if (client->add && !add_client_context(device, client))
+ if (!add_client_context(device, client) && client->add)
client->add(device);
+ device->index = __dev_new_index();
down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
@@ -578,7 +613,7 @@ int ib_register_client(struct ib_client *client)
mutex_lock(&device_mutex);
list_for_each_entry(device, &device_list, core_list)
- if (client->add && !add_client_context(device, client))
+ if (!add_client_context(device, client) && client->add)
client->add(device);
down_write(&lists_rwsem);
@@ -712,7 +747,7 @@ EXPORT_SYMBOL(ib_set_client_data);
* chapter 11 of the InfiniBand Architecture Specification). This
* callback may occur in interrupt context.
*/
-int ib_register_event_handler (struct ib_event_handler *event_handler)
+void ib_register_event_handler(struct ib_event_handler *event_handler)
{
unsigned long flags;
@@ -720,8 +755,6 @@ int ib_register_event_handler (struct ib_event_handler *event_handler)
list_add_tail(&event_handler->list,
&event_handler->device->event_handler_list);
spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
-
- return 0;
}
EXPORT_SYMBOL(ib_register_event_handler);
@@ -732,15 +765,13 @@ EXPORT_SYMBOL(ib_register_event_handler);
* Unregister an event handler registered with
* ib_register_event_handler().
*/
-int ib_unregister_event_handler(struct ib_event_handler *event_handler)
+void ib_unregister_event_handler(struct ib_event_handler *event_handler)
{
unsigned long flags;
spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
list_del(&event_handler->list);
spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
-
- return 0;
}
EXPORT_SYMBOL(ib_unregister_event_handler);
@@ -894,6 +925,31 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
}
/**
+ * ib_enum_all_devs - enumerate all ib_devices
+ * @cb: Callback to call for each found ib_device
+ *
+ * Enumerates all ib_devices and calls callback() on each device.
+ */
+int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct ib_device *dev;
+ unsigned int idx = 0;
+ int ret = 0;
+
+ down_read(&lists_rwsem);
+ list_for_each_entry(dev, &device_list, core_list) {
+ ret = nldev_cb(dev, skb, cb, idx);
+ if (ret)
+ break;
+ idx++;
+ }
+
+ up_read(&lists_rwsem);
+ return ret;
+}
+
+/**
* ib_query_pkey - Get P_Key table entry
* @device:Device to query
* @port_num:Port number to query
@@ -945,14 +1001,17 @@ int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
- if (!device->modify_port)
- return -ENOSYS;
+ int rc;
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- return device->modify_port(device, port_num, port_modify_mask,
- port_modify);
+ if (device->modify_port)
+ rc = device->modify_port(device, port_num, port_modify_mask,
+ port_modify);
+ else
+ rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS;
+ return rc;
}
EXPORT_SYMBOL(ib_modify_port);
@@ -1087,29 +1146,21 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
}
EXPORT_SYMBOL(ib_get_net_dev_by_params);
-static struct ibnl_client_cbs ibnl_ls_cb_table[] = {
+static const struct rdma_nl_cbs ibnl_ls_cb_table[] = {
[RDMA_NL_LS_OP_RESOLVE] = {
- .dump = ib_nl_handle_resolve_resp,
- .module = THIS_MODULE },
+ .doit = ib_nl_handle_resolve_resp,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
[RDMA_NL_LS_OP_SET_TIMEOUT] = {
- .dump = ib_nl_handle_set_timeout,
- .module = THIS_MODULE },
+ .doit = ib_nl_handle_set_timeout,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
[RDMA_NL_LS_OP_IP_RESOLVE] = {
- .dump = ib_nl_handle_ip_res_resp,
- .module = THIS_MODULE },
+ .doit = ib_nl_handle_ip_res_resp,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
};
-static int ib_add_ibnl_clients(void)
-{
- return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table),
- ibnl_ls_cb_table);
-}
-
-static void ib_remove_ibnl_clients(void)
-{
- ibnl_remove_client(RDMA_NL_LS);
-}
-
static int __init ib_core_init(void)
{
int ret;
@@ -1131,9 +1182,9 @@ static int __init ib_core_init(void)
goto err_comp;
}
- ret = ibnl_init();
+ ret = rdma_nl_init();
if (ret) {
- pr_warn("Couldn't init IB netlink interface\n");
+ pr_warn("Couldn't init IB netlink interface: err %d\n", ret);
goto err_sysfs;
}
@@ -1155,24 +1206,18 @@ static int __init ib_core_init(void)
goto err_mad;
}
- ret = ib_add_ibnl_clients();
- if (ret) {
- pr_warn("Couldn't register ibnl clients\n");
- goto err_sa;
- }
-
ret = register_lsm_notifier(&ibdev_lsm_nb);
if (ret) {
pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
- goto err_ibnl_clients;
+ goto err_sa;
}
+ nldev_init();
+ rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
ib_cache_setup();
return 0;
-err_ibnl_clients:
- ib_remove_ibnl_clients();
err_sa:
ib_sa_cleanup();
err_mad:
@@ -1180,7 +1225,7 @@ err_mad:
err_addr:
addr_cleanup();
err_ibnl:
- ibnl_cleanup();
+ rdma_nl_exit();
err_sysfs:
class_unregister(&ib_class);
err_comp:
@@ -1192,18 +1237,21 @@ err:
static void __exit ib_core_cleanup(void)
{
- unregister_lsm_notifier(&ibdev_lsm_nb);
ib_cache_cleanup();
- ib_remove_ibnl_clients();
+ nldev_exit();
+ rdma_nl_unregister(RDMA_NL_LS);
+ unregister_lsm_notifier(&ibdev_lsm_nb);
ib_sa_cleanup();
ib_mad_cleanup();
addr_cleanup();
- ibnl_cleanup();
+ rdma_nl_exit();
class_unregister(&ib_class);
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
}
+MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
+
module_init(ib_core_init);
module_exit(ib_core_cleanup);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 31661b5c1743..fcf42f6bb82a 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -80,7 +80,7 @@ const char *__attribute_const__ iwcm_reject_msg(int reason)
}
EXPORT_SYMBOL(iwcm_reject_msg);
-static struct ibnl_client_cbs iwcm_nl_cb_table[] = {
+static struct rdma_nl_cbs iwcm_nl_cb_table[] = {
[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
@@ -1175,13 +1175,9 @@ static int __init iw_cm_init(void)
ret = iwpm_init(RDMA_NL_IWCM);
if (ret)
pr_err("iw_cm: couldn't init iwpm\n");
-
- ret = ibnl_add_client(RDMA_NL_IWCM, ARRAY_SIZE(iwcm_nl_cb_table),
- iwcm_nl_cb_table);
- if (ret)
- pr_err("iw_cm: couldn't register netlink callbacks\n");
-
- iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM);
+ else
+ rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
+ iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0);
if (!iwcm_wq)
return -ENOMEM;
@@ -1200,9 +1196,11 @@ static void __exit iw_cm_cleanup(void)
{
unregister_net_sysctl_table(iwcm_ctl_table_hdr);
destroy_workqueue(iwcm_wq);
- ibnl_remove_client(RDMA_NL_IWCM);
+ rdma_nl_unregister(RDMA_NL_IWCM);
iwpm_exit(RDMA_NL_IWCM);
}
+MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2);
+
module_init(iw_cm_init);
module_exit(iw_cm_cleanup);
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index a0e7c16d8bd8..30825bb9b8e9 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -42,7 +42,6 @@ int iwpm_valid_pid(void)
{
return iwpm_user_pid > 0;
}
-EXPORT_SYMBOL(iwpm_valid_pid);
/*
* iwpm_register_pid - Send a netlink query to user space
@@ -104,7 +103,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
- ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+ ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNAVAILABLE;
@@ -122,7 +121,6 @@ pid_query_error:
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
-EXPORT_SYMBOL(iwpm_register_pid);
/*
* iwpm_add_mapping - Send a netlink add mapping message
@@ -174,7 +172,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
goto add_mapping_error;
nlmsg_request->req_buffer = pm_msg;
- ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
@@ -191,7 +189,6 @@ add_mapping_error:
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
-EXPORT_SYMBOL(iwpm_add_mapping);
/*
* iwpm_add_and_query_mapping - Send a netlink add and query
@@ -251,7 +248,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
goto query_mapping_error;
nlmsg_request->req_buffer = pm_msg;
- ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
err_str = "Unable to send a nlmsg";
@@ -267,7 +264,6 @@ query_mapping_error:
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
-EXPORT_SYMBOL(iwpm_add_and_query_mapping);
/*
* iwpm_remove_mapping - Send a netlink remove mapping message
@@ -312,7 +308,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
if (ret)
goto remove_mapping_error;
- ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
@@ -328,7 +324,6 @@ remove_mapping_error:
dev_kfree_skb_any(skb);
return ret;
}
-EXPORT_SYMBOL(iwpm_remove_mapping);
/* netlink attribute policy for the received response to register pid request */
static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
@@ -397,7 +392,6 @@ register_pid_response_exit:
up(&nlmsg_request->sem);
return 0;
}
-EXPORT_SYMBOL(iwpm_register_pid_cb);
/* netlink attribute policy for the received response to add mapping request */
static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
@@ -466,7 +460,6 @@ add_mapping_response_exit:
up(&nlmsg_request->sem);
return 0;
}
-EXPORT_SYMBOL(iwpm_add_mapping_cb);
/* netlink attribute policy for the response to add and query mapping request
* and response with remote address info */
@@ -558,7 +551,6 @@ query_mapping_response_exit:
up(&nlmsg_request->sem);
return 0;
}
-EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
/*
* iwpm_remote_info_cb - Process a port mapper message, containing
@@ -627,7 +619,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
"remote_info: Mapped remote sockaddr:");
return ret;
}
-EXPORT_SYMBOL(iwpm_remote_info_cb);
/* netlink attribute policy for the received request for mapping info */
static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
@@ -677,7 +668,6 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
ret = iwpm_send_mapinfo(nl_client, iwpm_user_pid);
return ret;
}
-EXPORT_SYMBOL(iwpm_mapping_info_cb);
/* netlink attribute policy for the received mapping info ack */
static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
@@ -707,7 +697,6 @@ int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
return 0;
}
-EXPORT_SYMBOL(iwpm_ack_mapping_info_cb);
/* netlink attribute policy for the received port mapper error message */
static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
@@ -751,4 +740,3 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
up(&nlmsg_request->sem);
return 0;
}
-EXPORT_SYMBOL(iwpm_mapping_error_cb);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index f13870e69ccd..c81c55942626 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -54,8 +54,6 @@ static struct iwpm_admin_data iwpm_admin;
int iwpm_init(u8 nl_client)
{
int ret = 0;
- if (iwpm_valid_client(nl_client))
- return -EINVAL;
mutex_lock(&iwpm_admin_lock);
if (atomic_read(&iwpm_admin.refcount) == 0) {
iwpm_hash_bucket = kzalloc(IWPM_MAPINFO_HASH_SIZE *
@@ -83,7 +81,6 @@ init_exit:
}
return ret;
}
-EXPORT_SYMBOL(iwpm_init);
static void free_hash_bucket(void);
static void free_reminfo_bucket(void);
@@ -109,7 +106,6 @@ int iwpm_exit(u8 nl_client)
iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
return 0;
}
-EXPORT_SYMBOL(iwpm_exit);
static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
struct sockaddr_storage *);
@@ -148,7 +144,6 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
return ret;
}
-EXPORT_SYMBOL(iwpm_create_mapinfo);
int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
struct sockaddr_storage *mapped_local_addr)
@@ -184,7 +179,6 @@ remove_mapinfo_exit:
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
return ret;
}
-EXPORT_SYMBOL(iwpm_remove_mapinfo);
static void free_hash_bucket(void)
{
@@ -297,7 +291,6 @@ get_remote_info_exit:
spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
return ret;
}
-EXPORT_SYMBOL(iwpm_get_remote_info);
struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
u8 nl_client, gfp_t gfp)
@@ -383,15 +376,11 @@ int iwpm_get_nlmsg_seq(void)
int iwpm_valid_client(u8 nl_client)
{
- if (nl_client >= RDMA_NL_NUM_CLIENTS)
- return 0;
return iwpm_admin.client_list[nl_client];
}
void iwpm_set_valid(u8 nl_client, int valid)
{
- if (nl_client >= RDMA_NL_NUM_CLIENTS)
- return;
iwpm_admin.client_list[nl_client] = valid;
}
@@ -608,7 +597,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
&mapping_num, IWPM_NLA_MAPINFO_SEND_NUM);
if (ret)
goto mapinfo_num_error;
- ret = ibnl_unicast(skb, nlh, iwpm_pid);
+ ret = rdma_nl_unicast(skb, iwpm_pid);
if (ret) {
skb = NULL;
err_str = "Unable to send a nlmsg";
@@ -637,7 +626,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
return -ENOMEM;
}
nlh->nlmsg_type = NLMSG_DONE;
- ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid);
+ ret = rdma_nl_unicast(skb, iwpm_pid);
if (ret)
pr_warn("%s Unable to send a nlmsg\n", __func__);
return ret;
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 0d3cca0a8890..e5cf09c66fe6 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -64,7 +64,7 @@ struct mad_rmpp_recv {
__be64 tid;
u32 src_qp;
- u16 slid;
+ u32 slid;
u8 mgmt_class;
u8 class_version;
u8 method;
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 94931c474d41..e685148dd3e6 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2017 Mellanox Technologies Inc. All rights reserved.
* Copyright (c) 2010 Voltaire Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -37,239 +38,267 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <rdma/rdma_netlink.h>
+#include <linux/module.h>
#include "core_priv.h"
-struct ibnl_client {
- struct list_head list;
- int index;
- int nops;
- const struct ibnl_client_cbs *cb_table;
-};
+#include "core_priv.h"
-static DEFINE_MUTEX(ibnl_mutex);
+static DEFINE_MUTEX(rdma_nl_mutex);
static struct sock *nls;
-static LIST_HEAD(client_list);
+static struct {
+ const struct rdma_nl_cbs *cb_table;
+} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
-int ibnl_chk_listeners(unsigned int group)
+int rdma_nl_chk_listeners(unsigned int group)
{
- if (netlink_has_listeners(nls, group) == 0)
- return -1;
- return 0;
+ return (netlink_has_listeners(nls, group)) ? 0 : -1;
}
+EXPORT_SYMBOL(rdma_nl_chk_listeners);
-int ibnl_add_client(int index, int nops,
- const struct ibnl_client_cbs cb_table[])
+static bool is_nl_msg_valid(unsigned int type, unsigned int op)
{
- struct ibnl_client *cur;
- struct ibnl_client *nl_client;
+ static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS - 1] = {
+ RDMA_NL_RDMA_CM_NUM_OPS,
+ RDMA_NL_IWPM_NUM_OPS,
+ 0,
+ RDMA_NL_LS_NUM_OPS,
+ RDMA_NLDEV_NUM_OPS };
- nl_client = kmalloc(sizeof *nl_client, GFP_KERNEL);
- if (!nl_client)
- return -ENOMEM;
+ /*
+ * This BUILD_BUG_ON is intended to catch addition of new
+ * RDMA netlink protocol without updating the array above.
+ */
+ BUILD_BUG_ON(RDMA_NL_NUM_CLIENTS != 6);
- nl_client->index = index;
- nl_client->nops = nops;
- nl_client->cb_table = cb_table;
+ if (type > RDMA_NL_NUM_CLIENTS - 1)
+ return false;
- mutex_lock(&ibnl_mutex);
+ return (op < max_num_ops[type - 1]) ? true : false;
+}
- list_for_each_entry(cur, &client_list, list) {
- if (cur->index == index) {
- pr_warn("Client for %d already exists\n", index);
- mutex_unlock(&ibnl_mutex);
- kfree(nl_client);
- return -EINVAL;
- }
+static bool is_nl_valid(unsigned int type, unsigned int op)
+{
+ const struct rdma_nl_cbs *cb_table;
+
+ if (!is_nl_msg_valid(type, op))
+ return false;
+
+ cb_table = rdma_nl_types[type].cb_table;
+#ifdef CONFIG_MODULES
+ if (!cb_table) {
+ mutex_unlock(&rdma_nl_mutex);
+ request_module("rdma-netlink-subsys-%d", type);
+ mutex_lock(&rdma_nl_mutex);
+ cb_table = rdma_nl_types[type].cb_table;
}
+#endif
- list_add_tail(&nl_client->list, &client_list);
-
- mutex_unlock(&ibnl_mutex);
-
- return 0;
+ if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
+ return false;
+ return true;
}
-EXPORT_SYMBOL(ibnl_add_client);
-int ibnl_remove_client(int index)
+void rdma_nl_register(unsigned int index,
+ const struct rdma_nl_cbs cb_table[])
{
- struct ibnl_client *cur, *next;
-
- mutex_lock(&ibnl_mutex);
- list_for_each_entry_safe(cur, next, &client_list, list) {
- if (cur->index == index) {
- list_del(&(cur->list));
- mutex_unlock(&ibnl_mutex);
- kfree(cur);
- return 0;
- }
+ mutex_lock(&rdma_nl_mutex);
+ if (!is_nl_msg_valid(index, 0)) {
+ /*
+ * All clients are not interesting in success/failure of
+ * this call. They want to see the print to error log and
+ * continue their initialization. Print warning for them,
+ * because it is programmer's error to be here.
+ */
+ mutex_unlock(&rdma_nl_mutex);
+ WARN(true,
+ "The not-valid %u index was supplied to RDMA netlink\n",
+ index);
+ return;
}
- pr_warn("Can't remove callback for client idx %d. Not found\n", index);
- mutex_unlock(&ibnl_mutex);
- return -EINVAL;
+ if (rdma_nl_types[index].cb_table) {
+ mutex_unlock(&rdma_nl_mutex);
+ WARN(true,
+ "The %u index is already registered in RDMA netlink\n",
+ index);
+ return;
+ }
+
+ rdma_nl_types[index].cb_table = cb_table;
+ mutex_unlock(&rdma_nl_mutex);
+}
+EXPORT_SYMBOL(rdma_nl_register);
+
+void rdma_nl_unregister(unsigned int index)
+{
+ mutex_lock(&rdma_nl_mutex);
+ rdma_nl_types[index].cb_table = NULL;
+ mutex_unlock(&rdma_nl_mutex);
}
-EXPORT_SYMBOL(ibnl_remove_client);
+EXPORT_SYMBOL(rdma_nl_unregister);
void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
int len, int client, int op, int flags)
{
- unsigned char *prev_tail;
-
- prev_tail = skb_tail_pointer(skb);
- *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
- len, flags);
+ *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), len, flags);
if (!*nlh)
- goto out_nlmsg_trim;
- (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
+ return NULL;
return nlmsg_data(*nlh);
-
-out_nlmsg_trim:
- nlmsg_trim(skb, prev_tail);
- return NULL;
}
EXPORT_SYMBOL(ibnl_put_msg);
int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
int len, void *data, int type)
{
- unsigned char *prev_tail;
-
- prev_tail = skb_tail_pointer(skb);
- if (nla_put(skb, type, len, data))
- goto nla_put_failure;
- nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail;
+ if (nla_put(skb, type, len, data)) {
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+ }
return 0;
-
-nla_put_failure:
- nlmsg_trim(skb, prev_tail - nlh->nlmsg_len);
- return -EMSGSIZE;
}
EXPORT_SYMBOL(ibnl_put_attr);
-static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- struct ibnl_client *client;
int type = nlh->nlmsg_type;
- int index = RDMA_NL_GET_CLIENT(type);
+ unsigned int index = RDMA_NL_GET_CLIENT(type);
unsigned int op = RDMA_NL_GET_OP(type);
+ const struct rdma_nl_cbs *cb_table;
+
+ if (!is_nl_valid(index, op))
+ return -EINVAL;
+
+ cb_table = rdma_nl_types[index].cb_table;
- list_for_each_entry(client, &client_list, list) {
- if (client->index == index) {
- if (op >= client->nops || !client->cb_table[op].dump)
- return -EINVAL;
-
- /*
- * For response or local service set_timeout request,
- * there is no need to use netlink_dump_start.
- */
- if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
- (index == RDMA_NL_LS &&
- op == RDMA_NL_LS_OP_SET_TIMEOUT)) {
- struct netlink_callback cb = {
- .skb = skb,
- .nlh = nlh,
- .dump = client->cb_table[op].dump,
- .module = client->cb_table[op].module,
- };
-
- return cb.dump(skb, &cb);
- }
-
- {
- struct netlink_dump_control c = {
- .dump = client->cb_table[op].dump,
- .module = client->cb_table[op].module,
- };
- return netlink_dump_start(nls, skb, nlh, &c);
- }
- }
+ if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
+ !netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
+
+ /* FIXME: Convert IWCM to properly handle doit callbacks */
+ if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_RDMA_CM ||
+ index == RDMA_NL_IWCM) {
+ struct netlink_dump_control c = {
+ .dump = cb_table[op].dump,
+ };
+ return netlink_dump_start(nls, skb, nlh, &c);
}
- pr_info("Index %d wasn't found in client list\n", index);
- return -EINVAL;
+ if (cb_table[op].doit)
+ return cb_table[op].doit(skb, nlh, extack);
+
+ return 0;
}
-static void ibnl_rcv_reply_skb(struct sk_buff *skb)
+/*
+ * This function is similar to netlink_rcv_skb with one exception:
+ * It calls to the callback for the netlink messages without NLM_F_REQUEST
+ * flag. These messages are intended for RDMA_NL_LS consumer, so it is allowed
+ * for that consumer only.
+ */
+static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
+ struct nlmsghdr *,
+ struct netlink_ext_ack *))
{
+ struct netlink_ext_ack extack = {};
struct nlmsghdr *nlh;
- int msglen;
+ int err;
- /*
- * Process responses until there is no more message or the first
- * request. Generally speaking, it is not recommended to mix responses
- * with requests.
- */
while (skb->len >= nlmsg_total_size(0)) {
+ int msglen;
+
nlh = nlmsg_hdr(skb);
+ err = 0;
if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
- return;
-
- /* Handle response only */
- if (nlh->nlmsg_flags & NLM_F_REQUEST)
- return;
-
- ibnl_rcv_msg(skb, nlh, NULL);
+ return 0;
+ /*
+ * Generally speaking, the only requests are handled
+ * by the kernel, but RDMA_NL_LS is different, because it
+ * runs backward netlink scheme. Kernel initiates messages
+ * and waits for reply with data to keep pathrecord cache
+ * in sync.
+ */
+ if (!(nlh->nlmsg_flags & NLM_F_REQUEST) &&
+ (RDMA_NL_GET_CLIENT(nlh->nlmsg_type) != RDMA_NL_LS))
+ goto ack;
+
+ /* Skip control messages */
+ if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
+ goto ack;
+
+ err = cb(skb, nlh, &extack);
+ if (err == -EINTR)
+ goto skip;
+
+ack:
+ if (nlh->nlmsg_flags & NLM_F_ACK || err)
+ netlink_ack(skb, nlh, err, &extack);
+
+skip:
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
if (msglen > skb->len)
msglen = skb->len;
skb_pull(skb, msglen);
}
+
+ return 0;
}
-static void ibnl_rcv(struct sk_buff *skb)
+static void rdma_nl_rcv(struct sk_buff *skb)
{
- mutex_lock(&ibnl_mutex);
- ibnl_rcv_reply_skb(skb);
- netlink_rcv_skb(skb, &ibnl_rcv_msg);
- mutex_unlock(&ibnl_mutex);
+ mutex_lock(&rdma_nl_mutex);
+ rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
+ mutex_unlock(&rdma_nl_mutex);
}
-int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
- __u32 pid)
+int rdma_nl_unicast(struct sk_buff *skb, u32 pid)
+{
+ int err;
+
+ err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
+ return (err < 0) ? err : 0;
+}
+EXPORT_SYMBOL(rdma_nl_unicast);
+
+int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid)
{
int err;
err = netlink_unicast(nls, skb, pid, 0);
return (err < 0) ? err : 0;
}
-EXPORT_SYMBOL(ibnl_unicast);
+EXPORT_SYMBOL(rdma_nl_unicast_wait);
-int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
- unsigned int group, gfp_t flags)
+int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags)
{
return nlmsg_multicast(nls, skb, 0, group, flags);
}
-EXPORT_SYMBOL(ibnl_multicast);
+EXPORT_SYMBOL(rdma_nl_multicast);
-int __init ibnl_init(void)
+int __init rdma_nl_init(void)
{
struct netlink_kernel_cfg cfg = {
- .input = ibnl_rcv,
+ .input = rdma_nl_rcv,
};
nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
- if (!nls) {
- pr_warn("Failed to create netlink socket\n");
+ if (!nls)
return -ENOMEM;
- }
nls->sk_sndtimeo = 10 * HZ;
return 0;
}
-void ibnl_cleanup(void)
+void rdma_nl_exit(void)
{
- struct ibnl_client *cur, *next;
+ int idx;
- mutex_lock(&ibnl_mutex);
- list_for_each_entry_safe(cur, next, &client_list, list) {
- list_del(&(cur->list));
- kfree(cur);
- }
- mutex_unlock(&ibnl_mutex);
+ for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+ rdma_nl_unregister(idx);
netlink_kernel_release(nls);
}
+
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
new file mode 100644
index 000000000000..3ba24c428c3b
--- /dev/null
+++ b/drivers/infiniband/core/nldev.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <net/netlink.h>
+#include <rdma/rdma_netlink.h>
+
+#include "core_priv.h"
+
+static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
+ [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = IB_DEVICE_NAME_MAX - 1},
+ [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
+ .len = IB_FW_VERSION_NAME_MAX - 1},
+ [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
+};
+
+static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
+{
+ char fw[IB_FW_VERSION_NAME_MAX];
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
+ return -EMSGSIZE;
+
+ BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
+ device->attrs.device_cap_flags, 0))
+ return -EMSGSIZE;
+
+ ib_get_device_fw_str(device, fw);
+ /* Device without FW has strlen(fw) */
+ if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
+ be64_to_cpu(device->node_guid), 0))
+ return -EMSGSIZE;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
+ be64_to_cpu(device->attrs.sys_image_guid), 0))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int fill_port_info(struct sk_buff *msg,
+ struct ib_device *device, u32 port)
+{
+ struct ib_port_attr attr;
+ int ret;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
+ return -EMSGSIZE;
+
+ ret = ib_query_port(device, port, &attr);
+ if (ret)
+ return ret;
+
+ BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
+ (u64)attr.port_cap_flags, 0))
+ return -EMSGSIZE;
+ if (rdma_protocol_ib(device, port) &&
+ nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
+ attr.subnet_prefix, 0))
+ return -EMSGSIZE;
+ if (rdma_protocol_ib(device, port)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
+ return -EMSGSIZE;
+ }
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index;
+ int err;
+
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+
+ device = __ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
+ 0, 0);
+
+ err = fill_dev_info(msg, device);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ nlmsg_end(msg, nlh);
+
+ return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+}
+
+static int _nldev_get_dumpit(struct ib_device *device,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ unsigned int idx)
+{
+ int start = cb->args[0];
+ struct nlmsghdr *nlh;
+
+ if (idx < start)
+ return 0;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_dev_info(skb, device)) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+
+ nlmsg_end(skb, nlh);
+
+ idx++;
+
+out: cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ /*
+ * There is no need to take lock, because
+ * we are relying on ib_core's lists_rwsem
+ */
+ return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
+}
+
+static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index;
+ u32 port;
+ int err;
+
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (err || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = __ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port))
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
+ 0, 0);
+
+ err = fill_port_info(msg, device, port);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ nlmsg_end(msg, nlh);
+
+ return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+}
+
+static int nldev_port_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ int start = cb->args[0];
+ struct nlmsghdr *nlh;
+ u32 idx = 0;
+ u32 ifindex;
+ int err;
+ u32 p;
+
+ err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NULL);
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = __ib_device_get_by_index(ifindex);
+ if (!device)
+ return -EINVAL;
+
+ for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
+ /*
+ * The dumpit function returns all information from specific
+ * index. This specific index is taken from the netlink
+ * messages request sent by user and it is available
+ * in cb->args[0].
+ *
+ * Usually, the user doesn't fill this field and it causes
+ * to return everything.
+ *
+ */
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_PORT_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_port_info(skb, device, p)) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+ idx++;
+ nlmsg_end(skb, nlh);
+ }
+
+out: cb->args[0] = idx;
+ return skb->len;
+}
+
+static const struct rdma_nl_cbs nldev_cb_table[] = {
+ [RDMA_NLDEV_CMD_GET] = {
+ .doit = nldev_get_doit,
+ .dump = nldev_get_dumpit,
+ },
+ [RDMA_NLDEV_CMD_PORT_GET] = {
+ .doit = nldev_port_get_doit,
+ .dump = nldev_port_get_dumpit,
+ },
+};
+
+void __init nldev_init(void)
+{
+ rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
+}
+
+void __exit nldev_exit(void)
+{
+ rdma_nl_unregister(RDMA_NL_NLDEV);
+}
+
+MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 41c31a2bf093..85b5ee4defa4 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -35,10 +35,57 @@
#include <rdma/ib_verbs.h>
#include <rdma/uverbs_types.h>
#include <linux/rcupdate.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/rdma_user_ioctl.h>
#include "uverbs.h"
#include "core_priv.h"
#include "rdma_core.h"
+int uverbs_ns_idx(u16 *id, unsigned int ns_count)
+{
+ int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT;
+
+ if (ret >= ns_count)
+ return -EINVAL;
+
+ *id &= ~UVERBS_ID_NS_MASK;
+ return ret;
+}
+
+const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
+ uint16_t object)
+{
+ const struct uverbs_root_spec *object_hash = ibdev->specs_root;
+ const struct uverbs_object_spec_hash *objects;
+ int ret = uverbs_ns_idx(&object, object_hash->num_buckets);
+
+ if (ret < 0)
+ return NULL;
+
+ objects = object_hash->object_buckets[ret];
+
+ if (object >= objects->num_objects)
+ return NULL;
+
+ return objects->objects[object];
+}
+
+const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
+ uint16_t method)
+{
+ const struct uverbs_method_spec_hash *methods;
+ int ret = uverbs_ns_idx(&method, object->num_buckets);
+
+ if (ret < 0)
+ return NULL;
+
+ methods = object->method_buckets[ret];
+ if (method >= methods->num_methods)
+ return NULL;
+
+ return methods->methods[method];
+}
+
void uverbs_uobject_get(struct ib_uobject *uobject)
{
kref_get(&uobject->ref);
@@ -404,6 +451,41 @@ int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
return ret;
}
+static int null_obj_type_class_remove_commit(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ return 0;
+}
+
+static const struct uverbs_obj_type null_obj_type = {
+ .type_class = &((const struct uverbs_obj_type_class){
+ .remove_commit = null_obj_type_class_remove_commit,
+ /* be cautious */
+ .needs_kfree_rcu = true}),
+};
+
+int rdma_explicit_destroy(struct ib_uobject *uobject)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobject->context;
+
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
+ WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
+ return 0;
+ }
+ lockdep_check(uobject, true);
+ ret = uobject->type->type_class->remove_commit(uobject,
+ RDMA_REMOVE_DESTROY);
+ if (ret)
+ return ret;
+
+ uobject->type = &null_obj_type;
+
+ up_read(&ucontext->cleanup_rwsem);
+ return 0;
+}
+
static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
uverbs_uobject_add(uobj);
@@ -625,3 +707,100 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
.needs_kfree_rcu = false,
};
+struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
+ struct ib_ucontext *ucontext,
+ enum uverbs_obj_access access,
+ int id)
+{
+ switch (access) {
+ case UVERBS_ACCESS_READ:
+ return rdma_lookup_get_uobject(type_attrs, ucontext, id, false);
+ case UVERBS_ACCESS_DESTROY:
+ case UVERBS_ACCESS_WRITE:
+ return rdma_lookup_get_uobject(type_attrs, ucontext, id, true);
+ case UVERBS_ACCESS_NEW:
+ return rdma_alloc_begin_uobject(type_attrs, ucontext);
+ default:
+ WARN_ON(true);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+int uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access,
+ bool commit)
+{
+ int ret = 0;
+
+ /*
+ * refcounts should be handled at the object level and not at the
+ * uobject level. Refcounts of the objects themselves are done in
+ * handlers.
+ */
+
+ switch (access) {
+ case UVERBS_ACCESS_READ:
+ rdma_lookup_put_uobject(uobj, false);
+ break;
+ case UVERBS_ACCESS_WRITE:
+ rdma_lookup_put_uobject(uobj, true);
+ break;
+ case UVERBS_ACCESS_DESTROY:
+ if (commit)
+ ret = rdma_remove_commit_uobject(uobj);
+ else
+ rdma_lookup_put_uobject(uobj, true);
+ break;
+ case UVERBS_ACCESS_NEW:
+ if (commit)
+ ret = rdma_alloc_commit_uobject(uobj);
+ else
+ rdma_alloc_abort_uobject(uobj);
+ break;
+ default:
+ WARN_ON(true);
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle,
+ struct uverbs_attr_spec_hash * const *spec_hash,
+ size_t num,
+ bool commit)
+{
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < num; i++) {
+ struct uverbs_attr_bundle_hash *curr_bundle =
+ &attrs_bundle->hash[i];
+ const struct uverbs_attr_spec_hash *curr_spec_bucket =
+ spec_hash[i];
+ unsigned int j;
+
+ for (j = 0; j < curr_bundle->num_attrs; j++) {
+ struct uverbs_attr *attr;
+ const struct uverbs_attr_spec *spec;
+
+ if (!uverbs_attr_is_valid_in_hash(curr_bundle, j))
+ continue;
+
+ attr = &curr_bundle->attrs[j];
+ spec = &curr_spec_bucket->attrs[j];
+
+ if (spec->type == UVERBS_ATTR_TYPE_IDR ||
+ spec->type == UVERBS_ATTR_TYPE_FD) {
+ int current_ret;
+
+ current_ret = uverbs_finalize_object(attr->obj_attr.uobject,
+ spec->obj.access,
+ commit);
+ if (!ret)
+ ret = current_ret;
+ }
+ }
+ }
+ return ret;
+}
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 1b82e7ff7fe8..1efcf93238dd 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -39,9 +39,15 @@
#include <linux/idr.h>
#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
#include <rdma/ib_verbs.h>
#include <linux/mutex.h>
+int uverbs_ns_idx(u16 *id, unsigned int ns_count);
+const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
+ uint16_t object);
+const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
+ uint16_t method);
/*
* These functions initialize the context and cleanups its uobjects.
* The context has a list of objects which is protected by a mutex
@@ -75,4 +81,40 @@ void uverbs_uobject_put(struct ib_uobject *uobject);
*/
void uverbs_close_fd(struct file *f);
+/*
+ * Get an ib_uobject that corresponds to the given id from ucontext, assuming
+ * the object is from the given type. Lock it to the required access when
+ * applicable.
+ * This function could create (access == NEW), destroy (access == DESTROY)
+ * or unlock (access == READ || access == WRITE) objects if required.
+ * The action will be finalized only when uverbs_finalize_object or
+ * uverbs_finalize_objects are called.
+ */
+struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
+ struct ib_ucontext *ucontext,
+ enum uverbs_obj_access access,
+ int id);
+int uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access,
+ bool commit);
+/*
+ * Note that certain finalize stages could return a status:
+ * (a) alloc_commit could return a failure if the object is committed at the
+ * same time when the context is destroyed.
+ * (b) remove_commit could fail if the object wasn't destroyed successfully.
+ * Since multiple objects could be finalized in one transaction, it is very NOT
+ * recommended to have several finalize actions which have side effects.
+ * For example, it's NOT recommended to have a certain action which has both
+ * a commit action and a destroy action or two destroy objects in the same
+ * action. The rule of thumb is to have one destroy or commit action with
+ * multiple lookups.
+ * The first non zero return value of finalize_object is returned from this
+ * function. For example, this could happen when we couldn't destroy an
+ * object.
+ */
+int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle,
+ struct uverbs_attr_spec_hash * const *spec_hash,
+ size_t num,
+ bool commit);
+
#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 94a9eefb3cfc..90e3889b7fbe 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -44,6 +44,8 @@
static struct workqueue_struct *gid_cache_wq;
+static struct workqueue_struct *gid_cache_wq;
+
enum gid_op_type {
GID_DEL = 0,
GID_ADD
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 70fa4cabe48e..ab5e1024fea9 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -50,6 +50,7 @@
#include <uapi/rdma/ib_user_sa.h>
#include <rdma/ib_marshall.h>
#include <rdma/ib_addr.h>
+#include <rdma/opa_addr.h>
#include "sa.h"
#include "core_priv.h"
@@ -861,7 +862,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask);
+ ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
if (!ret)
ret = len;
else
@@ -1021,9 +1022,9 @@ static void ib_nl_request_timeout(struct work_struct *work)
}
int ib_nl_handle_set_timeout(struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
int timeout, delta, abs_delta;
const struct nlattr *attr;
unsigned long flags;
@@ -1033,8 +1034,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
int ret;
if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
- !(NETLINK_CB(skb).sk) ||
- !netlink_capable(skb, CAP_NET_ADMIN))
+ !(NETLINK_CB(skb).sk))
return -EPERM;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
@@ -1098,9 +1098,9 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
}
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
unsigned long flags;
struct ib_sa_query *query;
struct ib_mad_send_buf *send_buf;
@@ -1109,8 +1109,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
int ret;
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
- !(NETLINK_CB(skb).sk) ||
- !netlink_capable(skb, CAP_NET_ADMIN))
+ !(NETLINK_CB(skb).sk))
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
@@ -1241,6 +1240,11 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
ah_attr->type = rdma_ah_find_type(device, port_num);
rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+
+ if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
+ (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)))
+ rdma_ah_set_make_grd(ah_attr, true);
+
rdma_ah_set_sl(ah_attr, rec->sl);
rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) &
get_src_path_mask(device, port_num));
@@ -1420,7 +1424,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
(!(query->flags & IB_SA_QUERY_OPA))) {
- if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
+ if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query, gfp_mask))
return id;
}
@@ -2290,12 +2294,15 @@ static void update_sm_ah(struct work_struct *work)
rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
rdma_ah_set_port_num(&ah_attr, port->port_num);
if (port_attr.grh_required) {
- rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
-
- rdma_ah_set_subnet_prefix(&ah_attr,
- cpu_to_be64(port_attr.subnet_prefix));
- rdma_ah_set_interface_id(&ah_attr,
- cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
+ if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) {
+ rdma_ah_set_make_grd(&ah_attr, true);
+ } else {
+ rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
+ rdma_ah_set_subnet_prefix(&ah_attr,
+ cpu_to_be64(port_attr.subnet_prefix));
+ rdma_ah_set_interface_id(&ah_attr,
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
+ }
}
new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
@@ -2410,8 +2417,7 @@ static void ib_sa_add_one(struct ib_device *device)
*/
INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
- if (ib_register_event_handler(&sa_dev->event_handler))
- goto err;
+ ib_register_event_handler(&sa_dev->event_handler);
for (i = 0; i <= e - s; ++i) {
if (rdma_cap_ib_sa(device, i + 1))
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 7ebe1ef23652..abc5ab581f82 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1210,8 +1210,8 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- ib_get_device_fw_str(dev, buf, PAGE_SIZE);
- strlcat(buf, "\n", PAGE_SIZE);
+ ib_get_device_fw_str(dev, buf);
+ strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
return strlen(buf);
}
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 112099c86a19..f2a7f62c2834 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -618,7 +618,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
if (result)
goto out;
- ib_copy_qp_attr_to_user(&resp, &qp_attr);
+ ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
if (copy_to_user((void __user *)(unsigned long)cmd.response,
&resp, sizeof(resp)))
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 276f0ef835bd..eb85b546e223 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -248,14 +248,15 @@ static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
dst->qp_num = src->qp_num;
}
-static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst,
+static void ucma_copy_ud_event(struct ib_device *device,
+ struct rdma_ucm_ud_param *dst,
struct rdma_ud_param *src)
{
if (src->private_data_len)
memcpy(dst->private_data, src->private_data,
src->private_data_len);
dst->private_data_len = src->private_data_len;
- ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
+ ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr);
dst->qp_num = src->qp_num;
dst->qkey = src->qkey;
}
@@ -335,7 +336,8 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
uevent->resp.event = event->event;
uevent->resp.status = event->status;
if (cm_id->qp_type == IB_QPT_UD)
- ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
+ ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
+ &event->param.ud);
else
ucma_copy_conn_event(&uevent->resp.param.conn,
&event->param.conn);
@@ -1157,7 +1159,7 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
if (ret)
goto out;
- ib_copy_qp_attr_to_user(&resp, &qp_attr);
+ ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
if (copy_to_user((void __user *)(unsigned long)cmd.response,
&resp, sizeof(resp)))
ret = -EFAULT;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 36a6f5c8914c..c1696e6084b2 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -229,7 +229,7 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.status = 0;
packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len;
packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
- packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
+ packet->mad.hdr.lid = ib_lid_be16(mad_recv_wc->wc->slid);
packet->mad.hdr.sl = mad_recv_wc->wc->sl;
packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 64d494a64daf..37c8903e7fd0 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -100,6 +100,7 @@ struct ib_uverbs_device {
struct mutex lists_mutex; /* protect lists */
struct list_head uverbs_file_list;
struct list_head uverbs_events_file_list;
+ struct uverbs_root_spec *specs_root;
};
struct ib_uverbs_event_queue {
@@ -218,6 +219,8 @@ int uverbs_dealloc_mw(struct ib_mw *mw);
void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj);
+long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+
struct ib_uverbs_flow_spec {
union {
union {
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 739bd69ef1d4..e0cb99860934 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -91,9 +91,10 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err;
}
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
if (ret)
@@ -275,8 +276,14 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
resp.bad_pkey_cntr = attr.bad_pkey_cntr;
resp.qkey_viol_cntr = attr.qkey_viol_cntr;
resp.pkey_tbl_len = attr.pkey_tbl_len;
- resp.lid = attr.lid;
- resp.sm_lid = attr.sm_lid;
+
+ if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) {
+ resp.lid = OPA_TO_IB_UCAST_LID(attr.lid);
+ resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid);
+ } else {
+ resp.lid = ib_lid_cpu16(attr.lid);
+ resp.sm_lid = ib_lid_cpu16(attr.sm_lid);
+ }
resp.lmc = attr.lmc;
resp.max_vl_num = attr.max_vl_num;
resp.sm_sl = attr.sm_sl;
@@ -313,9 +320,10 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
uobj = uobj_alloc(uobj_get_type(pd), file->ucontext);
if (IS_ERR(uobj))
@@ -482,9 +490,10 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
mutex_lock(&file->device->xrcd_tree_mutex);
@@ -646,9 +655,10 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
@@ -740,7 +750,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof(cmd),
(unsigned long) cmd.response + sizeof(resp),
- in_len - sizeof(cmd), out_len - sizeof(resp));
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
return -EINVAL;
@@ -1080,7 +1091,8 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
INIT_UDATA(&uhw, buf + sizeof(cmd),
(unsigned long)cmd.response + sizeof(resp),
- in_len - sizeof(cmd), out_len - sizeof(resp));
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -1161,9 +1173,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
@@ -1185,7 +1198,8 @@ out:
return ret ? ret : in_len;
}
-static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
+static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
+ struct ib_wc *wc)
{
struct ib_uverbs_wc tmp;
@@ -1199,7 +1213,10 @@ static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
tmp.src_qp = wc->src_qp;
tmp.wc_flags = wc->wc_flags;
tmp.pkey_index = wc->pkey_index;
- tmp.slid = wc->slid;
+ if (rdma_cap_opa_ah(ib_dev, wc->port_num))
+ tmp.slid = OPA_TO_IB_UCAST_LID(wc->slid);
+ else
+ tmp.slid = ib_lid_cpu16(wc->slid);
tmp.sl = wc->sl;
tmp.dlid_path_bits = wc->dlid_path_bits;
tmp.port_num = wc->port_num;
@@ -1243,7 +1260,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (!ret)
break;
- ret = copy_wc_to_user(data_ptr, &wc);
+ ret = copy_wc_to_user(ib_dev, data_ptr, &wc);
if (ret)
goto out_put;
@@ -1383,8 +1400,9 @@ static int create_qp(struct ib_uverbs_file *file,
attr.rwq_ind_tbl = ind_tbl;
}
- if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) +
- sizeof(cmd->reserved1)) && cmd->reserved1) {
+ if (cmd_sz > sizeof(*cmd) &&
+ !ib_is_udata_cleared(ucore, sizeof(*cmd),
+ cmd_sz - sizeof(*cmd))) {
ret = -EOPNOTSUPP;
goto err_put;
}
@@ -1420,7 +1438,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->is_srq) {
srq = uobj_get_obj_read(srq, cmd->srq_handle,
file->ucontext);
- if (!srq || srq->srq_type != IB_SRQT_BASIC) {
+ if (!srq || srq->srq_type == IB_SRQT_XRC) {
ret = -EINVAL;
goto err_put;
}
@@ -1482,11 +1500,21 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
IB_QP_CREATE_SCATTER_FCS |
- IB_QP_CREATE_CVLAN_STRIPPING)) {
+ IB_QP_CREATE_CVLAN_STRIPPING |
+ IB_QP_CREATE_SOURCE_QPN)) {
ret = -EINVAL;
goto err_put;
}
+ if (attr.create_flags & IB_QP_CREATE_SOURCE_QPN) {
+ if (!capable(CAP_NET_RAW)) {
+ ret = -EPERM;
+ goto err_put;
+ }
+
+ attr.source_qpn = cmd->source_qpn;
+ }
+
buf = (void *)cmd + sizeof(*cmd);
if (cmd_sz > sizeof(*cmd))
if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
@@ -1722,9 +1750,10 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
file->ucontext);
@@ -1791,6 +1820,28 @@ err_put:
return ret;
}
+static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
+ struct rdma_ah_attr *rdma_attr)
+{
+ const struct ib_global_route *grh;
+
+ uverb_attr->dlid = rdma_ah_get_dlid(rdma_attr);
+ uverb_attr->sl = rdma_ah_get_sl(rdma_attr);
+ uverb_attr->src_path_bits = rdma_ah_get_path_bits(rdma_attr);
+ uverb_attr->static_rate = rdma_ah_get_static_rate(rdma_attr);
+ uverb_attr->is_global = !!(rdma_ah_get_ah_flags(rdma_attr) &
+ IB_AH_GRH);
+ if (uverb_attr->is_global) {
+ grh = rdma_ah_read_grh(rdma_attr);
+ memcpy(uverb_attr->dgid, grh->dgid.raw, 16);
+ uverb_attr->flow_label = grh->flow_label;
+ uverb_attr->sgid_index = grh->sgid_index;
+ uverb_attr->hop_limit = grh->hop_limit;
+ uverb_attr->traffic_class = grh->traffic_class;
+ }
+ uverb_attr->port_num = rdma_ah_get_port_num(rdma_attr);
+}
+
ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
@@ -1801,7 +1852,6 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_qp_attr *attr;
struct ib_qp_init_attr *init_attr;
- const struct ib_global_route *grh;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1851,39 +1901,8 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.alt_port_num = attr->alt_port_num;
resp.alt_timeout = attr->alt_timeout;
- resp.dest.dlid = rdma_ah_get_dlid(&attr->ah_attr);
- resp.dest.sl = rdma_ah_get_sl(&attr->ah_attr);
- resp.dest.src_path_bits = rdma_ah_get_path_bits(&attr->ah_attr);
- resp.dest.static_rate = rdma_ah_get_static_rate(&attr->ah_attr);
- resp.dest.is_global = !!(rdma_ah_get_ah_flags(&attr->ah_attr) &
- IB_AH_GRH);
- if (resp.dest.is_global) {
- grh = rdma_ah_read_grh(&attr->ah_attr);
- memcpy(resp.dest.dgid, grh->dgid.raw, 16);
- resp.dest.flow_label = grh->flow_label;
- resp.dest.sgid_index = grh->sgid_index;
- resp.dest.hop_limit = grh->hop_limit;
- resp.dest.traffic_class = grh->traffic_class;
- }
- resp.dest.port_num = rdma_ah_get_port_num(&attr->ah_attr);
-
- resp.alt_dest.dlid = rdma_ah_get_dlid(&attr->alt_ah_attr);
- resp.alt_dest.sl = rdma_ah_get_sl(&attr->alt_ah_attr);
- resp.alt_dest.src_path_bits = rdma_ah_get_path_bits(&attr->alt_ah_attr);
- resp.alt_dest.static_rate
- = rdma_ah_get_static_rate(&attr->alt_ah_attr);
- resp.alt_dest.is_global
- = !!(rdma_ah_get_ah_flags(&attr->alt_ah_attr) &
- IB_AH_GRH);
- if (resp.alt_dest.is_global) {
- grh = rdma_ah_read_grh(&attr->alt_ah_attr);
- memcpy(resp.alt_dest.dgid, grh->dgid.raw, 16);
- resp.alt_dest.flow_label = grh->flow_label;
- resp.alt_dest.sgid_index = grh->sgid_index;
- resp.alt_dest.hop_limit = grh->hop_limit;
- resp.alt_dest.traffic_class = grh->traffic_class;
- }
- resp.alt_dest.port_num = rdma_ah_get_port_num(&attr->alt_ah_attr);
+ copy_ah_attr_to_uverbs(&resp.dest, &attr->ah_attr);
+ copy_ah_attr_to_uverbs(&resp.alt_dest, &attr->alt_ah_attr);
resp.max_send_wr = init_attr->cap.max_send_wr;
resp.max_recv_wr = init_attr->cap.max_recv_wr;
@@ -1917,6 +1936,29 @@ static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
}
}
+static void copy_ah_attr_from_uverbs(struct ib_device *dev,
+ struct rdma_ah_attr *rdma_attr,
+ struct ib_uverbs_qp_dest *uverb_attr)
+{
+ rdma_attr->type = rdma_ah_find_type(dev, uverb_attr->port_num);
+ if (uverb_attr->is_global) {
+ rdma_ah_set_grh(rdma_attr, NULL,
+ uverb_attr->flow_label,
+ uverb_attr->sgid_index,
+ uverb_attr->hop_limit,
+ uverb_attr->traffic_class);
+ rdma_ah_set_dgid_raw(rdma_attr, uverb_attr->dgid);
+ } else {
+ rdma_ah_set_ah_flags(rdma_attr, 0);
+ }
+ rdma_ah_set_dlid(rdma_attr, uverb_attr->dlid);
+ rdma_ah_set_sl(rdma_attr, uverb_attr->sl);
+ rdma_ah_set_path_bits(rdma_attr, uverb_attr->src_path_bits);
+ rdma_ah_set_static_rate(rdma_attr, uverb_attr->static_rate);
+ rdma_ah_set_port_num(rdma_attr, uverb_attr->port_num);
+ rdma_ah_set_make_grd(rdma_attr, false);
+}
+
static int modify_qp(struct ib_uverbs_file *file,
struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata)
{
@@ -1964,48 +2006,12 @@ static int modify_qp(struct ib_uverbs_file *file,
attr->rate_limit = cmd->rate_limit;
if (cmd->base.attr_mask & IB_QP_AV)
- attr->ah_attr.type = rdma_ah_find_type(qp->device,
- cmd->base.dest.port_num);
- if (cmd->base.dest.is_global) {
- rdma_ah_set_grh(&attr->ah_attr, NULL,
- cmd->base.dest.flow_label,
- cmd->base.dest.sgid_index,
- cmd->base.dest.hop_limit,
- cmd->base.dest.traffic_class);
- rdma_ah_set_dgid_raw(&attr->ah_attr, cmd->base.dest.dgid);
- } else {
- rdma_ah_set_ah_flags(&attr->ah_attr, 0);
- }
- rdma_ah_set_dlid(&attr->ah_attr, cmd->base.dest.dlid);
- rdma_ah_set_sl(&attr->ah_attr, cmd->base.dest.sl);
- rdma_ah_set_path_bits(&attr->ah_attr, cmd->base.dest.src_path_bits);
- rdma_ah_set_static_rate(&attr->ah_attr, cmd->base.dest.static_rate);
- rdma_ah_set_port_num(&attr->ah_attr,
- cmd->base.dest.port_num);
+ copy_ah_attr_from_uverbs(qp->device, &attr->ah_attr,
+ &cmd->base.dest);
if (cmd->base.attr_mask & IB_QP_ALT_PATH)
- attr->alt_ah_attr.type =
- rdma_ah_find_type(qp->device, cmd->base.dest.port_num);
- if (cmd->base.alt_dest.is_global) {
- rdma_ah_set_grh(&attr->alt_ah_attr, NULL,
- cmd->base.alt_dest.flow_label,
- cmd->base.alt_dest.sgid_index,
- cmd->base.alt_dest.hop_limit,
- cmd->base.alt_dest.traffic_class);
- rdma_ah_set_dgid_raw(&attr->alt_ah_attr,
- cmd->base.alt_dest.dgid);
- } else {
- rdma_ah_set_ah_flags(&attr->alt_ah_attr, 0);
- }
-
- rdma_ah_set_dlid(&attr->alt_ah_attr, cmd->base.alt_dest.dlid);
- rdma_ah_set_sl(&attr->alt_ah_attr, cmd->base.alt_dest.sl);
- rdma_ah_set_path_bits(&attr->alt_ah_attr,
- cmd->base.alt_dest.src_path_bits);
- rdma_ah_set_static_rate(&attr->alt_ah_attr,
- cmd->base.alt_dest.static_rate);
- rdma_ah_set_port_num(&attr->alt_ah_attr,
- cmd->base.alt_dest.port_num);
+ copy_ah_attr_from_uverbs(qp->device, &attr->alt_ah_attr,
+ &cmd->base.alt_dest);
ret = ib_modify_qp_with_udata(qp, attr,
modify_qp_mask(qp->qp_type,
@@ -2037,7 +2043,8 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL,
- in_len - sizeof(cmd.base), out_len);
+ in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len);
ret = modify_qp(file, &cmd, &udata);
if (ret)
@@ -2543,7 +2550,8 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof(cmd),
(unsigned long)cmd.response + sizeof(resp),
- in_len - sizeof(cmd), out_len - sizeof(resp));
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
uobj = uobj_alloc(uobj_get_type(ah), file->ucontext);
if (IS_ERR(uobj))
@@ -2556,6 +2564,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
}
attr.type = rdma_ah_find_type(ib_dev, cmd.attr.port_num);
+ rdma_ah_set_make_grd(&attr, false);
rdma_ah_set_dlid(&attr, cmd.attr.dlid);
rdma_ah_set_sl(&attr, cmd.attr.sl);
rdma_ah_set_path_bits(&attr, cmd.attr.src_path_bits);
@@ -3472,6 +3481,9 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (IS_ERR(obj))
return PTR_ERR(obj);
+ if (cmd->srq_type == IB_SRQT_TM)
+ attr.ext.tag_matching.max_num_tags = cmd->max_num_tags;
+
if (cmd->srq_type == IB_SRQT_XRC) {
xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle,
file->ucontext);
@@ -3488,10 +3500,12 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
+ }
- attr.ext.xrc.cq = uobj_get_obj_read(cq, cmd->cq_handle,
- file->ucontext);
- if (!attr.ext.xrc.cq) {
+ if (ib_srq_has_cq(cmd->srq_type)) {
+ attr.ext.cq = uobj_get_obj_read(cq, cmd->cq_handle,
+ file->ucontext);
+ if (!attr.ext.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
@@ -3526,10 +3540,13 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
+ if (ib_srq_has_cq(cmd->srq_type)) {
+ srq->ext.cq = attr.ext.cq;
+ atomic_inc(&attr.ext.cq->usecnt);
+ }
+
if (cmd->srq_type == IB_SRQT_XRC) {
- srq->ext.xrc.cq = attr.ext.xrc.cq;
srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
- atomic_inc(&attr.ext.xrc.cq->usecnt);
atomic_inc(&attr.ext.xrc.xrcd->usecnt);
}
@@ -3552,10 +3569,12 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
goto err_copy;
}
- if (cmd->srq_type == IB_SRQT_XRC) {
+ if (cmd->srq_type == IB_SRQT_XRC)
uobj_put_read(xrcd_uobj);
- uobj_put_obj_read(attr.ext.xrc.cq);
- }
+
+ if (ib_srq_has_cq(cmd->srq_type))
+ uobj_put_obj_read(attr.ext.cq);
+
uobj_put_obj_read(pd);
uobj_alloc_commit(&obj->uevent.uobject);
@@ -3568,8 +3587,8 @@ err_put:
uobj_put_obj_read(pd);
err_put_cq:
- if (cmd->srq_type == IB_SRQT_XRC)
- uobj_put_obj_read(attr.ext.xrc.cq);
+ if (ib_srq_has_cq(cmd->srq_type))
+ uobj_put_obj_read(attr.ext.cq);
err_put_xrcd:
if (cmd->srq_type == IB_SRQT_XRC) {
@@ -3599,6 +3618,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
+ memset(&xcmd, 0, sizeof(xcmd));
xcmd.response = cmd.response;
xcmd.user_handle = cmd.user_handle;
xcmd.srq_type = IB_SRQT_BASIC;
@@ -3607,10 +3627,10 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
xcmd.max_sge = cmd.max_sge;
xcmd.srq_limit = cmd.srq_limit;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof resp);
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
if (ret)
@@ -3634,10 +3654,10 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof resp);
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof(resp));
ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
if (ret)
@@ -3848,6 +3868,16 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.raw_packet_caps = attr.raw_packet_caps;
resp.response_length += sizeof(resp.raw_packet_caps);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps))
+ goto end;
+
+ resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size;
+ resp.xrq_caps.max_num_tags = attr.xrq_caps.max_num_tags;
+ resp.xrq_caps.max_ops = attr.xrq_caps.max_ops;
+ resp.xrq_caps.max_sge = attr.xrq_caps.max_sge;
+ resp.xrq_caps.flags = attr.xrq_caps.flags;
+ resp.response_length += sizeof(resp.xrq_caps);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
new file mode 100644
index 000000000000..5286ad57d903
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/rdma_user_ioctl.h>
+#include <rdma/uverbs_ioctl.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_process_attr(struct ib_device *ibdev,
+ struct ib_ucontext *ucontext,
+ const struct ib_uverbs_attr *uattr,
+ u16 attr_id,
+ const struct uverbs_attr_spec_hash *attr_spec_bucket,
+ struct uverbs_attr_bundle_hash *attr_bundle_h,
+ struct ib_uverbs_attr __user *uattr_ptr)
+{
+ const struct uverbs_attr_spec *spec;
+ struct uverbs_attr *e;
+ const struct uverbs_object_spec *object;
+ struct uverbs_obj_attr *o_attr;
+ struct uverbs_attr *elements = attr_bundle_h->attrs;
+
+ if (uattr->reserved)
+ return -EINVAL;
+
+ if (attr_id >= attr_spec_bucket->num_attrs) {
+ if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
+ return -EINVAL;
+ else
+ return 0;
+ }
+
+ spec = &attr_spec_bucket->attrs[attr_id];
+ e = &elements[attr_id];
+ e->uattr = uattr_ptr;
+
+ switch (spec->type) {
+ case UVERBS_ATTR_TYPE_PTR_IN:
+ case UVERBS_ATTR_TYPE_PTR_OUT:
+ if (uattr->len < spec->len ||
+ (!(spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ) &&
+ uattr->len > spec->len))
+ return -EINVAL;
+
+ e->ptr_attr.data = uattr->data;
+ e->ptr_attr.len = uattr->len;
+ e->ptr_attr.flags = uattr->flags;
+ break;
+
+ case UVERBS_ATTR_TYPE_IDR:
+ if (uattr->data >> 32)
+ return -EINVAL;
+ /* fall through */
+ case UVERBS_ATTR_TYPE_FD:
+ if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX)
+ return -EINVAL;
+
+ o_attr = &e->obj_attr;
+ object = uverbs_get_object(ibdev, spec->obj.obj_type);
+ if (!object)
+ return -EINVAL;
+ o_attr->type = object->type_attrs;
+
+ o_attr->id = (int)uattr->data;
+ o_attr->uobject = uverbs_get_uobject_from_context(
+ o_attr->type,
+ ucontext,
+ spec->obj.access,
+ o_attr->id);
+
+ if (IS_ERR(o_attr->uobject))
+ return PTR_ERR(o_attr->uobject);
+
+ if (spec->obj.access == UVERBS_ACCESS_NEW) {
+ u64 id = o_attr->uobject->id;
+
+ /* Copy the allocated id to the user-space */
+ if (put_user(id, &e->uattr->data)) {
+ uverbs_finalize_object(o_attr->uobject,
+ UVERBS_ACCESS_NEW,
+ false);
+ return -EFAULT;
+ }
+ }
+
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ set_bit(attr_id, attr_bundle_h->valid_bitmap);
+ return 0;
+}
+
+static int uverbs_uattrs_process(struct ib_device *ibdev,
+ struct ib_ucontext *ucontext,
+ const struct ib_uverbs_attr *uattrs,
+ size_t num_uattrs,
+ const struct uverbs_method_spec *method,
+ struct uverbs_attr_bundle *attr_bundle,
+ struct ib_uverbs_attr __user *uattr_ptr)
+{
+ size_t i;
+ int ret = 0;
+ int num_given_buckets = 0;
+
+ for (i = 0; i < num_uattrs; i++) {
+ const struct ib_uverbs_attr *uattr = &uattrs[i];
+ u16 attr_id = uattr->attr_id;
+ struct uverbs_attr_spec_hash *attr_spec_bucket;
+
+ ret = uverbs_ns_idx(&attr_id, method->num_buckets);
+ if (ret < 0) {
+ if (uattr->flags & UVERBS_ATTR_F_MANDATORY) {
+ uverbs_finalize_objects(attr_bundle,
+ method->attr_buckets,
+ num_given_buckets,
+ false);
+ return ret;
+ }
+ continue;
+ }
+
+ /*
+ * ret is the found ns, so increase num_given_buckets if
+ * necessary.
+ */
+ if (ret >= num_given_buckets)
+ num_given_buckets = ret + 1;
+
+ attr_spec_bucket = method->attr_buckets[ret];
+ ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id,
+ attr_spec_bucket, &attr_bundle->hash[ret],
+ uattr_ptr++);
+ if (ret) {
+ uverbs_finalize_objects(attr_bundle,
+ method->attr_buckets,
+ num_given_buckets,
+ false);
+ return ret;
+ }
+ }
+
+ return num_given_buckets;
+}
+
+static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec,
+ struct uverbs_attr_bundle *attr_bundle)
+{
+ unsigned int i;
+
+ for (i = 0; i < attr_bundle->num_buckets; i++) {
+ struct uverbs_attr_spec_hash *attr_spec_bucket =
+ method_spec->attr_buckets[i];
+
+ if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask,
+ attr_bundle->hash[i].valid_bitmap,
+ attr_spec_bucket->num_attrs))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr,
+ const struct ib_uverbs_attr *uattrs,
+ size_t num_uattrs,
+ struct ib_device *ibdev,
+ struct ib_uverbs_file *ufile,
+ const struct uverbs_method_spec *method_spec,
+ struct uverbs_attr_bundle *attr_bundle)
+{
+ int ret;
+ int finalize_ret;
+ int num_given_buckets;
+
+ num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs,
+ num_uattrs, method_spec,
+ attr_bundle, uattr_ptr);
+ if (num_given_buckets <= 0)
+ return -EINVAL;
+
+ attr_bundle->num_buckets = num_given_buckets;
+ ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle);
+ if (ret)
+ goto cleanup;
+
+ ret = method_spec->handler(ibdev, ufile, attr_bundle);
+cleanup:
+ finalize_ret = uverbs_finalize_objects(attr_bundle,
+ method_spec->attr_buckets,
+ attr_bundle->num_buckets,
+ !ret);
+
+ return ret ? ret : finalize_ret;
+}
+
+#define UVERBS_OPTIMIZE_USING_STACK_SZ 256
+static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct ib_uverbs_ioctl_hdr *hdr,
+ void __user *buf)
+{
+ const struct uverbs_object_spec *object_spec;
+ const struct uverbs_method_spec *method_spec;
+ long err = 0;
+ unsigned int i;
+ struct {
+ struct ib_uverbs_attr *uattrs;
+ struct uverbs_attr_bundle *uverbs_attr_bundle;
+ } *ctx = NULL;
+ struct uverbs_attr *curr_attr;
+ unsigned long *curr_bitmap;
+ size_t ctx_size;
+#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ
+ uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
+#endif
+
+ if (hdr->reserved)
+ return -EINVAL;
+
+ object_spec = uverbs_get_object(ib_dev, hdr->object_id);
+ if (!object_spec)
+ return -EOPNOTSUPP;
+
+ method_spec = uverbs_get_method(object_spec, hdr->method_id);
+ if (!method_spec)
+ return -EOPNOTSUPP;
+
+ if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext)
+ return -EINVAL;
+
+ ctx_size = sizeof(*ctx) +
+ sizeof(struct uverbs_attr_bundle) +
+ sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets +
+ sizeof(*ctx->uattrs) * hdr->num_attrs +
+ sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) *
+ method_spec->num_child_attrs +
+ sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) *
+ (method_spec->num_child_attrs / BITS_PER_LONG +
+ method_spec->num_buckets);
+
+#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ
+ if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ)
+ ctx = (void *)data;
+
+ if (!ctx)
+#endif
+ ctx = kmalloc(ctx_size, GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx);
+ ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) +
+ (sizeof(ctx->uverbs_attr_bundle->hash[0]) *
+ method_spec->num_buckets);
+ curr_attr = (void *)(ctx->uattrs + hdr->num_attrs);
+ curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs);
+
+ /*
+ * We just fill the pointers and num_attrs here. The data itself will be
+ * filled at a later stage (uverbs_process_attr)
+ */
+ for (i = 0; i < method_spec->num_buckets; i++) {
+ unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs;
+
+ ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr;
+ curr_attr += curr_num_attrs;
+ ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs;
+ ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap;
+ bitmap_zero(curr_bitmap, curr_num_attrs);
+ curr_bitmap += BITS_TO_LONGS(curr_num_attrs);
+ }
+
+ err = copy_from_user(ctx->uattrs, buf,
+ sizeof(*ctx->uattrs) * hdr->num_attrs);
+ if (err) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev,
+ file, method_spec, ctx->uverbs_attr_bundle);
+out:
+#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ
+ if (ctx_size > UVERBS_OPTIMIZE_USING_STACK_SZ)
+#endif
+ kfree(ctx);
+ return err;
+}
+
+#define IB_UVERBS_MAX_CMD_SZ 4096
+
+long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_ioctl_hdr __user *user_hdr =
+ (struct ib_uverbs_ioctl_hdr __user *)arg;
+ struct ib_uverbs_ioctl_hdr hdr;
+ struct ib_device *ib_dev;
+ int srcu_key;
+ long err;
+
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+ ib_dev = srcu_dereference(file->device->ib_dev,
+ &file->device->disassociate_srcu);
+ if (!ib_dev) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (cmd == RDMA_VERBS_IOCTL) {
+ err = copy_from_user(&hdr, user_hdr, sizeof(hdr));
+
+ if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ ||
+ hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (hdr.reserved) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr,
+ (__user void *)arg + sizeof(hdr));
+ } else {
+ err = -ENOIOCTLCMD;
+ }
+out:
+ srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
+
+ return err;
+}
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
new file mode 100644
index 000000000000..76ddb6564578
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -0,0 +1,665 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/rdma_user_ioctl.h>
+#include <linux/bitops.h>
+#include "uverbs.h"
+
+#define UVERBS_NUM_NS (UVERBS_ID_NS_MASK >> UVERBS_ID_NS_SHIFT)
+#define GET_NS_ID(idx) (((idx) & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT)
+#define GET_ID(idx) ((idx) & ~UVERBS_ID_NS_MASK)
+
+#define _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset, \
+ buckets_offset) \
+ for (tmpj = 0, \
+ elem = (*(const void ***)((hashes)[tmpi] + \
+ (buckets_offset)))[0]; \
+ tmpj < *(size_t *)((hashes)[tmpi] + (num_buckets_offset)); \
+ tmpj++) \
+ if ((elem = ((*(const void ***)(hashes[tmpi] + \
+ (buckets_offset)))[tmpj])))
+
+/*
+ * Iterate all elements of a few @hashes. The number of given hashes is
+ * indicated by @num_hashes. The offset of the number of buckets in the hash is
+ * represented by @num_buckets_offset, while the offset of the buckets array in
+ * the hash structure is represented by @buckets_offset. tmpi and tmpj are two
+ * short (or int) based indices that are given by the user. tmpi iterates over
+ * the different hashes. @elem points the current element in the hashes[tmpi]
+ * bucket we are looping on. To be honest, @hashes representation isn't exactly
+ * a hash, but more a collection of elements. These elements' ids are treated
+ * in a hash like manner, where the first upper bits are the bucket number.
+ * These elements are later mapped into a perfect-hash.
+ */
+#define for_each_element(elem, tmpi, tmpj, hashes, num_hashes, \
+ num_buckets_offset, buckets_offset) \
+ for (tmpi = 0; tmpi < (num_hashes); tmpi++) \
+ _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,\
+ buckets_offset)
+
+#define get_elements_iterators_entry_above(iters, num_elements, elements, \
+ num_objects_fld, objects_fld, bucket,\
+ min_id) \
+ get_elements_above_id((const void **)iters, num_elements, \
+ (const void **)(elements), \
+ offsetof(typeof(**elements), \
+ num_objects_fld), \
+ offsetof(typeof(**elements), objects_fld),\
+ offsetof(typeof(***(*elements)->objects_fld), id),\
+ bucket, min_id)
+
+#define get_objects_above_id(iters, num_trees, trees, bucket, min_id) \
+ get_elements_iterators_entry_above(iters, num_trees, trees, \
+ num_objects, objects, bucket, min_id)
+
+#define get_methods_above_id(method_iters, num_iters, iters, bucket, min_id)\
+ get_elements_iterators_entry_above(method_iters, num_iters, iters, \
+ num_methods, methods, bucket, min_id)
+
+#define get_attrs_above_id(attrs_iters, num_iters, iters, bucket, min_id)\
+ get_elements_iterators_entry_above(attrs_iters, num_iters, iters, \
+ num_attrs, attrs, bucket, min_id)
+
+/*
+ * get_elements_above_id get a few hashes represented by @elements and
+ * @num_elements. The hashes fields are described by @num_offset, @data_offset
+ * and @id_offset in the same way as required by for_each_element. The function
+ * returns an array of @iters, represents an array of elements in the hashes
+ * buckets, which their ids are the smallest ids in all hashes but are all
+ * larger than the id given by min_id. Elements are only added to the iters
+ * array if their id belongs to the bucket @bucket. The number of elements in
+ * the returned array is returned by the function. @min_id is also updated to
+ * reflect the new min_id of all elements in iters.
+ */
+static size_t get_elements_above_id(const void **iters,
+ unsigned int num_elements,
+ const void **elements,
+ size_t num_offset,
+ size_t data_offset,
+ size_t id_offset,
+ u16 bucket,
+ short *min_id)
+{
+ size_t num_iters = 0;
+ short min = SHRT_MAX;
+ const void *elem;
+ int i, j, last_stored = -1;
+
+ for_each_element(elem, i, j, elements, num_elements, num_offset,
+ data_offset) {
+ u16 id = *(u16 *)(elem + id_offset);
+
+ if (GET_NS_ID(id) != bucket)
+ continue;
+
+ if (GET_ID(id) < *min_id ||
+ (min != SHRT_MAX && GET_ID(id) > min))
+ continue;
+
+ /*
+ * We first iterate all hashes represented by @elements. When
+ * we do, we try to find an element @elem in the bucket @bucket
+ * which its id is min. Since we can't ensure the user sorted
+ * the elements in increasing order, we override this hash's
+ * minimal id element we found, if a new element with a smaller
+ * id was just found.
+ */
+ iters[last_stored == i ? num_iters - 1 : num_iters++] = elem;
+ last_stored = i;
+ min = GET_ID(id);
+ }
+
+ /*
+ * We only insert to our iters array an element, if its id is smaller
+ * than all previous ids. Therefore, the final iters array is sorted so
+ * that smaller ids are in the end of the array.
+ * Therefore, we need to clean the beginning of the array to make sure
+ * all ids of final elements are equal to min.
+ */
+ for (i = num_iters - 1; i >= 0 &&
+ GET_ID(*(u16 *)(iters[i] + id_offset)) == min; i--)
+ ;
+
+ num_iters -= i + 1;
+ memmove(iters, iters + i + 1, sizeof(*iters) * num_iters);
+
+ *min_id = min;
+ return num_iters;
+}
+
+#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
+ objects_fld, bucket) \
+ find_max_element_id(num_elements, (const void **)(elements), \
+ offsetof(typeof(**elements), num_objects_fld), \
+ offsetof(typeof(**elements), objects_fld), \
+ offsetof(typeof(***(*elements)->objects_fld), id),\
+ bucket)
+
+static short find_max_element_ns_id(unsigned int num_elements,
+ const void **elements,
+ size_t num_offset,
+ size_t data_offset,
+ size_t id_offset)
+{
+ short max_ns = SHRT_MIN;
+ const void *elem;
+ int i, j;
+
+ for_each_element(elem, i, j, elements, num_elements, num_offset,
+ data_offset) {
+ u16 id = *(u16 *)(elem + id_offset);
+
+ if (GET_NS_ID(id) > max_ns)
+ max_ns = GET_NS_ID(id);
+ }
+
+ return max_ns;
+}
+
+static short find_max_element_id(unsigned int num_elements,
+ const void **elements,
+ size_t num_offset,
+ size_t data_offset,
+ size_t id_offset,
+ u16 bucket)
+{
+ short max_id = SHRT_MIN;
+ const void *elem;
+ int i, j;
+
+ for_each_element(elem, i, j, elements, num_elements, num_offset,
+ data_offset) {
+ u16 id = *(u16 *)(elem + id_offset);
+
+ if (GET_NS_ID(id) == bucket &&
+ GET_ID(id) > max_id)
+ max_id = GET_ID(id);
+ }
+ return max_id;
+}
+
+#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
+ objects_fld, bucket) \
+ find_max_element_id(num_elements, (const void **)(elements), \
+ offsetof(typeof(**elements), num_objects_fld), \
+ offsetof(typeof(**elements), objects_fld), \
+ offsetof(typeof(***(*elements)->objects_fld), id),\
+ bucket)
+
+#define find_max_element_ns_entry_id(num_elements, elements, \
+ num_objects_fld, objects_fld) \
+ find_max_element_ns_id(num_elements, (const void **)(elements), \
+ offsetof(typeof(**elements), num_objects_fld),\
+ offsetof(typeof(**elements), objects_fld), \
+ offsetof(typeof(***(*elements)->objects_fld), id))
+
+/*
+ * find_max_xxxx_ns_id gets a few elements. Each element is described by an id
+ * which its upper bits represents a namespace. It finds the max namespace. This
+ * could be used in order to know how many buckets do we need to allocate. If no
+ * elements exist, SHRT_MIN is returned. Namespace represents here different
+ * buckets. The common example is "common bucket" and "driver bucket".
+ *
+ * find_max_xxxx_id gets a few elements and a bucket. Each element is described
+ * by an id which its upper bits represent a namespace. It returns the max id
+ * which is contained in the same namespace defined in @bucket. This could be
+ * used in order to know how many elements do we need to allocate in the bucket.
+ * If no elements exist, SHRT_MIN is returned.
+ */
+
+#define find_max_object_id(num_trees, trees, bucket) \
+ find_max_element_entry_id(num_trees, trees, num_objects,\
+ objects, bucket)
+#define find_max_object_ns_id(num_trees, trees) \
+ find_max_element_ns_entry_id(num_trees, trees, \
+ num_objects, objects)
+
+#define find_max_method_id(num_iters, iters, bucket) \
+ find_max_element_entry_id(num_iters, iters, num_methods,\
+ methods, bucket)
+#define find_max_method_ns_id(num_iters, iters) \
+ find_max_element_ns_entry_id(num_iters, iters, \
+ num_methods, methods)
+
+#define find_max_attr_id(num_iters, iters, bucket) \
+ find_max_element_entry_id(num_iters, iters, num_attrs, \
+ attrs, bucket)
+#define find_max_attr_ns_id(num_iters, iters) \
+ find_max_element_ns_entry_id(num_iters, iters, \
+ num_attrs, attrs)
+
+static void free_method(struct uverbs_method_spec *method)
+{
+ unsigned int i;
+
+ if (!method)
+ return;
+
+ for (i = 0; i < method->num_buckets; i++)
+ kfree(method->attr_buckets[i]);
+
+ kfree(method);
+}
+
+#define IS_ATTR_OBJECT(attr) ((attr)->type == UVERBS_ATTR_TYPE_IDR || \
+ (attr)->type == UVERBS_ATTR_TYPE_FD)
+
+/*
+ * This function gets array of size @num_method_defs which contains pointers to
+ * method definitions @method_defs. The function allocates an
+ * uverbs_method_spec structure and initializes its number of buckets and the
+ * elements in buckets to the correct attributes. While doing that, it
+ * validates that there aren't conflicts between attributes of different
+ * method_defs.
+ */
+static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_method_def **method_defs,
+ size_t num_method_defs)
+{
+ int bucket_idx;
+ int max_attr_buckets = 0;
+ size_t num_attr_buckets = 0;
+ int res = 0;
+ struct uverbs_method_spec *method = NULL;
+ const struct uverbs_attr_def **attr_defs;
+ unsigned int num_of_singularities = 0;
+
+ max_attr_buckets = find_max_attr_ns_id(num_method_defs, method_defs);
+ if (max_attr_buckets >= 0)
+ num_attr_buckets = max_attr_buckets + 1;
+
+ method = kzalloc(sizeof(*method) +
+ num_attr_buckets * sizeof(*method->attr_buckets),
+ GFP_KERNEL);
+ if (!method)
+ return ERR_PTR(-ENOMEM);
+
+ method->num_buckets = num_attr_buckets;
+ attr_defs = kcalloc(num_method_defs, sizeof(*attr_defs), GFP_KERNEL);
+ if (!attr_defs) {
+ res = -ENOMEM;
+ goto free_method;
+ }
+ for (bucket_idx = 0; bucket_idx < method->num_buckets; bucket_idx++) {
+ short min_id = SHRT_MIN;
+ int attr_max_bucket = 0;
+ struct uverbs_attr_spec_hash *hash = NULL;
+
+ attr_max_bucket = find_max_attr_id(num_method_defs, method_defs,
+ bucket_idx);
+ if (attr_max_bucket < 0)
+ continue;
+
+ hash = kzalloc(sizeof(*hash) +
+ ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1),
+ sizeof(long)) +
+ BITS_TO_LONGS(attr_max_bucket) * sizeof(long),
+ GFP_KERNEL);
+ if (!hash) {
+ res = -ENOMEM;
+ goto free;
+ }
+ hash->num_attrs = attr_max_bucket + 1;
+ method->num_child_attrs += hash->num_attrs;
+ hash->mandatory_attrs_bitmask = (void *)(hash + 1) +
+ ALIGN(sizeof(*hash->attrs) *
+ (attr_max_bucket + 1),
+ sizeof(long));
+
+ method->attr_buckets[bucket_idx] = hash;
+
+ do {
+ size_t num_attr_defs;
+ struct uverbs_attr_spec *attr;
+ bool attr_obj_with_special_access;
+
+ num_attr_defs =
+ get_attrs_above_id(attr_defs,
+ num_method_defs,
+ method_defs,
+ bucket_idx,
+ &min_id);
+ /* Last attr in bucket */
+ if (!num_attr_defs)
+ break;
+
+ if (num_attr_defs > 1) {
+ /*
+ * We don't allow two attribute definitions for
+ * the same attribute. This is usually a
+ * programmer error. If required, it's better to
+ * just add a new attribute to capture the new
+ * semantics.
+ */
+ res = -EEXIST;
+ goto free;
+ }
+
+ attr = &hash->attrs[min_id];
+ memcpy(attr, &attr_defs[0]->attr, sizeof(*attr));
+
+ attr_obj_with_special_access = IS_ATTR_OBJECT(attr) &&
+ (attr->obj.access == UVERBS_ACCESS_NEW ||
+ attr->obj.access == UVERBS_ACCESS_DESTROY);
+ num_of_singularities += !!attr_obj_with_special_access;
+ if (WARN(num_of_singularities > 1,
+ "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n",
+ min_id) ||
+ WARN(attr_obj_with_special_access &&
+ !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY),
+ "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy aceess but isn't mandatory\n",
+ min_id) ||
+ WARN(IS_ATTR_OBJECT(attr) &&
+ attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ,
+ "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
+ min_id)) {
+ res = -EINVAL;
+ goto free;
+ }
+
+ if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY)
+ set_bit(min_id, hash->mandatory_attrs_bitmask);
+ min_id++;
+
+ } while (1);
+ }
+ kfree(attr_defs);
+ return method;
+
+free:
+ kfree(attr_defs);
+free_method:
+ free_method(method);
+ return ERR_PTR(res);
+}
+
+static void free_object(struct uverbs_object_spec *object)
+{
+ unsigned int i, j;
+
+ if (!object)
+ return;
+
+ for (i = 0; i < object->num_buckets; i++) {
+ struct uverbs_method_spec_hash *method_buckets =
+ object->method_buckets[i];
+
+ if (!method_buckets)
+ continue;
+
+ for (j = 0; j < method_buckets->num_methods; j++)
+ free_method(method_buckets->methods[j]);
+
+ kfree(method_buckets);
+ }
+
+ kfree(object);
+}
+
+/*
+ * This function gets array of size @num_object_defs which contains pointers to
+ * object definitions @object_defs. The function allocated an
+ * uverbs_object_spec structure and initialize its number of buckets and the
+ * elements in buckets to the correct methods. While doing that, it
+ * sorts out the correct relationship between conflicts in the same method.
+ */
+static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_object_def **object_defs,
+ size_t num_object_defs)
+{
+ u16 bucket_idx;
+ int max_method_buckets = 0;
+ u16 num_method_buckets = 0;
+ int res = 0;
+ struct uverbs_object_spec *object = NULL;
+ const struct uverbs_method_def **method_defs;
+
+ max_method_buckets = find_max_method_ns_id(num_object_defs, object_defs);
+ if (max_method_buckets >= 0)
+ num_method_buckets = max_method_buckets + 1;
+
+ object = kzalloc(sizeof(*object) +
+ num_method_buckets *
+ sizeof(*object->method_buckets), GFP_KERNEL);
+ if (!object)
+ return ERR_PTR(-ENOMEM);
+
+ object->num_buckets = num_method_buckets;
+ method_defs = kcalloc(num_object_defs, sizeof(*method_defs), GFP_KERNEL);
+ if (!method_defs) {
+ res = -ENOMEM;
+ goto free_object;
+ }
+
+ for (bucket_idx = 0; bucket_idx < object->num_buckets; bucket_idx++) {
+ short min_id = SHRT_MIN;
+ int methods_max_bucket = 0;
+ struct uverbs_method_spec_hash *hash = NULL;
+
+ methods_max_bucket = find_max_method_id(num_object_defs, object_defs,
+ bucket_idx);
+ if (methods_max_bucket < 0)
+ continue;
+
+ hash = kzalloc(sizeof(*hash) +
+ sizeof(*hash->methods) * (methods_max_bucket + 1),
+ GFP_KERNEL);
+ if (!hash) {
+ res = -ENOMEM;
+ goto free;
+ }
+
+ hash->num_methods = methods_max_bucket + 1;
+ object->method_buckets[bucket_idx] = hash;
+
+ do {
+ size_t num_method_defs;
+ struct uverbs_method_spec *method;
+ int i;
+
+ num_method_defs =
+ get_methods_above_id(method_defs,
+ num_object_defs,
+ object_defs,
+ bucket_idx,
+ &min_id);
+ /* Last method in bucket */
+ if (!num_method_defs)
+ break;
+
+ method = build_method_with_attrs(method_defs,
+ num_method_defs);
+ if (IS_ERR(method)) {
+ res = PTR_ERR(method);
+ goto free;
+ }
+
+ /*
+ * The last tree which is given as an argument to the
+ * merge overrides previous method handler.
+ * Therefore, we iterate backwards and search for the
+ * first handler which != NULL. This also defines the
+ * set of flags used for this handler.
+ */
+ for (i = num_object_defs - 1;
+ i >= 0 && !method_defs[i]->handler; i--)
+ ;
+ hash->methods[min_id++] = method;
+ /* NULL handler isn't allowed */
+ if (WARN(i < 0,
+ "ib_uverbs: tried to merge function id %d, but all handlers are NULL\n",
+ min_id)) {
+ res = -EINVAL;
+ goto free;
+ }
+ method->handler = method_defs[i]->handler;
+ method->flags = method_defs[i]->flags;
+
+ } while (1);
+ }
+ kfree(method_defs);
+ return object;
+
+free:
+ kfree(method_defs);
+free_object:
+ free_object(object);
+ return ERR_PTR(res);
+}
+
+void uverbs_free_spec_tree(struct uverbs_root_spec *root)
+{
+ unsigned int i, j;
+
+ if (!root)
+ return;
+
+ for (i = 0; i < root->num_buckets; i++) {
+ struct uverbs_object_spec_hash *object_hash =
+ root->object_buckets[i];
+
+ if (!object_hash)
+ continue;
+
+ for (j = 0; j < object_hash->num_objects; j++)
+ free_object(object_hash->objects[j]);
+
+ kfree(object_hash);
+ }
+
+ kfree(root);
+}
+EXPORT_SYMBOL(uverbs_free_spec_tree);
+
+struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees,
+ const struct uverbs_object_tree_def **trees)
+{
+ u16 bucket_idx;
+ short max_object_buckets = 0;
+ size_t num_objects_buckets = 0;
+ struct uverbs_root_spec *root_spec = NULL;
+ const struct uverbs_object_def **object_defs;
+ int i;
+ int res = 0;
+
+ max_object_buckets = find_max_object_ns_id(num_trees, trees);
+ /*
+ * Devices which don't want to support ib_uverbs, should just allocate
+ * an empty parsing tree. Every user-space command won't hit any valid
+ * entry in the parsing tree and thus will fail.
+ */
+ if (max_object_buckets >= 0)
+ num_objects_buckets = max_object_buckets + 1;
+
+ root_spec = kzalloc(sizeof(*root_spec) +
+ num_objects_buckets * sizeof(*root_spec->object_buckets),
+ GFP_KERNEL);
+ if (!root_spec)
+ return ERR_PTR(-ENOMEM);
+ root_spec->num_buckets = num_objects_buckets;
+
+ object_defs = kcalloc(num_trees, sizeof(*object_defs),
+ GFP_KERNEL);
+ if (!object_defs) {
+ res = -ENOMEM;
+ goto free_root;
+ }
+
+ for (bucket_idx = 0; bucket_idx < root_spec->num_buckets; bucket_idx++) {
+ short min_id = SHRT_MIN;
+ short objects_max_bucket;
+ struct uverbs_object_spec_hash *hash = NULL;
+
+ objects_max_bucket = find_max_object_id(num_trees, trees,
+ bucket_idx);
+ if (objects_max_bucket < 0)
+ continue;
+
+ hash = kzalloc(sizeof(*hash) +
+ sizeof(*hash->objects) * (objects_max_bucket + 1),
+ GFP_KERNEL);
+ if (!hash) {
+ res = -ENOMEM;
+ goto free;
+ }
+ hash->num_objects = objects_max_bucket + 1;
+ root_spec->object_buckets[bucket_idx] = hash;
+
+ do {
+ size_t num_object_defs;
+ struct uverbs_object_spec *object;
+
+ num_object_defs = get_objects_above_id(object_defs,
+ num_trees,
+ trees,
+ bucket_idx,
+ &min_id);
+ /* Last object in bucket */
+ if (!num_object_defs)
+ break;
+
+ object = build_object_with_methods(object_defs,
+ num_object_defs);
+ if (IS_ERR(object)) {
+ res = PTR_ERR(object);
+ goto free;
+ }
+
+ /*
+ * The last tree which is given as an argument to the
+ * merge overrides previous object's type_attrs.
+ * Therefore, we iterate backwards and search for the
+ * first type_attrs which != NULL.
+ */
+ for (i = num_object_defs - 1;
+ i >= 0 && !object_defs[i]->type_attrs; i--)
+ ;
+ /*
+ * NULL is a valid type_attrs. It means an object we
+ * can't instantiate (like DEVICE).
+ */
+ object->type_attrs = i < 0 ? NULL :
+ object_defs[i]->type_attrs;
+
+ hash->objects[min_id++] = object;
+ } while (1);
+ }
+
+ kfree(object_defs);
+ return root_spec;
+
+free:
+ kfree(object_defs);
+free_root:
+ uverbs_free_spec_tree(root_spec);
+ return ERR_PTR(res);
+}
+EXPORT_SYMBOL(uverbs_alloc_spec_tree);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5e530d2bee44..dc2aed6fb21b 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -49,6 +49,7 @@
#include <linux/uaccess.h>
#include <rdma/ib.h>
+#include <rdma/uverbs_std_types.h>
#include "uverbs.h"
#include "core_priv.h"
@@ -595,7 +596,6 @@ struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file
{
struct ib_uverbs_async_event_file *ev_file;
struct file *filp;
- int ret;
ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
if (!ev_file)
@@ -621,21 +621,11 @@ struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file
INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
ib_dev,
ib_uverbs_event_handler);
- ret = ib_register_event_handler(&uverbs_file->event_handler);
- if (ret)
- goto err_put_file;
-
+ ib_register_event_handler(&uverbs_file->event_handler);
/* At that point async file stuff was fully set */
return filp;
-err_put_file:
- fput(filp);
- kref_put(&uverbs_file->async_file->ref,
- ib_uverbs_release_async_event_file);
- uverbs_file->async_file = NULL;
- return ERR_PTR(ret);
-
err_put_refs:
kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
@@ -949,6 +939,9 @@ static const struct file_operations uverbs_fops = {
.open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
+#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
+ .unlocked_ioctl = ib_uverbs_ioctl,
+#endif
};
static const struct file_operations uverbs_mmap_fops = {
@@ -958,6 +951,9 @@ static const struct file_operations uverbs_mmap_fops = {
.open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
+#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
+ .unlocked_ioctl = ib_uverbs_ioctl,
+#endif
};
static struct ib_client uverbs_client = {
@@ -1108,6 +1104,18 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
+ if (!device->specs_root) {
+ const struct uverbs_object_tree_def *default_root[] = {
+ uverbs_default_get_objects()};
+
+ uverbs_dev->specs_root = uverbs_alloc_spec_tree(1,
+ default_root);
+ if (IS_ERR(uverbs_dev->specs_root))
+ goto err_class;
+
+ device->specs_root = uverbs_dev->specs_root;
+ }
+
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
@@ -1239,6 +1247,11 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
ib_uverbs_comp_dev(uverbs_dev);
if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
+ if (uverbs_dev->specs_root) {
+ uverbs_free_spec_tree(uverbs_dev->specs_root);
+ device->specs_root = NULL;
+ }
+
kobject_put(&uverbs_dev->kobj);
}
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index 94fd989c9060..bd0acf376af0 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -33,10 +33,47 @@
#include <linux/export.h>
#include <rdma/ib_marshall.h>
-void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
- struct rdma_ah_attr *src)
+#define OPA_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
+static int rdma_ah_conv_opa_to_ib(struct ib_device *dev,
+ struct rdma_ah_attr *ib,
+ struct rdma_ah_attr *opa)
{
+ struct ib_port_attr port_attr;
+ int ret = 0;
+
+ /* Do structure copy and the over-write fields */
+ *ib = *opa;
+
+ ib->type = RDMA_AH_ATTR_TYPE_IB;
+ rdma_ah_set_grh(ib, NULL, 0, 0, 1, 0);
+
+ if (ib_query_port(dev, opa->port_num, &port_attr)) {
+ /* Set to default subnet to indicate error */
+ rdma_ah_set_subnet_prefix(ib, OPA_DEFAULT_GID_PREFIX);
+ ret = -EINVAL;
+ } else {
+ rdma_ah_set_subnet_prefix(ib,
+ cpu_to_be64(port_attr.subnet_prefix));
+ }
+ rdma_ah_set_interface_id(ib, OPA_MAKE_ID(rdma_ah_get_dlid(opa)));
+ return ret;
+}
+
+void ib_copy_ah_attr_to_user(struct ib_device *device,
+ struct ib_uverbs_ah_attr *dst,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct rdma_ah_attr *src = ah_attr;
+ struct rdma_ah_attr conv_ah;
+
memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
+
+ if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
+ (rdma_ah_get_dlid(ah_attr) >=
+ be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (!rdma_ah_conv_opa_to_ib(device, &conv_ah, ah_attr)))
+ src = &conv_ah;
+
dst->dlid = rdma_ah_get_dlid(src);
dst->sl = rdma_ah_get_sl(src);
dst->src_path_bits = rdma_ah_get_path_bits(src);
@@ -57,7 +94,8 @@ void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
-void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
+void ib_copy_qp_attr_to_user(struct ib_device *device,
+ struct ib_uverbs_qp_attr *dst,
struct ib_qp_attr *src)
{
dst->qp_state = src->qp_state;
@@ -76,8 +114,8 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
dst->max_recv_sge = src->cap.max_recv_sge;
dst->max_inline_data = src->cap.max_inline_data;
- ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
- ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr);
+ ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr);
+ ib_copy_ah_attr_to_user(device, &dst->alt_ah_attr, &src->alt_ah_attr);
dst->pkey_index = src->pkey_index;
dst->alt_pkey_index = src->alt_pkey_index;
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index ef293379f37a..0a98579700ec 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -209,67 +209,244 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_
return 0;
};
-const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
- .type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
- .context_closed = uverbs_hot_unplug_completion_event_file,
- .fops = &uverbs_event_fops,
- .name = "[infinibandevent]",
- .flags = O_RDONLY,
-};
+/*
+ * This spec is used in order to pass information to the hardware driver in a
+ * legacy way. Every verb that could get driver specific data should get this
+ * spec.
+ */
+static const struct uverbs_attr_def uverbs_uhw_compat_in =
+ UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ));
+static const struct uverbs_attr_def uverbs_uhw_compat_out =
+ UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ));
-const struct uverbs_obj_idr_type uverbs_type_attrs_cq = {
- .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0),
- .destroy_object = uverbs_free_cq,
-};
+static void create_udata(struct uverbs_attr_bundle *ctx,
+ struct ib_udata *udata)
+{
+ /*
+ * This is for ease of conversion. The purpose is to convert all drivers
+ * to use uverbs_attr_bundle instead of ib_udata.
+ * Assume attr == 0 is input and attr == 1 is output.
+ */
+ void __user *inbuf;
+ size_t inbuf_len = 0;
+ void __user *outbuf;
+ size_t outbuf_len = 0;
+ const struct uverbs_attr *uhw_in =
+ uverbs_attr_get(ctx, UVERBS_UHW_IN);
+ const struct uverbs_attr *uhw_out =
+ uverbs_attr_get(ctx, UVERBS_UHW_OUT);
+
+ if (!IS_ERR(uhw_in)) {
+ inbuf = uhw_in->ptr_attr.ptr;
+ inbuf_len = uhw_in->ptr_attr.len;
+ }
-const struct uverbs_obj_idr_type uverbs_type_attrs_qp = {
- .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0),
- .destroy_object = uverbs_free_qp,
-};
+ if (!IS_ERR(uhw_out)) {
+ outbuf = uhw_out->ptr_attr.ptr;
+ outbuf_len = uhw_out->ptr_attr.len;
+ }
-const struct uverbs_obj_idr_type uverbs_type_attrs_mw = {
- .type = UVERBS_TYPE_ALLOC_IDR(0),
- .destroy_object = uverbs_free_mw,
-};
+ INIT_UDATA_BUF_OR_NULL(udata, inbuf, outbuf, inbuf_len, outbuf_len);
+}
-const struct uverbs_obj_idr_type uverbs_type_attrs_mr = {
- /* 1 is used in order to free the MR after all the MWs */
- .type = UVERBS_TYPE_ALLOC_IDR(1),
- .destroy_object = uverbs_free_mr,
-};
+static int uverbs_create_cq_handler(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucontext *ucontext = file->ucontext;
+ struct ib_ucq_object *obj;
+ struct ib_udata uhw;
+ int ret;
+ u64 user_handle;
+ struct ib_cq_init_attr attr = {};
+ struct ib_cq *cq;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
+ const struct uverbs_attr *ev_file_attr;
+ struct ib_uobject *ev_file_uobj;
+
+ if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.comp_vector, attrs, CREATE_CQ_COMP_VECTOR);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.cqe, attrs, CREATE_CQ_CQE);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs, CREATE_CQ_USER_HANDLE);
+ if (ret)
+ return ret;
-const struct uverbs_obj_idr_type uverbs_type_attrs_srq = {
- .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0),
- .destroy_object = uverbs_free_srq,
-};
+ /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
+ if (uverbs_copy_from(&attr.flags, attrs, CREATE_CQ_FLAGS) == -EFAULT)
+ return -EFAULT;
-const struct uverbs_obj_idr_type uverbs_type_attrs_ah = {
- .type = UVERBS_TYPE_ALLOC_IDR(0),
- .destroy_object = uverbs_free_ah,
-};
+ ev_file_attr = uverbs_attr_get(attrs, CREATE_CQ_COMP_CHANNEL);
+ if (!IS_ERR(ev_file_attr)) {
+ ev_file_uobj = ev_file_attr->obj_attr.uobject;
-const struct uverbs_obj_idr_type uverbs_type_attrs_flow = {
- .type = UVERBS_TYPE_ALLOC_IDR(0),
- .destroy_object = uverbs_free_flow,
-};
+ ev_file = container_of(ev_file_uobj,
+ struct ib_uverbs_completion_event_file,
+ uobj_file.uobj);
+ uverbs_uobject_get(ev_file_uobj);
+ }
-const struct uverbs_obj_idr_type uverbs_type_attrs_wq = {
- .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0),
- .destroy_object = uverbs_free_wq,
-};
+ if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
-const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = {
- .type = UVERBS_TYPE_ALLOC_IDR(0),
- .destroy_object = uverbs_free_rwq_ind_tbl,
-};
+ obj = container_of(uverbs_attr_get(attrs, CREATE_CQ_HANDLE)->obj_attr.uobject,
+ typeof(*obj), uobject);
+ obj->uverbs_file = ucontext->ufile;
+ obj->comp_events_reported = 0;
+ obj->async_events_reported = 0;
+ INIT_LIST_HEAD(&obj->comp_list);
+ INIT_LIST_HEAD(&obj->async_list);
+
+ /* Temporary, only until drivers get the new uverbs_attr_bundle */
+ create_udata(attrs, &uhw);
+
+ cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto err_event_file;
+ }
-const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = {
- .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0),
- .destroy_object = uverbs_free_xrcd,
-};
+ cq->device = ib_dev;
+ cq->uobject = &obj->uobject;
+ cq->comp_handler = ib_uverbs_comp_handler;
+ cq->event_handler = ib_uverbs_cq_event_handler;
+ cq->cq_context = &ev_file->ev_queue;
+ obj->uobject.object = cq;
+ obj->uobject.user_handle = user_handle;
+ atomic_set(&cq->usecnt, 0);
+
+ ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe);
+ if (ret)
+ goto err_cq;
-const struct uverbs_obj_idr_type uverbs_type_attrs_pd = {
- /* 2 is used in order to free the PD after MRs */
- .type = UVERBS_TYPE_ALLOC_IDR(2),
- .destroy_object = uverbs_free_pd,
+ return 0;
+err_cq:
+ ib_destroy_cq(cq);
+
+err_event_file:
+ if (ev_file)
+ uverbs_uobject_put(ev_file_uobj);
+ return ret;
};
+
+static DECLARE_UVERBS_METHOD(
+ uverbs_method_cq_create, UVERBS_CQ_CREATE, uverbs_create_cq_handler,
+ &UVERBS_ATTR_IDR(CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(CREATE_CQ_CQE, u32,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(CREATE_CQ_USER_HANDLE, u64,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_FD(CREATE_CQ_COMP_CHANNEL, UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_ACCESS_READ),
+ &UVERBS_ATTR_PTR_IN(CREATE_CQ_COMP_VECTOR, u32,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(CREATE_CQ_FLAGS, u32),
+ &UVERBS_ATTR_PTR_OUT(CREATE_CQ_RESP_CQE, u32,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
+
+static int uverbs_destroy_cq_handler(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_destroy_cq_resp resp;
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs, DESTROY_CQ_HANDLE)->obj_attr.uobject;
+ struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
+ uobject);
+ int ret;
+
+ if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
+ return -EOPNOTSUPP;
+
+ ret = rdma_explicit_destroy(uobj);
+ if (ret)
+ return ret;
+
+ resp.comp_events_reported = obj->comp_events_reported;
+ resp.async_events_reported = obj->async_events_reported;
+
+ return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp);
+}
+
+static DECLARE_UVERBS_METHOD(
+ uverbs_method_cq_destroy, UVERBS_CQ_DESTROY, uverbs_destroy_cq_handler,
+ &UVERBS_ATTR_IDR(DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(DESTROY_CQ_RESP, struct ib_uverbs_destroy_cq_resp,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel,
+ UVERBS_OBJECT_COMP_CHANNEL,
+ &UVERBS_TYPE_ALLOC_FD(0,
+ sizeof(struct ib_uverbs_completion_event_file),
+ uverbs_hot_unplug_completion_event_file,
+ &uverbs_event_fops,
+ "[infinibandevent]", O_RDONLY));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_cq, UVERBS_OBJECT_CQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
+ uverbs_free_cq),
+ &uverbs_method_cq_create,
+ &uverbs_method_cq_destroy);
+
+DECLARE_UVERBS_OBJECT(uverbs_object_qp, UVERBS_OBJECT_QP,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
+ uverbs_free_qp));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_mw, UVERBS_OBJECT_MW,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_mr, UVERBS_OBJECT_MR,
+ /* 1 is used in order to free the MR after all the MWs */
+ &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_srq, UVERBS_OBJECT_SRQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
+ uverbs_free_srq));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_ah, UVERBS_OBJECT_AH,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_flow, UVERBS_OBJECT_FLOW,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_wq, UVERBS_OBJECT_WQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
+ uverbs_free_wq));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_rwq_ind_table,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_xrcd, UVERBS_OBJECT_XRCD,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
+ uverbs_free_xrcd));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_pd, UVERBS_OBJECT_PD,
+ /* 2 is used in order to free the PD after MRs */
+ &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
+
+DECLARE_UVERBS_OBJECT(uverbs_object_device, UVERBS_OBJECT_DEVICE, NULL);
+
+DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
+ &uverbs_object_device,
+ &uverbs_object_pd,
+ &uverbs_object_mr,
+ &uverbs_object_comp_channel,
+ &uverbs_object_cq,
+ &uverbs_object_qp,
+ &uverbs_object_ah,
+ &uverbs_object_mw,
+ &uverbs_object_srq,
+ &uverbs_object_flow,
+ &uverbs_object_wq,
+ &uverbs_object_rwq_ind_table,
+ &uverbs_object_xrcd);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index b456e3ca1876..ee9e27dc799b 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -180,39 +180,29 @@ EXPORT_SYMBOL(ib_rate_to_mbps);
__attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type)
{
- switch (node_type) {
- case RDMA_NODE_IB_CA:
- case RDMA_NODE_IB_SWITCH:
- case RDMA_NODE_IB_ROUTER:
- return RDMA_TRANSPORT_IB;
- case RDMA_NODE_RNIC:
- return RDMA_TRANSPORT_IWARP;
- case RDMA_NODE_USNIC:
+
+ if (node_type == RDMA_NODE_USNIC)
return RDMA_TRANSPORT_USNIC;
- case RDMA_NODE_USNIC_UDP:
+ if (node_type == RDMA_NODE_USNIC_UDP)
return RDMA_TRANSPORT_USNIC_UDP;
- default:
- BUG();
- return 0;
- }
+ if (node_type == RDMA_NODE_RNIC)
+ return RDMA_TRANSPORT_IWARP;
+
+ return RDMA_TRANSPORT_IB;
}
EXPORT_SYMBOL(rdma_node_get_transport);
enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
{
+ enum rdma_transport_type lt;
if (device->get_link_layer)
return device->get_link_layer(device, port_num);
- switch (rdma_node_get_transport(device->node_type)) {
- case RDMA_TRANSPORT_IB:
+ lt = rdma_node_get_transport(device->node_type);
+ if (lt == RDMA_TRANSPORT_IB)
return IB_LINK_LAYER_INFINIBAND;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_USNIC:
- case RDMA_TRANSPORT_USNIC_UDP:
- return IB_LINK_LAYER_ETHERNET;
- default:
- return IB_LINK_LAYER_UNSPECIFIED;
- }
+
+ return IB_LINK_LAYER_ETHERNET;
}
EXPORT_SYMBOL(rdma_port_get_link_layer);
@@ -478,6 +468,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
union ib_gid dgid;
union ib_gid sgid;
+ might_sleep();
+
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->type = rdma_ah_find_type(device, port_num);
if (rdma_cap_eth_ah(device, port_num)) {
@@ -632,11 +624,13 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
srq->srq_type = srq_init_attr->srq_type;
+ if (ib_srq_has_cq(srq->srq_type)) {
+ srq->ext.cq = srq_init_attr->ext.cq;
+ atomic_inc(&srq->ext.cq->usecnt);
+ }
if (srq->srq_type == IB_SRQT_XRC) {
srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
- srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq;
atomic_inc(&srq->ext.xrc.xrcd->usecnt);
- atomic_inc(&srq->ext.xrc.cq->usecnt);
}
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
@@ -677,18 +671,18 @@ int ib_destroy_srq(struct ib_srq *srq)
pd = srq->pd;
srq_type = srq->srq_type;
- if (srq_type == IB_SRQT_XRC) {
+ if (ib_srq_has_cq(srq_type))
+ cq = srq->ext.cq;
+ if (srq_type == IB_SRQT_XRC)
xrcd = srq->ext.xrc.xrcd;
- cq = srq->ext.xrc.cq;
- }
ret = srq->device->destroy_srq(srq);
if (!ret) {
atomic_dec(&pd->usecnt);
- if (srq_type == IB_SRQT_XRC) {
+ if (srq_type == IB_SRQT_XRC)
atomic_dec(&xrcd->usecnt);
+ if (ib_srq_has_cq(srq_type))
atomic_dec(&cq->usecnt);
- }
}
return ret;
@@ -1244,6 +1238,18 @@ int ib_resolve_eth_dmac(struct ib_device *device,
if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
ah_attr->roce.dmac);
+ return 0;
+ }
+ if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ __be32 addr = 0;
+
+ memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4);
+ ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac);
+ } else {
+ ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw,
+ (char *)ah_attr->roce.dmac);
+ }
} else {
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
@@ -1306,6 +1312,61 @@ int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr,
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
+int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
+{
+ int rc;
+ u32 netdev_speed;
+ struct net_device *netdev;
+ struct ethtool_link_ksettings lksettings;
+
+ if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
+
+ if (!dev->get_netdev)
+ return -EOPNOTSUPP;
+
+ netdev = dev->get_netdev(dev, port_num);
+ if (!netdev)
+ return -ENODEV;
+
+ rtnl_lock();
+ rc = __ethtool_get_link_ksettings(netdev, &lksettings);
+ rtnl_unlock();
+
+ dev_put(netdev);
+
+ if (!rc) {
+ netdev_speed = lksettings.base.speed;
+ } else {
+ netdev_speed = SPEED_1000;
+ pr_warn("%s speed is unknown, defaulting to %d\n", netdev->name,
+ netdev_speed);
+ }
+
+ if (netdev_speed <= SPEED_1000) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_SDR;
+ } else if (netdev_speed <= SPEED_10000) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_FDR10;
+ } else if (netdev_speed <= SPEED_20000) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_DDR;
+ } else if (netdev_speed <= SPEED_25000) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_EDR;
+ } else if (netdev_speed <= SPEED_40000) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_FDR10;
+ } else {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_EDR;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_get_eth_speed);
+
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
@@ -1573,15 +1634,53 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
/* Multicast groups */
+static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
+{
+ struct ib_qp_init_attr init_attr = {};
+ struct ib_qp_attr attr = {};
+ int num_eth_ports = 0;
+ int port;
+
+ /* If QP state >= init, it is assigned to a port and we can check this
+ * port only.
+ */
+ if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
+ if (attr.qp_state >= IB_QPS_INIT) {
+ if (qp->device->get_link_layer(qp->device, attr.port_num) !=
+ IB_LINK_LAYER_INFINIBAND)
+ return true;
+ goto lid_check;
+ }
+ }
+
+ /* Can't get a quick answer, iterate over all ports */
+ for (port = 0; port < qp->device->phys_port_cnt; port++)
+ if (qp->device->get_link_layer(qp->device, port) !=
+ IB_LINK_LAYER_INFINIBAND)
+ num_eth_ports++;
+
+ /* If we have at lease one Ethernet port, RoCE annex declares that
+ * multicast LID should be ignored. We can't tell at this step if the
+ * QP belongs to an IB or Ethernet port.
+ */
+ if (num_eth_ports)
+ return true;
+
+ /* If all the ports are IB, we can check according to IB spec. */
+lid_check:
+ return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE));
+}
+
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
if (!qp->device->attach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
- lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- lid == be16_to_cpu(IB_LID_PERMISSIVE))
+
+ if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
+ qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
ret = qp->device->attach_mcast(qp, gid, lid);
@@ -1597,9 +1696,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->detach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
- lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- lid == be16_to_cpu(IB_LID_PERMISSIVE))
+
+ if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
+ qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
ret = qp->device->detach_mcast(qp, gid, lid);