aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile3
-rw-r--r--drivers/infiniband/core/addr.c6
-rw-r--r--drivers/infiniband/core/agent.c4
-rw-r--r--drivers/infiniband/core/cm.c119
-rw-r--r--drivers/infiniband/core/cma.c132
-rw-r--r--drivers/infiniband/core/device.c33
-rw-r--r--drivers/infiniband/core/fmr_pool.c49
-rw-r--r--drivers/infiniband/core/mad.c28
-rw-r--r--drivers/infiniband/core/mad_rmpp.c16
-rw-r--r--drivers/infiniband/core/multicast.c27
-rw-r--r--drivers/infiniband/core/rdma_core.c627
-rw-r--r--drivers/infiniband/core/rdma_core.h78
-rw-r--r--drivers/infiniband/core/sa_query.c886
-rw-r--r--drivers/infiniband/core/sysfs.c6
-rw-r--r--drivers/infiniband/core/ucm.c41
-rw-r--r--drivers/infiniband/core/ucma.c24
-rw-r--r--drivers/infiniband/core/umem.c17
-rw-r--r--drivers/infiniband/core/umem_odp.c81
-rw-r--r--drivers/infiniband/core/user_mad.c48
-rw-r--r--drivers/infiniband/core/uverbs.h69
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1582
-rw-r--r--drivers/infiniband/core/uverbs_main.c510
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c81
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c275
-rw-r--r--drivers/infiniband/core/verbs.c86
25 files changed, 2824 insertions, 2004 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index e426ac877d19..6ebd9ad95010 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -29,4 +29,5 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
-ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
+ rdma_core.o uverbs_std_types.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 329d08c884f6..02971e239a18 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -444,9 +444,9 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(addr->net, NULL, &fl6);
- if ((ret = dst->error))
- goto put;
+ ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
+ if (ret < 0)
+ return ret;
rt = (struct rt6_info *)dst;
if (ipv6_addr_any(&fl6.saddr)) {
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 11dacd97a667..324ef85a13b6 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -137,13 +137,13 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
err2:
ib_free_send_mad(send_buf);
err1:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
}
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 6535f09dc575..1844770f3ae8 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -228,7 +228,7 @@ struct cm_device {
struct cm_av {
struct cm_port *port;
union ib_gid dgid;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
};
@@ -241,7 +241,7 @@ struct cm_work {
__be32 local_id; /* Established / timewait */
__be32 remote_id;
struct ib_cm_event cm_event;
- struct ib_sa_path_rec path[0];
+ struct sa_path_rec path[0];
};
struct cm_timewait_info {
@@ -343,7 +343,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
ret = -ENODEV;
goto out;
}
- ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+ ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto out;
@@ -355,7 +355,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
ret = PTR_ERR(m);
goto out;
}
@@ -390,7 +390,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
return PTR_ERR(m);
}
m->ah = ah;
@@ -400,7 +400,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
@@ -440,7 +440,7 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
+static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_id_private *cm_id_priv)
{
struct cm_device *cm_dev;
@@ -453,7 +453,8 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- path->gid_type, ndev, &p, NULL)) {
+ sa_conv_pathrec_to_gid_type(path),
+ ndev, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
@@ -1172,8 +1173,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_req_param *param)
{
- struct ib_sa_path_rec *pri_path = param->primary_path;
- struct ib_sa_path_rec *alt_path = param->alternate_path;
+ struct sa_path_rec *pri_path = param->primary_path;
+ struct sa_path_rec *alt_path = param->alternate_path;
cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
@@ -1202,8 +1203,10 @@ static void cm_format_req(struct cm_req_msg *req_msg,
}
if (pri_path->hop_limit <= 1) {
- req_msg->primary_local_lid = pri_path->slid;
- req_msg->primary_remote_lid = pri_path->dlid;
+ req_msg->primary_local_lid =
+ htons(ntohl(sa_path_get_slid(pri_path)));
+ req_msg->primary_remote_lid =
+ htons(ntohl(sa_path_get_dlid(pri_path)));
} else {
/* Work-around until there's a way to obtain remote LID info */
req_msg->primary_local_lid = IB_LID_PERMISSIVE;
@@ -1223,8 +1226,10 @@ static void cm_format_req(struct cm_req_msg *req_msg,
if (alt_path) {
if (alt_path->hop_limit <= 1) {
- req_msg->alt_local_lid = alt_path->slid;
- req_msg->alt_remote_lid = alt_path->dlid;
+ req_msg->alt_local_lid =
+ htons(ntohl(sa_path_get_slid(alt_path)));
+ req_msg->alt_remote_lid =
+ htons(ntohl(sa_path_get_dlid(alt_path)));
} else {
req_msg->alt_local_lid = IB_LID_PERMISSIVE;
req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
@@ -1401,14 +1406,15 @@ static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
}
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
- struct ib_sa_path_rec *primary_path,
- struct ib_sa_path_rec *alt_path)
+ struct sa_path_rec *primary_path,
+ struct sa_path_rec *alt_path)
{
- memset(primary_path, 0, sizeof *primary_path);
primary_path->dgid = req_msg->primary_local_gid;
primary_path->sgid = req_msg->primary_remote_gid;
- primary_path->dlid = req_msg->primary_local_lid;
- primary_path->slid = req_msg->primary_remote_lid;
+ sa_path_set_dlid(primary_path,
+ htonl(ntohs(req_msg->primary_local_lid)));
+ sa_path_set_slid(primary_path,
+ htonl(ntohs(req_msg->primary_remote_lid)));
primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
primary_path->hop_limit = req_msg->primary_hop_limit;
primary_path->traffic_class = req_msg->primary_traffic_class;
@@ -1423,14 +1429,15 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
primary_path->packet_life_time =
cm_req_get_primary_local_ack_timeout(req_msg);
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
- primary_path->service_id = req_msg->service_id;
+ sa_path_set_service_id(primary_path, req_msg->service_id);
if (req_msg->alt_local_lid) {
- memset(alt_path, 0, sizeof *alt_path);
alt_path->dgid = req_msg->alt_local_gid;
alt_path->sgid = req_msg->alt_remote_gid;
- alt_path->dlid = req_msg->alt_local_lid;
- alt_path->slid = req_msg->alt_remote_lid;
+ sa_path_set_dlid(alt_path,
+ htonl(ntohs(req_msg->alt_local_lid)));
+ sa_path_set_slid(alt_path,
+ htonl(ntohs(req_msg->alt_remote_lid)));
alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
alt_path->hop_limit = req_msg->alt_hop_limit;
alt_path->traffic_class = req_msg->alt_traffic_class;
@@ -1445,7 +1452,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
alt_path->packet_life_time =
cm_req_get_alt_local_ack_timeout(req_msg);
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
- alt_path->service_id = req_msg->service_id;
+ sa_path_set_service_id(alt_path, req_msg->service_id);
}
}
@@ -1722,6 +1729,7 @@ static int cm_req_handler(struct cm_work *work)
struct cm_req_msg *req_msg;
union ib_gid gid;
struct ib_gid_attr gid_attr;
+ const struct ib_global_route *grh;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1758,21 +1766,34 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv->id.service_mask = ~cpu_to_be64(0);
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
- cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
- memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
- work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
+ memset(&work->path[0], 0, sizeof(work->path[0]));
+ memset(&work->path[1], 0, sizeof(work->path[1]));
+ grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
work->port->port_num,
- cm_id_priv->av.ah_attr.grh.sgid_index,
+ grh->sgid_index,
&gid, &gid_attr);
if (!ret) {
if (gid_attr.ndev) {
- work->path[0].ifindex = gid_attr.ndev->ifindex;
- work->path[0].net = dev_net(gid_attr.ndev);
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
+ } else {
+ work->path[0].rec_type = SA_PATH_REC_TYPE_IB;
}
- work->path[0].gid_type = gid_attr.gid_type;
+ if (req_msg->alt_local_lid)
+ work->path[1].rec_type = work->path[0].rec_type;
+ cm_format_paths_from_req(req_msg, &work->path[0],
+ &work->path[1]);
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ sa_path_set_dmac(&work->path[0],
+ cm_id_priv->av.ah_attr.roce.dmac);
+ work->path[0].hop_limit = grh->hop_limit;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
cm_id_priv);
}
@@ -1782,11 +1803,18 @@ static int cm_req_handler(struct cm_work *work)
&work->path[0].sgid,
&gid_attr);
if (!err && gid_attr.ndev) {
- work->path[0].ifindex = gid_attr.ndev->ifindex;
- work->path[0].net = dev_net(gid_attr.ndev);
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
+ } else {
+ work->path[0].rec_type = SA_PATH_REC_TYPE_IB;
}
- work->path[0].gid_type = gid_attr.gid_type;
+ if (req_msg->alt_local_lid)
+ work->path[1].rec_type = work->path[0].rec_type;
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
@@ -2811,7 +2839,7 @@ out:
static void cm_format_lap(struct cm_lap_msg *lap_msg,
struct cm_id_private *cm_id_priv,
- struct ib_sa_path_rec *alternate_path,
+ struct sa_path_rec *alternate_path,
const void *private_data,
u8 private_data_len)
{
@@ -2822,8 +2850,10 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
/* todo: need remote CM response timeout */
cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
- lap_msg->alt_local_lid = alternate_path->slid;
- lap_msg->alt_remote_lid = alternate_path->dlid;
+ lap_msg->alt_local_lid =
+ htons(ntohl(sa_path_get_slid(alternate_path)));
+ lap_msg->alt_remote_lid =
+ htons(ntohl(sa_path_get_dlid(alternate_path)));
lap_msg->alt_local_gid = alternate_path->sgid;
lap_msg->alt_remote_gid = alternate_path->dgid;
cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
@@ -2841,7 +2871,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
}
int ib_send_cm_lap(struct ib_cm_id *cm_id,
- struct ib_sa_path_rec *alternate_path,
+ struct sa_path_rec *alternate_path,
const void *private_data,
u8 private_data_len)
{
@@ -2895,14 +2925,15 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
EXPORT_SYMBOL(ib_send_cm_lap);
static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
- struct ib_sa_path_rec *path,
+ struct sa_path_rec *path,
struct cm_lap_msg *lap_msg)
{
memset(path, 0, sizeof *path);
+ path->rec_type = SA_PATH_REC_TYPE_IB;
path->dgid = lap_msg->alt_local_gid;
path->sgid = lap_msg->alt_remote_gid;
- path->dlid = lap_msg->alt_local_lid;
- path->slid = lap_msg->alt_remote_lid;
+ sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid)));
+ sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid)));
path->flow_label = cm_lap_get_flow_label(lap_msg);
path->hop_limit = lap_msg->alt_hop_limit;
path->traffic_class = cm_lap_get_traffic_class(lap_msg);
@@ -3708,7 +3739,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
atomic_long_inc(&port->counter_group[CM_RECV].
counter[attr_id - CM_ATTR_ID_OFFSET]);
- work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
+ work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths,
GFP_KERNEL);
if (!work) {
ib_free_recv_mad(mad_recv_wc);
@@ -3800,7 +3831,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
cm_id_priv->responder_resources;
qp_attr->min_rnr_timer = 0;
}
- if (cm_id_priv->alt_av.ah_attr.dlid) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
*qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
@@ -3854,7 +3885,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
default:
break;
}
- if (cm_id_priv->alt_av.ah_attr.dlid) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
qp_attr->path_mig_state = IB_MIG_REARM;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index acd10d666f1c..91b7a2fe5a55 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -929,7 +929,8 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
goto out;
ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
- qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
+ rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index,
+ &sgid, NULL);
if (ret)
goto out;
@@ -1127,7 +1128,7 @@ static inline int cma_any_port(struct sockaddr *addr)
static void cma_save_ib_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
struct rdma_cm_id *listen_id,
- struct ib_sa_path_rec *path)
+ struct sa_path_rec *path)
{
struct sockaddr_ib *listen_ib, *ib;
@@ -1139,7 +1140,7 @@ static void cma_save_ib_info(struct sockaddr *src_addr,
ib->sib_pkey = path->pkey;
ib->sib_flowinfo = path->flow_label;
memcpy(&ib->sib_addr, &path->sgid, 16);
- ib->sib_sid = path->service_id;
+ ib->sib_sid = sa_path_get_service_id(path);
ib->sib_scope_id = 0;
} else {
ib->sib_pkey = listen_ib->sib_pkey;
@@ -1273,7 +1274,8 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
memcpy(&req->local_gid, &req_param->primary_path->sgid,
sizeof(req->local_gid));
req->has_gid = true;
- req->service_id = req_param->primary_path->service_id;
+ req->service_id =
+ sa_path_get_service_id(req_param->primary_path);
req->pkey = be16_to_cpu(req_param->primary_path->pkey);
if (req->pkey != req_param->bth_pkey)
pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
@@ -1755,6 +1757,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
+ if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+ (id_priv->id.qp_type != IB_QPT_UD))
+ ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -1821,8 +1826,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct rdma_cm_id *id;
struct rdma_route *rt;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
- const __be64 service_id =
- ib_event->param.req_rcvd.primary_path->service_id;
+ struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path;
+ const __be64 service_id = sa_path_get_service_id(path);
int ret;
id = rdma_create_id(listen_id->route.addr.dev_addr.net,
@@ -1844,7 +1849,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (!rt->path_rec)
goto err;
- rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
+ rt->path_rec[0] = *path;
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
@@ -2297,7 +2302,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
}
EXPORT_SYMBOL(rdma_set_service_type);
-static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
+static void cma_query_handler(int status, struct sa_path_rec *path_rec,
void *context)
{
struct cma_work *work = context;
@@ -2324,18 +2329,25 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct cma_work *work)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct ib_sa_path_rec path_rec;
+ struct sa_path_rec path_rec;
ib_sa_comp_mask comp_mask;
struct sockaddr_in6 *sin6;
struct sockaddr_ib *sib;
memset(&path_rec, 0, sizeof path_rec);
+
+ if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num))
+ path_rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path_rec.rec_type = SA_PATH_REC_TYPE_IB;
rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
- path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
+ sa_path_set_service_id(&path_rec,
+ rdma_get_service_id(&id_priv->id,
+ cma_dst_addr(id_priv)));
comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
@@ -2449,7 +2461,7 @@ err1:
}
int rdma_set_ib_paths(struct rdma_cm_id *id,
- struct ib_sa_path_rec *path_rec, int num_paths)
+ struct sa_path_rec *path_rec, int num_paths)
{
struct rdma_id_private *id_priv;
int ret;
@@ -2528,6 +2540,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
@@ -2572,21 +2585,22 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
}
}
- route->path_rec->net = &init_net;
- route->path_rec->ifindex = ndev->ifindex;
supported_gids = roce_gid_type_mask_support(id_priv->id.device,
id_priv->id.port_num);
- route->path_rec->gid_type =
- cma_route_gid_type(addr->dev_addr.network,
- supported_gids,
- id_priv->gid_type);
+ gid_type = cma_route_gid_type(addr->dev_addr.network,
+ supported_gids,
+ id_priv->gid_type);
+ route->path_rec->rec_type =
+ sa_conv_gid_to_pathrec_type(gid_type);
+ sa_path_set_ndev(route->path_rec, &init_net);
+ sa_path_set_ifindex(route->path_rec, ndev->ifindex);
}
if (!ndev) {
ret = -ENODEV;
goto err2;
}
- memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
+ sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
@@ -2594,8 +2608,10 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
&route->path_rec->dgid);
/* Use the hint from IP Stack to select GID Type */
- if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
- route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+ gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
+
if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
/* TODO: get the hoplimit from the inet/inet6 device */
route->path_rec->hop_limit = addr->dev_addr.hoplimit;
@@ -3941,63 +3957,10 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
}
}
-static void cma_query_sa_classport_info_cb(int status,
- struct ib_class_port_info *rec,
- void *context)
-{
- struct class_port_info_context *cb_ctx = context;
-
- WARN_ON(!context);
-
- if (status || !rec) {
- pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
- cb_ctx->device->name, cb_ctx->port_num, status);
- goto out;
- }
-
- memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
-
-out:
- complete(&cb_ctx->done);
-}
-
-static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
- struct ib_class_port_info *class_port_info)
-{
- struct class_port_info_context *cb_ctx;
- int ret;
-
- cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
- if (!cb_ctx)
- return -ENOMEM;
-
- cb_ctx->device = device;
- cb_ctx->class_port_info = class_port_info;
- cb_ctx->port_num = port_num;
- init_completion(&cb_ctx->done);
-
- ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
- CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
- GFP_KERNEL, cma_query_sa_classport_info_cb,
- cb_ctx, &cb_ctx->sa_query);
- if (ret < 0) {
- pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
- device->name, port_num, ret);
- goto out;
- }
-
- wait_for_completion(&cb_ctx->done);
-
-out:
- kfree(cb_ctx);
- return ret;
-}
-
static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct ib_sa_mcmember_rec rec;
- struct ib_class_port_info class_port_info;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
int ret;
@@ -4018,21 +3981,14 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
- if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
- ret = cma_query_sa_classport_info(id_priv->id.device,
- id_priv->id.port_num,
- &class_port_info);
-
- if (ret)
- return ret;
-
- if (!(ib_get_cpi_capmask2(&class_port_info) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
- pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
- "RDMA CM: SM doesn't support Send Only Full Member option\n",
- id_priv->id.device->name, id_priv->id.port_num);
- return -EOPNOTSUPP;
- }
+ if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
+ (!ib_sa_sendonly_fullmem_support(&sa_client,
+ id_priv->id.device,
+ id_priv->id.port_num))) {
+ pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
+ "RDMA CM: SM doesn't support Send Only Full Member option\n",
+ id_priv->id.device->name, id_priv->id.port_num);
+ return -EOPNOTSUPP;
}
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7c9e34d679d3..81d447da0048 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -172,8 +172,16 @@ static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- ib_cache_release_one(dev);
- kfree(dev->port_immutable);
+ WARN_ON(dev->reg_state == IB_DEV_REGISTERED);
+ if (dev->reg_state == IB_DEV_UNREGISTERED) {
+ /*
+ * In IB_DEV_UNINITIALIZED state, cache or port table
+ * is not even created. Free cache and port table only when
+ * device reaches UNREGISTERED state.
+ */
+ ib_cache_release_one(dev);
+ kfree(dev->port_immutable);
+ }
kfree(dev);
}
@@ -380,32 +388,27 @@ int ib_register_device(struct ib_device *device,
ret = ib_cache_setup_one(device);
if (ret) {
pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
- goto out;
+ goto port_cleanup;
}
ret = ib_device_register_rdmacg(device);
if (ret) {
pr_warn("Couldn't register device with rdma cgroup\n");
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
pr_warn("Couldn't query the device attributes\n");
- ib_device_unregister_rdmacg(device);
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
pr_warn("Couldn't register device %s with driver model\n",
device->name);
- ib_device_unregister_rdmacg(device);
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
device->reg_state = IB_DEV_REGISTERED;
@@ -417,6 +420,14 @@ int ib_register_device(struct ib_device *device,
down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
+ mutex_unlock(&device_mutex);
+ return 0;
+
+cache_cleanup:
+ ib_cache_cleanup_one(device);
+ ib_cache_release_one(device);
+port_cleanup:
+ kfree(device->port_immutable);
out:
mutex_unlock(&device_mutex);
return ret;
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index cdfad5f26212..84d2615b5d4b 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -96,7 +96,8 @@ struct ib_fmr_pool {
void * arg);
void *flush_arg;
- struct task_struct *thread;
+ struct kthread_worker *worker;
+ struct kthread_work work;
atomic_t req_ser;
atomic_t flush_ser;
@@ -174,29 +175,19 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
spin_unlock_irq(&pool->pool_lock);
}
-static int ib_fmr_cleanup_thread(void *pool_ptr)
+static void ib_fmr_cleanup_func(struct kthread_work *work)
{
- struct ib_fmr_pool *pool = pool_ptr;
+ struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work);
- do {
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
- ib_fmr_batch_release(pool);
-
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
- }
+ ib_fmr_batch_release(pool);
+ atomic_inc(&pool->flush_ser);
+ wake_up_interruptible(&pool->force_wait);
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
- !kthread_should_stop())
- schedule();
- __set_current_state(TASK_RUNNING);
- } while (!kthread_should_stop());
+ if (pool->flush_function)
+ pool->flush_function(pool, pool->flush_arg);
- return 0;
+ if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0)
+ kthread_queue_work(pool->worker, &pool->work);
}
/**
@@ -265,15 +256,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait);
- pool->thread = kthread_run(ib_fmr_cleanup_thread,
- pool,
- "ib_fmr(%s)",
- device->name);
- if (IS_ERR(pool->thread)) {
- pr_warn(PFX "couldn't start cleanup thread\n");
- ret = PTR_ERR(pool->thread);
+ pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name);
+ if (IS_ERR(pool->worker)) {
+ pr_warn(PFX "couldn't start cleanup kthread worker\n");
+ ret = PTR_ERR(pool->worker);
goto out_free_pool;
}
+ kthread_init_work(&pool->work, ib_fmr_cleanup_func);
{
struct ib_pool_fmr *fmr;
@@ -338,7 +327,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
LIST_HEAD(fmr_list);
int i;
- kthread_stop(pool->thread);
+ kthread_destroy_worker(pool->worker);
ib_fmr_batch_release(pool);
i = 0;
@@ -388,7 +377,7 @@ int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
spin_unlock_irq(&pool->pool_lock);
serial = atomic_inc_return(&pool->req_ser);
- wake_up_process(pool->thread);
+ kthread_queue_work(pool->worker, &pool->work);
if (wait_event_interruptible(pool->force_wait,
atomic_read(&pool->flush_ser) - serial >= 0))
@@ -502,7 +491,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
list_add_tail(&fmr->list, &pool->dirty_list);
if (++pool->dirty_len >= pool->dirty_watermark) {
atomic_inc(&pool->req_ser);
- wake_up_process(pool->thread);
+ kthread_queue_work(pool->worker, &pool->work);
}
}
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 57f231f1c721..192ee3dafb80 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -605,7 +605,7 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
*/
-int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
+void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_snoop_private *mad_snoop_priv;
@@ -622,7 +622,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
agent);
unregister_mad_snoop(mad_snoop_priv);
}
- return 0;
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -1834,12 +1833,13 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
const struct ib_mad_send_wr_private *wr,
const struct ib_mad_recv_wc *rwc )
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
u8 send_resp, rcv_resp;
union ib_gid sgid;
struct ib_device *device = mad_agent_priv->agent.device;
u8 port_num = mad_agent_priv->agent.port_num;
u8 lmc;
+ bool has_grh;
send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
@@ -1848,36 +1848,40 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
/* both requests, or both responses. GIDs different */
return 0;
- if (ib_query_ah(wr->send_buf.ah, &attr))
+ if (rdma_query_ah(wr->send_buf.ah, &attr))
/* Assume not equal, to avoid false positives. */
return 0;
- if (!!(attr.ah_flags & IB_AH_GRH) !=
- !!(rwc->wc->wc_flags & IB_WC_GRH))
+ has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH);
+ if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH))
/* one has GID, other does not. Assume different */
return 0;
if (!send_resp && rcv_resp) {
/* is request/response. */
- if (!(attr.ah_flags & IB_AH_GRH)) {
+ if (!has_grh) {
if (ib_get_cached_lmc(device, port_num, &lmc))
return 0;
- return (!lmc || !((attr.src_path_bits ^
+ return (!lmc || !((rdma_ah_get_path_bits(&attr) ^
rwc->wc->dlid_path_bits) &
((1 << lmc) - 1)));
} else {
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(&attr);
+
if (ib_get_cached_gid(device, port_num,
- attr.grh.sgid_index, &sgid, NULL))
+ grh->sgid_index, &sgid, NULL))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
}
}
- if (!(attr.ah_flags & IB_AH_GRH))
- return attr.dlid == rwc->wc->slid;
+ if (!has_grh)
+ return rdma_ah_get_dlid(&attr) == rwc->wc->slid;
else
- return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
+ return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw,
+ rwc->recv_buf.grh->sgid.raw,
16);
}
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 382941b46e43..0d3cca0a8890 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -81,7 +81,7 @@ static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
deref_rmpp_recv(rmpp_recv);
wait_for_completion(&rmpp_recv->comp);
- ib_destroy_ah(rmpp_recv->ah);
+ rdma_destroy_ah(rmpp_recv->ah);
kfree(rmpp_recv);
}
@@ -171,7 +171,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
hdr_len, 0, GFP_KERNEL,
IB_MGMT_BASE_VERSION);
if (IS_ERR(msg))
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
else {
msg->ah = ah;
msg->context[0] = ah;
@@ -201,7 +201,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
@@ -209,7 +209,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -237,7 +237,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
@@ -852,7 +852,7 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
struct mad_rmpp_recv *rmpp_recv;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
int newwin = 1;
@@ -867,10 +867,10 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
(rmpp_recv->method & IB_MGMT_METHOD_RESP))
continue;
- if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+ if (rdma_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
continue;
- if (rmpp_recv->slid == ah_attr.dlid) {
+ if (rmpp_recv->slid == rdma_ah_get_dlid(&ah_attr)) {
newwin = rmpp_recv->repwin;
break;
}
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 322cb67b07a9..45f2f095f793 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -720,7 +720,7 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
@@ -743,19 +743,18 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
return ret;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(rec->mlid);
- ah_attr->sl = rec->sl;
- ah_attr->port_num = port_num;
- ah_attr->static_rate = rec->rate;
-
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = rec->mgid;
-
- ah_attr->grh.sgid_index = (u8) gid_index;
- ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
- ah_attr->grh.hop_limit = rec->hop_limit;
- ah_attr->grh.traffic_class = rec->traffic_class;
-
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid));
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
+
+ rdma_ah_set_grh(ah_attr, &rec->mgid,
+ be32_to_cpu(rec->flow_label),
+ (u8)gid_index,
+ rec->hop_limit,
+ rec->traffic_class);
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_mcmember);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
new file mode 100644
index 000000000000..41c31a2bf093
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.c
@@ -0,0 +1,627 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <linux/rcupdate.h>
+#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
+
+void uverbs_uobject_get(struct ib_uobject *uobject)
+{
+ kref_get(&uobject->ref);
+}
+
+static void uverbs_uobject_free(struct kref *ref)
+{
+ struct ib_uobject *uobj =
+ container_of(ref, struct ib_uobject, ref);
+
+ if (uobj->type->type_class->needs_kfree_rcu)
+ kfree_rcu(uobj, rcu);
+ else
+ kfree(uobj);
+}
+
+void uverbs_uobject_put(struct ib_uobject *uobject)
+{
+ kref_put(&uobject->ref, uverbs_uobject_free);
+}
+
+static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
+{
+ /*
+ * When a shared access is required, we use a positive counter. Each
+ * shared access request checks that the value != -1 and increment it.
+ * Exclusive access is required for operations like write or destroy.
+ * In exclusive access mode, we check that the counter is zero (nobody
+ * claimed this object) and we set it to -1. Releasing a shared access
+ * lock is done simply by decreasing the counter. As for exclusive
+ * access locks, since only a single one of them is is allowed
+ * concurrently, setting the counter to zero is enough for releasing
+ * this lock.
+ */
+ if (!exclusive)
+ return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+ -EBUSY : 0;
+
+ /* lock is either WRITE or DESTROY - should be exclusive */
+ return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+}
+
+static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
+ const struct uverbs_obj_type *type)
+{
+ struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
+
+ if (!uobj)
+ return ERR_PTR(-ENOMEM);
+ /*
+ * user_handle should be filled by the handler,
+ * The object is added to the list in the commit stage.
+ */
+ uobj->context = context;
+ uobj->type = type;
+ atomic_set(&uobj->usecnt, 0);
+ kref_init(&uobj->ref);
+
+ return uobj;
+}
+
+static int idr_add_uobj(struct ib_uobject *uobj)
+{
+ int ret;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&uobj->context->ufile->idr_lock);
+
+ /*
+ * We start with allocating an idr pointing to NULL. This represents an
+ * object which isn't initialized yet. We'll replace it later on with
+ * the real object once we commit.
+ */
+ ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
+ min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
+ if (ret >= 0)
+ uobj->id = ret;
+
+ spin_unlock(&uobj->context->ufile->idr_lock);
+ idr_preload_end();
+
+ return ret < 0 ? ret : 0;
+}
+
+/*
+ * It only removes it from the uobjects list, uverbs_uobject_put() is still
+ * required.
+ */
+static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
+{
+ spin_lock(&uobj->context->ufile->idr_lock);
+ idr_remove(&uobj->context->ufile->idr, uobj->id);
+ spin_unlock(&uobj->context->ufile->idr_lock);
+}
+
+/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
+static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct ib_uobject *uobj;
+
+ rcu_read_lock();
+ /* object won't be released as we're protected in rcu */
+ uobj = idr_find(&ucontext->ufile->idr, id);
+ if (!uobj) {
+ uobj = ERR_PTR(-ENOENT);
+ goto free;
+ }
+
+ uverbs_uobject_get(uobj);
+free:
+ rcu_read_unlock();
+ return uobj;
+}
+
+static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct file *f;
+ struct ib_uobject *uobject;
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(type, struct uverbs_obj_fd_type, type);
+
+ if (exclusive)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ f = fget(id);
+ if (!f)
+ return ERR_PTR(-EBADF);
+
+ uobject = f->private_data;
+ /*
+ * fget(id) ensures we are not currently running uverbs_close_fd,
+ * and the caller is expected to ensure that uverbs_close_fd is never
+ * done while a call top lookup is possible.
+ */
+ if (f->f_op != fd_type->fops) {
+ fput(f);
+ return ERR_PTR(-EBADF);
+ }
+
+ uverbs_uobject_get(uobject);
+ return uobject;
+}
+
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ if (uobj->type != type) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ ret = uverbs_try_lock_object(uobj, exclusive);
+ if (ret) {
+ WARN(ucontext->cleanup_reason,
+ "ib_uverbs: Trying to lookup_get while cleanup context\n");
+ goto free;
+ }
+
+ return uobj;
+free:
+ uobj->type->type_class->lookup_put(uobj, exclusive);
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ int ret;
+ struct ib_uobject *uobj;
+
+ uobj = alloc_uobj(ucontext, type);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ ret = idr_add_uobj(uobj);
+ if (ret)
+ goto uobj_put;
+
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto idr_remove;
+
+ return uobj;
+
+idr_remove:
+ uverbs_idr_remove_uobj(uobj);
+uobj_put:
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(type, struct uverbs_obj_fd_type, type);
+ int new_fd;
+ struct ib_uobject *uobj;
+ struct ib_uobject_file *uobj_file;
+ struct file *filp;
+
+ new_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (new_fd < 0)
+ return ERR_PTR(new_fd);
+
+ uobj = alloc_uobj(ucontext, type);
+ if (IS_ERR(uobj)) {
+ put_unused_fd(new_fd);
+ return uobj;
+ }
+
+ uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+ filp = anon_inode_getfile(fd_type->name,
+ fd_type->fops,
+ uobj_file,
+ fd_type->flags);
+ if (IS_ERR(filp)) {
+ put_unused_fd(new_fd);
+ uverbs_uobject_put(uobj);
+ return (void *)filp;
+ }
+
+ uobj_file->uobj.id = new_fd;
+ uobj_file->uobj.object = filp;
+ uobj_file->ufile = ucontext->ufile;
+ INIT_LIST_HEAD(&uobj->list);
+ kref_get(&uobj_file->ufile->ref);
+
+ return uobj;
+}
+
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ return type->type_class->alloc_begin(type, ucontext);
+}
+
+static void uverbs_uobject_add(struct ib_uobject *uobject)
+{
+ mutex_lock(&uobject->context->uobjects_lock);
+ list_add(&uobject->list, &uobject->context->uobjects);
+ mutex_unlock(&uobject->context->uobjects_lock);
+}
+
+static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ const struct uverbs_obj_idr_type *idr_type =
+ container_of(uobj->type, struct uverbs_obj_idr_type,
+ type);
+ int ret = idr_type->destroy_object(uobj, why);
+
+ /*
+ * We can only fail gracefully if the user requested to destroy the
+ * object. In the rest of the cases, just remove whatever you can.
+ */
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ return ret;
+
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ uverbs_idr_remove_uobj(uobj);
+
+ return ret;
+}
+
+static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+ struct file *filp = uobj->object;
+ int id = uobj_file->uobj.id;
+
+ /* Unsuccessful NEW */
+ fput(filp);
+ put_unused_fd(id);
+}
+
+static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(uobj->type, struct uverbs_obj_fd_type, type);
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+ int ret = fd_type->context_closed(uobj_file, why);
+
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ return ret;
+
+ if (why == RDMA_REMOVE_DURING_CLEANUP) {
+ alloc_abort_fd_uobject(uobj);
+ return ret;
+ }
+
+ uobj_file->uobj.context = NULL;
+ return ret;
+}
+
+static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
+{
+#ifdef CONFIG_LOCKDEP
+ if (exclusive)
+ WARN_ON(atomic_read(&uobj->usecnt) > 0);
+ else
+ WARN_ON(atomic_read(&uobj->usecnt) == -1);
+#endif
+}
+
+static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobj->context;
+
+ ret = uobj->type->type_class->remove_commit(uobj, why);
+ if (ret && why == RDMA_REMOVE_DESTROY) {
+ /* We couldn't remove the object, so just unlock the uobject */
+ atomic_set(&uobj->usecnt, 0);
+ uobj->type->type_class->lookup_put(uobj, true);
+ } else {
+ mutex_lock(&ucontext->uobjects_lock);
+ list_del(&uobj->list);
+ mutex_unlock(&ucontext->uobjects_lock);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(uobj);
+ }
+
+ return ret;
+}
+
+/* This is called only for user requested DESTROY reasons */
+int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobj->context;
+
+ /* put the ref count we took at lookup_get */
+ uverbs_uobject_put(uobj);
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
+ WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
+ return 0;
+ }
+ lockdep_check(uobj, true);
+ ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
+
+ up_read(&ucontext->cleanup_rwsem);
+ return ret;
+}
+
+static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
+{
+ uverbs_uobject_add(uobj);
+ spin_lock(&uobj->context->ufile->idr_lock);
+ /*
+ * We already allocated this IDR with a NULL object, so
+ * this shouldn't fail.
+ */
+ WARN_ON(idr_replace(&uobj->context->ufile->idr,
+ uobj, uobj->id));
+ spin_unlock(&uobj->context->ufile->idr_lock);
+}
+
+static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+
+ uverbs_uobject_add(&uobj_file->uobj);
+ fd_install(uobj_file->uobj.id, uobj->object);
+ /* This shouldn't be used anymore. Use the file object instead */
+ uobj_file->uobj.id = 0;
+ /* Get another reference as we export this to the fops */
+ uverbs_uobject_get(&uobj_file->uobj);
+}
+
+int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
+{
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
+ int ret;
+
+ WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
+ ret = uobj->type->type_class->remove_commit(uobj,
+ RDMA_REMOVE_DURING_CLEANUP);
+ if (ret)
+ pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
+ uobj->id);
+ return ret;
+ }
+
+ uobj->type->type_class->alloc_commit(uobj);
+ up_read(&uobj->context->cleanup_rwsem);
+
+ return 0;
+}
+
+static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
+{
+ uverbs_idr_remove_uobj(uobj);
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ uverbs_uobject_put(uobj);
+}
+
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
+{
+ uobj->type->type_class->alloc_abort(uobj);
+}
+
+static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+}
+
+static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+ struct file *filp = uobj->object;
+
+ WARN_ON(exclusive);
+ /* This indirectly calls uverbs_close_fd and free the object */
+ fput(filp);
+}
+
+void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+ lockdep_check(uobj, exclusive);
+ uobj->type->type_class->lookup_put(uobj, exclusive);
+ /*
+ * In order to unlock an object, either decrease its usecnt for
+ * read access or zero it in case of exclusive access. See
+ * uverbs_try_lock_object for locking schema information.
+ */
+ if (!exclusive)
+ atomic_dec(&uobj->usecnt);
+ else
+ atomic_set(&uobj->usecnt, 0);
+
+ uverbs_uobject_put(uobj);
+}
+
+const struct uverbs_obj_type_class uverbs_idr_class = {
+ .alloc_begin = alloc_begin_idr_uobject,
+ .lookup_get = lookup_get_idr_uobject,
+ .alloc_commit = alloc_commit_idr_uobject,
+ .alloc_abort = alloc_abort_idr_uobject,
+ .lookup_put = lookup_put_idr_uobject,
+ .remove_commit = remove_commit_idr_uobject,
+ /*
+ * When we destroy an object, we first just lock it for WRITE and
+ * actually DESTROY it in the finalize stage. So, the problematic
+ * scenario is when we just started the finalize stage of the
+ * destruction (nothing was executed yet). Now, the other thread
+ * fetched the object for READ access, but it didn't lock it yet.
+ * The DESTROY thread continues and starts destroying the object.
+ * When the other thread continue - without the RCU, it would
+ * access freed memory. However, the rcu_read_lock delays the free
+ * until the rcu_read_lock of the READ operation quits. Since the
+ * exclusive lock of the object is still taken by the DESTROY flow, the
+ * READ operation will get -EBUSY and it'll just bail out.
+ */
+ .needs_kfree_rcu = true,
+};
+
+static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
+{
+ struct ib_ucontext *ucontext;
+ struct ib_uverbs_file *ufile = uobj_file->ufile;
+ int ret;
+
+ mutex_lock(&uobj_file->ufile->cleanup_mutex);
+
+ /* uobject was either already cleaned up or is cleaned up right now anyway */
+ if (!uobj_file->uobj.context ||
+ !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
+ goto unlock;
+
+ ucontext = uobj_file->uobj.context;
+ ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
+ up_read(&ucontext->cleanup_rwsem);
+ if (ret)
+ pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
+unlock:
+ mutex_unlock(&ufile->cleanup_mutex);
+}
+
+void uverbs_close_fd(struct file *f)
+{
+ struct ib_uobject_file *uobj_file = f->private_data;
+ struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
+
+ _uverbs_close_fd(uobj_file);
+ uverbs_uobject_put(&uobj_file->uobj);
+ kref_put(uverbs_file_ref, ib_uverbs_release_file);
+}
+
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+{
+ enum rdma_remove_reason reason = device_removed ?
+ RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
+ unsigned int cur_order = 0;
+
+ ucontext->cleanup_reason = reason;
+ /*
+ * Waits for all remove_commit and alloc_commit to finish. Logically, We
+ * want to hold this forever as the context is going to be destroyed,
+ * but we'll release it since it causes a "held lock freed" BUG message.
+ */
+ down_write(&ucontext->cleanup_rwsem);
+
+ while (!list_empty(&ucontext->uobjects)) {
+ struct ib_uobject *obj, *next_obj;
+ unsigned int next_order = UINT_MAX;
+
+ /*
+ * This shouldn't run while executing other commands on this
+ * context. Thus, the only thing we should take care of is
+ * releasing a FD while traversing this list. The FD could be
+ * closed and released from the _release fop of this FD.
+ * In order to mitigate this, we add a lock.
+ * We take and release the lock per order traversal in order
+ * to let other threads (which might still use the FDs) chance
+ * to run.
+ */
+ mutex_lock(&ucontext->uobjects_lock);
+ list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
+ list) {
+ if (obj->type->destroy_order == cur_order) {
+ int ret;
+
+ /*
+ * if we hit this WARN_ON, that means we are
+ * racing with a lookup_get.
+ */
+ WARN_ON(uverbs_try_lock_object(obj, true));
+ ret = obj->type->type_class->remove_commit(obj,
+ reason);
+ list_del(&obj->list);
+ if (ret)
+ pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
+ obj->id, cur_order);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(obj);
+ } else {
+ next_order = min(next_order,
+ obj->type->destroy_order);
+ }
+ }
+ mutex_unlock(&ucontext->uobjects_lock);
+ cur_order = next_order;
+ }
+ up_write(&ucontext->cleanup_rwsem);
+}
+
+void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
+{
+ ucontext->cleanup_reason = 0;
+ mutex_init(&ucontext->uobjects_lock);
+ INIT_LIST_HEAD(&ucontext->uobjects);
+ init_rwsem(&ucontext->cleanup_rwsem);
+}
+
+const struct uverbs_obj_type_class uverbs_fd_class = {
+ .alloc_begin = alloc_begin_fd_uobject,
+ .lookup_get = lookup_get_fd_uobject,
+ .alloc_commit = alloc_commit_fd_uobject,
+ .alloc_abort = alloc_abort_fd_uobject,
+ .lookup_put = lookup_put_fd_uobject,
+ .remove_commit = remove_commit_fd_uobject,
+ .needs_kfree_rcu = false,
+};
+
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
new file mode 100644
index 000000000000..1b82e7ff7fe8
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_CORE_H
+#define RDMA_CORE_H
+
+#include <linux/idr.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mutex.h>
+
+/*
+ * These functions initialize the context and cleanups its uobjects.
+ * The context has a list of objects which is protected by a mutex
+ * on the context. initialize_ucontext should be called when we create
+ * a context.
+ * cleanup_ucontext removes all uobjects from the context and puts them.
+ */
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
+void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
+
+/*
+ * uverbs_uobject_get is called in order to increase the reference count on
+ * an uobject. This is useful when a handler wants to keep the uobject's memory
+ * alive, regardless if this uobject is still alive in the context's objects
+ * repository. Objects are put via uverbs_uobject_put.
+ */
+void uverbs_uobject_get(struct ib_uobject *uobject);
+
+/*
+ * In order to indicate we no longer needs this uobject, uverbs_uobject_put
+ * is called. When the reference count is decreased, the uobject is freed.
+ * For example, this is used when attaching a completion channel to a CQ.
+ */
+void uverbs_uobject_put(struct ib_uobject *uobject);
+
+/* Indicate this fd is no longer used by this consumer, but its memory isn't
+ * necessarily released yet. When the last reference is put, we release the
+ * memory. After this call is executed, calling uverbs_uobject_get isn't
+ * allowed.
+ * This must be called from the release file_operations of the file!
+ */
+void uverbs_close_fd(struct file *f);
+
+#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index ceae153997d0..e335b09c022e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -56,6 +56,8 @@
#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
+#define IB_SA_CPI_MAX_RETRY_CNT 3
+#define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */
static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
struct ib_sa_sm_ah {
@@ -65,9 +67,23 @@ struct ib_sa_sm_ah {
u8 src_path_mask;
};
+enum rdma_class_port_info_type {
+ RDMA_CLASS_PORT_INFO_IB,
+ RDMA_CLASS_PORT_INFO_OPA
+};
+
+struct rdma_class_port_info {
+ enum rdma_class_port_info_type type;
+ union {
+ struct ib_class_port_info ib;
+ struct opa_class_port_info opa;
+ };
+};
+
struct ib_sa_classport_cache {
bool valid;
- struct ib_class_port_info data;
+ int retry_cnt;
+ struct rdma_class_port_info data;
};
struct ib_sa_port {
@@ -75,6 +91,7 @@ struct ib_sa_port {
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
struct ib_sa_classport_cache classport_info;
+ struct delayed_work ib_cpi_work;
spinlock_t classport_lock; /* protects class port info set */
spinlock_t ah_lock;
u8 port_num;
@@ -103,6 +120,7 @@ struct ib_sa_query {
#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001
#define IB_SA_CANCEL 0x00000002
+#define IB_SA_QUERY_OPA 0x00000004
struct ib_sa_service_query {
void (*callback)(int, struct ib_sa_service_rec *, void *);
@@ -111,9 +129,10 @@ struct ib_sa_service_query {
};
struct ib_sa_path_query {
- void (*callback)(int, struct ib_sa_path_rec *, void *);
+ void (*callback)(int, struct sa_path_rec *, void *);
void *context;
struct ib_sa_query sa_query;
+ struct sa_path_rec *conv_pr;
};
struct ib_sa_guidinfo_query {
@@ -123,7 +142,7 @@ struct ib_sa_guidinfo_query {
};
struct ib_sa_classport_info_query {
- void (*callback)(int, struct ib_class_port_info *, void *);
+ void (*callback)(void *);
void *context;
struct ib_sa_query sa_query;
};
@@ -170,12 +189,12 @@ static DEFINE_SPINLOCK(tid_lock);
static u32 tid;
#define PATH_REC_FIELD(field) \
- .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \
+ .struct_offset_bytes = offsetof(struct sa_path_rec, field), \
+ .struct_size_bytes = sizeof((struct sa_path_rec *)0)->field, \
.field_name = "sa_path_rec:" #field
static const struct ib_field path_rec_table[] = {
- { PATH_REC_FIELD(service_id),
+ { PATH_REC_FIELD(ib.service_id),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 64 },
@@ -187,15 +206,15 @@ static const struct ib_field path_rec_table[] = {
.offset_words = 6,
.offset_bits = 0,
.size_bits = 128 },
- { PATH_REC_FIELD(dlid),
+ { PATH_REC_FIELD(ib.dlid),
.offset_words = 10,
.offset_bits = 0,
.size_bits = 16 },
- { PATH_REC_FIELD(slid),
+ { PATH_REC_FIELD(ib.slid),
.offset_words = 10,
.offset_bits = 16,
.size_bits = 16 },
- { PATH_REC_FIELD(raw_traffic),
+ { PATH_REC_FIELD(ib.raw_traffic),
.offset_words = 11,
.offset_bits = 0,
.size_bits = 1 },
@@ -269,6 +288,136 @@ static const struct ib_field path_rec_table[] = {
.size_bits = 48 },
};
+#define OPA_PATH_REC_FIELD(field) \
+ .struct_offset_bytes = \
+ offsetof(struct sa_path_rec, field), \
+ .struct_size_bytes = \
+ sizeof((struct sa_path_rec *)0)->field, \
+ .field_name = "sa_path_rec:" #field
+
+static const struct ib_field opa_path_rec_table[] = {
+ { OPA_PATH_REC_FIELD(opa.service_id),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 64 },
+ { OPA_PATH_REC_FIELD(dgid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_PATH_REC_FIELD(sgid),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_PATH_REC_FIELD(opa.dlid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_PATH_REC_FIELD(opa.slid),
+ .offset_words = 11,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_PATH_REC_FIELD(opa.raw_traffic),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 12,
+ .offset_bits = 1,
+ .size_bits = 3 },
+ { OPA_PATH_REC_FIELD(flow_label),
+ .offset_words = 12,
+ .offset_bits = 4,
+ .size_bits = 20 },
+ { OPA_PATH_REC_FIELD(hop_limit),
+ .offset_words = 12,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(traffic_class),
+ .offset_words = 13,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(reversible),
+ .offset_words = 13,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(numb_path),
+ .offset_words = 13,
+ .offset_bits = 9,
+ .size_bits = 7 },
+ { OPA_PATH_REC_FIELD(pkey),
+ .offset_words = 13,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_PATH_REC_FIELD(opa.l2_8B),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_10B),
+ .offset_words = 14,
+ .offset_bits = 1,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_9B),
+ .offset_words = 14,
+ .offset_bits = 2,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_16B),
+ .offset_words = 14,
+ .offset_bits = 3,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 4,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(opa.qos_type),
+ .offset_words = 14,
+ .offset_bits = 6,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(opa.qos_priority),
+ .offset_words = 14,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 16,
+ .size_bits = 3 },
+ { OPA_PATH_REC_FIELD(sl),
+ .offset_words = 14,
+ .offset_bits = 19,
+ .size_bits = 5 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(mtu_selector),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(mtu),
+ .offset_words = 15,
+ .offset_bits = 2,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(rate_selector),
+ .offset_words = 15,
+ .offset_bits = 8,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(rate),
+ .offset_words = 15,
+ .offset_bits = 10,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(packet_life_time_selector),
+ .offset_words = 15,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(packet_life_time),
+ .offset_words = 15,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(preference),
+ .offset_words = 15,
+ .offset_bits = 24,
+ .size_bits = 8 },
+};
+
#define MCMEMBER_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
.struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
@@ -406,7 +555,7 @@ static const struct ib_field service_rec_table[] = {
.struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
.field_name = "ib_class_port_info:" #field
-static const struct ib_field classport_info_rec_table[] = {
+static const struct ib_field ib_classport_info_rec_table[] = {
{ CLASSPORTINFO_REC_FIELD(base_version),
.offset_words = 0,
.offset_bits = 0,
@@ -477,6 +626,88 @@ static const struct ib_field classport_info_rec_table[] = {
.size_bits = 32 },
};
+#define OPA_CLASSPORTINFO_REC_FIELD(field) \
+ .struct_offset_bytes =\
+ offsetof(struct opa_class_port_info, field), \
+ .struct_size_bytes = \
+ sizeof((struct opa_class_port_info *)0)->field, \
+ .field_name = "opa_class_port_info:" #field
+
+static const struct ib_field opa_classport_info_rec_table[] = {
+ { OPA_CLASSPORTINFO_REC_FIELD(base_version),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(class_version),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
+ .offset_words = 9,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
+ .offset_words = 16,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
+ .offset_words = 17,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
+ .offset_words = 18,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
+ .offset_words = 18,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
+ .offset_words = 19,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 19,
+ .offset_bits = 8,
+ .size_bits = 24 },
+};
+
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
.struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
@@ -518,7 +749,7 @@ static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
struct ib_sa_query *query)
{
- struct ib_sa_path_rec *sa_rec = query->mad_buf->context[1];
+ struct sa_path_rec *sa_rec = query->mad_buf->context[1];
struct ib_sa_mad *mad = query->mad_buf->mad;
ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
u16 val16;
@@ -543,7 +774,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
/* Now build the attributes */
if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
- val64 = be64_to_cpu(sa_rec->service_id);
+ val64 = be64_to_cpu(sa_path_get_service_id(sa_rec));
nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
sizeof(val64), &val64);
}
@@ -927,96 +1158,10 @@ static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
- ib_destroy_ah(sm_ah->ah);
+ rdma_destroy_ah(sm_ah->ah);
kfree(sm_ah);
}
-static void update_sm_ah(struct work_struct *work)
-{
- struct ib_sa_port *port =
- container_of(work, struct ib_sa_port, update_task);
- struct ib_sa_sm_ah *new_ah;
- struct ib_port_attr port_attr;
- struct ib_ah_attr ah_attr;
-
- if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
- pr_warn("Couldn't query port\n");
- return;
- }
-
- new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
- if (!new_ah) {
- return;
- }
-
- kref_init(&new_ah->ref);
- new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
-
- new_ah->pkey_index = 0;
- if (ib_find_pkey(port->agent->device, port->port_num,
- IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
- pr_err("Couldn't find index for default PKey\n");
-
- memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = port_attr.sm_lid;
- ah_attr.sl = port_attr.sm_sl;
- ah_attr.port_num = port->port_num;
- if (port_attr.grh_required) {
- ah_attr.ah_flags = IB_AH_GRH;
- ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
- ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
- }
-
- new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
- if (IS_ERR(new_ah->ah)) {
- pr_warn("Couldn't create new SM AH\n");
- kfree(new_ah);
- return;
- }
-
- spin_lock_irq(&port->ah_lock);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = new_ah;
- spin_unlock_irq(&port->ah_lock);
-
-}
-
-static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
-{
- if (event->event == IB_EVENT_PORT_ERR ||
- event->event == IB_EVENT_PORT_ACTIVE ||
- event->event == IB_EVENT_LID_CHANGE ||
- event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER) {
- unsigned long flags;
- struct ib_sa_device *sa_dev =
- container_of(handler, typeof(*sa_dev), event_handler);
- struct ib_sa_port *port =
- &sa_dev->port[event->element.port_num - sa_dev->start_port];
-
- if (!rdma_cap_ib_sa(handler->device, port->port_num))
- return;
-
- spin_lock_irqsave(&port->ah_lock, flags);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = NULL;
- spin_unlock_irqrestore(&port->ah_lock, flags);
-
- if (event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER ||
- event->event == IB_EVENT_LID_CHANGE) {
- spin_lock_irqsave(&port->classport_lock, flags);
- port->classport_info.valid = false;
- spin_unlock_irqrestore(&port->classport_lock, flags);
- }
- queue_work(ib_wq, &sa_dev->port[event->element.port_num -
- sa_dev->start_port].update_task);
- }
-}
-
void ib_sa_register_client(struct ib_sa_client *client)
{
atomic_set(&client->users, 1);
@@ -1085,7 +1230,8 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
}
int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
@@ -1093,21 +1239,26 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
struct net_device *ndev = NULL;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(rec->dlid);
- ah_attr->sl = rec->sl;
- ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
- get_src_path_mask(device, port_num);
- ah_attr->port_num = port_num;
- ah_attr->static_rate = rec->rate;
-
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) &
+ get_src_path_mask(device, port_num));
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
use_roce = rdma_cap_eth_ah(device, port_num);
if (use_roce) {
struct net_device *idev;
struct net_device *resolved_dev;
- struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
- .net = rec->net ? rec->net :
- &init_net};
+ struct rdma_dev_addr dev_addr = {
+ .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
+ sa_path_get_ifindex(rec) : 0),
+ .net = sa_path_get_ndev(rec) ?
+ sa_path_get_ndev(rec) :
+ &init_net
+ };
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
@@ -1128,7 +1279,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
dev_addr.network == RDMA_NETWORK_IPV6) &&
- rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
idev = device->get_netdev(device, port_num);
@@ -1159,28 +1310,31 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
}
if (rec->hop_limit > 0 || use_roce) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = rec->dgid;
+ enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
- ret = ib_find_cached_gid_by_port(device, &rec->sgid,
- rec->gid_type, port_num, ndev,
- &gid_index);
+ ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
+ port_num, ndev, &gid_index);
if (ret) {
if (ndev)
dev_put(ndev);
return ret;
}
- ah_attr->grh.sgid_index = gid_index;
- ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
- ah_attr->grh.hop_limit = rec->hop_limit;
- ah_attr->grh.traffic_class = rec->traffic_class;
+ rdma_ah_set_grh(ah_attr, &rec->dgid,
+ be32_to_cpu(rec->flow_label),
+ gid_index, rec->hop_limit,
+ rec->traffic_class);
if (ndev)
dev_put(ndev);
}
- if (use_roce)
- memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
+ if (use_roce) {
+ u8 *dmac = sa_path_get_dmac(rec);
+
+ if (!dmac)
+ return -EINVAL;
+ memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN);
+ }
return 0;
}
@@ -1203,7 +1357,9 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
query->sm_ah->pkey_index,
0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
gfp_mask,
- IB_MGMT_BASE_VERSION);
+ ((query->flags & IB_SA_QUERY_OPA) ?
+ OPA_MGMT_BASE_VERSION :
+ IB_MGMT_BASE_VERSION));
if (IS_ERR(query->mad_buf)) {
kref_put(&query->sm_ah->ref, free_sm_ah);
return -ENOMEM;
@@ -1220,16 +1376,21 @@ static void free_mad(struct ib_sa_query *query)
kref_put(&query->sm_ah->ref, free_sm_ah);
}
-static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
+static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
{
+ struct ib_sa_mad *mad = query->mad_buf->mad;
unsigned long flags;
memset(mad, 0, sizeof *mad);
- mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ if (query->flags & IB_SA_QUERY_OPA) {
+ mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
+ } else {
+ mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ }
mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
- mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
-
spin_lock_irqsave(&tid_lock, flags);
mad->mad_hdr.tid =
cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
@@ -1258,7 +1419,8 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
query->mad_buf->context[0] = query;
query->id = id;
- if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
+ if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
+ (!(query->flags & IB_SA_QUERY_OPA))) {
if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query, gfp_mask))
return id;
@@ -1281,18 +1443,75 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
return ret ? ret : id;
}
-void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
{
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
}
EXPORT_SYMBOL(ib_sa_unpack_path);
-void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
+void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
{
ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
}
EXPORT_SYMBOL(ib_sa_pack_path);
+static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ unsigned long flags;
+ bool ret = false;
+
+ if (!sa_dev)
+ return ret;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (!port->classport_info.valid)
+ goto ret;
+
+ if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
+ ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
+ OPA_CLASS_PORT_INFO_PR_SUPPORT;
+ret:
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return ret;
+}
+
+enum opa_pr_supported {
+ PR_NOT_SUPPORTED,
+ PR_OPA_SUPPORTED,
+ PR_IB_SUPPORTED
+};
+
+/**
+ * Check if current PR query can be an OPA query.
+ * Retuns PR_NOT_SUPPORTED if a path record query is not
+ * possible, PR_OPA_SUPPORTED if an OPA path record query
+ * is possible and PR_IB_SUPPORTED if an IB path record
+ * query is possible.
+ */
+static int opa_pr_query_possible(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num,
+ struct sa_path_rec *rec)
+{
+ struct ib_port_attr port_attr;
+
+ if (ib_query_port(device, port_num, &port_attr))
+ return PR_NOT_SUPPORTED;
+
+ if (ib_sa_opa_pathrecord_support(client, device, port_num))
+ return PR_OPA_SUPPORTED;
+
+ if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+ return PR_NOT_SUPPORTED;
+ else
+ return PR_IB_SUPPORTED;
+}
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -1301,22 +1520,44 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
container_of(sa_query, struct ib_sa_path_query, sa_query);
if (mad) {
- struct ib_sa_path_rec rec;
-
- ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
- mad->data, &rec);
- rec.net = NULL;
- rec.ifindex = 0;
- rec.gid_type = IB_GID_TYPE_IB;
- eth_zero_addr(rec.dmac);
- query->callback(status, &rec, query->context);
+ struct sa_path_rec rec;
+
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ ib_unpack(opa_path_rec_table,
+ ARRAY_SIZE(opa_path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ query->callback(status, &rec, query->context);
+ } else {
+ ib_unpack(path_rec_table,
+ ARRAY_SIZE(path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_ndev(&rec, NULL);
+ sa_path_set_ifindex(&rec, 0);
+ sa_path_set_dmac_zero(&rec);
+
+ if (query->conv_pr) {
+ struct sa_path_rec opa;
+
+ memset(&opa, 0, sizeof(struct sa_path_rec));
+ sa_convert_path_ib_to_opa(&opa, &rec);
+ query->callback(status, &opa, query->context);
+ } else {
+ query->callback(status, &rec, query->context);
+ }
+ }
} else
query->callback(status, NULL, query->context);
}
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
{
- kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
+ struct ib_sa_path_query *query =
+ container_of(sa_query, struct ib_sa_path_query, sa_query);
+
+ kfree(query->conv_pr);
+ kfree(query);
}
/**
@@ -1346,11 +1587,11 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
*/
int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
+ struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
- struct ib_sa_path_rec *resp,
+ struct sa_path_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
@@ -1360,11 +1601,16 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
+ enum opa_pr_supported status;
int ret;
if (!sa_dev)
return -ENODEV;
+ if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
+ (rec->rec_type != SA_PATH_REC_TYPE_OPA))
+ return -EINVAL;
+
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
@@ -1373,9 +1619,26 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
return -ENOMEM;
query->sa_query.port = port;
+ if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
+ status = opa_pr_query_possible(client, device, port_num, rec);
+ if (status == PR_NOT_SUPPORTED) {
+ ret = -EINVAL;
+ goto err1;
+ } else if (status == PR_OPA_SUPPORTED) {
+ query->sa_query.flags |= IB_SA_QUERY_OPA;
+ } else {
+ query->conv_pr =
+ kmalloc(sizeof(*query->conv_pr), gfp_mask);
+ if (!query->conv_pr) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+ }
+ }
+
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err2;
ib_sa_client_get(client);
query->sa_query.client = client;
@@ -1383,7 +1646,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
query->sa_query.release = ib_sa_path_rec_release;
@@ -1391,24 +1654,36 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
mad->sa_hdr.comp_mask = comp_mask;
- ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
+ if (query->sa_query.flags & IB_SA_QUERY_OPA) {
+ ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
+ rec, mad->data);
+ } else if (query->conv_pr) {
+ sa_convert_path_opa_to_ib(query->conv_pr, rec);
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ query->conv_pr, mad->data);
+ } else {
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ rec, mad->data);
+ }
*sa_query = &query->sa_query;
query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
- query->sa_query.mad_buf->context[1] = rec;
+ query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
+ query->conv_pr : rec;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err3;
return ret;
-err2:
+err3:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-
+err2:
+ kfree(query->conv_pr);
err1:
kfree(query);
return ret;
@@ -1508,7 +1783,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
query->sa_query.release = ib_sa_service_rec_release;
@@ -1600,7 +1875,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
query->sa_query.release = ib_sa_mcmember_rec_release;
@@ -1697,7 +1972,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
query->sa_query.release = ib_sa_guidinfo_rec_release;
@@ -1728,7 +2003,42 @@ err1:
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
-/* Support get SA ClassPortInfo */
+bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ bool ret = false;
+ unsigned long flags;
+
+ if (!sa_dev)
+ return ret;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if ((port->classport_info.valid) &&
+ (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB))
+ ret = ib_get_cpi_capmask2(&port->classport_info.data.ib)
+ & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support);
+
+struct ib_classport_info_context {
+ struct completion done;
+ struct ib_sa_query *sa_query;
+};
+
+static void ib_classportinfo_cb(void *context)
+{
+ struct ib_classport_info_context *cb_ctx = context;
+
+ complete(&cb_ctx->done);
+}
+
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -1736,91 +2046,91 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
unsigned long flags;
struct ib_sa_classport_info_query *query =
container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
+ struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
if (mad) {
- struct ib_class_port_info rec;
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ struct opa_class_port_info rec;
- ib_unpack(classport_info_rec_table,
- ARRAY_SIZE(classport_info_rec_table),
- mad->data, &rec);
+ ib_unpack(opa_classport_info_rec_table,
+ ARRAY_SIZE(opa_classport_info_rec_table),
+ mad->data, &rec);
- spin_lock_irqsave(&sa_query->port->classport_lock, flags);
- if (!status && !sa_query->port->classport_info.valid) {
- memcpy(&sa_query->port->classport_info.data, &rec,
- sizeof(sa_query->port->classport_info.data));
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.opa, &rec,
+ sizeof(info->data.opa));
- sa_query->port->classport_info.valid = true;
- }
- spin_unlock_irqrestore(&sa_query->port->classport_lock, flags);
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_OPA;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
- query->callback(status, &rec, query->context);
- } else {
- query->callback(status, NULL, query->context);
+ } else {
+ struct ib_class_port_info rec;
+
+ ib_unpack(ib_classport_info_rec_table,
+ ARRAY_SIZE(ib_classport_info_rec_table),
+ mad->data, &rec);
+
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.ib, &rec,
+ sizeof(info->data.ib));
+
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_IB;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
+ }
}
+ query->callback(query->context);
}
-static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
+static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_classport_info_query,
sa_query));
}
-int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_class_port_info *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
+static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
+ int timeout_ms,
+ void (*callback)(void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
{
- struct ib_sa_classport_info_query *query;
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
struct ib_mad_agent *agent;
+ struct ib_sa_classport_info_query *query;
struct ib_sa_mad *mad;
- struct ib_class_port_info cached_class_port_info;
+ gfp_t gfp_mask = GFP_KERNEL;
int ret;
- unsigned long flags;
- if (!sa_dev)
- return -ENODEV;
-
- port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
- /* Use cached ClassPortInfo attribute if valid instead of sending mad */
- spin_lock_irqsave(&port->classport_lock, flags);
- if (port->classport_info.valid && callback) {
- memcpy(&cached_class_port_info, &port->classport_info.data,
- sizeof(cached_class_port_info));
- spin_unlock_irqrestore(&port->classport_lock, flags);
- callback(0, &cached_class_port_info, context);
- return 0;
- }
- spin_unlock_irqrestore(&port->classport_lock, flags);
-
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
+ query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
+ port->port_num) ?
+ IB_SA_QUERY_OPA : 0;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err_free;
- ib_sa_client_get(client);
- query->sa_query.client = client;
- query->callback = callback;
- query->context = context;
+ query->callback = callback;
+ query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
- query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
-
- query->sa_query.release = ib_sa_portclass_info_rec_release;
- /* support GET only */
+ query->sa_query.callback = ib_sa_classport_info_rec_callback;
+ query->sa_query.release = ib_sa_classport_info_rec_release;
mad->mad_hdr.method = IB_MGMT_METHOD_GET;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
mad->sa_hdr.comp_mask = 0;
@@ -1828,20 +2138,71 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err_free_mad;
return ret;
-err2:
+err_free_mad:
*sa_query = NULL;
- ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-err1:
+err_free:
kfree(query);
return ret;
}
-EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
+
+static void update_ib_cpi(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, ib_cpi_work.work);
+ struct ib_classport_info_context *cb_context;
+ unsigned long flags;
+ int ret;
+
+ /* If the classport info is valid, nothing
+ * to do here.
+ */
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (port->classport_info.valid) {
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+ cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
+ if (!cb_context)
+ goto err_nomem;
+
+ init_completion(&cb_context->done);
+
+ ret = ib_sa_classport_info_rec_query(port, 3000,
+ ib_classportinfo_cb, cb_context,
+ &cb_context->sa_query);
+ if (ret < 0)
+ goto free_cb_err;
+ wait_for_completion(&cb_context->done);
+free_cb_err:
+ kfree(cb_context);
+ spin_lock_irqsave(&port->classport_lock, flags);
+
+ /* If the classport info is still not valid, the query should have
+ * failed for some reason. Retry issuing the query
+ */
+ if (!port->classport_info.valid) {
+ port->classport_info.retry_cnt++;
+ if (port->classport_info.retry_cnt <=
+ IB_SA_CPI_MAX_RETRY_CNT) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
+ }
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+err_nomem:
+ return;
+}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
@@ -1870,7 +2231,8 @@ static void send_handler(struct ib_mad_agent *agent,
spin_unlock_irqrestore(&idr_lock, flags);
free_mad(query);
- ib_sa_client_put(query->client);
+ if (query->client)
+ ib_sa_client_put(query->client);
query->release(query);
}
@@ -1897,6 +2259,102 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
ib_free_recv_mad(mad_recv_wc);
}
+static void update_sm_ah(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, update_task);
+ struct ib_sa_sm_ah *new_ah;
+ struct ib_port_attr port_attr;
+ struct rdma_ah_attr ah_attr;
+
+ if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
+ pr_warn("Couldn't query port\n");
+ return;
+ }
+
+ new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
+ if (!new_ah)
+ return;
+
+ kref_init(&new_ah->ref);
+ new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
+
+ new_ah->pkey_index = 0;
+ if (ib_find_pkey(port->agent->device, port->port_num,
+ IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
+ pr_err("Couldn't find index for default PKey\n");
+
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.type = rdma_ah_find_type(port->agent->device,
+ port->port_num);
+ rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
+ rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
+ rdma_ah_set_port_num(&ah_attr, port->port_num);
+ if (port_attr.grh_required) {
+ rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
+
+ rdma_ah_set_subnet_prefix(&ah_attr,
+ cpu_to_be64(port_attr.subnet_prefix));
+ rdma_ah_set_interface_id(&ah_attr,
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
+ }
+
+ new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
+ if (IS_ERR(new_ah->ah)) {
+ pr_warn("Couldn't create new SM AH\n");
+ kfree(new_ah);
+ return;
+ }
+
+ spin_lock_irq(&port->ah_lock);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = new_ah;
+ spin_unlock_irq(&port->ah_lock);
+}
+
+static void ib_sa_event(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ if (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER) {
+ unsigned long flags;
+ struct ib_sa_device *sa_dev =
+ container_of(handler, typeof(*sa_dev), event_handler);
+ u8 port_num = event->element.port_num - sa_dev->start_port;
+ struct ib_sa_port *port = &sa_dev->port[port_num];
+
+ if (!rdma_cap_ib_sa(handler->device, port->port_num))
+ return;
+
+ spin_lock_irqsave(&port->ah_lock, flags);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = NULL;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ if (event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PORT_ACTIVE) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ port->classport_info.valid = false;
+ port->classport_info.retry_cnt = 0;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ queue_delayed_work(ib_wq,
+ &port->ib_cpi_work, delay);
+ }
+ queue_work(ib_wq, &sa_dev->port[port_num].update_task);
+ }
+}
+
static void ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
@@ -1934,6 +2392,8 @@ static void ib_sa_add_one(struct ib_device *device)
goto err;
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
+ INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
+ update_ib_cpi);
count++;
}
@@ -1980,11 +2440,11 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
return;
ib_unregister_event_handler(&sa_dev->event_handler);
-
flush_workqueue(ib_wq);
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
if (rdma_cap_ib_sa(device, i + 1)) {
+ cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
ib_unregister_mad_agent(sa_dev->port[i].agent);
if (sa_dev->port[i].sm_ah)
kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index daadf3130c9f..7ebe1ef23652 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -253,6 +253,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
speed = " EDR";
rate = 250;
break;
+ case IB_SPEED_HDR:
+ speed = " HDR";
+ rate = 500;
+ break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
rate = 25;
@@ -1301,7 +1305,7 @@ err_put:
free_port_list_attributes(device);
err_unregister:
- device_unregister(class_dev);
+ device_del(class_dev);
err:
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index cc0d51fb06e3..112099c86a19 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -702,10 +702,10 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
return 0;
}
-static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
+static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
{
struct ib_user_path_rec upath;
- struct ib_sa_path_rec *sa_path;
+ struct sa_path_rec *sa_path;
*path = NULL;
@@ -962,7 +962,7 @@ static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file,
int in_len, int out_len)
{
struct ib_ucm_context *ctx;
- struct ib_sa_path_rec *path = NULL;
+ struct sa_path_rec *path = NULL;
struct ib_ucm_lap cmd;
const void *data = NULL;
int result;
@@ -1205,12 +1205,15 @@ static void ib_ucm_release_dev(struct device *dev)
struct ib_ucm_device *ucm_dev;
ucm_dev = container_of(dev, struct ib_ucm_device, dev);
- cdev_del(&ucm_dev->cdev);
+ kfree(ucm_dev);
+}
+
+static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev)
+{
if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
clear_bit(ucm_dev->devnum, dev_map);
else
clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map);
- kfree(ucm_dev);
}
static const struct file_operations ucm_fops = {
@@ -1266,7 +1269,9 @@ static void ib_ucm_add_one(struct ib_device *device)
if (!ucm_dev)
return;
+ device_initialize(&ucm_dev->dev);
ucm_dev->ib_dev = device;
+ ucm_dev->dev.release = ib_ucm_release_dev;
devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
if (devnum >= IB_UCM_MAX_DEVICES) {
@@ -1286,16 +1291,14 @@ static void ib_ucm_add_one(struct ib_device *device)
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
- if (cdev_add(&ucm_dev->cdev, base, 1))
- goto err;
ucm_dev->dev.class = &cm_class;
ucm_dev->dev.parent = device->dev.parent;
- ucm_dev->dev.devt = ucm_dev->cdev.dev;
- ucm_dev->dev.release = ib_ucm_release_dev;
+ ucm_dev->dev.devt = base;
+
dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
- if (device_register(&ucm_dev->dev))
- goto err_cdev;
+ if (cdev_device_add(&ucm_dev->cdev, &ucm_dev->dev))
+ goto err_devnum;
if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev))
goto err_dev;
@@ -1304,15 +1307,11 @@ static void ib_ucm_add_one(struct ib_device *device)
return;
err_dev:
- device_unregister(&ucm_dev->dev);
-err_cdev:
- cdev_del(&ucm_dev->cdev);
- if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
+err_devnum:
+ ib_ucm_free_dev(ucm_dev);
err:
- kfree(ucm_dev);
+ put_device(&ucm_dev->dev);
return;
}
@@ -1323,7 +1322,9 @@ static void ib_ucm_remove_one(struct ib_device *device, void *client_data)
if (!ucm_dev)
return;
- device_unregister(&ucm_dev->dev);
+ cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
+ ib_ucm_free_dev(ucm_dev);
+ put_device(&ucm_dev->dev);
}
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index e12f8faf8c23..276f0ef835bd 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -898,11 +898,18 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
for (i = 0, out_len -= sizeof(*resp);
i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
i++, out_len -= sizeof(struct ib_path_rec_data)) {
+ struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i];
resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
IB_PATH_BIDIRECTIONAL;
- ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
- &resp->path_data[i].path_rec);
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB) {
+ ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
+ } else {
+ struct sa_path_rec ib;
+
+ sa_convert_path_opa_to_ib(&ib, rec);
+ ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
+ }
}
if (copy_to_user(response, resp,
@@ -1197,7 +1204,7 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
static int ucma_set_ib_path(struct ucma_context *ctx,
struct ib_path_rec_data *path_data, size_t optlen)
{
- struct ib_sa_path_rec sa_path;
+ struct sa_path_rec sa_path;
struct rdma_cm_event event;
int ret;
@@ -1215,8 +1222,17 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
memset(&sa_path, 0, sizeof(sa_path));
+ sa_path.rec_type = SA_PATH_REC_TYPE_IB;
ib_sa_unpack_path(path_data->path_rec, &sa_path);
- ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+
+ if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) {
+ struct sa_path_rec opa;
+
+ sa_convert_path_ib_to_opa(&opa, &sa_path);
+ ret = rdma_set_ib_paths(ctx->cm_id, &opa, 1);
+ } else {
+ ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+ }
if (ret)
return ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 27f155d2df8d..3dbf811d3c51 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -115,11 +115,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->context = context;
- umem->length = size;
- umem->address = addr;
- umem->page_size = PAGE_SIZE;
- umem->pid = get_task_pid(current, PIDTYPE_PID);
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_shift = PAGE_SHIFT;
+ umem->pid = get_task_pid(current, PIDTYPE_PID);
/*
* We ask for writable memory if any of the following
* access flags are set. "Local write" and "remote write"
@@ -133,7 +133,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (access & IB_ACCESS_ON_DEMAND) {
put_pid(umem->pid);
- ret = ib_umem_odp_get(context, umem);
+ ret = ib_umem_odp_get(context, umem, access);
if (ret) {
kfree(umem);
return ERR_PTR(ret);
@@ -315,7 +315,6 @@ EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
- int shift;
int i;
int n;
struct scatterlist *sg;
@@ -323,11 +322,9 @@ int ib_umem_page_count(struct ib_umem *umem)
if (umem->odp_data)
return ib_umem_num_pages(umem);
- shift = ilog2(umem->page_size);
-
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> shift;
+ n += sg_dma_len(sg) >> umem->page_shift;
return n;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index cb2742b548bb..0780b1afefa9 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -254,11 +255,11 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->context = context;
- umem->length = size;
- umem->address = addr;
- umem->page_size = PAGE_SIZE;
- umem->writable = 1;
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_shift = PAGE_SHIFT;
+ umem->writable = 1;
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
if (!odp_data) {
@@ -306,7 +307,8 @@ out_umem:
}
EXPORT_SYMBOL(ib_alloc_odp_umem);
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
+ int access)
{
int ret_val;
struct pid *our_pid;
@@ -315,6 +317,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
if (!mm)
return -EINVAL;
+ if (access & IB_ACCESS_HUGETLB) {
+ struct vm_area_struct *vma;
+ struct hstate *h;
+
+ vma = find_vma(mm, ib_umem_start(umem));
+ if (!vma || !is_vm_hugetlb_page(vma))
+ return -EINVAL;
+ h = hstate_vma(vma);
+ umem->page_shift = huge_page_shift(h);
+ umem->hugetlb = 1;
+ } else {
+ umem->hugetlb = 0;
+ }
+
/* Prevent creating ODP MRs in child processes */
rcu_read_lock();
our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
@@ -325,7 +341,6 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
goto out_mm;
}
- umem->hugetlb = 0;
umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
if (!umem->odp_data) {
ret_val = -ENOMEM;
@@ -504,7 +519,6 @@ out:
static int ib_umem_odp_map_dma_single_page(
struct ib_umem *umem,
int page_index,
- u64 base_virt_addr,
struct page *page,
u64 access_mask,
unsigned long current_seq)
@@ -527,7 +541,7 @@ static int ib_umem_odp_map_dma_single_page(
if (!(umem->odp_data->dma_list[page_index])) {
dma_addr = ib_dma_map_page(dev,
page,
- 0, PAGE_SIZE,
+ 0, BIT(umem->page_shift),
DMA_BIDIRECTIONAL);
if (ib_dma_mapping_error(dev, dma_addr)) {
ret = -EFAULT;
@@ -555,8 +569,9 @@ out:
if (remove_existing_mapping && umem->context->invalidate_range) {
invalidate_page_trampoline(
umem,
- base_virt_addr + (page_index * PAGE_SIZE),
- base_virt_addr + ((page_index+1)*PAGE_SIZE),
+ ib_umem_start(umem) + (page_index >> umem->page_shift),
+ ib_umem_start(umem) + ((page_index + 1) >>
+ umem->page_shift),
NULL);
ret = -EAGAIN;
}
@@ -595,10 +610,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
struct page **local_page_list = NULL;
- u64 off;
- int j, k, ret = 0, start_idx, npages = 0;
- u64 base_virt_addr;
+ u64 page_mask, off;
+ int j, k, ret = 0, start_idx, npages = 0, page_shift;
unsigned int flags = 0;
+ phys_addr_t p = 0;
if (access_mask == 0)
return -EINVAL;
@@ -611,9 +626,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
if (!local_page_list)
return -ENOMEM;
- off = user_virt & (~PAGE_MASK);
- user_virt = user_virt & PAGE_MASK;
- base_virt_addr = user_virt;
+ page_shift = umem->page_shift;
+ page_mask = ~(BIT(page_shift) - 1);
+ off = user_virt & (~page_mask);
+ user_virt = user_virt & page_mask;
bcnt += off; /* Charge for the first page offset as well. */
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
@@ -631,13 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
if (access_mask & ODP_WRITE_ALLOWED_BIT)
flags |= FOLL_WRITE;
- start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
+ start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
k = start_idx;
while (bcnt > 0) {
- const size_t gup_num_pages =
- min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
- PAGE_SIZE / sizeof(struct page *));
+ const size_t gup_num_pages = min_t(size_t,
+ (bcnt + BIT(page_shift) - 1) >> page_shift,
+ PAGE_SIZE / sizeof(struct page *));
down_read(&owning_mm->mmap_sem);
/*
@@ -656,14 +672,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
break;
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- user_virt += npages << PAGE_SHIFT;
mutex_lock(&umem->odp_data->umem_mutex);
- for (j = 0; j < npages; ++j) {
+ for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
+ if (user_virt & ~page_mask) {
+ p += PAGE_SIZE;
+ if (page_to_phys(local_page_list[j]) != p) {
+ ret = -EFAULT;
+ break;
+ }
+ put_page(local_page_list[j]);
+ continue;
+ }
+
ret = ib_umem_odp_map_dma_single_page(
- umem, k, base_virt_addr, local_page_list[j],
- access_mask, current_seq);
+ umem, k, local_page_list[j],
+ access_mask, current_seq);
if (ret < 0)
break;
+
+ p = page_to_phys(local_page_list[j]);
k++;
}
mutex_unlock(&umem->odp_data->umem_mutex);
@@ -707,8 +734,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
* invalidations, so we must make sure we free each page only
* once. */
mutex_lock(&umem->odp_data->umem_mutex);
- for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
- idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+ for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
+ idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
if (umem->odp_data->page_list[idx]) {
struct page *page = umem->odp_data->page_list[idx];
dma_addr_t dma = umem->odp_data->dma_list[idx];
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index aca7ff7abedc..36a6f5c8914c 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -197,7 +197,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_umad_packet *packet = send_wc->send_buf->context[0];
dequeue_send(file, packet);
- ib_destroy_ah(packet->msg->ah);
+ rdma_destroy_ah(packet->msg->ah);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
@@ -235,17 +235,19 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
if (packet->mad.hdr.grh_present) {
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
+ const struct ib_global_route *grh;
ib_init_ah_from_wc(agent->device, agent->port_num,
mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
&ah_attr);
- packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
- packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
- packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
- memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
- packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
+ grh = rdma_ah_read_grh(&ah_attr);
+ packet->mad.hdr.gid_index = grh->sgid_index;
+ packet->mad.hdr.hop_limit = grh->hop_limit;
+ packet->mad.hdr.traffic_class = grh->traffic_class;
+ memcpy(packet->mad.hdr.gid, &grh->dgid, 16);
+ packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label);
}
if (queue_packet(file, agent, packet))
@@ -449,7 +451,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
struct ib_umad_file *file = filp->private_data;
struct ib_umad_packet *packet;
struct ib_mad_agent *agent;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
struct ib_ah *ah;
struct ib_rmpp_mad *rmpp_mad;
__be64 *tid;
@@ -489,20 +491,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
}
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid);
- ah_attr.sl = packet->mad.hdr.sl;
- ah_attr.src_path_bits = packet->mad.hdr.path_bits;
- ah_attr.port_num = file->port->port_num;
+ ah_attr.type = rdma_ah_find_type(file->port->ib_dev,
+ file->port->port_num);
+ rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid));
+ rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl);
+ rdma_ah_set_path_bits(&ah_attr, packet->mad.hdr.path_bits);
+ rdma_ah_set_port_num(&ah_attr, file->port->port_num);
if (packet->mad.hdr.grh_present) {
- ah_attr.ah_flags = IB_AH_GRH;
- memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
- ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
- ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
- ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
- ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
+ rdma_ah_set_grh(&ah_attr, NULL,
+ be32_to_cpu(packet->mad.hdr.flow_label),
+ packet->mad.hdr.gid_index,
+ packet->mad.hdr.hop_limit,
+ packet->mad.hdr.traffic_class);
+ rdma_ah_set_dgid_raw(&ah_attr, packet->mad.hdr.gid);
}
- ah = ib_create_ah(agent->qp->pd, &ah_attr);
+ ah = rdma_create_ah(agent->qp->pd, &ah_attr);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_up;
@@ -596,7 +600,7 @@ err_send:
err_msg:
ib_free_send_mad(packet->msg);
err_ah:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
err_up:
mutex_unlock(&file->mutex);
err:
@@ -1183,7 +1187,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
cdev_init(&port->cdev, &umad_fops);
port->cdev.owner = THIS_MODULE;
- port->cdev.kobj.parent = &umad_dev->kobj;
+ cdev_set_parent(&port->cdev, &umad_dev->kobj);
kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
@@ -1202,7 +1206,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
base += IB_UMAD_MAX_PORTS;
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
- port->sm_cdev.kobj.parent = &umad_dev->kobj;
+ cdev_set_parent(&port->sm_cdev, &umad_dev->kobj);
kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index e1bedf0bac04..64d494a64daf 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -76,12 +76,13 @@
* an asynchronous event queue file is created and released when the
* event file is closed.
*
- * struct ib_uverbs_event_file: One reference is held by the VFS and
- * released when the file is closed. For asynchronous event files,
- * another reference is held by the corresponding main context file
- * and released when that file is closed. For completion event files,
- * a reference is taken when a CQ is created that uses the file, and
- * released when the CQ is destroyed.
+ * struct ib_uverbs_event_queue: Base structure for
+ * struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file.
+ * One reference is held by the VFS and released when the file is closed.
+ * For asynchronous event files, another reference is held by the corresponding
+ * main context file and released when that file is closed. For completion
+ * event files, a reference is taken when a CQ is created that uses the file,
+ * and released when the CQ is destroyed.
*/
struct ib_uverbs_device {
@@ -101,18 +102,26 @@ struct ib_uverbs_device {
struct list_head uverbs_events_file_list;
};
-struct ib_uverbs_event_file {
- struct kref ref;
- int is_async;
- struct ib_uverbs_file *uverbs_file;
+struct ib_uverbs_event_queue {
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
+};
+
+struct ib_uverbs_async_event_file {
+ struct ib_uverbs_event_queue ev_queue;
+ struct ib_uverbs_file *uverbs_file;
+ struct kref ref;
struct list_head list;
};
+struct ib_uverbs_completion_event_file {
+ struct ib_uobject_file uobj_file;
+ struct ib_uverbs_event_queue ev_queue;
+};
+
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
@@ -120,9 +129,13 @@ struct ib_uverbs_file {
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
- struct ib_uverbs_event_file *async_file;
+ struct ib_uverbs_async_event_file *async_file;
struct list_head list;
int is_closed;
+
+ struct idr idr;
+ /* spinlock protects write access to idr */
+ spinlock_t idr_lock;
};
struct ib_uverbs_event {
@@ -159,6 +172,8 @@ struct ib_usrq_object {
struct ib_uqp_object {
struct ib_uevent_object uevent;
+ /* lock for mcast list */
+ struct mutex mcast_lock;
struct list_head mcast_list;
struct ib_uxrcd_object *uxrcd;
};
@@ -176,32 +191,18 @@ struct ib_ucq_object {
u32 async_events_reported;
};
-extern spinlock_t ib_uverbs_idr_lock;
-extern struct idr ib_uverbs_pd_idr;
-extern struct idr ib_uverbs_mr_idr;
-extern struct idr ib_uverbs_mw_idr;
-extern struct idr ib_uverbs_ah_idr;
-extern struct idr ib_uverbs_cq_idr;
-extern struct idr ib_uverbs_qp_idr;
-extern struct idr ib_uverbs_srq_idr;
-extern struct idr ib_uverbs_xrcd_idr;
-extern struct idr ib_uverbs_rule_idr;
-extern struct idr ib_uverbs_wq_idr;
-extern struct idr ib_uverbs_rwq_ind_tbl_idr;
-
-void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
-
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async);
+extern const struct file_operations uverbs_event_fops;
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+ struct ib_device *ib_dev);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+ struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj);
+void ib_uverbs_release_file(struct kref *ref);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
@@ -210,9 +211,12 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why);
int uverbs_dealloc_mw(struct ib_mw *mw);
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj);
struct ib_uverbs_flow_spec {
union {
@@ -229,6 +233,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
struct ib_uverbs_flow_spec_action_tag flow_tag;
+ struct ib_uverbs_flow_spec_action_drop drop;
};
};
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 7b7a76e1279a..70b7fb156414 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -40,270 +40,29 @@
#include <linux/uaccess.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+
#include "uverbs.h"
#include "core_priv.h"
-struct uverbs_lock_class {
- struct lock_class_key key;
- char name[16];
-};
-
-static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" };
-static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
-static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" };
-static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
-static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
-static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
-static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
-static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
-static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
-static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
-
-/*
- * The ib_uobject locking scheme is as follows:
- *
- * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
- * needs to be held during all idr write operations. When an object is
- * looked up, a reference must be taken on the object's kref before
- * dropping this lock. For read operations, the rcu_read_lock()
- * and rcu_write_lock() but similarly the kref reference is grabbed
- * before the rcu_read_unlock().
- *
- * - Each object also has an rwsem. This rwsem must be held for
- * reading while an operation that uses the object is performed.
- * For example, while registering an MR, the associated PD's
- * uobject.mutex must be held for reading. The rwsem must be held
- * for writing while initializing or destroying an object.
- *
- * - In addition, each object has a "live" flag. If this flag is not
- * set, then lookups of the object will fail even if it is found in
- * the idr. This handles a reader that blocks and does not acquire
- * the rwsem until after the object is destroyed. The destroy
- * operation will set the live flag to 0 and then drop the rwsem;
- * this will allow the reader to acquire the rwsem, see that the
- * live flag is 0, and then drop the rwsem and its reference to
- * object. The underlying storage will not be freed until the last
- * reference to the object is dropped.
- */
-
-static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
- struct ib_ucontext *context, struct uverbs_lock_class *c)
-{
- uobj->user_handle = user_handle;
- uobj->context = context;
- kref_init(&uobj->ref);
- init_rwsem(&uobj->mutex);
- lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
- uobj->live = 0;
-}
-
-static void release_uobj(struct kref *kref)
-{
- kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
-}
-
-static void put_uobj(struct ib_uobject *uobj)
-{
- kref_put(&uobj->ref, release_uobj);
-}
-
-static void put_uobj_read(struct ib_uobject *uobj)
-{
- up_read(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static void put_uobj_write(struct ib_uobject *uobj)
-{
- up_write(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
-{
- int ret;
-
- idr_preload(GFP_KERNEL);
- spin_lock(&ib_uverbs_idr_lock);
-
- ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
- if (ret >= 0)
- uobj->id = ret;
-
- spin_unlock(&ib_uverbs_idr_lock);
- idr_preload_end();
-
- return ret < 0 ? ret : 0;
-}
-
-void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
+static struct ib_uverbs_completion_event_file *
+ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
{
- spin_lock(&ib_uverbs_idr_lock);
- idr_remove(idr, uobj->id);
- spin_unlock(&ib_uverbs_idr_lock);
-}
+ struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
+ fd, context);
+ struct ib_uobject_file *uobj_file;
-static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
+ if (IS_ERR(uobj))
+ return (void *)uobj;
- rcu_read_lock();
- uobj = idr_find(idr, id);
- if (uobj) {
- if (uobj->context == context)
- kref_get(&uobj->ref);
- else
- uobj = NULL;
- }
- rcu_read_unlock();
+ uverbs_uobject_get(uobj);
+ uobj_put_read(uobj);
- return uobj;
-}
-
-static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
- struct ib_ucontext *context, int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- if (nested)
- down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
- else
- down_read(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_read(uobj);
- return NULL;
- }
-
- return uobj;
-}
-
-static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- down_write(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_write(uobj);
- return NULL;
- }
-
- return uobj;
-}
-
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
- int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_read_uobj(idr, id, context, nested);
- return uobj ? uobj->object : NULL;
-}
-
-static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
-}
-
-static void put_pd_read(struct ib_pd *pd)
-{
- put_uobj_read(pd->uobject);
-}
-
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
-{
- return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
-}
-
-static void put_cq_read(struct ib_cq *cq)
-{
- put_uobj_read(cq->uobject);
-}
-
-static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
-}
-
-static void put_ah_read(struct ib_ah *ah)
-{
- put_uobj_read(ah->uobject);
-}
-
-static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
-}
-
-static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
-}
-
-static void put_wq_read(struct ib_wq *wq)
-{
- put_uobj_read(wq->uobject);
-}
-
-static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
- struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
-}
-
-static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
-{
- put_uobj_read(ind_table->uobject);
-}
-
-static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
- return uobj ? uobj->object : NULL;
-}
-
-static void put_qp_read(struct ib_qp *qp)
-{
- put_uobj_read(qp->uobject);
-}
-
-static void put_qp_write(struct ib_qp *qp)
-{
- put_uobj_write(qp->uobject);
-}
-
-static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
-}
-
-static void put_srq_read(struct ib_srq *srq)
-{
- put_uobj_read(srq->uobject);
-}
-
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
- struct ib_uobject **uobj)
-{
- *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
- return *uobj ? (*uobj)->object : NULL;
-}
-
-static void put_xrcd_read(struct ib_uobject *uobj)
-{
- put_uobj_read(uobj);
+ uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+ return container_of(uobj_file, struct ib_uverbs_completion_event_file,
+ uobj_file);
}
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -348,17 +107,10 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext->device = ib_dev;
ucontext->cg_obj = cg_obj;
- INIT_LIST_HEAD(&ucontext->pd_list);
- INIT_LIST_HEAD(&ucontext->mr_list);
- INIT_LIST_HEAD(&ucontext->mw_list);
- INIT_LIST_HEAD(&ucontext->cq_list);
- INIT_LIST_HEAD(&ucontext->qp_list);
- INIT_LIST_HEAD(&ucontext->srq_list);
- INIT_LIST_HEAD(&ucontext->ah_list);
- INIT_LIST_HEAD(&ucontext->wq_list);
- INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
- INIT_LIST_HEAD(&ucontext->xrcd_list);
- INIT_LIST_HEAD(&ucontext->rule_list);
+ /* ufile is required when some objects are released */
+ ucontext->ufile = file;
+ uverbs_initialize_ucontext(ucontext);
+
rcu_read_lock();
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
@@ -382,7 +134,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_free;
resp.async_fd = ret;
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
+ filp = ib_uverbs_alloc_async_event_file(file, ib_dev);
if (IS_ERR(filp)) {
ret = PTR_ERR(filp);
goto err_fd;
@@ -565,19 +317,9 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret) {
- kfree(uobj);
- return ret;
- }
-
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(pd), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
if (IS_ERR(pd)) {
@@ -591,10 +333,6 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
atomic_set(&pd->usecnt, 0);
uobj->object = pd;
- ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
@@ -604,25 +342,15 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
goto err_copy;
}
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->pd_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
-
-err_idr:
ib_dealloc_pd(pd);
err:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -633,45 +361,19 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
{
struct ib_uverbs_dealloc_pd cmd;
struct ib_uobject *uobj;
- struct ib_pd *pd;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- pd = uobj->object;
-
- if (atomic_read(&pd->usecnt)) {
- ret = -EBUSY;
- goto err_put;
- }
-
- ret = pd->device->dealloc_pd(uobj->object);
- WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
- if (ret)
- goto err_put;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- uobj->live = 0;
- put_uobj_write(uobj);
+ uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+ ret = uobj_remove_commit(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
-
-err_put:
- put_uobj_write(uobj);
- return ret;
+ return ret ?: in_len;
}
struct xrcd_table_entry {
@@ -808,16 +510,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj) {
- ret = -ENOMEM;
+ obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd),
+ file->ucontext);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
goto err_tree_mutex_unlock;
}
- init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
-
- down_write(&obj->uobject.mutex);
-
if (!xrcd) {
xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
if (IS_ERR(xrcd)) {
@@ -835,10 +534,6 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
atomic_set(&obj->refcnt, 0);
obj->uobject.object = xrcd;
- ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.xrcd_handle = obj->uobject.id;
@@ -847,7 +542,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
/* create new inode/xrcd table entry */
ret = xrcd_table_insert(file->device, inode, xrcd);
if (ret)
- goto err_insert_xrcd;
+ goto err_dealloc_xrcd;
}
atomic_inc(&xrcd->usecnt);
}
@@ -861,12 +556,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
if (f.file)
fdput(f);
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
- up_write(&obj->uobject.mutex);
+ uobj_alloc_commit(&obj->uobject);
mutex_unlock(&file->device->xrcd_tree_mutex);
return in_len;
@@ -878,14 +568,11 @@ err_copy:
atomic_dec(&xrcd->usecnt);
}
-err_insert_xrcd:
- idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
-
-err_idr:
+err_dealloc_xrcd:
ib_dealloc_xrcd(xrcd);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject);
err_tree_mutex_unlock:
if (f.file)
@@ -903,75 +590,41 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
{
struct ib_uverbs_close_xrcd cmd;
struct ib_uobject *uobj;
- struct ib_xrcd *xrcd = NULL;
- struct inode *inode = NULL;
- struct ib_uxrcd_object *obj;
- int live;
int ret = 0;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- mutex_lock(&file->device->xrcd_tree_mutex);
- uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
- if (!uobj) {
- ret = -EINVAL;
- goto out;
- }
-
- xrcd = uobj->object;
- inode = xrcd->inode;
- obj = container_of(uobj, struct ib_uxrcd_object, uobject);
- if (atomic_read(&obj->refcnt)) {
- put_uobj_write(uobj);
- ret = -EBUSY;
- goto out;
- }
-
- if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
- ret = ib_dealloc_xrcd(uobj->object);
- if (!ret)
- uobj->live = 0;
+ uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
+ file->ucontext);
+ if (IS_ERR(uobj)) {
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+ return PTR_ERR(uobj);
}
- live = uobj->live;
- if (inode && ret)
- atomic_inc(&xrcd->usecnt);
-
- put_uobj_write(uobj);
-
- if (ret)
- goto out;
-
- if (inode && !live)
- xrcd_table_delete(file->device, inode);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- ret = in_len;
-
-out:
- mutex_unlock(&file->device->xrcd_tree_mutex);
- return ret;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
- struct ib_xrcd *xrcd)
+int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+ struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why)
{
struct inode *inode;
+ int ret;
inode = xrcd->inode;
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
- return;
+ return 0;
- ib_dealloc_xrcd(xrcd);
+ ret = ib_dealloc_xrcd(xrcd);
- if (inode)
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ atomic_inc(&xrcd->usecnt);
+ else if (inode)
xrcd_table_delete(dev, inode);
+
+ return ret;
}
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
@@ -1004,14 +657,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(mr), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1025,10 +675,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_put;
}
}
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
@@ -1043,9 +689,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
atomic_inc(&pd->usecnt);
uobj->object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
- if (ret)
- goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
@@ -1058,32 +701,20 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mr_list);
- mutex_unlock(&file->mutex);
+ uobj_put_obj_read(pd);
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
-err_unreg:
ib_dereg_mr(mr);
err_put:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
-err_charge:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -1119,11 +750,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
- file->ucontext);
-
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
mr = uobj->object;
@@ -1134,7 +764,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -1167,11 +797,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
put_uobjs:
-
- put_uobj_write(mr->uobject);
+ uobj_put_write(uobj);
return ret;
}
@@ -1182,39 +811,20 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dereg_mr cmd;
- struct ib_mr *mr;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mr = uobj->object;
-
- ret = ib_dereg_mr(mr);
- if (!ret)
- uobj->live = 0;
+ uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ ret = uobj_remove_commit(uobj);
- put_uobj(uobj);
-
- return in_len;
+ return ret ?: in_len;
}
ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
@@ -1236,14 +846,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(mw), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1254,11 +861,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
-
mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
@@ -1271,9 +873,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
atomic_inc(&pd->usecnt);
uobj->object = mw;
- ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
- if (ret)
- goto err_unalloc;
memset(&resp, 0, sizeof(resp));
resp.rkey = mw->rkey;
@@ -1285,32 +884,17 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mw_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
-err_unalloc:
uverbs_dealloc_mw(mw);
-
err_put:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
-err_charge:
- put_pd_read(pd);
-
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -1320,39 +904,19 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dealloc_mw cmd;
- struct ib_mw *mw;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mw = uobj->object;
-
- ret = uverbs_dealloc_mw(mw);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
+ uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
@@ -1362,8 +926,8 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
- struct file *filp;
- int ret;
+ struct ib_uobject *uobj;
+ struct ib_uverbs_completion_event_file *ev_file;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -1371,25 +935,23 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
- return ret;
- resp.fd = ret;
+ uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
- if (IS_ERR(filp)) {
- put_unused_fd(resp.fd);
- return PTR_ERR(filp);
- }
+ resp.fd = uobj->id;
+
+ ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj_file.uobj);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
- put_unused_fd(resp.fd);
- fput(filp);
+ uobj_alloc_abort(uobj);
return -EFAULT;
}
- fd_install(resp.fd, filp);
+ uobj_alloc_commit(uobj);
return in_len;
}
@@ -1407,7 +969,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
void *context)
{
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file = NULL;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
struct ib_uverbs_ex_create_cq_resp resp;
@@ -1416,21 +978,21 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return ERR_PTR(-ENOMEM);
-
- init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
- down_write(&obj->uobject.mutex);
+ obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return obj;
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
- if (!ev_file) {
- ret = -EINVAL;
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
+ file->ucontext);
+ if (IS_ERR(ev_file)) {
+ ret = PTR_ERR(ev_file);
goto err;
}
}
+ obj->uobject.user_handle = cmd->user_handle;
obj->uverbs_file = file;
obj->comp_events_reported = 0;
obj->async_events_reported = 0;
@@ -1443,13 +1005,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
attr.flags = cmd->flags;
- ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
-
- cq = ib_dev->create_cq(ib_dev, &attr,
- file->ucontext, uhw);
+ cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
@@ -1459,14 +1015,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
cq->uobject = &obj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = ev_file;
+ cq->cq_context = &ev_file->ev_queue;
atomic_set(&cq->usecnt, 0);
obj->uobject.object = cq;
- ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
- if (ret)
- goto err_free;
-
memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uobject.id;
resp.base.cqe = cq->cqe;
@@ -1478,32 +1030,19 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (ret)
goto err_cb;
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
-
- up_write(&obj->uobject.mutex);
+ uobj_alloc_commit(&obj->uobject);
return obj;
err_cb:
- idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
-
-err_free:
ib_destroy_cq(cq);
err_file:
- ib_rdmacg_uncharge(&obj->uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
-
-err_charge:
if (ev_file)
ib_uverbs_release_ucq(file, ev_file, obj);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject);
return ERR_PTR(ret);
}
@@ -1626,7 +1165,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1641,7 +1180,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
ret = -EFAULT;
out:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return ret ? ret : in_len;
}
@@ -1688,7 +1227,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1720,7 +1259,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
ret = in_len;
out_put:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return ret;
}
@@ -1735,14 +1274,14 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
ib_req_notify_cq(cq, cmd.solicited_only ?
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return in_len;
}
@@ -1757,44 +1296,38 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_cq *cq;
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file;
+ struct ib_uverbs_event_queue *ev_queue;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
cq = uobj->object;
- ev_file = cq->cq_context;
+ ev_queue = cq->cq_context;
obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
- ret = ib_destroy_cq(cq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ memset(&resp, 0, sizeof(resp));
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
+ }
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_ucq(file, ev_file, obj);
-
- memset(&resp, 0, sizeof resp);
resp.comp_events_reported = obj->comp_events_reported;
resp.async_events_reported = obj->async_events_reported;
- put_uobj(uobj);
-
+ uverbs_uobject_put(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
return -EFAULT;
@@ -1816,7 +1349,7 @@ static int create_qp(struct ib_uverbs_file *file,
struct ib_device *device;
struct ib_pd *pd = NULL;
struct ib_xrcd *xrcd = NULL;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_uobject *xrcd_uobj = ERR_PTR(-ENOENT);
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
@@ -1830,18 +1363,20 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = kzalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+ obj->uxrcd = NULL;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
+ mutex_init(&obj->mcast_lock);
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
- &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
- file->ucontext);
+ ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ cmd->rwq_ind_tbl_handle,
+ file->ucontext);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1865,8 +1400,15 @@ static int create_qp(struct ib_uverbs_file *file,
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
- xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
- &xrcd_uobj);
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle,
+ file->ucontext);
+
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
goto err_put;
@@ -1878,8 +1420,8 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = idr_read_srq(cmd->srq_handle,
- file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd->srq_handle,
+ file->ucontext);
if (!srq || srq->srq_type != IB_SRQT_BASIC) {
ret = -EINVAL;
goto err_put;
@@ -1888,8 +1430,8 @@ static int create_qp(struct ib_uverbs_file *file,
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = idr_read_cq(cmd->recv_cq_handle,
- file->ucontext, 0);
+ rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle,
+ file->ucontext);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1899,10 +1441,11 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (has_sq)
- scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
+ scq = uobj_get_obj_read(cq, cmd->send_cq_handle,
+ file->ucontext);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1954,11 +1497,6 @@ static int create_qp(struct ib_uverbs_file *file,
goto err_put;
}
- ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, device,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_put;
-
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
@@ -1966,7 +1504,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_create;
+ goto err_put;
}
if (cmd->qp_type != IB_QPT_XRC_TGT) {
@@ -1994,9 +1532,6 @@ static int create_qp(struct ib_uverbs_file *file,
qp->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
memset(&resp, 0, sizeof resp);
resp.base.qpn = qp->qp_num;
@@ -2018,54 +1553,41 @@ static int create_qp(struct ib_uverbs_file *file,
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ uobj_put_obj_read(scq);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ uobj_put_obj_read(rcq);
if (srq)
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
+ uobj_put_obj_read(ind_tbl);
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_cb:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
-err_destroy:
ib_destroy_qp(qp);
-err_create:
- ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device,
- RDMACG_RESOURCE_HCA_OBJECT);
-
err_put:
- if (xrcd)
- put_xrcd_read(xrcd_uobj);
+ if (!IS_ERR(xrcd_uobj))
+ uobj_put_read(xrcd_uobj);
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ uobj_put_obj_read(scq);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ uobj_put_obj_read(rcq);
if (srq)
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
+ uobj_put_obj_read(ind_tbl);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -2201,17 +1723,22 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle,
+ file->ucontext);
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
- xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
- goto err_put;
+ goto err_xrcd;
}
attr.event_handler = ib_uverbs_qp_event_handler;
@@ -2226,15 +1753,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
qp = ib_open_qp(xrcd, &attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_put;
+ goto err_xrcd;
}
- qp->uobject = &obj->uevent.uobject;
-
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd.user_handle;
memset(&resp, 0, sizeof resp);
resp.qpn = qp->qp_num;
@@ -2243,32 +1766,25 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_remove;
+ goto err_destroy;
}
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
+ qp->uobject = &obj->uevent.uobject;
+ uobj_put_read(xrcd_uobj);
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_alloc_commit(&obj->uevent.uobject);
return in_len;
-err_remove:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
err_destroy:
ib_destroy_qp(qp);
-
+err_xrcd:
+ uobj_put_read(xrcd_uobj);
err_put:
- put_xrcd_read(xrcd_uobj);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -2282,6 +1798,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_qp_attr *attr;
struct ib_qp_init_attr *init_attr;
+ const struct ib_global_route *grh;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -2294,7 +1811,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2302,7 +1819,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
if (ret)
goto out;
@@ -2331,29 +1848,39 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.alt_port_num = attr->alt_port_num;
resp.alt_timeout = attr->alt_timeout;
- memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
- resp.dest.flow_label = attr->ah_attr.grh.flow_label;
- resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
- resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
- resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
- resp.dest.dlid = attr->ah_attr.dlid;
- resp.dest.sl = attr->ah_attr.sl;
- resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
- resp.dest.static_rate = attr->ah_attr.static_rate;
- resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
- resp.dest.port_num = attr->ah_attr.port_num;
-
- memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
- resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
- resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
- resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
- resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
- resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
- resp.alt_dest.sl = attr->alt_ah_attr.sl;
- resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
- resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
- resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
- resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+ resp.dest.dlid = rdma_ah_get_dlid(&attr->ah_attr);
+ resp.dest.sl = rdma_ah_get_sl(&attr->ah_attr);
+ resp.dest.src_path_bits = rdma_ah_get_path_bits(&attr->ah_attr);
+ resp.dest.static_rate = rdma_ah_get_static_rate(&attr->ah_attr);
+ resp.dest.is_global = !!(rdma_ah_get_ah_flags(&attr->ah_attr) &
+ IB_AH_GRH);
+ if (resp.dest.is_global) {
+ grh = rdma_ah_read_grh(&attr->ah_attr);
+ memcpy(resp.dest.dgid, grh->dgid.raw, 16);
+ resp.dest.flow_label = grh->flow_label;
+ resp.dest.sgid_index = grh->sgid_index;
+ resp.dest.hop_limit = grh->hop_limit;
+ resp.dest.traffic_class = grh->traffic_class;
+ }
+ resp.dest.port_num = rdma_ah_get_port_num(&attr->ah_attr);
+
+ resp.alt_dest.dlid = rdma_ah_get_dlid(&attr->alt_ah_attr);
+ resp.alt_dest.sl = rdma_ah_get_sl(&attr->alt_ah_attr);
+ resp.alt_dest.src_path_bits = rdma_ah_get_path_bits(&attr->alt_ah_attr);
+ resp.alt_dest.static_rate
+ = rdma_ah_get_static_rate(&attr->alt_ah_attr);
+ resp.alt_dest.is_global
+ = !!(rdma_ah_get_ah_flags(&attr->alt_ah_attr) &
+ IB_AH_GRH);
+ if (resp.alt_dest.is_global) {
+ grh = rdma_ah_read_grh(&attr->alt_ah_attr);
+ memcpy(resp.alt_dest.dgid, grh->dgid.raw, 16);
+ resp.alt_dest.flow_label = grh->flow_label;
+ resp.alt_dest.sgid_index = grh->sgid_index;
+ resp.alt_dest.hop_limit = grh->hop_limit;
+ resp.alt_dest.traffic_class = grh->traffic_class;
+ }
+ resp.alt_dest.port_num = rdma_ah_get_port_num(&attr->alt_ah_attr);
resp.max_send_wr = init_attr->cap.max_send_wr;
resp.max_recv_wr = init_attr->cap.max_recv_wr;
@@ -2398,7 +1925,7 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = idr_read_qp(cmd->base.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2427,31 +1954,47 @@ static int modify_qp(struct ib_uverbs_file *file,
attr->alt_timeout = cmd->base.alt_timeout;
attr->rate_limit = cmd->rate_limit;
- memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16);
- attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label;
- attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index;
- attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit;
- attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class;
- attr->ah_attr.dlid = cmd->base.dest.dlid;
- attr->ah_attr.sl = cmd->base.dest.sl;
- attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits;
- attr->ah_attr.static_rate = cmd->base.dest.static_rate;
- attr->ah_attr.ah_flags = cmd->base.dest.is_global ?
- IB_AH_GRH : 0;
- attr->ah_attr.port_num = cmd->base.dest.port_num;
-
- memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16);
- attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label;
- attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index;
- attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit;
- attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class;
- attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid;
- attr->alt_ah_attr.sl = cmd->base.alt_dest.sl;
- attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits;
- attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate;
- attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ?
- IB_AH_GRH : 0;
- attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num;
+ attr->ah_attr.type = rdma_ah_find_type(qp->device,
+ cmd->base.dest.port_num);
+ if (cmd->base.dest.is_global) {
+ rdma_ah_set_grh(&attr->ah_attr, NULL,
+ cmd->base.dest.flow_label,
+ cmd->base.dest.sgid_index,
+ cmd->base.dest.hop_limit,
+ cmd->base.dest.traffic_class);
+ rdma_ah_set_dgid_raw(&attr->ah_attr, cmd->base.dest.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr->ah_attr, 0);
+ }
+ rdma_ah_set_dlid(&attr->ah_attr, cmd->base.dest.dlid);
+ rdma_ah_set_sl(&attr->ah_attr, cmd->base.dest.sl);
+ rdma_ah_set_path_bits(&attr->ah_attr, cmd->base.dest.src_path_bits);
+ rdma_ah_set_static_rate(&attr->ah_attr, cmd->base.dest.static_rate);
+ rdma_ah_set_port_num(&attr->ah_attr,
+ cmd->base.dest.port_num);
+
+ attr->alt_ah_attr.type = rdma_ah_find_type(qp->device,
+ cmd->base.dest.port_num);
+ if (cmd->base.alt_dest.is_global) {
+ rdma_ah_set_grh(&attr->alt_ah_attr, NULL,
+ cmd->base.alt_dest.flow_label,
+ cmd->base.alt_dest.sgid_index,
+ cmd->base.alt_dest.hop_limit,
+ cmd->base.alt_dest.traffic_class);
+ rdma_ah_set_dgid_raw(&attr->alt_ah_attr,
+ cmd->base.alt_dest.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr->alt_ah_attr, 0);
+ }
+
+ rdma_ah_set_dlid(&attr->alt_ah_attr, cmd->base.alt_dest.dlid);
+ rdma_ah_set_sl(&attr->alt_ah_attr, cmd->base.alt_dest.sl);
+ rdma_ah_set_path_bits(&attr->alt_ah_attr,
+ cmd->base.alt_dest.src_path_bits);
+ rdma_ah_set_static_rate(&attr->alt_ah_attr,
+ cmd->base.alt_dest.static_rate);
+ rdma_ah_set_port_num(&attr->alt_ah_attr,
+ cmd->base.alt_dest.port_num);
if (qp->real_qp == qp) {
if (cmd->base.attr_mask & IB_QP_AV) {
@@ -2470,7 +2013,7 @@ static int modify_qp(struct ib_uverbs_file *file,
}
release_qp:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
out:
kfree(attr);
@@ -2557,42 +2100,27 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
qp = uobj->object;
obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- if (!list_empty(&obj->mcast_list)) {
- put_uobj_write(uobj);
- return -EBUSY;
- }
-
- ret = ib_destroy_qp(qp);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- if (obj->uxrcd)
- atomic_dec(&obj->uxrcd->refcnt);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, &obj->uevent);
+ }
resp.events_reported = obj->uevent.events_reported;
-
- put_uobj(uobj);
+ uverbs_uobject_put(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -2603,9 +2131,13 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
static void *alloc_wr(size_t wr_size, __u32 num_sge)
{
+ if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge))
+ return NULL;
+
return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
num_sge * sizeof (struct ib_sge), GFP_KERNEL);
-};
+}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
@@ -2636,7 +2168,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (!user_wr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2672,7 +2204,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
+ ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah,
+ file->ucontext);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2779,11 +2312,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
ret = -EFAULT;
out_put:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
while (wr) {
if (is_ud && ud_wr(wr)->ah)
- put_ah_read(ud_wr(wr)->ah);
+ uobj_put_obj_read(ud_wr(wr)->ah);
next = wr->next;
kfree(wr);
wr = next;
@@ -2832,6 +2365,13 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
goto err;
}
+ if (user_wr->num_sge >=
+ (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
user_wr->num_sge * sizeof (struct ib_sge),
GFP_KERNEL);
@@ -2900,21 +2440,21 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
resp.bad_wr = 0;
ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
- put_qp_read(qp);
-
- if (ret)
+ uobj_put_obj_read(qp);
+ if (ret) {
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
break;
}
+ }
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -2950,14 +2490,14 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
goto out;
resp.bad_wr = 0;
ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ret)
for (next = wr; next; next = next->next) {
@@ -2990,9 +2530,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_ah *ah;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
int ret;
struct ib_udata udata;
+ u8 *dmac;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -3004,54 +2545,50 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
(unsigned long)cmd.response + sizeof(resp),
in_len - sizeof(cmd), out_len - sizeof(resp));
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(ah), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
}
- attr.dlid = cmd.attr.dlid;
- attr.sl = cmd.attr.sl;
- attr.src_path_bits = cmd.attr.src_path_bits;
- attr.static_rate = cmd.attr.static_rate;
- attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0;
- attr.port_num = cmd.attr.port_num;
- attr.grh.flow_label = cmd.attr.grh.flow_label;
- attr.grh.sgid_index = cmd.attr.grh.sgid_index;
- attr.grh.hop_limit = cmd.attr.grh.hop_limit;
- attr.grh.traffic_class = cmd.attr.grh.traffic_class;
- memset(&attr.dmac, 0, sizeof(attr.dmac));
- memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
-
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
+ attr.type = rdma_ah_find_type(ib_dev, cmd.attr.port_num);
+ rdma_ah_set_dlid(&attr, cmd.attr.dlid);
+ rdma_ah_set_sl(&attr, cmd.attr.sl);
+ rdma_ah_set_path_bits(&attr, cmd.attr.src_path_bits);
+ rdma_ah_set_static_rate(&attr, cmd.attr.static_rate);
+ rdma_ah_set_port_num(&attr, cmd.attr.port_num);
+
+ if (cmd.attr.is_global) {
+ rdma_ah_set_grh(&attr, NULL, cmd.attr.grh.flow_label,
+ cmd.attr.grh.sgid_index,
+ cmd.attr.grh.hop_limit,
+ cmd.attr.grh.traffic_class);
+ rdma_ah_set_dgid_raw(&attr, cmd.attr.grh.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr, 0);
+ }
+ dmac = rdma_ah_retrieve_dmac(&attr);
+ if (dmac)
+ memset(dmac, 0, ETH_ALEN);
ah = pd->device->create_ah(pd, &attr, &udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
- goto err_create;
+ goto err_put;
}
ah->device = pd->device;
ah->pd = pd;
atomic_inc(&pd->usecnt);
ah->uobject = uobj;
+ uobj->user_handle = cmd.user_handle;
uobj->object = ah;
- ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
- if (ret)
- goto err_destroy;
-
resp.ah_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@@ -3060,32 +2597,19 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->ah_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
-
-err_destroy:
- ib_destroy_ah(ah);
-
-err_create:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+ rdma_destroy_ah(ah);
-err_charge:
- put_pd_read(pd);
+err_put:
+ uobj_put_obj_read(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -3094,38 +2618,19 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_destroy_ah cmd;
- struct ib_ah *ah;
struct ib_uobject *uobj;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- ah = uobj->object;
-
- ret = ib_destroy_ah(ah);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
+ uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
@@ -3142,12 +2647,13 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
@@ -3171,7 +2677,8 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
kfree(mcast);
out_put:
- put_qp_write(qp);
+ mutex_unlock(&obj->mcast_lock);
+ uobj_put_obj_read(qp);
return ret ? ret : in_len;
}
@@ -3186,31 +2693,37 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
int ret = -EINVAL;
+ bool found = false;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
- ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
- if (ret)
- goto out_put;
-
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
list_del(&mcast->list);
kfree(mcast);
+ found = true;
break;
}
-out_put:
- put_qp_write(qp);
+ if (!found) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ ret = ib_detach_mcast(qp, (union ib_gid *)cmd.gid, cmd.mlid);
+out_put:
+ mutex_unlock(&obj->mcast_lock);
+ uobj_put_obj_read(qp);
return ret ? ret : in_len;
}
@@ -3227,6 +2740,13 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (kern_spec->drop.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_drop))
+ return -EINVAL;
+
+ ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
+ break;
default:
return -EINVAL;
}
@@ -3402,20 +2922,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = kmalloc(sizeof(*obj), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
- &wq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3450,9 +2968,6 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
atomic_inc(&cq->usecnt);
wq->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = wq;
- err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
- if (err)
- goto destroy_wq;
memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
@@ -3465,27 +2980,19 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (err)
goto err_copy;
- put_pd_read(pd);
- put_cq_read(cq);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ uobj_put_obj_read(cq);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
-destroy_wq:
ib_destroy_wq(wq);
err_put_cq:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
err_put_pd:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_uobj:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return err;
}
@@ -3526,36 +3033,26 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
- uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
wq = uobj->object;
obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
- ret = ib_destroy_wq(wq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- ib_uverbs_release_uevent(file, &obj->uevent);
+ ret = uobj_remove_commit(uobj);
resp.events_reported = obj->uevent.events_reported;
- put_uobj(uobj);
-
- ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
+ uverbs_uobject_put(uobj);
if (ret)
return ret;
- return 0;
+ return ib_copy_to_udata(ucore, &resp, resp.response_length);
}
int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
@@ -3588,7 +3085,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = idr_read_wq(cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext);
if (!wq)
return -EINVAL;
@@ -3599,7 +3096,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.flags_mask = cmd.flags_mask;
}
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
- put_wq_read(wq);
+ uobj_put_obj_read(wq);
return ret;
}
@@ -3677,7 +3174,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
+ wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs],
+ file->ucontext);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3686,14 +3184,12 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto put_wqs;
}
- init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
- down_write(&uobj->mutex);
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
@@ -3713,10 +3209,6 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (i = 0; i < num_wq_handles; i++)
atomic_inc(&wqs[i]->usecnt);
- err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- if (err)
- goto destroy_ind_tbl;
-
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
resp.response_length = required_resp_len;
@@ -3729,26 +3221,18 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
kfree(wqs_handles);
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
+ uobj_put_obj_read(wqs[j]);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-destroy_ind_tbl:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
put_wqs:
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
+ uobj_put_obj_read(wqs[j]);
err_free:
kfree(wqs_handles);
kfree(wqs);
@@ -3761,10 +3245,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_uobject *uobj;
int ret;
- struct ib_wq **ind_tbl;
size_t required_cmd_sz;
required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
@@ -3784,31 +3266,12 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- rwq_ind_tbl = uobj->object;
- ind_tbl = rwq_ind_tbl->ind_tbl;
-
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- kfree(ind_tbl);
- return ret;
+ return uobj_remove_commit(uobj);
}
int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
@@ -3882,15 +3345,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kern_flow_attr = &cmd.flow_attr;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(flow), file->ucontext);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto err_free_attr;
}
- init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
- down_write(&uobj->mutex);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3931,24 +3392,14 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -EINVAL;
goto err_free;
}
-
- err = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (err)
- goto err_free;
-
flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
- goto err_create;
+ goto err_free;
}
flow_id->uobject = uobj;
uobj->object = flow_id;
- err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
- if (err)
- goto destroy_flow;
-
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
@@ -3957,30 +3408,20 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (err)
goto err_copy;
- put_qp_read(qp);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rule_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(qp);
+ uobj_alloc_commit(uobj);
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-destroy_flow:
ib_destroy_flow(flow_id);
-err_create:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
err_free:
kfree(flow_attr);
err_put:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
err_free_attr:
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
@@ -3993,7 +3434,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_destroy_flow cmd;
- struct ib_flow *flow_id;
struct ib_uobject *uobj;
int ret;
@@ -4007,29 +3447,12 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- flow_id = uobj->object;
-
- ret = ib_destroy_flow(flow_id);
- if (!ret) {
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- uobj->live = 0;
- }
-
- put_uobj_write(uobj);
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
+ uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+ ret = uobj_remove_commit(uobj);
return ret;
}
@@ -4046,31 +3469,37 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_srq_init_attr attr;
int ret;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
if (cmd->srq_type == IB_SRQT_XRC) {
- attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
- if (!attr.ext.xrc.xrcd) {
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle,
+ file->ucontext);
+ if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
}
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd) {
+ ret = -EINVAL;
+ goto err_put_xrcd;
+ }
+
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
+ attr.ext.xrc.cq = uobj_get_obj_read(cq, cmd->cq_handle,
+ file->ucontext);
if (!attr.ext.xrc.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
@@ -4086,11 +3515,6 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
- ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_put_cq;
-
srq = pd->device->create_srq(pd, &attr, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
@@ -4115,9 +3539,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
atomic_set(&srq->usecnt, 0);
obj->uevent.uobject.object = srq;
- ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
memset(&resp, 0, sizeof resp);
resp.srq_handle = obj->uevent.uobject.id;
@@ -4133,44 +3555,32 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
if (cmd->srq_type == IB_SRQT_XRC) {
- put_uobj_read(xrcd_uobj);
- put_cq_read(attr.ext.xrc.cq);
+ uobj_put_read(xrcd_uobj);
+ uobj_put_obj_read(attr.ext.xrc.cq);
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
-
-err_destroy:
ib_destroy_srq(srq);
err_put:
- ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_put_cq:
if (cmd->srq_type == IB_SRQT_XRC)
- put_cq_read(attr.ext.xrc.cq);
+ uobj_put_obj_read(attr.ext.xrc.cq);
err_put_xrcd:
if (cmd->srq_type == IB_SRQT_XRC) {
atomic_dec(&obj->uxrcd->refcnt);
- put_uobj_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
err:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -4255,7 +3665,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -4264,7 +3674,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
return ret ? ret : in_len;
}
@@ -4286,13 +3696,13 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
ret = ib_query_srq(srq, &attr);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ret)
return ret;
@@ -4321,53 +3731,39 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_srq *srq;
struct ib_uevent_object *obj;
int ret = -EINVAL;
- struct ib_usrq_object *us;
enum ib_srq_type srq_type;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
srq = uobj->object;
obj = container_of(uobj, struct ib_uevent_object, uobject);
srq_type = srq->srq_type;
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- ret = ib_destroy_srq(srq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ memset(&resp, 0, sizeof(resp));
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- if (srq_type == IB_SRQT_XRC) {
- us = container_of(obj, struct ib_usrq_object, uevent);
- atomic_dec(&us->uxrcd->refcnt);
}
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, obj);
-
- memset(&resp, 0, sizeof resp);
resp.events_reported = obj->events_reported;
+ uverbs_uobject_put(uobj);
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ return -EFAULT;
- put_uobj(uobj);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
-
- return ret ? ret : in_len;
+ return in_len;
}
int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 35c788a32e26..3d2609608f58 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -52,6 +52,7 @@
#include "uverbs.h"
#include "core_priv.h"
+#include "rdma_core.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -67,19 +68,6 @@ enum {
static struct class *uverbs_class;
-DEFINE_SPINLOCK(ib_uverbs_idr_lock);
-DEFINE_IDR(ib_uverbs_pd_idr);
-DEFINE_IDR(ib_uverbs_mr_idr);
-DEFINE_IDR(ib_uverbs_mw_idr);
-DEFINE_IDR(ib_uverbs_ah_idr);
-DEFINE_IDR(ib_uverbs_cq_idr);
-DEFINE_IDR(ib_uverbs_qp_idr);
-DEFINE_IDR(ib_uverbs_srq_idr);
-DEFINE_IDR(ib_uverbs_xrcd_idr);
-DEFINE_IDR(ib_uverbs_rule_idr);
-DEFINE_IDR(ib_uverbs_wq_idr);
-DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
-
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -168,37 +156,37 @@ static struct kobj_type ib_uverbs_dev_ktype = {
.release = ib_uverbs_release_dev,
};
-static void ib_uverbs_release_event_file(struct kref *ref)
+static void ib_uverbs_release_async_event_file(struct kref *ref)
{
- struct ib_uverbs_event_file *file =
- container_of(ref, struct ib_uverbs_event_file, ref);
+ struct ib_uverbs_async_event_file *file =
+ container_of(ref, struct ib_uverbs_async_event_file, ref);
kfree(file);
}
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+ struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
if (ev_file) {
- spin_lock_irq(&ev_file->lock);
+ spin_lock_irq(&ev_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&ev_file->lock);
+ spin_unlock_irq(&ev_file->ev_queue.lock);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ uverbs_uobject_put(&ev_file->uobj_file.uobj);
}
- spin_lock_irq(&file->async_file->lock);
+ spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&file->async_file->ev_queue.lock);
}
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
@@ -206,16 +194,16 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
{
struct ib_uverbs_event *evt, *tmp;
- spin_lock_irq(&file->async_file->lock);
+ spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&file->async_file->ev_queue.lock);
}
-static void ib_uverbs_detach_umcast(struct ib_qp *qp,
- struct ib_uqp_object *uobj)
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj)
{
struct ib_uverbs_mcast_entry *mcast, *tmp;
@@ -227,138 +215,11 @@ static void ib_uverbs_detach_umcast(struct ib_qp *qp,
}
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
- struct ib_ucontext *context)
+ struct ib_ucontext *context,
+ bool device_removed)
{
- struct ib_uobject *uobj, *tmp;
-
context->closing = 1;
-
- list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
- struct ib_ah *ah = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
- ib_destroy_ah(ah);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- /* Remove MWs before QPs, in order to support type 2A MWs. */
- list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
- struct ib_mw *mw = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- uverbs_dealloc_mw(mw);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
- struct ib_flow *flow_id = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
- ib_destroy_flow(flow_id);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
- struct ib_qp *qp = uobj->object;
- struct ib_uqp_object *uqp =
- container_of(uobj, struct ib_uqp_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
- if (qp == qp->real_qp)
- ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- ib_uverbs_release_uevent(file, &uqp->uevent);
- kfree(uqp);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
- struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
- struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
- kfree(ind_tbl);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
- struct ib_wq *wq = uobj->object;
- struct ib_uwq_object *uwq =
- container_of(uobj, struct ib_uwq_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
- ib_destroy_wq(wq);
- ib_uverbs_release_uevent(file, &uwq->uevent);
- kfree(uwq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
- struct ib_srq *srq = uobj->object;
- struct ib_uevent_object *uevent =
- container_of(uobj, struct ib_uevent_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
- ib_destroy_srq(srq);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- ib_uverbs_release_uevent(file, uevent);
- kfree(uevent);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
- struct ib_cq *cq = uobj->object;
- struct ib_uverbs_event_file *ev_file = cq->cq_context;
- struct ib_ucq_object *ucq =
- container_of(uobj, struct ib_ucq_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
- ib_destroy_cq(cq);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- ib_uverbs_release_ucq(file, ev_file, ucq);
- kfree(ucq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
- struct ib_mr *mr = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
- ib_dereg_mr(mr);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- mutex_lock(&file->device->xrcd_tree_mutex);
- list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
- struct ib_xrcd *xrcd = uobj->object;
- struct ib_uxrcd_object *uxrcd =
- container_of(uobj, struct ib_uxrcd_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- ib_uverbs_dealloc_xrcd(file->device, xrcd);
- kfree(uxrcd);
- }
- mutex_unlock(&file->device->xrcd_tree_mutex);
-
- list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
- struct ib_pd *pd = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
- ib_dealloc_pd(pd);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
+ uverbs_cleanup_ucontext(context, device_removed);
put_pid(context->tgid);
ib_rdmacg_uncharge(&context->cg_obj, context->device,
@@ -372,7 +233,7 @@ static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
complete(&dev->comp);
}
-static void ib_uverbs_release_file(struct kref *ref)
+void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
@@ -392,58 +253,54 @@ static void ib_uverbs_release_file(struct kref *ref)
kfree(file);
}
-static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
- size_t count, loff_t *pos)
+static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
+ struct ib_uverbs_file *uverbs_file,
+ struct file *filp, char __user *buf,
+ size_t count, loff_t *pos,
+ size_t eventsz)
{
- struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *event;
- int eventsz;
int ret = 0;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
- while (list_empty(&file->event_list)) {
- spin_unlock_irq(&file->lock);
+ while (list_empty(&ev_queue->event_list)) {
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
- if (wait_event_interruptible(file->poll_wait,
- (!list_empty(&file->event_list) ||
+ if (wait_event_interruptible(ev_queue->poll_wait,
+ (!list_empty(&ev_queue->event_list) ||
/* The barriers built into wait_event_interruptible()
* and wake_up() guarentee this will see the null set
* without using RCU
*/
- !file->uverbs_file->device->ib_dev)))
+ !uverbs_file->device->ib_dev)))
return -ERESTARTSYS;
/* If device was disassociated and no event exists set an error */
- if (list_empty(&file->event_list) &&
- !file->uverbs_file->device->ib_dev)
+ if (list_empty(&ev_queue->event_list) &&
+ !uverbs_file->device->ib_dev)
return -EIO;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
}
- event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
-
- if (file->is_async)
- eventsz = sizeof (struct ib_uverbs_async_event_desc);
- else
- eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
if (eventsz > count) {
ret = -EINVAL;
event = NULL;
} else {
- list_del(file->event_list.next);
+ list_del(ev_queue->event_list.next);
if (event->counter) {
++(*event->counter);
list_del(&event->obj_list);
}
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
if (event) {
if (copy_to_user(buf, event, eventsz))
@@ -457,87 +314,158 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return ret;
}
-static unsigned int ib_uverbs_event_poll(struct file *filp,
+static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
+ buf, count, pos,
+ sizeof(struct ib_uverbs_async_event_desc));
+}
+
+static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return ib_uverbs_event_read(&comp_ev_file->ev_queue,
+ comp_ev_file->uobj_file.ufile, filp,
+ buf, count, pos,
+ sizeof(struct ib_uverbs_comp_event_desc));
+}
+
+static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
+ struct file *filp,
struct poll_table_struct *wait)
{
unsigned int pollflags = 0;
- struct ib_uverbs_event_file *file = filp->private_data;
- poll_wait(filp, &file->poll_wait, wait);
+ poll_wait(filp, &ev_queue->poll_wait, wait);
- spin_lock_irq(&file->lock);
- if (!list_empty(&file->event_list))
+ spin_lock_irq(&ev_queue->lock);
+ if (!list_empty(&ev_queue->event_list))
pollflags = POLLIN | POLLRDNORM;
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
return pollflags;
}
-static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
+static unsigned int ib_uverbs_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ return ib_uverbs_event_poll(filp->private_data, filp, wait);
+}
+
+static unsigned int ib_uverbs_comp_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
- return fasync_helper(fd, filp, on, &file->async_queue);
+ return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
}
-static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_event_queue *ev_queue = filp->private_data;
+
+ return fasync_helper(fd, filp, on, &ev_queue->async_queue);
+}
+
+static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
+}
+
+static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+ struct ib_uverbs_file *uverbs_file = file->uverbs_file;
struct ib_uverbs_event *entry, *tmp;
int closed_already = 0;
- mutex_lock(&file->uverbs_file->device->lists_mutex);
- spin_lock_irq(&file->lock);
- closed_already = file->is_closed;
- file->is_closed = 1;
- list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+ mutex_lock(&uverbs_file->device->lists_mutex);
+ spin_lock_irq(&file->ev_queue.lock);
+ closed_already = file->ev_queue.is_closed;
+ file->ev_queue.is_closed = 1;
+ list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
if (entry->counter)
list_del(&entry->obj_list);
kfree(entry);
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&file->ev_queue.lock);
if (!closed_already) {
list_del(&file->list);
- if (file->is_async)
- ib_unregister_event_handler(&file->uverbs_file->
- event_handler);
+ ib_unregister_event_handler(&uverbs_file->event_handler);
+ }
+ mutex_unlock(&uverbs_file->device->lists_mutex);
+
+ kref_put(&uverbs_file->ref, ib_uverbs_release_file);
+ kref_put(&file->ref, ib_uverbs_release_async_event_file);
+
+ return 0;
+}
+
+static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_completion_event_file *file = filp->private_data;
+ struct ib_uverbs_event *entry, *tmp;
+
+ spin_lock_irq(&file->ev_queue.lock);
+ list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ kfree(entry);
}
- mutex_unlock(&file->uverbs_file->device->lists_mutex);
+ spin_unlock_irq(&file->ev_queue.lock);
- kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&file->ref, ib_uverbs_release_event_file);
+ uverbs_close_fd(filp);
return 0;
}
-static const struct file_operations uverbs_event_fops = {
+const struct file_operations uverbs_event_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_uverbs_comp_event_read,
+ .poll = ib_uverbs_comp_event_poll,
+ .release = ib_uverbs_comp_event_close,
+ .fasync = ib_uverbs_comp_event_fasync,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations uverbs_async_event_fops = {
.owner = THIS_MODULE,
- .read = ib_uverbs_event_read,
- .poll = ib_uverbs_event_poll,
- .release = ib_uverbs_event_close,
- .fasync = ib_uverbs_event_fasync,
+ .read = ib_uverbs_async_event_read,
+ .poll = ib_uverbs_async_event_poll,
+ .release = ib_uverbs_async_event_close,
+ .fasync = ib_uverbs_async_event_fasync,
.llseek = no_llseek,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
- struct ib_uverbs_event_file *file = cq_context;
+ struct ib_uverbs_event_queue *ev_queue = cq_context;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *entry;
unsigned long flags;
- if (!file)
+ if (!ev_queue)
return;
- spin_lock_irqsave(&file->lock, flags);
- if (file->is_closed) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_lock_irqsave(&ev_queue->lock, flags);
+ if (ev_queue->is_closed) {
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
@@ -546,12 +474,12 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
entry->desc.comp.cq_handle = cq->uobject->user_handle;
entry->counter = &uobj->comp_events_reported;
- list_add_tail(&entry->list, &file->event_list);
+ list_add_tail(&entry->list, &ev_queue->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
- wake_up_interruptible(&file->poll_wait);
- kill_fasync(&file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&ev_queue->poll_wait);
+ kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@@ -562,15 +490,15 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
struct ib_uverbs_event *entry;
unsigned long flags;
- spin_lock_irqsave(&file->async_file->lock, flags);
- if (file->async_file->is_closed) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
+ if (file->async_file->ev_queue.is_closed) {
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
@@ -579,13 +507,13 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
entry->desc.async.reserved = 0;
entry->counter = counter;
- list_add_tail(&entry->list, &file->async_file->event_list);
+ list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
- wake_up_interruptible(&file->async_file->poll_wait);
- kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
+ kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
@@ -603,7 +531,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
struct ib_uevent_object *uobj;
/* for XRC target qp's, check that qp is live */
- if (!event->element.qp->uobject || !event->element.qp->uobject->live)
+ if (!event->element.qp->uobject)
return;
uobj = container_of(event->element.qp->uobject,
@@ -648,15 +576,23 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
file->async_file = NULL;
}
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async)
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
{
- struct ib_uverbs_event_file *ev_file;
+ spin_lock_init(&ev_queue->lock);
+ INIT_LIST_HEAD(&ev_queue->event_list);
+ init_waitqueue_head(&ev_queue->poll_wait);
+ ev_queue->is_closed = 0;
+ ev_queue->async_queue = NULL;
+}
+
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+ struct ib_device *ib_dev)
+{
+ struct ib_uverbs_async_event_file *ev_file;
struct file *filp;
int ret;
@@ -664,16 +600,11 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
if (!ev_file)
return ERR_PTR(-ENOMEM);
- kref_init(&ev_file->ref);
- spin_lock_init(&ev_file->lock);
- INIT_LIST_HEAD(&ev_file->event_list);
- init_waitqueue_head(&ev_file->poll_wait);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
ev_file->uverbs_file = uverbs_file;
kref_get(&ev_file->uverbs_file->ref);
- ev_file->async_queue = NULL;
- ev_file->is_closed = 0;
-
- filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
+ kref_init(&ev_file->ref);
+ filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
ev_file, O_RDONLY);
if (IS_ERR(filp))
goto err_put_refs;
@@ -683,64 +614,33 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
&uverbs_file->device->uverbs_events_file_list);
mutex_unlock(&uverbs_file->device->lists_mutex);
- if (is_async) {
- WARN_ON(uverbs_file->async_file);
- uverbs_file->async_file = ev_file;
- kref_get(&uverbs_file->async_file->ref);
- INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
- ib_dev,
- ib_uverbs_event_handler);
- ret = ib_register_event_handler(&uverbs_file->event_handler);
- if (ret)
- goto err_put_file;
-
- /* At that point async file stuff was fully set */
- ev_file->is_async = 1;
- }
+ WARN_ON(uverbs_file->async_file);
+ uverbs_file->async_file = ev_file;
+ kref_get(&uverbs_file->async_file->ref);
+ INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
+ ib_dev,
+ ib_uverbs_event_handler);
+ ret = ib_register_event_handler(&uverbs_file->event_handler);
+ if (ret)
+ goto err_put_file;
+
+ /* At that point async file stuff was fully set */
return filp;
err_put_file:
fput(filp);
- kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&uverbs_file->async_file->ref,
+ ib_uverbs_release_async_event_file);
uverbs_file->async_file = NULL;
return ERR_PTR(ret);
err_put_refs:
kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
return filp;
}
-/*
- * Look up a completion event file by FD. If lookup is successful,
- * takes a ref to the event file struct that it returns; if
- * unsuccessful, returns NULL.
- */
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
-{
- struct ib_uverbs_event_file *ev_file = NULL;
- struct fd f = fdget(fd);
-
- if (!f.file)
- return NULL;
-
- if (f.file->f_op != &uverbs_event_fops)
- goto out;
-
- ev_file = f.file->private_data;
- if (ev_file->is_async) {
- ev_file = NULL;
- goto out;
- }
-
- kref_get(&ev_file->ref);
-
-out:
- fdput(f);
- return ev_file;
-}
-
static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
{
u64 mask;
@@ -986,6 +886,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
}
file->device = dev;
+ spin_lock_init(&file->idr_lock);
+ idr_init(&file->idr);
file->ucontext = NULL;
file->async_file = NULL;
kref_init(&file->ref);
@@ -1019,10 +921,11 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_lock(&file->cleanup_mutex);
if (file->ucontext) {
- ib_uverbs_cleanup_ucontext(file, file->ucontext);
+ ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
file->ucontext = NULL;
}
mutex_unlock(&file->cleanup_mutex);
+ idr_destroy(&file->idr);
mutex_lock(&file->device->lists_mutex);
if (!file->is_closed) {
@@ -1032,7 +935,8 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->device->lists_mutex);
if (file->async_file)
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&file->async_file->ref,
+ ib_uverbs_release_async_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
kobject_put(&dev->kobj);
@@ -1189,7 +1093,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
cdev_init(&uverbs_dev->cdev, NULL);
uverbs_dev->cdev.owner = THIS_MODULE;
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
- uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
+ cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
@@ -1231,7 +1135,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
struct ib_uverbs_file *file;
- struct ib_uverbs_event_file *event_file;
+ struct ib_uverbs_async_event_file *event_file;
struct ib_event event;
/* Pending running commands to terminate */
@@ -1268,7 +1172,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
* (e.g mmput).
*/
ib_dev->disassociate_ucontext(ucontext);
- ib_uverbs_cleanup_ucontext(file, ucontext);
+ mutex_lock(&file->cleanup_mutex);
+ ib_uverbs_cleanup_ucontext(file, ucontext, true);
+ mutex_unlock(&file->cleanup_mutex);
}
mutex_lock(&uverbs_dev->lists_mutex);
@@ -1278,21 +1184,20 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
event_file = list_first_entry(&uverbs_dev->
uverbs_events_file_list,
- struct ib_uverbs_event_file,
+ struct ib_uverbs_async_event_file,
list);
- spin_lock_irq(&event_file->lock);
- event_file->is_closed = 1;
- spin_unlock_irq(&event_file->lock);
+ spin_lock_irq(&event_file->ev_queue.lock);
+ event_file->ev_queue.is_closed = 1;
+ spin_unlock_irq(&event_file->ev_queue.lock);
list_del(&event_file->list);
- if (event_file->is_async) {
- ib_unregister_event_handler(&event_file->uverbs_file->
- event_handler);
- event_file->uverbs_file->event_handler.device = NULL;
- }
+ ib_unregister_event_handler(
+ &event_file->uverbs_file->event_handler);
+ event_file->uverbs_file->event_handler.device =
+ NULL;
- wake_up_interruptible(&event_file->poll_wait);
- kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&event_file->ev_queue.poll_wait);
+ kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
mutex_unlock(&uverbs_dev->lists_mutex);
}
@@ -1396,13 +1301,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
if (overflow_maj)
unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
- idr_destroy(&ib_uverbs_pd_idr);
- idr_destroy(&ib_uverbs_mr_idr);
- idr_destroy(&ib_uverbs_mw_idr);
- idr_destroy(&ib_uverbs_ah_idr);
- idr_destroy(&ib_uverbs_cq_idr);
- idr_destroy(&ib_uverbs_qp_idr);
- idr_destroy(&ib_uverbs_srq_idr);
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index af020f80d50f..8b9587fe2303 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -34,20 +34,25 @@
#include <rdma/ib_marshall.h>
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
- struct ib_ah_attr *src)
+ struct rdma_ah_attr *src)
{
- memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
- dst->grh.flow_label = src->grh.flow_label;
- dst->grh.sgid_index = src->grh.sgid_index;
- dst->grh.hop_limit = src->grh.hop_limit;
- dst->grh.traffic_class = src->grh.traffic_class;
memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
- dst->dlid = src->dlid;
- dst->sl = src->sl;
- dst->src_path_bits = src->src_path_bits;
- dst->static_rate = src->static_rate;
- dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
- dst->port_num = src->port_num;
+ dst->dlid = rdma_ah_get_dlid(src);
+ dst->sl = rdma_ah_get_sl(src);
+ dst->src_path_bits = rdma_ah_get_path_bits(src);
+ dst->static_rate = rdma_ah_get_static_rate(src);
+ dst->is_global = rdma_ah_get_ah_flags(src) &
+ IB_AH_GRH ? 1 : 0;
+ if (dst->is_global) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(src);
+
+ memcpy(dst->grh.dgid, grh->dgid.raw, sizeof(grh->dgid));
+ dst->grh.flow_label = grh->flow_label;
+ dst->grh.sgid_index = grh->sgid_index;
+ dst->grh.hop_limit = grh->hop_limit;
+ dst->grh.traffic_class = grh->traffic_class;
+ }
+ dst->port_num = rdma_ah_get_port_num(src);
dst->reserved = 0;
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
@@ -91,15 +96,15 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
}
EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
-void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
- struct ib_sa_path_rec *src)
+void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct sa_path_rec *src)
{
memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
- dst->dlid = src->dlid;
- dst->slid = src->slid;
- dst->raw_traffic = src->raw_traffic;
+ dst->dlid = htons(ntohl(sa_path_get_dlid(src)));
+ dst->slid = htons(ntohl(sa_path_get_slid(src)));
+ dst->raw_traffic = sa_path_get_raw_traffic(src);
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
@@ -115,17 +120,43 @@ void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
}
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct sa_path_rec *src)
+{
+ struct sa_path_rec rec;
+
+ if (src->rec_type == SA_PATH_REC_TYPE_OPA) {
+ sa_convert_path_opa_to_ib(&rec, src);
+ __ib_copy_path_rec_to_user(dst, &rec);
+ return;
+ }
+ __ib_copy_path_rec_to_user(dst, src);
+}
EXPORT_SYMBOL(ib_copy_path_rec_to_user);
-void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
struct ib_user_path_rec *src)
{
+ __be32 slid, dlid;
+
+ memset(dst, 0, sizeof(*dst));
+ if ((ib_is_opa_gid((union ib_gid *)src->sgid)) ||
+ (ib_is_opa_gid((union ib_gid *)src->dgid))) {
+ dst->rec_type = SA_PATH_REC_TYPE_OPA;
+ slid = htonl(opa_get_lid_from_gid((union ib_gid *)src->sgid));
+ dlid = htonl(opa_get_lid_from_gid((union ib_gid *)src->dgid));
+ } else {
+ dst->rec_type = SA_PATH_REC_TYPE_IB;
+ slid = htonl(ntohs(src->slid));
+ dlid = htonl(ntohs(src->dlid));
+ }
memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
- dst->dlid = src->dlid;
- dst->slid = src->slid;
- dst->raw_traffic = src->raw_traffic;
+ sa_path_set_dlid(dst, dlid);
+ sa_path_set_slid(dst, slid);
+ sa_path_set_raw_traffic(dst, src->raw_traffic);
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
@@ -141,9 +172,9 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
- memset(dst->dmac, 0, sizeof(dst->dmac));
- dst->net = NULL;
- dst->ifindex = 0;
- dst->gid_type = IB_GID_TYPE_IB;
+ /* TODO: No need to set this */
+ sa_path_set_dmac_zero(dst);
+ sa_path_set_ndev(dst, NULL);
+ sa_path_set_ifindex(dst, 0);
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
new file mode 100644
index 000000000000..ef293379f37a
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/bug.h>
+#include <linux/file.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_ah(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return rdma_destroy_ah((struct ib_ah *)uobject->object);
+}
+
+static int uverbs_free_flow(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_destroy_flow((struct ib_flow *)uobject->object);
+}
+
+static int uverbs_free_mw(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
+}
+
+static int uverbs_free_qp(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_qp *qp = uobject->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobject, struct ib_uqp_object, uevent.uobject);
+ int ret;
+
+ if (why == RDMA_REMOVE_DESTROY) {
+ if (!list_empty(&uqp->mcast_list))
+ return -EBUSY;
+ } else if (qp == qp->real_qp) {
+ ib_uverbs_detach_umcast(qp, uqp);
+ }
+
+ ret = ib_destroy_qp(qp);
+ if (ret && why == RDMA_REMOVE_DESTROY)
+ return ret;
+
+ if (uqp->uxrcd)
+ atomic_dec(&uqp->uxrcd->refcnt);
+
+ ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent);
+ return ret;
+}
+
+static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
+ struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
+ int ret;
+
+ ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ kfree(ind_tbl);
+ return ret;
+}
+
+static int uverbs_free_wq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_wq *wq = uobject->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobject, struct ib_uwq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_wq(wq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
+ return ret;
+}
+
+static int uverbs_free_srq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_srq *srq = uobject->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobject, struct ib_uevent_object, uobject);
+ enum ib_srq_type srq_type = srq->srq_type;
+ int ret;
+
+ ret = ib_destroy_srq(srq);
+
+ if (ret && why == RDMA_REMOVE_DESTROY)
+ return ret;
+
+ if (srq_type == IB_SRQT_XRC) {
+ struct ib_usrq_object *us =
+ container_of(uevent, struct ib_usrq_object, uevent);
+
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
+ ib_uverbs_release_uevent(uobject->context->ufile, uevent);
+ return ret;
+}
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_cq *cq = uobject->object;
+ struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobject, struct ib_ucq_object, uobject);
+ int ret;
+
+ ret = ib_destroy_cq(cq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
+ container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) : NULL,
+ ucq);
+ return ret;
+}
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_dereg_mr((struct ib_mr *)uobject->object);
+}
+
+static int uverbs_free_xrcd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_xrcd *xrcd = uobject->object;
+ struct ib_uxrcd_object *uxrcd =
+ container_of(uobject, struct ib_uxrcd_object, uobject);
+ int ret;
+
+ mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
+ ret = -EBUSY;
+ else
+ ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
+ xrcd, why);
+ mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
+
+ return ret;
+}
+
+static int uverbs_free_pd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_pd *pd = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
+ return -EBUSY;
+
+ ib_dealloc_pd((struct ib_pd *)uobject->object);
+ return 0;
+}
+
+static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_completion_event_file *comp_event_file =
+ container_of(uobj_file, struct ib_uverbs_completion_event_file,
+ uobj_file);
+ struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
+
+ spin_lock_irq(&event_queue->lock);
+ event_queue->is_closed = 1;
+ spin_unlock_irq(&event_queue->lock);
+
+ if (why == RDMA_REMOVE_DRIVER_REMOVE) {
+ wake_up_interruptible(&event_queue->poll_wait);
+ kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
+ }
+ return 0;
+};
+
+const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
+ .type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
+ .context_closed = uverbs_hot_unplug_completion_event_file,
+ .fops = &uverbs_event_fops,
+ .name = "[infinibandevent]",
+ .flags = O_RDONLY,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_cq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0),
+ .destroy_object = uverbs_free_cq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_qp = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0),
+ .destroy_object = uverbs_free_qp,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_mw = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_mw,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_mr = {
+ /* 1 is used in order to free the MR after all the MWs */
+ .type = UVERBS_TYPE_ALLOC_IDR(1),
+ .destroy_object = uverbs_free_mr,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_srq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0),
+ .destroy_object = uverbs_free_srq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_ah = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_ah,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_flow = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_flow,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_wq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0),
+ .destroy_object = uverbs_free_wq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_rwq_ind_tbl,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0),
+ .destroy_object = uverbs_free_xrcd,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_pd = {
+ /* 2 is used in order to free the PD after MRs */
+ .type = UVERBS_TYPE_ALLOC_IDR(2),
+ .destroy_object = uverbs_free_pd,
+};
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 85ed5051fdfd..4792f5209ac2 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -311,7 +311,7 @@ EXPORT_SYMBOL(ib_dealloc_pd);
/* Address handles */
-struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
{
struct ib_ah *ah;
@@ -321,12 +321,13 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
ah->device = pd->device;
ah->pd = pd;
ah->uobject = NULL;
+ ah->type = ah_attr->type;
atomic_inc(&pd->usecnt);
}
return ah;
}
-EXPORT_SYMBOL(ib_create_ah);
+EXPORT_SYMBOL(rdma_create_ah);
int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
{
@@ -452,7 +453,7 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
u16 gid_index;
@@ -464,6 +465,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
union ib_gid sgid;
memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->type = rdma_ah_find_type(device, port_num);
if (rdma_cap_eth_ah(device, port_num)) {
if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
net_type = wc->network_hdr_type;
@@ -494,7 +496,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
return -ENODEV;
ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
- ah_attr->dmac,
+ ah_attr->roce.dmac,
wc->wc_flags & IB_WC_WITH_VLAN ?
NULL : &vlan_id,
&if_index, &hoplimit);
@@ -525,15 +527,12 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
return ret;
}
- ah_attr->dlid = wc->slid;
- ah_attr->sl = wc->sl;
- ah_attr->src_path_bits = wc->dlid_path_bits;
- ah_attr->port_num = port_num;
+ rdma_ah_set_dlid(ah_attr, wc->slid);
+ rdma_ah_set_sl(ah_attr, wc->sl);
+ rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
+ rdma_ah_set_port_num(ah_attr, port_num);
if (wc->wc_flags & IB_WC_GRH) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = sgid;
-
if (!rdma_cap_eth_ah(device, port_num)) {
if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
ret = ib_find_cached_gid_by_port(device, &dgid,
@@ -547,11 +546,12 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
}
}
- ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
- ah_attr->grh.flow_label = flow_class & 0xFFFFF;
- ah_attr->grh.hop_limit = hoplimit;
- ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+
}
return 0;
}
@@ -560,34 +560,37 @@ EXPORT_SYMBOL(ib_init_ah_from_wc);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
const struct ib_grh *grh, u8 port_num)
{
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
int ret;
ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
- return ib_create_ah(pd, &ah_attr);
+ return rdma_create_ah(pd, &ah_attr);
}
EXPORT_SYMBOL(ib_create_ah_from_wc);
-int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
+ if (ah->type != ah_attr->type)
+ return -EINVAL;
+
return ah->device->modify_ah ?
ah->device->modify_ah(ah, ah_attr) :
-ENOSYS;
}
-EXPORT_SYMBOL(ib_modify_ah);
+EXPORT_SYMBOL(rdma_modify_ah);
-int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
return ah->device->query_ah ?
ah->device->query_ah(ah, ah_attr) :
-ENOSYS;
}
-EXPORT_SYMBOL(ib_query_ah);
+EXPORT_SYMBOL(rdma_query_ah);
-int ib_destroy_ah(struct ib_ah *ah)
+int rdma_destroy_ah(struct ib_ah *ah)
{
struct ib_pd *pd;
int ret;
@@ -599,7 +602,7 @@ int ib_destroy_ah(struct ib_ah *ah)
return ret;
}
-EXPORT_SYMBOL(ib_destroy_ah);
+EXPORT_SYMBOL(rdma_destroy_ah);
/* Shared receive queues */
@@ -1201,19 +1204,22 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
EXPORT_SYMBOL(ib_modify_qp_is_ok);
int ib_resolve_eth_dmac(struct ib_device *device,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
int ret = 0;
+ struct ib_global_route *grh;
- if (!rdma_is_port_valid(device, ah_attr->port_num))
+ if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
return -EINVAL;
- if (!rdma_cap_eth_ah(device, ah_attr->port_num))
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE)
return 0;
- if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw,
- ah_attr->dmac);
+ grh = rdma_ah_retrieve_grh(ah_attr);
+
+ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
+ rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
+ ah_attr->roce.dmac);
} else {
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
@@ -1221,8 +1227,8 @@ int ib_resolve_eth_dmac(struct ib_device *device,
int hop_limit;
ret = ib_query_gid(device,
- ah_attr->port_num,
- ah_attr->grh.sgid_index,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index,
&sgid, &sgid_attr);
if (ret || !sgid_attr.ndev) {
@@ -1233,14 +1239,14 @@ int ib_resolve_eth_dmac(struct ib_device *device,
ifindex = sgid_attr.ndev->ifindex;
- ret = rdma_addr_find_l2_eth_by_grh(&sgid,
- &ah_attr->grh.dgid,
- ah_attr->dmac,
- NULL, &ifindex, &hop_limit);
+ ret =
+ rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac,
+ NULL, &ifindex, &hop_limit);
dev_put(sgid_attr.ndev);
- ah_attr->grh.hop_limit = hop_limit;
+ grh->hop_limit = hop_limit;
}
out:
return ret;
@@ -1519,7 +1525,9 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->attach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
+ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE))
return -EINVAL;
ret = qp->device->attach_mcast(qp, gid, lid);
@@ -1535,7 +1543,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->detach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
+ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE))
return -EINVAL;
ret = qp->device->detach_mcast(qp, gid, lid);