diff options
Diffstat (limited to 'drivers/infiniband/core')
25 files changed, 2824 insertions, 2004 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index e426ac877d19..6ebd9ad95010 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -29,4 +29,5 @@ ib_umad-y := user_mad.o ib_ucm-y := ucm.o -ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o +ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ + rdma_core.o uverbs_std_types.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 329d08c884f6..02971e239a18 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -444,9 +444,9 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; - dst = ip6_route_output(addr->net, NULL, &fl6); - if ((ret = dst->error)) - goto put; + ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6); + if (ret < 0) + return ret; rt = (struct rt6_info *)dst; if (ipv6_addr_any(&fl6.saddr)) { diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 11dacd97a667..324ef85a13b6 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -137,13 +137,13 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * err2: ib_free_send_mad(send_buf); err1: - ib_destroy_ah(ah); + rdma_destroy_ah(ah); } static void agent_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - ib_destroy_ah(mad_send_wc->send_buf->ah); + rdma_destroy_ah(mad_send_wc->send_buf->ah); ib_free_send_mad(mad_send_wc->send_buf); } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 6535f09dc575..1844770f3ae8 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -228,7 +228,7 @@ struct cm_device { struct cm_av { struct cm_port *port; union ib_gid dgid; - struct ib_ah_attr ah_attr; + struct rdma_ah_attr ah_attr; u16 pkey_index; u8 timeout; }; @@ -241,7 +241,7 @@ struct cm_work { __be32 local_id; /* Established / timewait */ __be32 remote_id; struct ib_cm_event cm_event; - struct ib_sa_path_rec path[0]; + struct sa_path_rec path[0]; }; struct cm_timewait_info { @@ -343,7 +343,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, ret = -ENODEV; goto out; } - ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr); + ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto out; @@ -355,7 +355,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { - ib_destroy_ah(ah); + rdma_destroy_ah(ah); ret = PTR_ERR(m); goto out; } @@ -390,7 +390,7 @@ static int cm_alloc_response_msg(struct cm_port *port, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { - ib_destroy_ah(ah); + rdma_destroy_ah(ah); return PTR_ERR(m); } m->ah = ah; @@ -400,7 +400,7 @@ static int cm_alloc_response_msg(struct cm_port *port, static void cm_free_msg(struct ib_mad_send_buf *msg) { - ib_destroy_ah(msg->ah); + rdma_destroy_ah(msg->ah); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); @@ -440,7 +440,7 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, &av->ah_attr); } -static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av, +static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, struct cm_id_private *cm_id_priv) { struct cm_device *cm_dev; @@ -453,7 +453,8 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av, read_lock_irqsave(&cm.device_lock, flags); list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - path->gid_type, ndev, &p, NULL)) { + sa_conv_pathrec_to_gid_type(path), + ndev, &p, NULL)) { port = cm_dev->port[p-1]; break; } @@ -1172,8 +1173,8 @@ static void cm_format_req(struct cm_req_msg *req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_req_param *param) { - struct ib_sa_path_rec *pri_path = param->primary_path; - struct ib_sa_path_rec *alt_path = param->alternate_path; + struct sa_path_rec *pri_path = param->primary_path; + struct sa_path_rec *alt_path = param->alternate_path; cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); @@ -1202,8 +1203,10 @@ static void cm_format_req(struct cm_req_msg *req_msg, } if (pri_path->hop_limit <= 1) { - req_msg->primary_local_lid = pri_path->slid; - req_msg->primary_remote_lid = pri_path->dlid; + req_msg->primary_local_lid = + htons(ntohl(sa_path_get_slid(pri_path))); + req_msg->primary_remote_lid = + htons(ntohl(sa_path_get_dlid(pri_path))); } else { /* Work-around until there's a way to obtain remote LID info */ req_msg->primary_local_lid = IB_LID_PERMISSIVE; @@ -1223,8 +1226,10 @@ static void cm_format_req(struct cm_req_msg *req_msg, if (alt_path) { if (alt_path->hop_limit <= 1) { - req_msg->alt_local_lid = alt_path->slid; - req_msg->alt_remote_lid = alt_path->dlid; + req_msg->alt_local_lid = + htons(ntohl(sa_path_get_slid(alt_path))); + req_msg->alt_remote_lid = + htons(ntohl(sa_path_get_dlid(alt_path))); } else { req_msg->alt_local_lid = IB_LID_PERMISSIVE; req_msg->alt_remote_lid = IB_LID_PERMISSIVE; @@ -1401,14 +1406,15 @@ static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, } static void cm_format_paths_from_req(struct cm_req_msg *req_msg, - struct ib_sa_path_rec *primary_path, - struct ib_sa_path_rec *alt_path) + struct sa_path_rec *primary_path, + struct sa_path_rec *alt_path) { - memset(primary_path, 0, sizeof *primary_path); primary_path->dgid = req_msg->primary_local_gid; primary_path->sgid = req_msg->primary_remote_gid; - primary_path->dlid = req_msg->primary_local_lid; - primary_path->slid = req_msg->primary_remote_lid; + sa_path_set_dlid(primary_path, + htonl(ntohs(req_msg->primary_local_lid))); + sa_path_set_slid(primary_path, + htonl(ntohs(req_msg->primary_remote_lid))); primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); primary_path->hop_limit = req_msg->primary_hop_limit; primary_path->traffic_class = req_msg->primary_traffic_class; @@ -1423,14 +1429,15 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, primary_path->packet_life_time = cm_req_get_primary_local_ack_timeout(req_msg); primary_path->packet_life_time -= (primary_path->packet_life_time > 0); - primary_path->service_id = req_msg->service_id; + sa_path_set_service_id(primary_path, req_msg->service_id); if (req_msg->alt_local_lid) { - memset(alt_path, 0, sizeof *alt_path); alt_path->dgid = req_msg->alt_local_gid; alt_path->sgid = req_msg->alt_remote_gid; - alt_path->dlid = req_msg->alt_local_lid; - alt_path->slid = req_msg->alt_remote_lid; + sa_path_set_dlid(alt_path, + htonl(ntohs(req_msg->alt_local_lid))); + sa_path_set_slid(alt_path, + htonl(ntohs(req_msg->alt_remote_lid))); alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); alt_path->hop_limit = req_msg->alt_hop_limit; alt_path->traffic_class = req_msg->alt_traffic_class; @@ -1445,7 +1452,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, alt_path->packet_life_time = cm_req_get_alt_local_ack_timeout(req_msg); alt_path->packet_life_time -= (alt_path->packet_life_time > 0); - alt_path->service_id = req_msg->service_id; + sa_path_set_service_id(alt_path, req_msg->service_id); } } @@ -1722,6 +1729,7 @@ static int cm_req_handler(struct cm_work *work) struct cm_req_msg *req_msg; union ib_gid gid; struct ib_gid_attr gid_attr; + const struct ib_global_route *grh; int ret; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1758,21 +1766,34 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_process_routed_req(req_msg, work->mad_recv_wc->wc); - cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); - memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); - work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit; + memset(&work->path[0], 0, sizeof(work->path[0])); + memset(&work->path[1], 0, sizeof(work->path[1])); + grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr); ret = ib_get_cached_gid(work->port->cm_dev->ib_device, work->port->port_num, - cm_id_priv->av.ah_attr.grh.sgid_index, + grh->sgid_index, &gid, &gid_attr); if (!ret) { if (gid_attr.ndev) { - work->path[0].ifindex = gid_attr.ndev->ifindex; - work->path[0].net = dev_net(gid_attr.ndev); + work->path[0].rec_type = + sa_conv_gid_to_pathrec_type(gid_attr.gid_type); + sa_path_set_ifindex(&work->path[0], + gid_attr.ndev->ifindex); + sa_path_set_ndev(&work->path[0], + dev_net(gid_attr.ndev)); dev_put(gid_attr.ndev); + } else { + work->path[0].rec_type = SA_PATH_REC_TYPE_IB; } - work->path[0].gid_type = gid_attr.gid_type; + if (req_msg->alt_local_lid) + work->path[1].rec_type = work->path[0].rec_type; + cm_format_paths_from_req(req_msg, &work->path[0], + &work->path[1]); + if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) + sa_path_set_dmac(&work->path[0], + cm_id_priv->av.ah_attr.roce.dmac); + work->path[0].hop_limit = grh->hop_limit; ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, cm_id_priv); } @@ -1782,11 +1803,18 @@ static int cm_req_handler(struct cm_work *work) &work->path[0].sgid, &gid_attr); if (!err && gid_attr.ndev) { - work->path[0].ifindex = gid_attr.ndev->ifindex; - work->path[0].net = dev_net(gid_attr.ndev); + work->path[0].rec_type = + sa_conv_gid_to_pathrec_type(gid_attr.gid_type); + sa_path_set_ifindex(&work->path[0], + gid_attr.ndev->ifindex); + sa_path_set_ndev(&work->path[0], + dev_net(gid_attr.ndev)); dev_put(gid_attr.ndev); + } else { + work->path[0].rec_type = SA_PATH_REC_TYPE_IB; } - work->path[0].gid_type = gid_attr.gid_type; + if (req_msg->alt_local_lid) + work->path[1].rec_type = work->path[0].rec_type; ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); @@ -2811,7 +2839,7 @@ out: static void cm_format_lap(struct cm_lap_msg *lap_msg, struct cm_id_private *cm_id_priv, - struct ib_sa_path_rec *alternate_path, + struct sa_path_rec *alternate_path, const void *private_data, u8 private_data_len) { @@ -2822,8 +2850,10 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); /* todo: need remote CM response timeout */ cm_lap_set_remote_resp_timeout(lap_msg, 0x1F); - lap_msg->alt_local_lid = alternate_path->slid; - lap_msg->alt_remote_lid = alternate_path->dlid; + lap_msg->alt_local_lid = + htons(ntohl(sa_path_get_slid(alternate_path))); + lap_msg->alt_remote_lid = + htons(ntohl(sa_path_get_dlid(alternate_path))); lap_msg->alt_local_gid = alternate_path->sgid; lap_msg->alt_remote_gid = alternate_path->dgid; cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); @@ -2841,7 +2871,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, } int ib_send_cm_lap(struct ib_cm_id *cm_id, - struct ib_sa_path_rec *alternate_path, + struct sa_path_rec *alternate_path, const void *private_data, u8 private_data_len) { @@ -2895,14 +2925,15 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); EXPORT_SYMBOL(ib_send_cm_lap); static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, - struct ib_sa_path_rec *path, + struct sa_path_rec *path, struct cm_lap_msg *lap_msg) { memset(path, 0, sizeof *path); + path->rec_type = SA_PATH_REC_TYPE_IB; path->dgid = lap_msg->alt_local_gid; path->sgid = lap_msg->alt_remote_gid; - path->dlid = lap_msg->alt_local_lid; - path->slid = lap_msg->alt_remote_lid; + sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid))); + sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid))); path->flow_label = cm_lap_get_flow_label(lap_msg); path->hop_limit = lap_msg->alt_hop_limit; path->traffic_class = cm_lap_get_traffic_class(lap_msg); @@ -3708,7 +3739,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, atomic_long_inc(&port->counter_group[CM_RECV]. counter[attr_id - CM_ATTR_ID_OFFSET]); - work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, + work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths, GFP_KERNEL); if (!work) { ib_free_recv_mad(mad_recv_wc); @@ -3800,7 +3831,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, cm_id_priv->responder_resources; qp_attr->min_rnr_timer = 0; } - if (cm_id_priv->alt_av.ah_attr.dlid) { + if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; @@ -3854,7 +3885,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, default: break; } - if (cm_id_priv->alt_av.ah_attr.dlid) { + if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; qp_attr->path_mig_state = IB_MIG_REARM; } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index acd10d666f1c..91b7a2fe5a55 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -929,7 +929,8 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, goto out; ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, - qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); + rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index, + &sgid, NULL); if (ret) goto out; @@ -1127,7 +1128,7 @@ static inline int cma_any_port(struct sockaddr *addr) static void cma_save_ib_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_cm_id *listen_id, - struct ib_sa_path_rec *path) + struct sa_path_rec *path) { struct sockaddr_ib *listen_ib, *ib; @@ -1139,7 +1140,7 @@ static void cma_save_ib_info(struct sockaddr *src_addr, ib->sib_pkey = path->pkey; ib->sib_flowinfo = path->flow_label; memcpy(&ib->sib_addr, &path->sgid, 16); - ib->sib_sid = path->service_id; + ib->sib_sid = sa_path_get_service_id(path); ib->sib_scope_id = 0; } else { ib->sib_pkey = listen_ib->sib_pkey; @@ -1273,7 +1274,8 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event, memcpy(&req->local_gid, &req_param->primary_path->sgid, sizeof(req->local_gid)); req->has_gid = true; - req->service_id = req_param->primary_path->service_id; + req->service_id = + sa_path_get_service_id(req_param->primary_path); req->pkey = be16_to_cpu(req_param->primary_path->pkey); if (req->pkey != req_param->bth_pkey) pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" @@ -1755,6 +1757,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.status = -ETIMEDOUT; break; case IB_CM_REP_RECEIVED: + if (cma_comp(id_priv, RDMA_CM_CONNECT) && + (id_priv->id.qp_type != IB_QPT_UD)) + ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); if (id_priv->id.qp) { event.status = cma_rep_recv(id_priv); event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : @@ -1821,8 +1826,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct rdma_cm_id *id; struct rdma_route *rt; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; - const __be64 service_id = - ib_event->param.req_rcvd.primary_path->service_id; + struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path; + const __be64 service_id = sa_path_get_service_id(path); int ret; id = rdma_create_id(listen_id->route.addr.dev_addr.net, @@ -1844,7 +1849,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (!rt->path_rec) goto err; - rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; + rt->path_rec[0] = *path; if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; @@ -2297,7 +2302,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos) } EXPORT_SYMBOL(rdma_set_service_type); -static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, +static void cma_query_handler(int status, struct sa_path_rec *path_rec, void *context) { struct cma_work *work = context; @@ -2324,18 +2329,25 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct cma_work *work) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - struct ib_sa_path_rec path_rec; + struct sa_path_rec path_rec; ib_sa_comp_mask comp_mask; struct sockaddr_in6 *sin6; struct sockaddr_ib *sib; memset(&path_rec, 0, sizeof path_rec); + + if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num)) + path_rec.rec_type = SA_PATH_REC_TYPE_OPA; + else + path_rec.rec_type = SA_PATH_REC_TYPE_IB; rdma_addr_get_sgid(dev_addr, &path_rec.sgid); rdma_addr_get_dgid(dev_addr, &path_rec.dgid); path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; - path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); + sa_path_set_service_id(&path_rec, + rdma_get_service_id(&id_priv->id, + cma_dst_addr(id_priv))); comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | @@ -2449,7 +2461,7 @@ err1: } int rdma_set_ib_paths(struct rdma_cm_id *id, - struct ib_sa_path_rec *path_rec, int num_paths) + struct sa_path_rec *path_rec, int num_paths) { struct rdma_id_private *id_priv; int ret; @@ -2528,6 +2540,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) struct cma_work *work; int ret; struct net_device *ndev = NULL; + enum ib_gid_type gid_type = IB_GID_TYPE_IB; u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos; @@ -2572,21 +2585,22 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) } } - route->path_rec->net = &init_net; - route->path_rec->ifindex = ndev->ifindex; supported_gids = roce_gid_type_mask_support(id_priv->id.device, id_priv->id.port_num); - route->path_rec->gid_type = - cma_route_gid_type(addr->dev_addr.network, - supported_gids, - id_priv->gid_type); + gid_type = cma_route_gid_type(addr->dev_addr.network, + supported_gids, + id_priv->gid_type); + route->path_rec->rec_type = + sa_conv_gid_to_pathrec_type(gid_type); + sa_path_set_ndev(route->path_rec, &init_net); + sa_path_set_ifindex(route->path_rec, ndev->ifindex); } if (!ndev) { ret = -ENODEV; goto err2; } - memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); + sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &route->path_rec->sgid); @@ -2594,8 +2608,10 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) &route->path_rec->dgid); /* Use the hint from IP Stack to select GID Type */ - if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) - route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); + if (gid_type < ib_network_to_gid_type(addr->dev_addr.network)) + gid_type = ib_network_to_gid_type(addr->dev_addr.network); + route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); + if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) /* TODO: get the hoplimit from the inet/inet6 device */ route->path_rec->hop_limit = addr->dev_addr.hoplimit; @@ -3941,63 +3957,10 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, } } -static void cma_query_sa_classport_info_cb(int status, - struct ib_class_port_info *rec, - void *context) -{ - struct class_port_info_context *cb_ctx = context; - - WARN_ON(!context); - - if (status || !rec) { - pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", - cb_ctx->device->name, cb_ctx->port_num, status); - goto out; - } - - memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); - -out: - complete(&cb_ctx->done); -} - -static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, - struct ib_class_port_info *class_port_info) -{ - struct class_port_info_context *cb_ctx; - int ret; - - cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); - if (!cb_ctx) - return -ENOMEM; - - cb_ctx->device = device; - cb_ctx->class_port_info = class_port_info; - cb_ctx->port_num = port_num; - init_completion(&cb_ctx->done); - - ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, - CMA_QUERY_CLASSPORT_INFO_TIMEOUT, - GFP_KERNEL, cma_query_sa_classport_info_cb, - cb_ctx, &cb_ctx->sa_query); - if (ret < 0) { - pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", - device->name, port_num, ret); - goto out; - } - - wait_for_completion(&cb_ctx->done); - -out: - kfree(cb_ctx); - return ret; -} - static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct ib_sa_mcmember_rec rec; - struct ib_class_port_info class_port_info; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; ib_sa_comp_mask comp_mask; int ret; @@ -4018,21 +3981,14 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = mc->join_state; - if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { - ret = cma_query_sa_classport_info(id_priv->id.device, - id_priv->id.port_num, - &class_port_info); - - if (ret) - return ret; - - if (!(ib_get_cpi_capmask2(&class_port_info) & - IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { - pr_warn("RDMA CM: %s port %u Unable to multicast join\n" - "RDMA CM: SM doesn't support Send Only Full Member option\n", - id_priv->id.device->name, id_priv->id.port_num); - return -EOPNOTSUPP; - } + if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) && + (!ib_sa_sendonly_fullmem_support(&sa_client, + id_priv->id.device, + id_priv->id.port_num))) { + pr_warn("RDMA CM: %s port %u Unable to multicast join\n" + "RDMA CM: SM doesn't support Send Only Full Member option\n", + id_priv->id.device->name, id_priv->id.port_num); + return -EOPNOTSUPP; } comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7c9e34d679d3..81d447da0048 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -172,8 +172,16 @@ static void ib_device_release(struct device *device) { struct ib_device *dev = container_of(device, struct ib_device, dev); - ib_cache_release_one(dev); - kfree(dev->port_immutable); + WARN_ON(dev->reg_state == IB_DEV_REGISTERED); + if (dev->reg_state == IB_DEV_UNREGISTERED) { + /* + * In IB_DEV_UNINITIALIZED state, cache or port table + * is not even created. Free cache and port table only when + * device reaches UNREGISTERED state. + */ + ib_cache_release_one(dev); + kfree(dev->port_immutable); + } kfree(dev); } @@ -380,32 +388,27 @@ int ib_register_device(struct ib_device *device, ret = ib_cache_setup_one(device); if (ret) { pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); - goto out; + goto port_cleanup; } ret = ib_device_register_rdmacg(device); if (ret) { pr_warn("Couldn't register device with rdma cgroup\n"); - ib_cache_cleanup_one(device); - goto out; + goto cache_cleanup; } memset(&device->attrs, 0, sizeof(device->attrs)); ret = device->query_device(device, &device->attrs, &uhw); if (ret) { pr_warn("Couldn't query the device attributes\n"); - ib_device_unregister_rdmacg(device); - ib_cache_cleanup_one(device); - goto out; + goto cache_cleanup; } ret = ib_device_register_sysfs(device, port_callback); if (ret) { pr_warn("Couldn't register device %s with driver model\n", device->name); - ib_device_unregister_rdmacg(device); - ib_cache_cleanup_one(device); - goto out; + goto cache_cleanup; } device->reg_state = IB_DEV_REGISTERED; @@ -417,6 +420,14 @@ int ib_register_device(struct ib_device *device, down_write(&lists_rwsem); list_add_tail(&device->core_list, &device_list); up_write(&lists_rwsem); + mutex_unlock(&device_mutex); + return 0; + +cache_cleanup: + ib_cache_cleanup_one(device); + ib_cache_release_one(device); +port_cleanup: + kfree(device->port_immutable); out: mutex_unlock(&device_mutex); return ret; diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index cdfad5f26212..84d2615b5d4b 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -96,7 +96,8 @@ struct ib_fmr_pool { void * arg); void *flush_arg; - struct task_struct *thread; + struct kthread_worker *worker; + struct kthread_work work; atomic_t req_ser; atomic_t flush_ser; @@ -174,29 +175,19 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) spin_unlock_irq(&pool->pool_lock); } -static int ib_fmr_cleanup_thread(void *pool_ptr) +static void ib_fmr_cleanup_func(struct kthread_work *work) { - struct ib_fmr_pool *pool = pool_ptr; + struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work); - do { - if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { - ib_fmr_batch_release(pool); - - atomic_inc(&pool->flush_ser); - wake_up_interruptible(&pool->force_wait); - - if (pool->flush_function) - pool->flush_function(pool, pool->flush_arg); - } + ib_fmr_batch_release(pool); + atomic_inc(&pool->flush_ser); + wake_up_interruptible(&pool->force_wait); - set_current_state(TASK_INTERRUPTIBLE); - if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && - !kthread_should_stop()) - schedule(); - __set_current_state(TASK_RUNNING); - } while (!kthread_should_stop()); + if (pool->flush_function) + pool->flush_function(pool, pool->flush_arg); - return 0; + if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) + kthread_queue_work(pool->worker, &pool->work); } /** @@ -265,15 +256,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, atomic_set(&pool->flush_ser, 0); init_waitqueue_head(&pool->force_wait); - pool->thread = kthread_run(ib_fmr_cleanup_thread, - pool, - "ib_fmr(%s)", - device->name); - if (IS_ERR(pool->thread)) { - pr_warn(PFX "couldn't start cleanup thread\n"); - ret = PTR_ERR(pool->thread); + pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name); + if (IS_ERR(pool->worker)) { + pr_warn(PFX "couldn't start cleanup kthread worker\n"); + ret = PTR_ERR(pool->worker); goto out_free_pool; } + kthread_init_work(&pool->work, ib_fmr_cleanup_func); { struct ib_pool_fmr *fmr; @@ -338,7 +327,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) LIST_HEAD(fmr_list); int i; - kthread_stop(pool->thread); + kthread_destroy_worker(pool->worker); ib_fmr_batch_release(pool); i = 0; @@ -388,7 +377,7 @@ int ib_flush_fmr_pool(struct ib_fmr_pool *pool) spin_unlock_irq(&pool->pool_lock); serial = atomic_inc_return(&pool->req_ser); - wake_up_process(pool->thread); + kthread_queue_work(pool->worker, &pool->work); if (wait_event_interruptible(pool->force_wait, atomic_read(&pool->flush_ser) - serial >= 0)) @@ -502,7 +491,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) list_add_tail(&fmr->list, &pool->dirty_list); if (++pool->dirty_len >= pool->dirty_watermark) { atomic_inc(&pool->req_ser); - wake_up_process(pool->thread); + kthread_queue_work(pool->worker, &pool->work); } } } diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 57f231f1c721..192ee3dafb80 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -605,7 +605,7 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) /* * ib_unregister_mad_agent - Unregisters a client from using MAD services */ -int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) +void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_snoop_private *mad_snoop_priv; @@ -622,7 +622,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) agent); unregister_mad_snoop(mad_snoop_priv); } - return 0; } EXPORT_SYMBOL(ib_unregister_mad_agent); @@ -1834,12 +1833,13 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_ const struct ib_mad_send_wr_private *wr, const struct ib_mad_recv_wc *rwc ) { - struct ib_ah_attr attr; + struct rdma_ah_attr attr; u8 send_resp, rcv_resp; union ib_gid sgid; struct ib_device *device = mad_agent_priv->agent.device; u8 port_num = mad_agent_priv->agent.port_num; u8 lmc; + bool has_grh; send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); @@ -1848,36 +1848,40 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_ /* both requests, or both responses. GIDs different */ return 0; - if (ib_query_ah(wr->send_buf.ah, &attr)) + if (rdma_query_ah(wr->send_buf.ah, &attr)) /* Assume not equal, to avoid false positives. */ return 0; - if (!!(attr.ah_flags & IB_AH_GRH) != - !!(rwc->wc->wc_flags & IB_WC_GRH)) + has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH); + if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH)) /* one has GID, other does not. Assume different */ return 0; if (!send_resp && rcv_resp) { /* is request/response. */ - if (!(attr.ah_flags & IB_AH_GRH)) { + if (!has_grh) { if (ib_get_cached_lmc(device, port_num, &lmc)) return 0; - return (!lmc || !((attr.src_path_bits ^ + return (!lmc || !((rdma_ah_get_path_bits(&attr) ^ rwc->wc->dlid_path_bits) & ((1 << lmc) - 1))); } else { + const struct ib_global_route *grh = + rdma_ah_read_grh(&attr); + if (ib_get_cached_gid(device, port_num, - attr.grh.sgid_index, &sgid, NULL)) + grh->sgid_index, &sgid, NULL)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); } } - if (!(attr.ah_flags & IB_AH_GRH)) - return attr.dlid == rwc->wc->slid; + if (!has_grh) + return rdma_ah_get_dlid(&attr) == rwc->wc->slid; else - return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw, + return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw, + rwc->recv_buf.grh->sgid.raw, 16); } diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 382941b46e43..0d3cca0a8890 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -81,7 +81,7 @@ static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) { deref_rmpp_recv(rmpp_recv); wait_for_completion(&rmpp_recv->comp); - ib_destroy_ah(rmpp_recv->ah); + rdma_destroy_ah(rmpp_recv->ah); kfree(rmpp_recv); } @@ -171,7 +171,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, hdr_len, 0, GFP_KERNEL, IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) - ib_destroy_ah(ah); + rdma_destroy_ah(ah); else { msg->ah = ah; msg->context[0] = ah; @@ -201,7 +201,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent, ret = ib_post_send_mad(msg, NULL); if (ret) { - ib_destroy_ah(msg->ah); + rdma_destroy_ah(msg->ah); ib_free_send_mad(msg); } } @@ -209,7 +209,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent, void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah) - ib_destroy_ah(mad_send_wc->send_buf->ah); + rdma_destroy_ah(mad_send_wc->send_buf->ah); ib_free_send_mad(mad_send_wc->send_buf); } @@ -237,7 +237,7 @@ static void nack_recv(struct ib_mad_agent_private *agent, ret = ib_post_send_mad(msg, NULL); if (ret) { - ib_destroy_ah(msg->ah); + rdma_destroy_ah(msg->ah); ib_free_send_mad(msg); } } @@ -852,7 +852,7 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr) struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv; struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad; struct mad_rmpp_recv *rmpp_recv; - struct ib_ah_attr ah_attr; + struct rdma_ah_attr ah_attr; unsigned long flags; int newwin = 1; @@ -867,10 +867,10 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr) (rmpp_recv->method & IB_MGMT_METHOD_RESP)) continue; - if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr)) + if (rdma_query_ah(mad_send_wr->send_buf.ah, &ah_attr)) continue; - if (rmpp_recv->slid == ah_attr.dlid) { + if (rmpp_recv->slid == rdma_ah_get_dlid(&ah_attr)) { newwin = rmpp_recv->repwin; break; } diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 322cb67b07a9..45f2f095f793 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -720,7 +720,7 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, - struct ib_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr) { int ret; u16 gid_index; @@ -743,19 +743,18 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, return ret; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->dlid = be16_to_cpu(rec->mlid); - ah_attr->sl = rec->sl; - ah_attr->port_num = port_num; - ah_attr->static_rate = rec->rate; - - ah_attr->ah_flags = IB_AH_GRH; - ah_attr->grh.dgid = rec->mgid; - - ah_attr->grh.sgid_index = (u8) gid_index; - ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); - ah_attr->grh.hop_limit = rec->hop_limit; - ah_attr->grh.traffic_class = rec->traffic_class; - + ah_attr->type = rdma_ah_find_type(device, port_num); + + rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid)); + rdma_ah_set_sl(ah_attr, rec->sl); + rdma_ah_set_port_num(ah_attr, port_num); + rdma_ah_set_static_rate(ah_attr, rec->rate); + + rdma_ah_set_grh(ah_attr, &rec->mgid, + be32_to_cpu(rec->flow_label), + (u8)gid_index, + rec->hop_limit, + rec->traffic_class); return 0; } EXPORT_SYMBOL(ib_init_ah_from_mcmember); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c new file mode 100644 index 000000000000..41c31a2bf093 --- /dev/null +++ b/drivers/infiniband/core/rdma_core.c @@ -0,0 +1,627 @@ +/* + * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/file.h> +#include <linux/anon_inodes.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_types.h> +#include <linux/rcupdate.h> +#include "uverbs.h" +#include "core_priv.h" +#include "rdma_core.h" + +void uverbs_uobject_get(struct ib_uobject *uobject) +{ + kref_get(&uobject->ref); +} + +static void uverbs_uobject_free(struct kref *ref) +{ + struct ib_uobject *uobj = + container_of(ref, struct ib_uobject, ref); + + if (uobj->type->type_class->needs_kfree_rcu) + kfree_rcu(uobj, rcu); + else + kfree(uobj); +} + +void uverbs_uobject_put(struct ib_uobject *uobject) +{ + kref_put(&uobject->ref, uverbs_uobject_free); +} + +static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) +{ + /* + * When a shared access is required, we use a positive counter. Each + * shared access request checks that the value != -1 and increment it. + * Exclusive access is required for operations like write or destroy. + * In exclusive access mode, we check that the counter is zero (nobody + * claimed this object) and we set it to -1. Releasing a shared access + * lock is done simply by decreasing the counter. As for exclusive + * access locks, since only a single one of them is is allowed + * concurrently, setting the counter to zero is enough for releasing + * this lock. + */ + if (!exclusive) + return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ? + -EBUSY : 0; + + /* lock is either WRITE or DESTROY - should be exclusive */ + return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; +} + +static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, + const struct uverbs_obj_type *type) +{ + struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL); + + if (!uobj) + return ERR_PTR(-ENOMEM); + /* + * user_handle should be filled by the handler, + * The object is added to the list in the commit stage. + */ + uobj->context = context; + uobj->type = type; + atomic_set(&uobj->usecnt, 0); + kref_init(&uobj->ref); + + return uobj; +} + +static int idr_add_uobj(struct ib_uobject *uobj) +{ + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&uobj->context->ufile->idr_lock); + + /* + * We start with allocating an idr pointing to NULL. This represents an + * object which isn't initialized yet. We'll replace it later on with + * the real object once we commit. + */ + ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0, + min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT); + if (ret >= 0) + uobj->id = ret; + + spin_unlock(&uobj->context->ufile->idr_lock); + idr_preload_end(); + + return ret < 0 ? ret : 0; +} + +/* + * It only removes it from the uobjects list, uverbs_uobject_put() is still + * required. + */ +static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) +{ + spin_lock(&uobj->context->ufile->idr_lock); + idr_remove(&uobj->context->ufile->idr, uobj->id); + spin_unlock(&uobj->context->ufile->idr_lock); +} + +/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ +static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext, + int id, bool exclusive) +{ + struct ib_uobject *uobj; + + rcu_read_lock(); + /* object won't be released as we're protected in rcu */ + uobj = idr_find(&ucontext->ufile->idr, id); + if (!uobj) { + uobj = ERR_PTR(-ENOENT); + goto free; + } + + uverbs_uobject_get(uobj); +free: + rcu_read_unlock(); + return uobj; +} + +static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext, + int id, bool exclusive) +{ + struct file *f; + struct ib_uobject *uobject; + const struct uverbs_obj_fd_type *fd_type = + container_of(type, struct uverbs_obj_fd_type, type); + + if (exclusive) + return ERR_PTR(-EOPNOTSUPP); + + f = fget(id); + if (!f) + return ERR_PTR(-EBADF); + + uobject = f->private_data; + /* + * fget(id) ensures we are not currently running uverbs_close_fd, + * and the caller is expected to ensure that uverbs_close_fd is never + * done while a call top lookup is possible. + */ + if (f->f_op != fd_type->fops) { + fput(f); + return ERR_PTR(-EBADF); + } + + uverbs_uobject_get(uobject); + return uobject; +} + +struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext, + int id, bool exclusive) +{ + struct ib_uobject *uobj; + int ret; + + uobj = type->type_class->lookup_get(type, ucontext, id, exclusive); + if (IS_ERR(uobj)) + return uobj; + + if (uobj->type != type) { + ret = -EINVAL; + goto free; + } + + ret = uverbs_try_lock_object(uobj, exclusive); + if (ret) { + WARN(ucontext->cleanup_reason, + "ib_uverbs: Trying to lookup_get while cleanup context\n"); + goto free; + } + + return uobj; +free: + uobj->type->type_class->lookup_put(uobj, exclusive); + uverbs_uobject_put(uobj); + return ERR_PTR(ret); +} + +static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext) +{ + int ret; + struct ib_uobject *uobj; + + uobj = alloc_uobj(ucontext, type); + if (IS_ERR(uobj)) + return uobj; + + ret = idr_add_uobj(uobj); + if (ret) + goto uobj_put; + + ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_OBJECT); + if (ret) + goto idr_remove; + + return uobj; + +idr_remove: + uverbs_idr_remove_uobj(uobj); +uobj_put: + uverbs_uobject_put(uobj); + return ERR_PTR(ret); +} + +static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext) +{ + const struct uverbs_obj_fd_type *fd_type = + container_of(type, struct uverbs_obj_fd_type, type); + int new_fd; + struct ib_uobject *uobj; + struct ib_uobject_file *uobj_file; + struct file *filp; + + new_fd = get_unused_fd_flags(O_CLOEXEC); + if (new_fd < 0) + return ERR_PTR(new_fd); + + uobj = alloc_uobj(ucontext, type); + if (IS_ERR(uobj)) { + put_unused_fd(new_fd); + return uobj; + } + + uobj_file = container_of(uobj, struct ib_uobject_file, uobj); + filp = anon_inode_getfile(fd_type->name, + fd_type->fops, + uobj_file, + fd_type->flags); + if (IS_ERR(filp)) { + put_unused_fd(new_fd); + uverbs_uobject_put(uobj); + return (void *)filp; + } + + uobj_file->uobj.id = new_fd; + uobj_file->uobj.object = filp; + uobj_file->ufile = ucontext->ufile; + INIT_LIST_HEAD(&uobj->list); + kref_get(&uobj_file->ufile->ref); + + return uobj; +} + +struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext) +{ + return type->type_class->alloc_begin(type, ucontext); +} + +static void uverbs_uobject_add(struct ib_uobject *uobject) +{ + mutex_lock(&uobject->context->uobjects_lock); + list_add(&uobject->list, &uobject->context->uobjects); + mutex_unlock(&uobject->context->uobjects_lock); +} + +static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + const struct uverbs_obj_idr_type *idr_type = + container_of(uobj->type, struct uverbs_obj_idr_type, + type); + int ret = idr_type->destroy_object(uobj, why); + + /* + * We can only fail gracefully if the user requested to destroy the + * object. In the rest of the cases, just remove whatever you can. + */ + if (why == RDMA_REMOVE_DESTROY && ret) + return ret; + + ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); + uverbs_idr_remove_uobj(uobj); + + return ret; +} + +static void alloc_abort_fd_uobject(struct ib_uobject *uobj) +{ + struct ib_uobject_file *uobj_file = + container_of(uobj, struct ib_uobject_file, uobj); + struct file *filp = uobj->object; + int id = uobj_file->uobj.id; + + /* Unsuccessful NEW */ + fput(filp); + put_unused_fd(id); +} + +static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + const struct uverbs_obj_fd_type *fd_type = + container_of(uobj->type, struct uverbs_obj_fd_type, type); + struct ib_uobject_file *uobj_file = + container_of(uobj, struct ib_uobject_file, uobj); + int ret = fd_type->context_closed(uobj_file, why); + + if (why == RDMA_REMOVE_DESTROY && ret) + return ret; + + if (why == RDMA_REMOVE_DURING_CLEANUP) { + alloc_abort_fd_uobject(uobj); + return ret; + } + + uobj_file->uobj.context = NULL; + return ret; +} + +static void lockdep_check(struct ib_uobject *uobj, bool exclusive) +{ +#ifdef CONFIG_LOCKDEP + if (exclusive) + WARN_ON(atomic_read(&uobj->usecnt) > 0); + else + WARN_ON(atomic_read(&uobj->usecnt) == -1); +#endif +} + +static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + int ret; + struct ib_ucontext *ucontext = uobj->context; + + ret = uobj->type->type_class->remove_commit(uobj, why); + if (ret && why == RDMA_REMOVE_DESTROY) { + /* We couldn't remove the object, so just unlock the uobject */ + atomic_set(&uobj->usecnt, 0); + uobj->type->type_class->lookup_put(uobj, true); + } else { + mutex_lock(&ucontext->uobjects_lock); + list_del(&uobj->list); + mutex_unlock(&ucontext->uobjects_lock); + /* put the ref we took when we created the object */ + uverbs_uobject_put(uobj); + } + + return ret; +} + +/* This is called only for user requested DESTROY reasons */ +int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) +{ + int ret; + struct ib_ucontext *ucontext = uobj->context; + + /* put the ref count we took at lookup_get */ + uverbs_uobject_put(uobj); + /* Cleanup is running. Calling this should have been impossible */ + if (!down_read_trylock(&ucontext->cleanup_rwsem)) { + WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); + return 0; + } + lockdep_check(uobj, true); + ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY); + + up_read(&ucontext->cleanup_rwsem); + return ret; +} + +static void alloc_commit_idr_uobject(struct ib_uobject *uobj) +{ + uverbs_uobject_add(uobj); + spin_lock(&uobj->context->ufile->idr_lock); + /* + * We already allocated this IDR with a NULL object, so + * this shouldn't fail. + */ + WARN_ON(idr_replace(&uobj->context->ufile->idr, + uobj, uobj->id)); + spin_unlock(&uobj->context->ufile->idr_lock); +} + +static void alloc_commit_fd_uobject(struct ib_uobject *uobj) +{ + struct ib_uobject_file *uobj_file = + container_of(uobj, struct ib_uobject_file, uobj); + + uverbs_uobject_add(&uobj_file->uobj); + fd_install(uobj_file->uobj.id, uobj->object); + /* This shouldn't be used anymore. Use the file object instead */ + uobj_file->uobj.id = 0; + /* Get another reference as we export this to the fops */ + uverbs_uobject_get(&uobj_file->uobj); +} + +int rdma_alloc_commit_uobject(struct ib_uobject *uobj) +{ + /* Cleanup is running. Calling this should have been impossible */ + if (!down_read_trylock(&uobj->context->cleanup_rwsem)) { + int ret; + + WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); + ret = uobj->type->type_class->remove_commit(uobj, + RDMA_REMOVE_DURING_CLEANUP); + if (ret) + pr_warn("ib_uverbs: cleanup of idr object %d failed\n", + uobj->id); + return ret; + } + + uobj->type->type_class->alloc_commit(uobj); + up_read(&uobj->context->cleanup_rwsem); + + return 0; +} + +static void alloc_abort_idr_uobject(struct ib_uobject *uobj) +{ + uverbs_idr_remove_uobj(uobj); + ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); + uverbs_uobject_put(uobj); +} + +void rdma_alloc_abort_uobject(struct ib_uobject *uobj) +{ + uobj->type->type_class->alloc_abort(uobj); +} + +static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive) +{ +} + +static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive) +{ + struct file *filp = uobj->object; + + WARN_ON(exclusive); + /* This indirectly calls uverbs_close_fd and free the object */ + fput(filp); +} + +void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive) +{ + lockdep_check(uobj, exclusive); + uobj->type->type_class->lookup_put(uobj, exclusive); + /* + * In order to unlock an object, either decrease its usecnt for + * read access or zero it in case of exclusive access. See + * uverbs_try_lock_object for locking schema information. + */ + if (!exclusive) + atomic_dec(&uobj->usecnt); + else + atomic_set(&uobj->usecnt, 0); + + uverbs_uobject_put(uobj); +} + +const struct uverbs_obj_type_class uverbs_idr_class = { + .alloc_begin = alloc_begin_idr_uobject, + .lookup_get = lookup_get_idr_uobject, + .alloc_commit = alloc_commit_idr_uobject, + .alloc_abort = alloc_abort_idr_uobject, + .lookup_put = lookup_put_idr_uobject, + .remove_commit = remove_commit_idr_uobject, + /* + * When we destroy an object, we first just lock it for WRITE and + * actually DESTROY it in the finalize stage. So, the problematic + * scenario is when we just started the finalize stage of the + * destruction (nothing was executed yet). Now, the other thread + * fetched the object for READ access, but it didn't lock it yet. + * The DESTROY thread continues and starts destroying the object. + * When the other thread continue - without the RCU, it would + * access freed memory. However, the rcu_read_lock delays the free + * until the rcu_read_lock of the READ operation quits. Since the + * exclusive lock of the object is still taken by the DESTROY flow, the + * READ operation will get -EBUSY and it'll just bail out. + */ + .needs_kfree_rcu = true, +}; + +static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) +{ + struct ib_ucontext *ucontext; + struct ib_uverbs_file *ufile = uobj_file->ufile; + int ret; + + mutex_lock(&uobj_file->ufile->cleanup_mutex); + + /* uobject was either already cleaned up or is cleaned up right now anyway */ + if (!uobj_file->uobj.context || + !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem)) + goto unlock; + + ucontext = uobj_file->uobj.context; + ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE); + up_read(&ucontext->cleanup_rwsem); + if (ret) + pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n"); +unlock: + mutex_unlock(&ufile->cleanup_mutex); +} + +void uverbs_close_fd(struct file *f) +{ + struct ib_uobject_file *uobj_file = f->private_data; + struct kref *uverbs_file_ref = &uobj_file->ufile->ref; + + _uverbs_close_fd(uobj_file); + uverbs_uobject_put(&uobj_file->uobj); + kref_put(uverbs_file_ref, ib_uverbs_release_file); +} + +void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) +{ + enum rdma_remove_reason reason = device_removed ? + RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE; + unsigned int cur_order = 0; + + ucontext->cleanup_reason = reason; + /* + * Waits for all remove_commit and alloc_commit to finish. Logically, We + * want to hold this forever as the context is going to be destroyed, + * but we'll release it since it causes a "held lock freed" BUG message. + */ + down_write(&ucontext->cleanup_rwsem); + + while (!list_empty(&ucontext->uobjects)) { + struct ib_uobject *obj, *next_obj; + unsigned int next_order = UINT_MAX; + + /* + * This shouldn't run while executing other commands on this + * context. Thus, the only thing we should take care of is + * releasing a FD while traversing this list. The FD could be + * closed and released from the _release fop of this FD. + * In order to mitigate this, we add a lock. + * We take and release the lock per order traversal in order + * to let other threads (which might still use the FDs) chance + * to run. + */ + mutex_lock(&ucontext->uobjects_lock); + list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, + list) { + if (obj->type->destroy_order == cur_order) { + int ret; + + /* + * if we hit this WARN_ON, that means we are + * racing with a lookup_get. + */ + WARN_ON(uverbs_try_lock_object(obj, true)); + ret = obj->type->type_class->remove_commit(obj, + reason); + list_del(&obj->list); + if (ret) + pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n", + obj->id, cur_order); + /* put the ref we took when we created the object */ + uverbs_uobject_put(obj); + } else { + next_order = min(next_order, + obj->type->destroy_order); + } + } + mutex_unlock(&ucontext->uobjects_lock); + cur_order = next_order; + } + up_write(&ucontext->cleanup_rwsem); +} + +void uverbs_initialize_ucontext(struct ib_ucontext *ucontext) +{ + ucontext->cleanup_reason = 0; + mutex_init(&ucontext->uobjects_lock); + INIT_LIST_HEAD(&ucontext->uobjects); + init_rwsem(&ucontext->cleanup_rwsem); +} + +const struct uverbs_obj_type_class uverbs_fd_class = { + .alloc_begin = alloc_begin_fd_uobject, + .lookup_get = lookup_get_fd_uobject, + .alloc_commit = alloc_commit_fd_uobject, + .alloc_abort = alloc_abort_fd_uobject, + .lookup_put = lookup_put_fd_uobject, + .remove_commit = remove_commit_fd_uobject, + .needs_kfree_rcu = false, +}; + diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h new file mode 100644 index 000000000000..1b82e7ff7fe8 --- /dev/null +++ b/drivers/infiniband/core/rdma_core.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_CORE_H +#define RDMA_CORE_H + +#include <linux/idr.h> +#include <rdma/uverbs_types.h> +#include <rdma/ib_verbs.h> +#include <linux/mutex.h> + +/* + * These functions initialize the context and cleanups its uobjects. + * The context has a list of objects which is protected by a mutex + * on the context. initialize_ucontext should be called when we create + * a context. + * cleanup_ucontext removes all uobjects from the context and puts them. + */ +void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed); +void uverbs_initialize_ucontext(struct ib_ucontext *ucontext); + +/* + * uverbs_uobject_get is called in order to increase the reference count on + * an uobject. This is useful when a handler wants to keep the uobject's memory + * alive, regardless if this uobject is still alive in the context's objects + * repository. Objects are put via uverbs_uobject_put. + */ +void uverbs_uobject_get(struct ib_uobject *uobject); + +/* + * In order to indicate we no longer needs this uobject, uverbs_uobject_put + * is called. When the reference count is decreased, the uobject is freed. + * For example, this is used when attaching a completion channel to a CQ. + */ +void uverbs_uobject_put(struct ib_uobject *uobject); + +/* Indicate this fd is no longer used by this consumer, but its memory isn't + * necessarily released yet. When the last reference is put, we release the + * memory. After this call is executed, calling uverbs_uobject_get isn't + * allowed. + * This must be called from the release file_operations of the file! + */ +void uverbs_close_fd(struct file *f); + +#endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index ceae153997d0..e335b09c022e 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -56,6 +56,8 @@ #define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 +#define IB_SA_CPI_MAX_RETRY_CNT 3 +#define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */ static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT; struct ib_sa_sm_ah { @@ -65,9 +67,23 @@ struct ib_sa_sm_ah { u8 src_path_mask; }; +enum rdma_class_port_info_type { + RDMA_CLASS_PORT_INFO_IB, + RDMA_CLASS_PORT_INFO_OPA +}; + +struct rdma_class_port_info { + enum rdma_class_port_info_type type; + union { + struct ib_class_port_info ib; + struct opa_class_port_info opa; + }; +}; + struct ib_sa_classport_cache { bool valid; - struct ib_class_port_info data; + int retry_cnt; + struct rdma_class_port_info data; }; struct ib_sa_port { @@ -75,6 +91,7 @@ struct ib_sa_port { struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; struct ib_sa_classport_cache classport_info; + struct delayed_work ib_cpi_work; spinlock_t classport_lock; /* protects class port info set */ spinlock_t ah_lock; u8 port_num; @@ -103,6 +120,7 @@ struct ib_sa_query { #define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 #define IB_SA_CANCEL 0x00000002 +#define IB_SA_QUERY_OPA 0x00000004 struct ib_sa_service_query { void (*callback)(int, struct ib_sa_service_rec *, void *); @@ -111,9 +129,10 @@ struct ib_sa_service_query { }; struct ib_sa_path_query { - void (*callback)(int, struct ib_sa_path_rec *, void *); + void (*callback)(int, struct sa_path_rec *, void *); void *context; struct ib_sa_query sa_query; + struct sa_path_rec *conv_pr; }; struct ib_sa_guidinfo_query { @@ -123,7 +142,7 @@ struct ib_sa_guidinfo_query { }; struct ib_sa_classport_info_query { - void (*callback)(int, struct ib_class_port_info *, void *); + void (*callback)(void *); void *context; struct ib_sa_query sa_query; }; @@ -170,12 +189,12 @@ static DEFINE_SPINLOCK(tid_lock); static u32 tid; #define PATH_REC_FIELD(field) \ - .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \ - .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \ + .struct_offset_bytes = offsetof(struct sa_path_rec, field), \ + .struct_size_bytes = sizeof((struct sa_path_rec *)0)->field, \ .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { - { PATH_REC_FIELD(service_id), + { PATH_REC_FIELD(ib.service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, @@ -187,15 +206,15 @@ static const struct ib_field path_rec_table[] = { .offset_words = 6, .offset_bits = 0, .size_bits = 128 }, - { PATH_REC_FIELD(dlid), + { PATH_REC_FIELD(ib.dlid), .offset_words = 10, .offset_bits = 0, .size_bits = 16 }, - { PATH_REC_FIELD(slid), + { PATH_REC_FIELD(ib.slid), .offset_words = 10, .offset_bits = 16, .size_bits = 16 }, - { PATH_REC_FIELD(raw_traffic), + { PATH_REC_FIELD(ib.raw_traffic), .offset_words = 11, .offset_bits = 0, .size_bits = 1 }, @@ -269,6 +288,136 @@ static const struct ib_field path_rec_table[] = { .size_bits = 48 }, }; +#define OPA_PATH_REC_FIELD(field) \ + .struct_offset_bytes = \ + offsetof(struct sa_path_rec, field), \ + .struct_size_bytes = \ + sizeof((struct sa_path_rec *)0)->field, \ + .field_name = "sa_path_rec:" #field + +static const struct ib_field opa_path_rec_table[] = { + { OPA_PATH_REC_FIELD(opa.service_id), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 64 }, + { OPA_PATH_REC_FIELD(dgid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { OPA_PATH_REC_FIELD(sgid), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 128 }, + { OPA_PATH_REC_FIELD(opa.dlid), + .offset_words = 10, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_PATH_REC_FIELD(opa.slid), + .offset_words = 11, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_PATH_REC_FIELD(opa.raw_traffic), + .offset_words = 12, + .offset_bits = 0, + .size_bits = 1 }, + { RESERVED, + .offset_words = 12, + .offset_bits = 1, + .size_bits = 3 }, + { OPA_PATH_REC_FIELD(flow_label), + .offset_words = 12, + .offset_bits = 4, + .size_bits = 20 }, + { OPA_PATH_REC_FIELD(hop_limit), + .offset_words = 12, + .offset_bits = 24, + .size_bits = 8 }, + { OPA_PATH_REC_FIELD(traffic_class), + .offset_words = 13, + .offset_bits = 0, + .size_bits = 8 }, + { OPA_PATH_REC_FIELD(reversible), + .offset_words = 13, + .offset_bits = 8, + .size_bits = 1 }, + { OPA_PATH_REC_FIELD(numb_path), + .offset_words = 13, + .offset_bits = 9, + .size_bits = 7 }, + { OPA_PATH_REC_FIELD(pkey), + .offset_words = 13, + .offset_bits = 16, + .size_bits = 16 }, + { OPA_PATH_REC_FIELD(opa.l2_8B), + .offset_words = 14, + .offset_bits = 0, + .size_bits = 1 }, + { OPA_PATH_REC_FIELD(opa.l2_10B), + .offset_words = 14, + .offset_bits = 1, + .size_bits = 1 }, + { OPA_PATH_REC_FIELD(opa.l2_9B), + .offset_words = 14, + .offset_bits = 2, + .size_bits = 1 }, + { OPA_PATH_REC_FIELD(opa.l2_16B), + .offset_words = 14, + .offset_bits = 3, + .size_bits = 1 }, + { RESERVED, + .offset_words = 14, + .offset_bits = 4, + .size_bits = 2 }, + { OPA_PATH_REC_FIELD(opa.qos_type), + .offset_words = 14, + .offset_bits = 6, + .size_bits = 2 }, + { OPA_PATH_REC_FIELD(opa.qos_priority), + .offset_words = 14, + .offset_bits = 8, + .size_bits = 8 }, + { RESERVED, + .offset_words = 14, + .offset_bits = 16, + .size_bits = 3 }, + { OPA_PATH_REC_FIELD(sl), + .offset_words = 14, + .offset_bits = 19, + .size_bits = 5 }, + { RESERVED, + .offset_words = 14, + .offset_bits = 24, + .size_bits = 8 }, + { OPA_PATH_REC_FIELD(mtu_selector), + .offset_words = 15, + .offset_bits = 0, + .size_bits = 2 }, + { OPA_PATH_REC_FIELD(mtu), + .offset_words = 15, + .offset_bits = 2, + .size_bits = 6 }, + { OPA_PATH_REC_FIELD(rate_selector), + .offset_words = 15, + .offset_bits = 8, + .size_bits = 2 }, + { OPA_PATH_REC_FIELD(rate), + .offset_words = 15, + .offset_bits = 10, + .size_bits = 6 }, + { OPA_PATH_REC_FIELD(packet_life_time_selector), + .offset_words = 15, + .offset_bits = 16, + .size_bits = 2 }, + { OPA_PATH_REC_FIELD(packet_life_time), + .offset_words = 15, + .offset_bits = 18, + .size_bits = 6 }, + { OPA_PATH_REC_FIELD(preference), + .offset_words = 15, + .offset_bits = 24, + .size_bits = 8 }, +}; + #define MCMEMBER_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \ .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \ @@ -406,7 +555,7 @@ static const struct ib_field service_rec_table[] = { .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \ .field_name = "ib_class_port_info:" #field -static const struct ib_field classport_info_rec_table[] = { +static const struct ib_field ib_classport_info_rec_table[] = { { CLASSPORTINFO_REC_FIELD(base_version), .offset_words = 0, .offset_bits = 0, @@ -477,6 +626,88 @@ static const struct ib_field classport_info_rec_table[] = { .size_bits = 32 }, }; +#define OPA_CLASSPORTINFO_REC_FIELD(field) \ + .struct_offset_bytes =\ + offsetof(struct opa_class_port_info, field), \ + .struct_size_bytes = \ + sizeof((struct opa_class_port_info *)0)->field, \ + .field_name = "opa_class_port_info:" #field + +static const struct ib_field opa_classport_info_rec_table[] = { + { OPA_CLASSPORTINFO_REC_FIELD(base_version), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 8 }, + { OPA_CLASSPORTINFO_REC_FIELD(class_version), + .offset_words = 0, + .offset_bits = 8, + .size_bits = 8 }, + { OPA_CLASSPORTINFO_REC_FIELD(cap_mask), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 }, + { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid), + .offset_words = 7, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp), + .offset_words = 8, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey), + .offset_words = 9, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_CLASSPORTINFO_REC_FIELD(trap_gid), + .offset_words = 10, + .offset_bits = 0, + .size_bits = 128 }, + { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl), + .offset_words = 14, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_CLASSPORTINFO_REC_FIELD(trap_lid), + .offset_words = 15, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp), + .offset_words = 16, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey), + .offset_words = 17, + .offset_bits = 0, + .size_bits = 32 }, + { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey), + .offset_words = 18, + .offset_bits = 0, + .size_bits = 16 }, + { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey), + .offset_words = 18, + .offset_bits = 16, + .size_bits = 16 }, + { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd), + .offset_words = 19, + .offset_bits = 0, + .size_bits = 8 }, + { RESERVED, + .offset_words = 19, + .offset_bits = 8, + .size_bits = 24 }, +}; + #define GUIDINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ @@ -518,7 +749,7 @@ static inline int ib_sa_query_cancelled(struct ib_sa_query *query) static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, struct ib_sa_query *query) { - struct ib_sa_path_rec *sa_rec = query->mad_buf->context[1]; + struct sa_path_rec *sa_rec = query->mad_buf->context[1]; struct ib_sa_mad *mad = query->mad_buf->mad; ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask; u16 val16; @@ -543,7 +774,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, /* Now build the attributes */ if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) { - val64 = be64_to_cpu(sa_rec->service_id); + val64 = be64_to_cpu(sa_path_get_service_id(sa_rec)); nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID, sizeof(val64), &val64); } @@ -927,96 +1158,10 @@ static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); - ib_destroy_ah(sm_ah->ah); + rdma_destroy_ah(sm_ah->ah); kfree(sm_ah); } -static void update_sm_ah(struct work_struct *work) -{ - struct ib_sa_port *port = - container_of(work, struct ib_sa_port, update_task); - struct ib_sa_sm_ah *new_ah; - struct ib_port_attr port_attr; - struct ib_ah_attr ah_attr; - - if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { - pr_warn("Couldn't query port\n"); - return; - } - - new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL); - if (!new_ah) { - return; - } - - kref_init(&new_ah->ref); - new_ah->src_path_mask = (1 << port_attr.lmc) - 1; - - new_ah->pkey_index = 0; - if (ib_find_pkey(port->agent->device, port->port_num, - IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) - pr_err("Couldn't find index for default PKey\n"); - - memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = port_attr.sm_lid; - ah_attr.sl = port_attr.sm_sl; - ah_attr.port_num = port->port_num; - if (port_attr.grh_required) { - ah_attr.ah_flags = IB_AH_GRH; - ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix); - ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID); - } - - new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); - if (IS_ERR(new_ah->ah)) { - pr_warn("Couldn't create new SM AH\n"); - kfree(new_ah); - return; - } - - spin_lock_irq(&port->ah_lock); - if (port->sm_ah) - kref_put(&port->sm_ah->ref, free_sm_ah); - port->sm_ah = new_ah; - spin_unlock_irq(&port->ah_lock); - -} - -static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) -{ - if (event->event == IB_EVENT_PORT_ERR || - event->event == IB_EVENT_PORT_ACTIVE || - event->event == IB_EVENT_LID_CHANGE || - event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_SM_CHANGE || - event->event == IB_EVENT_CLIENT_REREGISTER) { - unsigned long flags; - struct ib_sa_device *sa_dev = - container_of(handler, typeof(*sa_dev), event_handler); - struct ib_sa_port *port = - &sa_dev->port[event->element.port_num - sa_dev->start_port]; - - if (!rdma_cap_ib_sa(handler->device, port->port_num)) - return; - - spin_lock_irqsave(&port->ah_lock, flags); - if (port->sm_ah) - kref_put(&port->sm_ah->ref, free_sm_ah); - port->sm_ah = NULL; - spin_unlock_irqrestore(&port->ah_lock, flags); - - if (event->event == IB_EVENT_SM_CHANGE || - event->event == IB_EVENT_CLIENT_REREGISTER || - event->event == IB_EVENT_LID_CHANGE) { - spin_lock_irqsave(&port->classport_lock, flags); - port->classport_info.valid = false; - spin_unlock_irqrestore(&port->classport_lock, flags); - } - queue_work(ib_wq, &sa_dev->port[event->element.port_num - - sa_dev->start_port].update_task); - } -} - void ib_sa_register_client(struct ib_sa_client *client) { atomic_set(&client->users, 1); @@ -1085,7 +1230,8 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num) } int ib_init_ah_from_path(struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr) + struct sa_path_rec *rec, + struct rdma_ah_attr *ah_attr) { int ret; u16 gid_index; @@ -1093,21 +1239,26 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, struct net_device *ndev = NULL; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->dlid = be16_to_cpu(rec->dlid); - ah_attr->sl = rec->sl; - ah_attr->src_path_bits = be16_to_cpu(rec->slid) & - get_src_path_mask(device, port_num); - ah_attr->port_num = port_num; - ah_attr->static_rate = rec->rate; - + ah_attr->type = rdma_ah_find_type(device, port_num); + + rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec))); + rdma_ah_set_sl(ah_attr, rec->sl); + rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) & + get_src_path_mask(device, port_num)); + rdma_ah_set_port_num(ah_attr, port_num); + rdma_ah_set_static_rate(ah_attr, rec->rate); use_roce = rdma_cap_eth_ah(device, port_num); if (use_roce) { struct net_device *idev; struct net_device *resolved_dev; - struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex, - .net = rec->net ? rec->net : - &init_net}; + struct rdma_dev_addr dev_addr = { + .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ? + sa_path_get_ifindex(rec) : 0), + .net = sa_path_get_ndev(rec) ? + sa_path_get_ndev(rec) : + &init_net + }; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; @@ -1128,7 +1279,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, if ((dev_addr.network == RDMA_NETWORK_IPV4 || dev_addr.network == RDMA_NETWORK_IPV6) && - rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) + rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) return -EINVAL; idev = device->get_netdev(device, port_num); @@ -1159,28 +1310,31 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, } if (rec->hop_limit > 0 || use_roce) { - ah_attr->ah_flags = IB_AH_GRH; - ah_attr->grh.dgid = rec->dgid; + enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); - ret = ib_find_cached_gid_by_port(device, &rec->sgid, - rec->gid_type, port_num, ndev, - &gid_index); + ret = ib_find_cached_gid_by_port(device, &rec->sgid, type, + port_num, ndev, &gid_index); if (ret) { if (ndev) dev_put(ndev); return ret; } - ah_attr->grh.sgid_index = gid_index; - ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); - ah_attr->grh.hop_limit = rec->hop_limit; - ah_attr->grh.traffic_class = rec->traffic_class; + rdma_ah_set_grh(ah_attr, &rec->dgid, + be32_to_cpu(rec->flow_label), + gid_index, rec->hop_limit, + rec->traffic_class); if (ndev) dev_put(ndev); } - if (use_roce) - memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); + if (use_roce) { + u8 *dmac = sa_path_get_dmac(rec); + + if (!dmac) + return -EINVAL; + memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN); + } return 0; } @@ -1203,7 +1357,9 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) query->sm_ah->pkey_index, 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, gfp_mask, - IB_MGMT_BASE_VERSION); + ((query->flags & IB_SA_QUERY_OPA) ? + OPA_MGMT_BASE_VERSION : + IB_MGMT_BASE_VERSION)); if (IS_ERR(query->mad_buf)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -ENOMEM; @@ -1220,16 +1376,21 @@ static void free_mad(struct ib_sa_query *query) kref_put(&query->sm_ah->ref, free_sm_ah); } -static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) +static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent) { + struct ib_sa_mad *mad = query->mad_buf->mad; unsigned long flags; memset(mad, 0, sizeof *mad); - mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + if (query->flags & IB_SA_QUERY_OPA) { + mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION; + mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION; + } else { + mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; + } mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; - mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; - spin_lock_irqsave(&tid_lock, flags); mad->mad_hdr.tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++); @@ -1258,7 +1419,8 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) query->mad_buf->context[0] = query; query->id = id; - if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) { + if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) && + (!(query->flags & IB_SA_QUERY_OPA))) { if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) { if (!ib_nl_make_request(query, gfp_mask)) return id; @@ -1281,18 +1443,75 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) return ret ? ret : id; } -void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec) +void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec) { ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec); } EXPORT_SYMBOL(ib_sa_unpack_path); -void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute) +void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute) { ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute); } EXPORT_SYMBOL(ib_sa_pack_path); +static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client, + struct ib_device *device, + u8 port_num) +{ + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + unsigned long flags; + bool ret = false; + + if (!sa_dev) + return ret; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + spin_lock_irqsave(&port->classport_lock, flags); + if (!port->classport_info.valid) + goto ret; + + if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA) + ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) & + OPA_CLASS_PORT_INFO_PR_SUPPORT; +ret: + spin_unlock_irqrestore(&port->classport_lock, flags); + return ret; +} + +enum opa_pr_supported { + PR_NOT_SUPPORTED, + PR_OPA_SUPPORTED, + PR_IB_SUPPORTED +}; + +/** + * Check if current PR query can be an OPA query. + * Retuns PR_NOT_SUPPORTED if a path record query is not + * possible, PR_OPA_SUPPORTED if an OPA path record query + * is possible and PR_IB_SUPPORTED if an IB path record + * query is possible. + */ +static int opa_pr_query_possible(struct ib_sa_client *client, + struct ib_device *device, + u8 port_num, + struct sa_path_rec *rec) +{ + struct ib_port_attr port_attr; + + if (ib_query_port(device, port_num, &port_attr)) + return PR_NOT_SUPPORTED; + + if (ib_sa_opa_pathrecord_support(client, device, port_num)) + return PR_OPA_SUPPORTED; + + if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) + return PR_NOT_SUPPORTED; + else + return PR_IB_SUPPORTED; +} + static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) @@ -1301,22 +1520,44 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, container_of(sa_query, struct ib_sa_path_query, sa_query); if (mad) { - struct ib_sa_path_rec rec; - - ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), - mad->data, &rec); - rec.net = NULL; - rec.ifindex = 0; - rec.gid_type = IB_GID_TYPE_IB; - eth_zero_addr(rec.dmac); - query->callback(status, &rec, query->context); + struct sa_path_rec rec; + + if (sa_query->flags & IB_SA_QUERY_OPA) { + ib_unpack(opa_path_rec_table, + ARRAY_SIZE(opa_path_rec_table), + mad->data, &rec); + rec.rec_type = SA_PATH_REC_TYPE_OPA; + query->callback(status, &rec, query->context); + } else { + ib_unpack(path_rec_table, + ARRAY_SIZE(path_rec_table), + mad->data, &rec); + rec.rec_type = SA_PATH_REC_TYPE_IB; + sa_path_set_ndev(&rec, NULL); + sa_path_set_ifindex(&rec, 0); + sa_path_set_dmac_zero(&rec); + + if (query->conv_pr) { + struct sa_path_rec opa; + + memset(&opa, 0, sizeof(struct sa_path_rec)); + sa_convert_path_ib_to_opa(&opa, &rec); + query->callback(status, &opa, query->context); + } else { + query->callback(status, &rec, query->context); + } + } } else query->callback(status, NULL, query->context); } static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) { - kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); + struct ib_sa_path_query *query = + container_of(sa_query, struct ib_sa_path_query, sa_query); + + kfree(query->conv_pr); + kfree(query); } /** @@ -1346,11 +1587,11 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) */ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, + struct sa_path_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, - struct ib_sa_path_rec *resp, + struct sa_path_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) @@ -1360,11 +1601,16 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; + enum opa_pr_supported status; int ret; if (!sa_dev) return -ENODEV; + if ((rec->rec_type != SA_PATH_REC_TYPE_IB) && + (rec->rec_type != SA_PATH_REC_TYPE_OPA)) + return -EINVAL; + port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; @@ -1373,9 +1619,26 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, return -ENOMEM; query->sa_query.port = port; + if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { + status = opa_pr_query_possible(client, device, port_num, rec); + if (status == PR_NOT_SUPPORTED) { + ret = -EINVAL; + goto err1; + } else if (status == PR_OPA_SUPPORTED) { + query->sa_query.flags |= IB_SA_QUERY_OPA; + } else { + query->conv_pr = + kmalloc(sizeof(*query->conv_pr), gfp_mask); + if (!query->conv_pr) { + ret = -ENOMEM; + goto err1; + } + } + } + ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) - goto err1; + goto err2; ib_sa_client_get(client); query->sa_query.client = client; @@ -1383,7 +1646,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, query->context = context; mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); + init_mad(&query->sa_query, agent); query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; query->sa_query.release = ib_sa_path_rec_release; @@ -1391,24 +1654,36 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); mad->sa_hdr.comp_mask = comp_mask; - ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); + if (query->sa_query.flags & IB_SA_QUERY_OPA) { + ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table), + rec, mad->data); + } else if (query->conv_pr) { + sa_convert_path_opa_to_ib(query->conv_pr, rec); + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), + query->conv_pr, mad->data); + } else { + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), + rec, mad->data); + } *sa_query = &query->sa_query; query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE; - query->sa_query.mad_buf->context[1] = rec; + query->sa_query.mad_buf->context[1] = (query->conv_pr) ? + query->conv_pr : rec; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) - goto err2; + goto err3; return ret; -err2: +err3: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); - +err2: + kfree(query->conv_pr); err1: kfree(query); return ret; @@ -1508,7 +1783,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client, query->context = context; mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); + init_mad(&query->sa_query, agent); query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; query->sa_query.release = ib_sa_service_rec_release; @@ -1600,7 +1875,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client, query->context = context; mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); + init_mad(&query->sa_query, agent); query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; query->sa_query.release = ib_sa_mcmember_rec_release; @@ -1697,7 +1972,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, query->context = context; mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); + init_mad(&query->sa_query, agent); query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; query->sa_query.release = ib_sa_guidinfo_rec_release; @@ -1728,7 +2003,42 @@ err1: } EXPORT_SYMBOL(ib_sa_guid_info_rec_query); -/* Support get SA ClassPortInfo */ +bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, + struct ib_device *device, + u8 port_num) +{ + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + bool ret = false; + unsigned long flags; + + if (!sa_dev) + return ret; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + + spin_lock_irqsave(&port->classport_lock, flags); + if ((port->classport_info.valid) && + (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB)) + ret = ib_get_cpi_capmask2(&port->classport_info.data.ib) + & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT; + spin_unlock_irqrestore(&port->classport_lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support); + +struct ib_classport_info_context { + struct completion done; + struct ib_sa_query *sa_query; +}; + +static void ib_classportinfo_cb(void *context) +{ + struct ib_classport_info_context *cb_ctx = context; + + complete(&cb_ctx->done); +} + static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) @@ -1736,91 +2046,91 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, unsigned long flags; struct ib_sa_classport_info_query *query = container_of(sa_query, struct ib_sa_classport_info_query, sa_query); + struct ib_sa_classport_cache *info = &sa_query->port->classport_info; if (mad) { - struct ib_class_port_info rec; + if (sa_query->flags & IB_SA_QUERY_OPA) { + struct opa_class_port_info rec; - ib_unpack(classport_info_rec_table, - ARRAY_SIZE(classport_info_rec_table), - mad->data, &rec); + ib_unpack(opa_classport_info_rec_table, + ARRAY_SIZE(opa_classport_info_rec_table), + mad->data, &rec); - spin_lock_irqsave(&sa_query->port->classport_lock, flags); - if (!status && !sa_query->port->classport_info.valid) { - memcpy(&sa_query->port->classport_info.data, &rec, - sizeof(sa_query->port->classport_info.data)); + spin_lock_irqsave(&sa_query->port->classport_lock, + flags); + if (!status && !info->valid) { + memcpy(&info->data.opa, &rec, + sizeof(info->data.opa)); - sa_query->port->classport_info.valid = true; - } - spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); + info->valid = true; + info->data.type = RDMA_CLASS_PORT_INFO_OPA; + } + spin_unlock_irqrestore(&sa_query->port->classport_lock, + flags); - query->callback(status, &rec, query->context); - } else { - query->callback(status, NULL, query->context); + } else { + struct ib_class_port_info rec; + + ib_unpack(ib_classport_info_rec_table, + ARRAY_SIZE(ib_classport_info_rec_table), + mad->data, &rec); + + spin_lock_irqsave(&sa_query->port->classport_lock, + flags); + if (!status && !info->valid) { + memcpy(&info->data.ib, &rec, + sizeof(info->data.ib)); + + info->valid = true; + info->data.type = RDMA_CLASS_PORT_INFO_IB; + } + spin_unlock_irqrestore(&sa_query->port->classport_lock, + flags); + } } + query->callback(query->context); } -static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query) +static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_classport_info_query, sa_query)); } -int ib_sa_classport_info_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_class_port_info *resp, - void *context), - void *context, - struct ib_sa_query **sa_query) +static int ib_sa_classport_info_rec_query(struct ib_sa_port *port, + int timeout_ms, + void (*callback)(void *context), + void *context, + struct ib_sa_query **sa_query) { - struct ib_sa_classport_info_query *query; - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; struct ib_mad_agent *agent; + struct ib_sa_classport_info_query *query; struct ib_sa_mad *mad; - struct ib_class_port_info cached_class_port_info; + gfp_t gfp_mask = GFP_KERNEL; int ret; - unsigned long flags; - if (!sa_dev) - return -ENODEV; - - port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; - /* Use cached ClassPortInfo attribute if valid instead of sending mad */ - spin_lock_irqsave(&port->classport_lock, flags); - if (port->classport_info.valid && callback) { - memcpy(&cached_class_port_info, &port->classport_info.data, - sizeof(cached_class_port_info)); - spin_unlock_irqrestore(&port->classport_lock, flags); - callback(0, &cached_class_port_info, context); - return 0; - } - spin_unlock_irqrestore(&port->classport_lock, flags); - query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; + query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device, + port->port_num) ? + IB_SA_QUERY_OPA : 0; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) - goto err1; + goto err_free; - ib_sa_client_get(client); - query->sa_query.client = client; - query->callback = callback; - query->context = context; + query->callback = callback; + query->context = context; mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); + init_mad(&query->sa_query, agent); - query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL; - - query->sa_query.release = ib_sa_portclass_info_rec_release; - /* support GET only */ + query->sa_query.callback = ib_sa_classport_info_rec_callback; + query->sa_query.release = ib_sa_classport_info_rec_release; mad->mad_hdr.method = IB_MGMT_METHOD_GET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO); mad->sa_hdr.comp_mask = 0; @@ -1828,20 +2138,71 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client, ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) - goto err2; + goto err_free_mad; return ret; -err2: +err_free_mad: *sa_query = NULL; - ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); -err1: +err_free: kfree(query); return ret; } -EXPORT_SYMBOL(ib_sa_classport_info_rec_query); + +static void update_ib_cpi(struct work_struct *work) +{ + struct ib_sa_port *port = + container_of(work, struct ib_sa_port, ib_cpi_work.work); + struct ib_classport_info_context *cb_context; + unsigned long flags; + int ret; + + /* If the classport info is valid, nothing + * to do here. + */ + spin_lock_irqsave(&port->classport_lock, flags); + if (port->classport_info.valid) { + spin_unlock_irqrestore(&port->classport_lock, flags); + return; + } + spin_unlock_irqrestore(&port->classport_lock, flags); + + cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL); + if (!cb_context) + goto err_nomem; + + init_completion(&cb_context->done); + + ret = ib_sa_classport_info_rec_query(port, 3000, + ib_classportinfo_cb, cb_context, + &cb_context->sa_query); + if (ret < 0) + goto free_cb_err; + wait_for_completion(&cb_context->done); +free_cb_err: + kfree(cb_context); + spin_lock_irqsave(&port->classport_lock, flags); + + /* If the classport info is still not valid, the query should have + * failed for some reason. Retry issuing the query + */ + if (!port->classport_info.valid) { + port->classport_info.retry_cnt++; + if (port->classport_info.retry_cnt <= + IB_SA_CPI_MAX_RETRY_CNT) { + unsigned long delay = + msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT); + + queue_delayed_work(ib_wq, &port->ib_cpi_work, delay); + } + } + spin_unlock_irqrestore(&port->classport_lock, flags); + +err_nomem: + return; +} static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) @@ -1870,7 +2231,8 @@ static void send_handler(struct ib_mad_agent *agent, spin_unlock_irqrestore(&idr_lock, flags); free_mad(query); - ib_sa_client_put(query->client); + if (query->client) + ib_sa_client_put(query->client); query->release(query); } @@ -1897,6 +2259,102 @@ static void recv_handler(struct ib_mad_agent *mad_agent, ib_free_recv_mad(mad_recv_wc); } +static void update_sm_ah(struct work_struct *work) +{ + struct ib_sa_port *port = + container_of(work, struct ib_sa_port, update_task); + struct ib_sa_sm_ah *new_ah; + struct ib_port_attr port_attr; + struct rdma_ah_attr ah_attr; + + if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { + pr_warn("Couldn't query port\n"); + return; + } + + new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL); + if (!new_ah) + return; + + kref_init(&new_ah->ref); + new_ah->src_path_mask = (1 << port_attr.lmc) - 1; + + new_ah->pkey_index = 0; + if (ib_find_pkey(port->agent->device, port->port_num, + IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) + pr_err("Couldn't find index for default PKey\n"); + + memset(&ah_attr, 0, sizeof(ah_attr)); + ah_attr.type = rdma_ah_find_type(port->agent->device, + port->port_num); + rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid); + rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); + rdma_ah_set_port_num(&ah_attr, port->port_num); + if (port_attr.grh_required) { + rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); + + rdma_ah_set_subnet_prefix(&ah_attr, + cpu_to_be64(port_attr.subnet_prefix)); + rdma_ah_set_interface_id(&ah_attr, + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); + } + + new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr); + if (IS_ERR(new_ah->ah)) { + pr_warn("Couldn't create new SM AH\n"); + kfree(new_ah); + return; + } + + spin_lock_irq(&port->ah_lock); + if (port->sm_ah) + kref_put(&port->sm_ah->ref, free_sm_ah); + port->sm_ah = new_ah; + spin_unlock_irq(&port->ah_lock); +} + +static void ib_sa_event(struct ib_event_handler *handler, + struct ib_event *event) +{ + if (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_SM_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER) { + unsigned long flags; + struct ib_sa_device *sa_dev = + container_of(handler, typeof(*sa_dev), event_handler); + u8 port_num = event->element.port_num - sa_dev->start_port; + struct ib_sa_port *port = &sa_dev->port[port_num]; + + if (!rdma_cap_ib_sa(handler->device, port->port_num)) + return; + + spin_lock_irqsave(&port->ah_lock, flags); + if (port->sm_ah) + kref_put(&port->sm_ah->ref, free_sm_ah); + port->sm_ah = NULL; + spin_unlock_irqrestore(&port->ah_lock, flags); + + if (event->event == IB_EVENT_SM_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PORT_ACTIVE) { + unsigned long delay = + msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT); + + spin_lock_irqsave(&port->classport_lock, flags); + port->classport_info.valid = false; + port->classport_info.retry_cnt = 0; + spin_unlock_irqrestore(&port->classport_lock, flags); + queue_delayed_work(ib_wq, + &port->ib_cpi_work, delay); + } + queue_work(ib_wq, &sa_dev->port[port_num].update_task); + } +} + static void ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; @@ -1934,6 +2392,8 @@ static void ib_sa_add_one(struct ib_device *device) goto err; INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); + INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work, + update_ib_cpi); count++; } @@ -1980,11 +2440,11 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data) return; ib_unregister_event_handler(&sa_dev->event_handler); - flush_workqueue(ib_wq); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { if (rdma_cap_ib_sa(device, i + 1)) { + cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work); ib_unregister_mad_agent(sa_dev->port[i].agent); if (sa_dev->port[i].sm_ah) kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index daadf3130c9f..7ebe1ef23652 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -253,6 +253,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, speed = " EDR"; rate = 250; break; + case IB_SPEED_HDR: + speed = " HDR"; + rate = 500; + break; case IB_SPEED_SDR: default: /* default to SDR for invalid rates */ rate = 25; @@ -1301,7 +1305,7 @@ err_put: free_port_list_attributes(device); err_unregister: - device_unregister(class_dev); + device_del(class_dev); err: return ret; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index cc0d51fb06e3..112099c86a19 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -702,10 +702,10 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) return 0; } -static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) +static int ib_ucm_path_get(struct sa_path_rec **path, u64 src) { struct ib_user_path_rec upath; - struct ib_sa_path_rec *sa_path; + struct sa_path_rec *sa_path; *path = NULL; @@ -962,7 +962,7 @@ static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, int in_len, int out_len) { struct ib_ucm_context *ctx; - struct ib_sa_path_rec *path = NULL; + struct sa_path_rec *path = NULL; struct ib_ucm_lap cmd; const void *data = NULL; int result; @@ -1205,12 +1205,15 @@ static void ib_ucm_release_dev(struct device *dev) struct ib_ucm_device *ucm_dev; ucm_dev = container_of(dev, struct ib_ucm_device, dev); - cdev_del(&ucm_dev->cdev); + kfree(ucm_dev); +} + +static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev) +{ if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) clear_bit(ucm_dev->devnum, dev_map); else clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map); - kfree(ucm_dev); } static const struct file_operations ucm_fops = { @@ -1266,7 +1269,9 @@ static void ib_ucm_add_one(struct ib_device *device) if (!ucm_dev) return; + device_initialize(&ucm_dev->dev); ucm_dev->ib_dev = device; + ucm_dev->dev.release = ib_ucm_release_dev; devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); if (devnum >= IB_UCM_MAX_DEVICES) { @@ -1286,16 +1291,14 @@ static void ib_ucm_add_one(struct ib_device *device) cdev_init(&ucm_dev->cdev, &ucm_fops); ucm_dev->cdev.owner = THIS_MODULE; kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); - if (cdev_add(&ucm_dev->cdev, base, 1)) - goto err; ucm_dev->dev.class = &cm_class; ucm_dev->dev.parent = device->dev.parent; - ucm_dev->dev.devt = ucm_dev->cdev.dev; - ucm_dev->dev.release = ib_ucm_release_dev; + ucm_dev->dev.devt = base; + dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum); - if (device_register(&ucm_dev->dev)) - goto err_cdev; + if (cdev_device_add(&ucm_dev->cdev, &ucm_dev->dev)) + goto err_devnum; if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev)) goto err_dev; @@ -1304,15 +1307,11 @@ static void ib_ucm_add_one(struct ib_device *device) return; err_dev: - device_unregister(&ucm_dev->dev); -err_cdev: - cdev_del(&ucm_dev->cdev); - if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) - clear_bit(devnum, dev_map); - else - clear_bit(devnum, overflow_map); + cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev); +err_devnum: + ib_ucm_free_dev(ucm_dev); err: - kfree(ucm_dev); + put_device(&ucm_dev->dev); return; } @@ -1323,7 +1322,9 @@ static void ib_ucm_remove_one(struct ib_device *device, void *client_data) if (!ucm_dev) return; - device_unregister(&ucm_dev->dev); + cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev); + ib_ucm_free_dev(ucm_dev); + put_device(&ucm_dev->dev); } static CLASS_ATTR_STRING(abi_version, S_IRUGO, diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index e12f8faf8c23..276f0ef835bd 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -898,11 +898,18 @@ static ssize_t ucma_query_path(struct ucma_context *ctx, for (i = 0, out_len -= sizeof(*resp); i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); i++, out_len -= sizeof(struct ib_path_rec_data)) { + struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i]; resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | IB_PATH_BIDIRECTIONAL; - ib_sa_pack_path(&ctx->cm_id->route.path_rec[i], - &resp->path_data[i].path_rec); + if (rec->rec_type == SA_PATH_REC_TYPE_IB) { + ib_sa_pack_path(rec, &resp->path_data[i].path_rec); + } else { + struct sa_path_rec ib; + + sa_convert_path_opa_to_ib(&ib, rec); + ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); + } } if (copy_to_user(response, resp, @@ -1197,7 +1204,7 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname, static int ucma_set_ib_path(struct ucma_context *ctx, struct ib_path_rec_data *path_data, size_t optlen) { - struct ib_sa_path_rec sa_path; + struct sa_path_rec sa_path; struct rdma_cm_event event; int ret; @@ -1215,8 +1222,17 @@ static int ucma_set_ib_path(struct ucma_context *ctx, memset(&sa_path, 0, sizeof(sa_path)); + sa_path.rec_type = SA_PATH_REC_TYPE_IB; ib_sa_unpack_path(path_data->path_rec, &sa_path); - ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); + + if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) { + struct sa_path_rec opa; + + sa_convert_path_ib_to_opa(&opa, &sa_path); + ret = rdma_set_ib_paths(ctx->cm_id, &opa, 1); + } else { + ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); + } if (ret) return ret; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 27f155d2df8d..3dbf811d3c51 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -115,11 +115,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (!umem) return ERR_PTR(-ENOMEM); - umem->context = context; - umem->length = size; - umem->address = addr; - umem->page_size = PAGE_SIZE; - umem->pid = get_task_pid(current, PIDTYPE_PID); + umem->context = context; + umem->length = size; + umem->address = addr; + umem->page_shift = PAGE_SHIFT; + umem->pid = get_task_pid(current, PIDTYPE_PID); /* * We ask for writable memory if any of the following * access flags are set. "Local write" and "remote write" @@ -133,7 +133,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (access & IB_ACCESS_ON_DEMAND) { put_pid(umem->pid); - ret = ib_umem_odp_get(context, umem); + ret = ib_umem_odp_get(context, umem, access); if (ret) { kfree(umem); return ERR_PTR(ret); @@ -315,7 +315,6 @@ EXPORT_SYMBOL(ib_umem_release); int ib_umem_page_count(struct ib_umem *umem) { - int shift; int i; int n; struct scatterlist *sg; @@ -323,11 +322,9 @@ int ib_umem_page_count(struct ib_umem *umem) if (umem->odp_data) return ib_umem_num_pages(umem); - shift = ilog2(umem->page_size); - n = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) - n += sg_dma_len(sg) >> shift; + n += sg_dma_len(sg) >> umem->page_shift; return n; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index cb2742b548bb..0780b1afefa9 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -38,6 +38,7 @@ #include <linux/slab.h> #include <linux/export.h> #include <linux/vmalloc.h> +#include <linux/hugetlb.h> #include <rdma/ib_verbs.h> #include <rdma/ib_umem.h> @@ -254,11 +255,11 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, if (!umem) return ERR_PTR(-ENOMEM); - umem->context = context; - umem->length = size; - umem->address = addr; - umem->page_size = PAGE_SIZE; - umem->writable = 1; + umem->context = context; + umem->length = size; + umem->address = addr; + umem->page_shift = PAGE_SHIFT; + umem->writable = 1; odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); if (!odp_data) { @@ -306,7 +307,8 @@ out_umem: } EXPORT_SYMBOL(ib_alloc_odp_umem); -int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) +int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, + int access) { int ret_val; struct pid *our_pid; @@ -315,6 +317,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) if (!mm) return -EINVAL; + if (access & IB_ACCESS_HUGETLB) { + struct vm_area_struct *vma; + struct hstate *h; + + vma = find_vma(mm, ib_umem_start(umem)); + if (!vma || !is_vm_hugetlb_page(vma)) + return -EINVAL; + h = hstate_vma(vma); + umem->page_shift = huge_page_shift(h); + umem->hugetlb = 1; + } else { + umem->hugetlb = 0; + } + /* Prevent creating ODP MRs in child processes */ rcu_read_lock(); our_pid = get_task_pid(current->group_leader, PIDTYPE_PID); @@ -325,7 +341,6 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) goto out_mm; } - umem->hugetlb = 0; umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL); if (!umem->odp_data) { ret_val = -ENOMEM; @@ -504,7 +519,6 @@ out: static int ib_umem_odp_map_dma_single_page( struct ib_umem *umem, int page_index, - u64 base_virt_addr, struct page *page, u64 access_mask, unsigned long current_seq) @@ -527,7 +541,7 @@ static int ib_umem_odp_map_dma_single_page( if (!(umem->odp_data->dma_list[page_index])) { dma_addr = ib_dma_map_page(dev, page, - 0, PAGE_SIZE, + 0, BIT(umem->page_shift), DMA_BIDIRECTIONAL); if (ib_dma_mapping_error(dev, dma_addr)) { ret = -EFAULT; @@ -555,8 +569,9 @@ out: if (remove_existing_mapping && umem->context->invalidate_range) { invalidate_page_trampoline( umem, - base_virt_addr + (page_index * PAGE_SIZE), - base_virt_addr + ((page_index+1)*PAGE_SIZE), + ib_umem_start(umem) + (page_index >> umem->page_shift), + ib_umem_start(umem) + ((page_index + 1) >> + umem->page_shift), NULL); ret = -EAGAIN; } @@ -595,10 +610,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, struct task_struct *owning_process = NULL; struct mm_struct *owning_mm = NULL; struct page **local_page_list = NULL; - u64 off; - int j, k, ret = 0, start_idx, npages = 0; - u64 base_virt_addr; + u64 page_mask, off; + int j, k, ret = 0, start_idx, npages = 0, page_shift; unsigned int flags = 0; + phys_addr_t p = 0; if (access_mask == 0) return -EINVAL; @@ -611,9 +626,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, if (!local_page_list) return -ENOMEM; - off = user_virt & (~PAGE_MASK); - user_virt = user_virt & PAGE_MASK; - base_virt_addr = user_virt; + page_shift = umem->page_shift; + page_mask = ~(BIT(page_shift) - 1); + off = user_virt & (~page_mask); + user_virt = user_virt & page_mask; bcnt += off; /* Charge for the first page offset as well. */ owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID); @@ -631,13 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, if (access_mask & ODP_WRITE_ALLOWED_BIT) flags |= FOLL_WRITE; - start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT; + start_idx = (user_virt - ib_umem_start(umem)) >> page_shift; k = start_idx; while (bcnt > 0) { - const size_t gup_num_pages = - min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, - PAGE_SIZE / sizeof(struct page *)); + const size_t gup_num_pages = min_t(size_t, + (bcnt + BIT(page_shift) - 1) >> page_shift, + PAGE_SIZE / sizeof(struct page *)); down_read(&owning_mm->mmap_sem); /* @@ -656,14 +672,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, break; bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); - user_virt += npages << PAGE_SHIFT; mutex_lock(&umem->odp_data->umem_mutex); - for (j = 0; j < npages; ++j) { + for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) { + if (user_virt & ~page_mask) { + p += PAGE_SIZE; + if (page_to_phys(local_page_list[j]) != p) { + ret = -EFAULT; + break; + } + put_page(local_page_list[j]); + continue; + } + ret = ib_umem_odp_map_dma_single_page( - umem, k, base_virt_addr, local_page_list[j], - access_mask, current_seq); + umem, k, local_page_list[j], + access_mask, current_seq); if (ret < 0) break; + + p = page_to_phys(local_page_list[j]); k++; } mutex_unlock(&umem->odp_data->umem_mutex); @@ -707,8 +734,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, * invalidations, so we must make sure we free each page only * once. */ mutex_lock(&umem->odp_data->umem_mutex); - for (addr = virt; addr < bound; addr += (u64)umem->page_size) { - idx = (addr - ib_umem_start(umem)) / PAGE_SIZE; + for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) { + idx = (addr - ib_umem_start(umem)) >> umem->page_shift; if (umem->odp_data->page_list[idx]) { struct page *page = umem->odp_data->page_list[idx]; dma_addr_t dma = umem->odp_data->dma_list[idx]; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index aca7ff7abedc..36a6f5c8914c 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -197,7 +197,7 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_umad_packet *packet = send_wc->send_buf->context[0]; dequeue_send(file, packet); - ib_destroy_ah(packet->msg->ah); + rdma_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { @@ -235,17 +235,19 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); if (packet->mad.hdr.grh_present) { - struct ib_ah_attr ah_attr; + struct rdma_ah_attr ah_attr; + const struct ib_global_route *grh; ib_init_ah_from_wc(agent->device, agent->port_num, mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, &ah_attr); - packet->mad.hdr.gid_index = ah_attr.grh.sgid_index; - packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit; - packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class; - memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16); - packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label); + grh = rdma_ah_read_grh(&ah_attr); + packet->mad.hdr.gid_index = grh->sgid_index; + packet->mad.hdr.hop_limit = grh->hop_limit; + packet->mad.hdr.traffic_class = grh->traffic_class; + memcpy(packet->mad.hdr.gid, &grh->dgid, 16); + packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label); } if (queue_packet(file, agent, packet)) @@ -449,7 +451,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_umad_file *file = filp->private_data; struct ib_umad_packet *packet; struct ib_mad_agent *agent; - struct ib_ah_attr ah_attr; + struct rdma_ah_attr ah_attr; struct ib_ah *ah; struct ib_rmpp_mad *rmpp_mad; __be64 *tid; @@ -489,20 +491,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, } memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid); - ah_attr.sl = packet->mad.hdr.sl; - ah_attr.src_path_bits = packet->mad.hdr.path_bits; - ah_attr.port_num = file->port->port_num; + ah_attr.type = rdma_ah_find_type(file->port->ib_dev, + file->port->port_num); + rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid)); + rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl); + rdma_ah_set_path_bits(&ah_attr, packet->mad.hdr.path_bits); + rdma_ah_set_port_num(&ah_attr, file->port->port_num); if (packet->mad.hdr.grh_present) { - ah_attr.ah_flags = IB_AH_GRH; - memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); - ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; - ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); - ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; - ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; + rdma_ah_set_grh(&ah_attr, NULL, + be32_to_cpu(packet->mad.hdr.flow_label), + packet->mad.hdr.gid_index, + packet->mad.hdr.hop_limit, + packet->mad.hdr.traffic_class); + rdma_ah_set_dgid_raw(&ah_attr, packet->mad.hdr.gid); } - ah = ib_create_ah(agent->qp->pd, &ah_attr); + ah = rdma_create_ah(agent->qp->pd, &ah_attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_up; @@ -596,7 +600,7 @@ err_send: err_msg: ib_free_send_mad(packet->msg); err_ah: - ib_destroy_ah(ah); + rdma_destroy_ah(ah); err_up: mutex_unlock(&file->mutex); err: @@ -1183,7 +1187,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, cdev_init(&port->cdev, &umad_fops); port->cdev.owner = THIS_MODULE; - port->cdev.kobj.parent = &umad_dev->kobj; + cdev_set_parent(&port->cdev, &umad_dev->kobj); kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); if (cdev_add(&port->cdev, base, 1)) goto err_cdev; @@ -1202,7 +1206,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, base += IB_UMAD_MAX_PORTS; cdev_init(&port->sm_cdev, &umad_sm_fops); port->sm_cdev.owner = THIS_MODULE; - port->sm_cdev.kobj.parent = &umad_dev->kobj; + cdev_set_parent(&port->sm_cdev, &umad_dev->kobj); kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); if (cdev_add(&port->sm_cdev, base, 1)) goto err_sm_cdev; diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index e1bedf0bac04..64d494a64daf 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -76,12 +76,13 @@ * an asynchronous event queue file is created and released when the * event file is closed. * - * struct ib_uverbs_event_file: One reference is held by the VFS and - * released when the file is closed. For asynchronous event files, - * another reference is held by the corresponding main context file - * and released when that file is closed. For completion event files, - * a reference is taken when a CQ is created that uses the file, and - * released when the CQ is destroyed. + * struct ib_uverbs_event_queue: Base structure for + * struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file. + * One reference is held by the VFS and released when the file is closed. + * For asynchronous event files, another reference is held by the corresponding + * main context file and released when that file is closed. For completion + * event files, a reference is taken when a CQ is created that uses the file, + * and released when the CQ is destroyed. */ struct ib_uverbs_device { @@ -101,18 +102,26 @@ struct ib_uverbs_device { struct list_head uverbs_events_file_list; }; -struct ib_uverbs_event_file { - struct kref ref; - int is_async; - struct ib_uverbs_file *uverbs_file; +struct ib_uverbs_event_queue { spinlock_t lock; int is_closed; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; struct list_head event_list; +}; + +struct ib_uverbs_async_event_file { + struct ib_uverbs_event_queue ev_queue; + struct ib_uverbs_file *uverbs_file; + struct kref ref; struct list_head list; }; +struct ib_uverbs_completion_event_file { + struct ib_uobject_file uobj_file; + struct ib_uverbs_event_queue ev_queue; +}; + struct ib_uverbs_file { struct kref ref; struct mutex mutex; @@ -120,9 +129,13 @@ struct ib_uverbs_file { struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; - struct ib_uverbs_event_file *async_file; + struct ib_uverbs_async_event_file *async_file; struct list_head list; int is_closed; + + struct idr idr; + /* spinlock protects write access to idr */ + spinlock_t idr_lock; }; struct ib_uverbs_event { @@ -159,6 +172,8 @@ struct ib_usrq_object { struct ib_uqp_object { struct ib_uevent_object uevent; + /* lock for mcast list */ + struct mutex mcast_lock; struct list_head mcast_list; struct ib_uxrcd_object *uxrcd; }; @@ -176,32 +191,18 @@ struct ib_ucq_object { u32 async_events_reported; }; -extern spinlock_t ib_uverbs_idr_lock; -extern struct idr ib_uverbs_pd_idr; -extern struct idr ib_uverbs_mr_idr; -extern struct idr ib_uverbs_mw_idr; -extern struct idr ib_uverbs_ah_idr; -extern struct idr ib_uverbs_cq_idr; -extern struct idr ib_uverbs_qp_idr; -extern struct idr ib_uverbs_srq_idr; -extern struct idr ib_uverbs_xrcd_idr; -extern struct idr ib_uverbs_rule_idr; -extern struct idr ib_uverbs_wq_idr; -extern struct idr ib_uverbs_rwq_ind_tbl_idr; - -void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); - -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev, - int is_async); +extern const struct file_operations uverbs_event_fops; +void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue); +struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, + struct ib_device *ib_dev); void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_event_file *ev_file, + struct ib_uverbs_completion_event_file *ev_file, struct ib_ucq_object *uobj); void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj); +void ib_uverbs_release_file(struct kref *ref); void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); @@ -210,9 +211,12 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); -void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); +int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd, + enum rdma_remove_reason why); int uverbs_dealloc_mw(struct ib_mw *mw); +void ib_uverbs_detach_umcast(struct ib_qp *qp, + struct ib_uqp_object *uobj); struct ib_uverbs_flow_spec { union { @@ -229,6 +233,7 @@ struct ib_uverbs_flow_spec { struct ib_uverbs_flow_spec_tcp_udp tcp_udp; struct ib_uverbs_flow_spec_ipv6 ipv6; struct ib_uverbs_flow_spec_action_tag flow_tag; + struct ib_uverbs_flow_spec_action_drop drop; }; }; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 7b7a76e1279a..70b7fb156414 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -40,270 +40,29 @@ #include <linux/uaccess.h> +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_std_types.h> +#include "rdma_core.h" + #include "uverbs.h" #include "core_priv.h" -struct uverbs_lock_class { - struct lock_class_key key; - char name[16]; -}; - -static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; -static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; -static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" }; -static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; -static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; -static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; -static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; -static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; -static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; -static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; -static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; - -/* - * The ib_uobject locking scheme is as follows: - * - * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it - * needs to be held during all idr write operations. When an object is - * looked up, a reference must be taken on the object's kref before - * dropping this lock. For read operations, the rcu_read_lock() - * and rcu_write_lock() but similarly the kref reference is grabbed - * before the rcu_read_unlock(). - * - * - Each object also has an rwsem. This rwsem must be held for - * reading while an operation that uses the object is performed. - * For example, while registering an MR, the associated PD's - * uobject.mutex must be held for reading. The rwsem must be held - * for writing while initializing or destroying an object. - * - * - In addition, each object has a "live" flag. If this flag is not - * set, then lookups of the object will fail even if it is found in - * the idr. This handles a reader that blocks and does not acquire - * the rwsem until after the object is destroyed. The destroy - * operation will set the live flag to 0 and then drop the rwsem; - * this will allow the reader to acquire the rwsem, see that the - * live flag is 0, and then drop the rwsem and its reference to - * object. The underlying storage will not be freed until the last - * reference to the object is dropped. - */ - -static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context, struct uverbs_lock_class *c) -{ - uobj->user_handle = user_handle; - uobj->context = context; - kref_init(&uobj->ref); - init_rwsem(&uobj->mutex); - lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name); - uobj->live = 0; -} - -static void release_uobj(struct kref *kref) -{ - kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu); -} - -static void put_uobj(struct ib_uobject *uobj) -{ - kref_put(&uobj->ref, release_uobj); -} - -static void put_uobj_read(struct ib_uobject *uobj) -{ - up_read(&uobj->mutex); - put_uobj(uobj); -} - -static void put_uobj_write(struct ib_uobject *uobj) -{ - up_write(&uobj->mutex); - put_uobj(uobj); -} - -static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) -{ - int ret; - - idr_preload(GFP_KERNEL); - spin_lock(&ib_uverbs_idr_lock); - - ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); - if (ret >= 0) - uobj->id = ret; - - spin_unlock(&ib_uverbs_idr_lock); - idr_preload_end(); - - return ret < 0 ? ret : 0; -} - -void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) +static struct ib_uverbs_completion_event_file * +ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) { - spin_lock(&ib_uverbs_idr_lock); - idr_remove(idr, uobj->id); - spin_unlock(&ib_uverbs_idr_lock); -} + struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel), + fd, context); + struct ib_uobject_file *uobj_file; -static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, - struct ib_ucontext *context) -{ - struct ib_uobject *uobj; + if (IS_ERR(uobj)) + return (void *)uobj; - rcu_read_lock(); - uobj = idr_find(idr, id); - if (uobj) { - if (uobj->context == context) - kref_get(&uobj->ref); - else - uobj = NULL; - } - rcu_read_unlock(); + uverbs_uobject_get(uobj); + uobj_put_read(uobj); - return uobj; -} - -static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, - struct ib_ucontext *context, int nested) -{ - struct ib_uobject *uobj; - - uobj = __idr_get_uobj(idr, id, context); - if (!uobj) - return NULL; - - if (nested) - down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); - else - down_read(&uobj->mutex); - if (!uobj->live) { - put_uobj_read(uobj); - return NULL; - } - - return uobj; -} - -static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, - struct ib_ucontext *context) -{ - struct ib_uobject *uobj; - - uobj = __idr_get_uobj(idr, id, context); - if (!uobj) - return NULL; - - down_write(&uobj->mutex); - if (!uobj->live) { - put_uobj_write(uobj); - return NULL; - } - - return uobj; -} - -static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, - int nested) -{ - struct ib_uobject *uobj; - - uobj = idr_read_uobj(idr, id, context, nested); - return uobj ? uobj->object : NULL; -} - -static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); -} - -static void put_pd_read(struct ib_pd *pd) -{ - put_uobj_read(pd->uobject); -} - -static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) -{ - return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); -} - -static void put_cq_read(struct ib_cq *cq) -{ - put_uobj_read(cq->uobject); -} - -static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); -} - -static void put_ah_read(struct ib_ah *ah) -{ - put_uobj_read(ah->uobject); -} - -static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); -} - -static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); -} - -static void put_wq_read(struct ib_wq *wq) -{ - put_uobj_read(wq->uobject); -} - -static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, - struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); -} - -static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) -{ - put_uobj_read(ind_table->uobject); -} - -static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) -{ - struct ib_uobject *uobj; - - uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); - return uobj ? uobj->object : NULL; -} - -static void put_qp_read(struct ib_qp *qp) -{ - put_uobj_read(qp->uobject); -} - -static void put_qp_write(struct ib_qp *qp) -{ - put_uobj_write(qp->uobject); -} - -static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); -} - -static void put_srq_read(struct ib_srq *srq) -{ - put_uobj_read(srq->uobject); -} - -static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, - struct ib_uobject **uobj) -{ - *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); - return *uobj ? (*uobj)->object : NULL; -} - -static void put_xrcd_read(struct ib_uobject *uobj) -{ - put_uobj_read(uobj); + uobj_file = container_of(uobj, struct ib_uobject_file, uobj); + return container_of(uobj_file, struct ib_uverbs_completion_event_file, + uobj_file); } ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, @@ -348,17 +107,10 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->device = ib_dev; ucontext->cg_obj = cg_obj; - INIT_LIST_HEAD(&ucontext->pd_list); - INIT_LIST_HEAD(&ucontext->mr_list); - INIT_LIST_HEAD(&ucontext->mw_list); - INIT_LIST_HEAD(&ucontext->cq_list); - INIT_LIST_HEAD(&ucontext->qp_list); - INIT_LIST_HEAD(&ucontext->srq_list); - INIT_LIST_HEAD(&ucontext->ah_list); - INIT_LIST_HEAD(&ucontext->wq_list); - INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); - INIT_LIST_HEAD(&ucontext->xrcd_list); - INIT_LIST_HEAD(&ucontext->rule_list); + /* ufile is required when some objects are released */ + ucontext->ufile = file; + uverbs_initialize_ucontext(ucontext); + rcu_read_lock(); ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); rcu_read_unlock(); @@ -382,7 +134,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_free; resp.async_fd = ret; - filp = ib_uverbs_alloc_event_file(file, ib_dev, 1); + filp = ib_uverbs_alloc_async_event_file(file, ib_dev); if (IS_ERR(filp)) { ret = PTR_ERR(filp); goto err_fd; @@ -565,19 +317,9 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &pd_lock_class); - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) { - kfree(uobj); - return ret; - } - - down_write(&uobj->mutex); + uobj = uobj_alloc(uobj_get_type(pd), file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); if (IS_ERR(pd)) { @@ -591,10 +333,6 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, atomic_set(&pd->usecnt, 0); uobj->object = pd; - ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj); - if (ret) - goto err_idr; - memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -604,25 +342,15 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, goto err_copy; } - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->pd_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_alloc_commit(uobj); return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - -err_idr: ib_dealloc_pd(pd); err: - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - put_uobj_write(uobj); + uobj_alloc_abort(uobj); return ret; } @@ -633,45 +361,19 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, { struct ib_uverbs_dealloc_pd cmd; struct ib_uobject *uobj; - struct ib_pd *pd; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); - if (!uobj) - return -EINVAL; - pd = uobj->object; - - if (atomic_read(&pd->usecnt)) { - ret = -EBUSY; - goto err_put; - } - - ret = pd->device->dealloc_pd(uobj->object); - WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); - if (ret) - goto err_put; - - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - uobj->live = 0; - put_uobj_write(uobj); + uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); + ret = uobj_remove_commit(uobj); - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; - -err_put: - put_uobj_write(uobj); - return ret; + return ret ?: in_len; } struct xrcd_table_entry { @@ -808,16 +510,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) { - ret = -ENOMEM; + obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd), + file->ucontext); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); goto err_tree_mutex_unlock; } - init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); - - down_write(&obj->uobject.mutex); - if (!xrcd) { xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); if (IS_ERR(xrcd)) { @@ -835,10 +534,6 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, atomic_set(&obj->refcnt, 0); obj->uobject.object = xrcd; - ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); - if (ret) - goto err_idr; - memset(&resp, 0, sizeof resp); resp.xrcd_handle = obj->uobject.id; @@ -847,7 +542,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, /* create new inode/xrcd table entry */ ret = xrcd_table_insert(file->device, inode, xrcd); if (ret) - goto err_insert_xrcd; + goto err_dealloc_xrcd; } atomic_inc(&xrcd->usecnt); } @@ -861,12 +556,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (f.file) fdput(f); - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - up_write(&obj->uobject.mutex); + uobj_alloc_commit(&obj->uobject); mutex_unlock(&file->device->xrcd_tree_mutex); return in_len; @@ -878,14 +568,11 @@ err_copy: atomic_dec(&xrcd->usecnt); } -err_insert_xrcd: - idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); - -err_idr: +err_dealloc_xrcd: ib_dealloc_xrcd(xrcd); err: - put_uobj_write(&obj->uobject); + uobj_alloc_abort(&obj->uobject); err_tree_mutex_unlock: if (f.file) @@ -903,75 +590,41 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, { struct ib_uverbs_close_xrcd cmd; struct ib_uobject *uobj; - struct ib_xrcd *xrcd = NULL; - struct inode *inode = NULL; - struct ib_uxrcd_object *obj; - int live; int ret = 0; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&file->device->xrcd_tree_mutex); - uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); - if (!uobj) { - ret = -EINVAL; - goto out; - } - - xrcd = uobj->object; - inode = xrcd->inode; - obj = container_of(uobj, struct ib_uxrcd_object, uobject); - if (atomic_read(&obj->refcnt)) { - put_uobj_write(uobj); - ret = -EBUSY; - goto out; - } - - if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { - ret = ib_dealloc_xrcd(uobj->object); - if (!ret) - uobj->live = 0; + uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle, + file->ucontext); + if (IS_ERR(uobj)) { + mutex_unlock(&file->device->xrcd_tree_mutex); + return PTR_ERR(uobj); } - live = uobj->live; - if (inode && ret) - atomic_inc(&xrcd->usecnt); - - put_uobj_write(uobj); - - if (ret) - goto out; - - if (inode && !live) - xrcd_table_delete(file->device, inode); - - idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - ret = in_len; - -out: - mutex_unlock(&file->device->xrcd_tree_mutex); - return ret; + ret = uobj_remove_commit(uobj); + return ret ?: in_len; } -void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, - struct ib_xrcd *xrcd) +int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, + struct ib_xrcd *xrcd, + enum rdma_remove_reason why) { struct inode *inode; + int ret; inode = xrcd->inode; if (inode && !atomic_dec_and_test(&xrcd->usecnt)) - return; + return 0; - ib_dealloc_xrcd(xrcd); + ret = ib_dealloc_xrcd(xrcd); - if (inode) + if (why == RDMA_REMOVE_DESTROY && ret) + atomic_inc(&xrcd->usecnt); + else if (inode) xrcd_table_delete(dev, inode); + + return ret; } ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, @@ -1004,14 +657,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &mr_lock_class); - down_write(&uobj->mutex); + uobj = uobj_alloc(uobj_get_type(mr), file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_free; @@ -1025,10 +675,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, goto err_put; } } - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_charge; mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, cmd.access_flags, &udata); @@ -1043,9 +689,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, atomic_inc(&pd->usecnt); uobj->object = mr; - ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); - if (ret) - goto err_unreg; memset(&resp, 0, sizeof resp); resp.lkey = mr->lkey; @@ -1058,32 +701,20 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mr_list); - mutex_unlock(&file->mutex); + uobj_put_obj_read(pd); - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_alloc_commit(uobj); return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - -err_unreg: ib_dereg_mr(mr); err_put: - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - -err_charge: - put_pd_read(pd); + uobj_put_obj_read(pd); err_free: - put_uobj_write(uobj); + uobj_alloc_abort(uobj); return ret; } @@ -1119,11 +750,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, - file->ucontext); - - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); mr = uobj->object; @@ -1134,7 +764,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, } if (cmd.flags & IB_MR_REREG_PD) { - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto put_uobjs; @@ -1167,11 +797,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) - put_pd_read(pd); + uobj_put_obj_read(pd); put_uobjs: - - put_uobj_write(mr->uobject); + uobj_put_write(uobj); return ret; } @@ -1182,39 +811,20 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_dereg_mr cmd; - struct ib_mr *mr; struct ib_uobject *uobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); - if (!uobj) - return -EINVAL; - - mr = uobj->object; - - ret = ib_dereg_mr(mr); - if (!ret) - uobj->live = 0; + uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - put_uobj_write(uobj); - - if (ret) - return ret; - - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); + ret = uobj_remove_commit(uobj); - put_uobj(uobj); - - return in_len; + return ret ?: in_len; } ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, @@ -1236,14 +846,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &mw_lock_class); - down_write(&uobj->mutex); + uobj = uobj_alloc(uobj_get_type(mw), file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_free; @@ -1254,11 +861,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_charge; - mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); if (IS_ERR(mw)) { ret = PTR_ERR(mw); @@ -1271,9 +873,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, atomic_inc(&pd->usecnt); uobj->object = mw; - ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); - if (ret) - goto err_unalloc; memset(&resp, 0, sizeof(resp)); resp.rkey = mw->rkey; @@ -1285,32 +884,17 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mw_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_put_obj_read(pd); + uobj_alloc_commit(uobj); return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - -err_unalloc: uverbs_dealloc_mw(mw); - err_put: - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - -err_charge: - put_pd_read(pd); - + uobj_put_obj_read(pd); err_free: - put_uobj_write(uobj); + uobj_alloc_abort(uobj); return ret; } @@ -1320,39 +904,19 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_dealloc_mw cmd; - struct ib_mw *mw; struct ib_uobject *uobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); - if (!uobj) - return -EINVAL; - - mw = uobj->object; - - ret = uverbs_dealloc_mw(mw); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; + uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; + ret = uobj_remove_commit(uobj); + return ret ?: in_len; } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, @@ -1362,8 +926,8 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, { struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; - struct file *filp; - int ret; + struct ib_uobject *uobj; + struct ib_uverbs_completion_event_file *ev_file; if (out_len < sizeof resp) return -ENOSPC; @@ -1371,25 +935,23 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) - return ret; - resp.fd = ret; + uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - filp = ib_uverbs_alloc_event_file(file, ib_dev, 0); - if (IS_ERR(filp)) { - put_unused_fd(resp.fd); - return PTR_ERR(filp); - } + resp.fd = uobj->id; + + ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, + uobj_file.uobj); + ib_uverbs_init_event_queue(&ev_file->ev_queue); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { - put_unused_fd(resp.fd); - fput(filp); + uobj_alloc_abort(uobj); return -EFAULT; } - fd_install(resp.fd, filp); + uobj_alloc_commit(uobj); return in_len; } @@ -1407,7 +969,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, void *context) { struct ib_ucq_object *obj; - struct ib_uverbs_event_file *ev_file = NULL; + struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_cq *cq; int ret; struct ib_uverbs_ex_create_cq_resp resp; @@ -1416,21 +978,21 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return ERR_PTR(-ENOMEM); - - init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class); - down_write(&obj->uobject.mutex); + obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq), + file->ucontext); + if (IS_ERR(obj)) + return obj; if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); - if (!ev_file) { - ret = -EINVAL; + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, + file->ucontext); + if (IS_ERR(ev_file)) { + ret = PTR_ERR(ev_file); goto err; } } + obj->uobject.user_handle = cmd->user_handle; obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; @@ -1443,13 +1005,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) attr.flags = cmd->flags; - ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_charge; - - cq = ib_dev->create_cq(ib_dev, &attr, - file->ucontext, uhw); + cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; @@ -1459,14 +1015,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = ev_file; + cq->cq_context = &ev_file->ev_queue; atomic_set(&cq->usecnt, 0); obj->uobject.object = cq; - ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject); - if (ret) - goto err_free; - memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uobject.id; resp.base.cqe = cq->cqe; @@ -1478,32 +1030,19 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (ret) goto err_cb; - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - - up_write(&obj->uobject.mutex); + uobj_alloc_commit(&obj->uobject); return obj; err_cb: - idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); - -err_free: ib_destroy_cq(cq); err_file: - ib_rdmacg_uncharge(&obj->uobject.cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - -err_charge: if (ev_file) ib_uverbs_release_ucq(file, ev_file, obj); err: - put_uobj_write(&obj->uobject); + uobj_alloc_abort(&obj->uobject); return ERR_PTR(ret); } @@ -1626,7 +1165,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; @@ -1641,7 +1180,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, ret = -EFAULT; out: - put_cq_read(cq); + uobj_put_obj_read(cq); return ret ? ret : in_len; } @@ -1688,7 +1227,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; @@ -1720,7 +1259,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, ret = in_len; out_put: - put_cq_read(cq); + uobj_put_obj_read(cq); return ret; } @@ -1735,14 +1274,14 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - put_cq_read(cq); + uobj_put_obj_read(cq); return in_len; } @@ -1757,44 +1296,38 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_cq *cq; struct ib_ucq_object *obj; - struct ib_uverbs_event_file *ev_file; + struct ib_uverbs_event_queue *ev_queue; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + + /* + * Make sure we don't free the memory in remove_commit as we still + * needs the uobject memory to create the response. + */ + uverbs_uobject_get(uobj); cq = uobj->object; - ev_file = cq->cq_context; + ev_queue = cq->cq_context; obj = container_of(cq->uobject, struct ib_ucq_object, uobject); - ret = ib_destroy_cq(cq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + memset(&resp, 0, sizeof(resp)); - if (ret) + ret = uobj_remove_commit(uobj); + if (ret) { + uverbs_uobject_put(uobj); return ret; + } - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_ucq(file, ev_file, obj); - - memset(&resp, 0, sizeof resp); resp.comp_events_reported = obj->comp_events_reported; resp.async_events_reported = obj->async_events_reported; - put_uobj(uobj); - + uverbs_uobject_put(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; @@ -1816,7 +1349,7 @@ static int create_qp(struct ib_uverbs_file *file, struct ib_device *device; struct ib_pd *pd = NULL; struct ib_xrcd *xrcd = NULL; - struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_uobject *xrcd_uobj = ERR_PTR(-ENOENT); struct ib_cq *scq = NULL, *rcq = NULL; struct ib_srq *srq = NULL; struct ib_qp *qp; @@ -1830,18 +1363,20 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = kzalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp), + file->ucontext); + if (IS_ERR(obj)) + return PTR_ERR(obj); + obj->uxrcd = NULL; + obj->uevent.uobject.user_handle = cmd->user_handle; + mutex_init(&obj->mcast_lock); - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, - &qp_lock_class); - down_write(&obj->uevent.uobject.mutex); if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + sizeof(cmd->rwq_ind_tbl_handle) && (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { - ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle, - file->ucontext); + ind_tbl = uobj_get_obj_read(rwq_ind_table, + cmd->rwq_ind_tbl_handle, + file->ucontext); if (!ind_tbl) { ret = -EINVAL; goto err_put; @@ -1865,8 +1400,15 @@ static int create_qp(struct ib_uverbs_file *file, has_sq = false; if (cmd->qp_type == IB_QPT_XRC_TGT) { - xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, - &xrcd_uobj); + xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle, + file->ucontext); + + if (IS_ERR(xrcd_uobj)) { + ret = -EINVAL; + goto err_put; + } + + xrcd = (struct ib_xrcd *)xrcd_uobj->object; if (!xrcd) { ret = -EINVAL; goto err_put; @@ -1878,8 +1420,8 @@ static int create_qp(struct ib_uverbs_file *file, cmd->max_recv_sge = 0; } else { if (cmd->is_srq) { - srq = idr_read_srq(cmd->srq_handle, - file->ucontext); + srq = uobj_get_obj_read(srq, cmd->srq_handle, + file->ucontext); if (!srq || srq->srq_type != IB_SRQT_BASIC) { ret = -EINVAL; goto err_put; @@ -1888,8 +1430,8 @@ static int create_qp(struct ib_uverbs_file *file, if (!ind_tbl) { if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = idr_read_cq(cmd->recv_cq_handle, - file->ucontext, 0); + rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle, + file->ucontext); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1899,10 +1441,11 @@ static int create_qp(struct ib_uverbs_file *file, } if (has_sq) - scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); + scq = uobj_get_obj_read(cq, cmd->send_cq_handle, + file->ucontext); if (!ind_tbl) rcq = rcq ?: scq; - pd = idr_read_pd(cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; @@ -1954,11 +1497,6 @@ static int create_qp(struct ib_uverbs_file *file, goto err_put; } - ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, device, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_put; - if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else @@ -1966,7 +1504,7 @@ static int create_qp(struct ib_uverbs_file *file, if (IS_ERR(qp)) { ret = PTR_ERR(qp); - goto err_create; + goto err_put; } if (cmd->qp_type != IB_QPT_XRC_TGT) { @@ -1994,9 +1532,6 @@ static int create_qp(struct ib_uverbs_file *file, qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; memset(&resp, 0, sizeof resp); resp.base.qpn = qp->qp_num; @@ -2018,54 +1553,41 @@ static int create_qp(struct ib_uverbs_file *file, obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - put_xrcd_read(xrcd_uobj); + uobj_put_read(xrcd_uobj); } if (pd) - put_pd_read(pd); + uobj_put_obj_read(pd); if (scq) - put_cq_read(scq); + uobj_put_obj_read(scq); if (rcq && rcq != scq) - put_cq_read(rcq); + uobj_put_obj_read(rcq); if (srq) - put_srq_read(srq); + uobj_put_obj_read(srq); if (ind_tbl) - put_rwq_indirection_table_read(ind_tbl); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); - mutex_unlock(&file->mutex); + uobj_put_obj_read(ind_tbl); - obj->uevent.uobject.live = 1; - - up_write(&obj->uevent.uobject.mutex); + uobj_alloc_commit(&obj->uevent.uobject); return 0; err_cb: - idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - -err_destroy: ib_destroy_qp(qp); -err_create: - ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device, - RDMACG_RESOURCE_HCA_OBJECT); - err_put: - if (xrcd) - put_xrcd_read(xrcd_uobj); + if (!IS_ERR(xrcd_uobj)) + uobj_put_read(xrcd_uobj); if (pd) - put_pd_read(pd); + uobj_put_obj_read(pd); if (scq) - put_cq_read(scq); + uobj_put_obj_read(scq); if (rcq && rcq != scq) - put_cq_read(rcq); + uobj_put_obj_read(rcq); if (srq) - put_srq_read(srq); + uobj_put_obj_read(srq); if (ind_tbl) - put_rwq_indirection_table_read(ind_tbl); + uobj_put_obj_read(ind_tbl); - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject); return ret; } @@ -2201,17 +1723,22 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp), + file->ucontext); + if (IS_ERR(obj)) + return PTR_ERR(obj); - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); - down_write(&obj->uevent.uobject.mutex); + xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle, + file->ucontext); + if (IS_ERR(xrcd_uobj)) { + ret = -EINVAL; + goto err_put; + } - xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + xrcd = (struct ib_xrcd *)xrcd_uobj->object; if (!xrcd) { ret = -EINVAL; - goto err_put; + goto err_xrcd; } attr.event_handler = ib_uverbs_qp_event_handler; @@ -2226,15 +1753,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, qp = ib_open_qp(xrcd, &attr); if (IS_ERR(qp)) { ret = PTR_ERR(qp); - goto err_put; + goto err_xrcd; } - qp->uobject = &obj->uevent.uobject; - obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; + obj->uevent.uobject.user_handle = cmd.user_handle; memset(&resp, 0, sizeof resp); resp.qpn = qp->qp_num; @@ -2243,32 +1766,25 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_remove; + goto err_destroy; } obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - put_xrcd_read(xrcd_uobj); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); - mutex_unlock(&file->mutex); + qp->uobject = &obj->uevent.uobject; + uobj_put_read(xrcd_uobj); - obj->uevent.uobject.live = 1; - up_write(&obj->uevent.uobject.mutex); + uobj_alloc_commit(&obj->uevent.uobject); return in_len; -err_remove: - idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - err_destroy: ib_destroy_qp(qp); - +err_xrcd: + uobj_put_read(xrcd_uobj); err_put: - put_xrcd_read(xrcd_uobj); - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject); return ret; } @@ -2282,6 +1798,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, struct ib_qp *qp; struct ib_qp_attr *attr; struct ib_qp_init_attr *init_attr; + const struct ib_global_route *grh; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -2294,7 +1811,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; @@ -2302,7 +1819,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - put_qp_read(qp); + uobj_put_obj_read(qp); if (ret) goto out; @@ -2331,29 +1848,39 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, resp.alt_port_num = attr->alt_port_num; resp.alt_timeout = attr->alt_timeout; - memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); - resp.dest.flow_label = attr->ah_attr.grh.flow_label; - resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; - resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; - resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; - resp.dest.dlid = attr->ah_attr.dlid; - resp.dest.sl = attr->ah_attr.sl; - resp.dest.src_path_bits = attr->ah_attr.src_path_bits; - resp.dest.static_rate = attr->ah_attr.static_rate; - resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); - resp.dest.port_num = attr->ah_attr.port_num; - - memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); - resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; - resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; - resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; - resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; - resp.alt_dest.dlid = attr->alt_ah_attr.dlid; - resp.alt_dest.sl = attr->alt_ah_attr.sl; - resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; - resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; - resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); - resp.alt_dest.port_num = attr->alt_ah_attr.port_num; + resp.dest.dlid = rdma_ah_get_dlid(&attr->ah_attr); + resp.dest.sl = rdma_ah_get_sl(&attr->ah_attr); + resp.dest.src_path_bits = rdma_ah_get_path_bits(&attr->ah_attr); + resp.dest.static_rate = rdma_ah_get_static_rate(&attr->ah_attr); + resp.dest.is_global = !!(rdma_ah_get_ah_flags(&attr->ah_attr) & + IB_AH_GRH); + if (resp.dest.is_global) { + grh = rdma_ah_read_grh(&attr->ah_attr); + memcpy(resp.dest.dgid, grh->dgid.raw, 16); + resp.dest.flow_label = grh->flow_label; + resp.dest.sgid_index = grh->sgid_index; + resp.dest.hop_limit = grh->hop_limit; + resp.dest.traffic_class = grh->traffic_class; + } + resp.dest.port_num = rdma_ah_get_port_num(&attr->ah_attr); + + resp.alt_dest.dlid = rdma_ah_get_dlid(&attr->alt_ah_attr); + resp.alt_dest.sl = rdma_ah_get_sl(&attr->alt_ah_attr); + resp.alt_dest.src_path_bits = rdma_ah_get_path_bits(&attr->alt_ah_attr); + resp.alt_dest.static_rate + = rdma_ah_get_static_rate(&attr->alt_ah_attr); + resp.alt_dest.is_global + = !!(rdma_ah_get_ah_flags(&attr->alt_ah_attr) & + IB_AH_GRH); + if (resp.alt_dest.is_global) { + grh = rdma_ah_read_grh(&attr->alt_ah_attr); + memcpy(resp.alt_dest.dgid, grh->dgid.raw, 16); + resp.alt_dest.flow_label = grh->flow_label; + resp.alt_dest.sgid_index = grh->sgid_index; + resp.alt_dest.hop_limit = grh->hop_limit; + resp.alt_dest.traffic_class = grh->traffic_class; + } + resp.alt_dest.port_num = rdma_ah_get_port_num(&attr->alt_ah_attr); resp.max_send_wr = init_attr->cap.max_send_wr; resp.max_recv_wr = init_attr->cap.max_recv_wr; @@ -2398,7 +1925,7 @@ static int modify_qp(struct ib_uverbs_file *file, if (!attr) return -ENOMEM; - qp = idr_read_qp(cmd->base.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; @@ -2427,31 +1954,47 @@ static int modify_qp(struct ib_uverbs_file *file, attr->alt_timeout = cmd->base.alt_timeout; attr->rate_limit = cmd->rate_limit; - memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16); - attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label; - attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index; - attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit; - attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class; - attr->ah_attr.dlid = cmd->base.dest.dlid; - attr->ah_attr.sl = cmd->base.dest.sl; - attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits; - attr->ah_attr.static_rate = cmd->base.dest.static_rate; - attr->ah_attr.ah_flags = cmd->base.dest.is_global ? - IB_AH_GRH : 0; - attr->ah_attr.port_num = cmd->base.dest.port_num; - - memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16); - attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label; - attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index; - attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit; - attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class; - attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid; - attr->alt_ah_attr.sl = cmd->base.alt_dest.sl; - attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits; - attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate; - attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ? - IB_AH_GRH : 0; - attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num; + attr->ah_attr.type = rdma_ah_find_type(qp->device, + cmd->base.dest.port_num); + if (cmd->base.dest.is_global) { + rdma_ah_set_grh(&attr->ah_attr, NULL, + cmd->base.dest.flow_label, + cmd->base.dest.sgid_index, + cmd->base.dest.hop_limit, + cmd->base.dest.traffic_class); + rdma_ah_set_dgid_raw(&attr->ah_attr, cmd->base.dest.dgid); + } else { + rdma_ah_set_ah_flags(&attr->ah_attr, 0); + } + rdma_ah_set_dlid(&attr->ah_attr, cmd->base.dest.dlid); + rdma_ah_set_sl(&attr->ah_attr, cmd->base.dest.sl); + rdma_ah_set_path_bits(&attr->ah_attr, cmd->base.dest.src_path_bits); + rdma_ah_set_static_rate(&attr->ah_attr, cmd->base.dest.static_rate); + rdma_ah_set_port_num(&attr->ah_attr, + cmd->base.dest.port_num); + + attr->alt_ah_attr.type = rdma_ah_find_type(qp->device, + cmd->base.dest.port_num); + if (cmd->base.alt_dest.is_global) { + rdma_ah_set_grh(&attr->alt_ah_attr, NULL, + cmd->base.alt_dest.flow_label, + cmd->base.alt_dest.sgid_index, + cmd->base.alt_dest.hop_limit, + cmd->base.alt_dest.traffic_class); + rdma_ah_set_dgid_raw(&attr->alt_ah_attr, + cmd->base.alt_dest.dgid); + } else { + rdma_ah_set_ah_flags(&attr->alt_ah_attr, 0); + } + + rdma_ah_set_dlid(&attr->alt_ah_attr, cmd->base.alt_dest.dlid); + rdma_ah_set_sl(&attr->alt_ah_attr, cmd->base.alt_dest.sl); + rdma_ah_set_path_bits(&attr->alt_ah_attr, + cmd->base.alt_dest.src_path_bits); + rdma_ah_set_static_rate(&attr->alt_ah_attr, + cmd->base.alt_dest.static_rate); + rdma_ah_set_port_num(&attr->alt_ah_attr, + cmd->base.alt_dest.port_num); if (qp->real_qp == qp) { if (cmd->base.attr_mask & IB_QP_AV) { @@ -2470,7 +2013,7 @@ static int modify_qp(struct ib_uverbs_file *file, } release_qp: - put_qp_read(qp); + uobj_put_obj_read(qp); out: kfree(attr); @@ -2557,42 +2100,27 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); - uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + qp = uobj->object; obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); + /* + * Make sure we don't free the memory in remove_commit as we still + * needs the uobject memory to create the response. + */ + uverbs_uobject_get(uobj); - if (!list_empty(&obj->mcast_list)) { - put_uobj_write(uobj); - return -EBUSY; - } - - ret = ib_destroy_qp(qp); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) + ret = uobj_remove_commit(uobj); + if (ret) { + uverbs_uobject_put(uobj); return ret; - - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - if (obj->uxrcd) - atomic_dec(&obj->uxrcd->refcnt); - - idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_uevent(file, &obj->uevent); + } resp.events_reported = obj->uevent.events_reported; - - put_uobj(uobj); + uverbs_uobject_put(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -2603,9 +2131,13 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, static void *alloc_wr(size_t wr_size, __u32 num_sge) { + if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) / + sizeof (struct ib_sge)) + return NULL; + return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) + num_sge * sizeof (struct ib_sge), GFP_KERNEL); -}; +} ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, struct ib_device *ib_dev, @@ -2636,7 +2168,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (!user_wr) return -ENOMEM; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) goto out; @@ -2672,7 +2204,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); + ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah, + file->ucontext); if (!ud->ah) { kfree(ud); ret = -EINVAL; @@ -2779,11 +2312,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, ret = -EFAULT; out_put: - put_qp_read(qp); + uobj_put_obj_read(qp); while (wr) { if (is_ud && ud_wr(wr)->ah) - put_ah_read(ud_wr(wr)->ah); + uobj_put_obj_read(ud_wr(wr)->ah); next = wr->next; kfree(wr); wr = next; @@ -2832,6 +2365,13 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, goto err; } + if (user_wr->num_sge >= + (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) / + sizeof (struct ib_sge)) { + ret = -EINVAL; + goto err; + } + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + user_wr->num_sge * sizeof (struct ib_sge), GFP_KERNEL); @@ -2900,21 +2440,21 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) goto out; resp.bad_wr = 0; ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); - put_qp_read(qp); - - if (ret) + uobj_put_obj_read(qp); + if (ret) { for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) break; } + } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -2950,14 +2490,14 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext); if (!srq) goto out; resp.bad_wr = 0; ret = srq->device->post_srq_recv(srq, wr, &bad_wr); - put_srq_read(srq); + uobj_put_obj_read(srq); if (ret) for (next = wr; next; next = next->next) { @@ -2990,9 +2530,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; struct ib_ah *ah; - struct ib_ah_attr attr; + struct rdma_ah_attr attr; int ret; struct ib_udata udata; + u8 *dmac; if (out_len < sizeof resp) return -ENOSPC; @@ -3004,54 +2545,50 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, (unsigned long)cmd.response + sizeof(resp), in_len - sizeof(cmd), out_len - sizeof(resp)); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); - down_write(&uobj->mutex); + uobj = uobj_alloc(uobj_get_type(ah), file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; } - attr.dlid = cmd.attr.dlid; - attr.sl = cmd.attr.sl; - attr.src_path_bits = cmd.attr.src_path_bits; - attr.static_rate = cmd.attr.static_rate; - attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0; - attr.port_num = cmd.attr.port_num; - attr.grh.flow_label = cmd.attr.grh.flow_label; - attr.grh.sgid_index = cmd.attr.grh.sgid_index; - attr.grh.hop_limit = cmd.attr.grh.hop_limit; - attr.grh.traffic_class = cmd.attr.grh.traffic_class; - memset(&attr.dmac, 0, sizeof(attr.dmac)); - memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); - - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_charge; + attr.type = rdma_ah_find_type(ib_dev, cmd.attr.port_num); + rdma_ah_set_dlid(&attr, cmd.attr.dlid); + rdma_ah_set_sl(&attr, cmd.attr.sl); + rdma_ah_set_path_bits(&attr, cmd.attr.src_path_bits); + rdma_ah_set_static_rate(&attr, cmd.attr.static_rate); + rdma_ah_set_port_num(&attr, cmd.attr.port_num); + + if (cmd.attr.is_global) { + rdma_ah_set_grh(&attr, NULL, cmd.attr.grh.flow_label, + cmd.attr.grh.sgid_index, + cmd.attr.grh.hop_limit, + cmd.attr.grh.traffic_class); + rdma_ah_set_dgid_raw(&attr, cmd.attr.grh.dgid); + } else { + rdma_ah_set_ah_flags(&attr, 0); + } + dmac = rdma_ah_retrieve_dmac(&attr); + if (dmac) + memset(dmac, 0, ETH_ALEN); ah = pd->device->create_ah(pd, &attr, &udata); if (IS_ERR(ah)) { ret = PTR_ERR(ah); - goto err_create; + goto err_put; } ah->device = pd->device; ah->pd = pd; atomic_inc(&pd->usecnt); ah->uobject = uobj; + uobj->user_handle = cmd.user_handle; uobj->object = ah; - ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj); - if (ret) - goto err_destroy; - resp.ah_handle = uobj->id; if (copy_to_user((void __user *) (unsigned long) cmd.response, @@ -3060,32 +2597,19 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->ah_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_put_obj_read(pd); + uobj_alloc_commit(uobj); return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - -err_destroy: - ib_destroy_ah(ah); - -err_create: - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + rdma_destroy_ah(ah); -err_charge: - put_pd_read(pd); +err_put: + uobj_put_obj_read(pd); err: - put_uobj_write(uobj); + uobj_alloc_abort(uobj); return ret; } @@ -3094,38 +2618,19 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_ah cmd; - struct ib_ah *ah; struct ib_uobject *uobj; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext); - if (!uobj) - return -EINVAL; - ah = uobj->object; - - ret = ib_destroy_ah(ah); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); + uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; + ret = uobj_remove_commit(uobj); + return ret ?: in_len; } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, @@ -3142,12 +2647,13 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = idr_write_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { @@ -3171,7 +2677,8 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, kfree(mcast); out_put: - put_qp_write(qp); + mutex_unlock(&obj->mcast_lock); + uobj_put_obj_read(qp); return ret ? ret : in_len; } @@ -3186,31 +2693,37 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, struct ib_qp *qp; struct ib_uverbs_mcast_entry *mcast; int ret = -EINVAL; + bool found = false; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = idr_write_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; - ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); - if (ret) - goto out_put; - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { list_del(&mcast->list); kfree(mcast); + found = true; break; } -out_put: - put_qp_write(qp); + if (!found) { + ret = -EINVAL; + goto out_put; + } + + ret = ib_detach_mcast(qp, (union ib_gid *)cmd.gid, cmd.mlid); +out_put: + mutex_unlock(&obj->mcast_lock); + uobj_put_obj_read(qp); return ret ? ret : in_len; } @@ -3227,6 +2740,13 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec, ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag); ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id; break; + case IB_FLOW_SPEC_ACTION_DROP: + if (kern_spec->drop.size != + sizeof(struct ib_uverbs_flow_spec_action_drop)) + return -EINVAL; + + ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop); + break; default: return -EINVAL; } @@ -3402,20 +2922,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - obj = kmalloc(sizeof(*obj), GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq), + file->ucontext); + if (IS_ERR(obj)) + return PTR_ERR(obj); - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, - &wq_lock_class); - down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3450,9 +2968,6 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, atomic_inc(&cq->usecnt); wq->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = wq; - err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); - if (err) - goto destroy_wq; memset(&resp, 0, sizeof(resp)); resp.wq_handle = obj->uevent.uobject.id; @@ -3465,27 +2980,19 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (err) goto err_copy; - put_pd_read(pd); - put_cq_read(cq); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - up_write(&obj->uevent.uobject.mutex); + uobj_put_obj_read(pd); + uobj_put_obj_read(cq); + uobj_alloc_commit(&obj->uevent.uobject); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); -destroy_wq: ib_destroy_wq(wq); err_put_cq: - put_cq_read(cq); + uobj_put_obj_read(cq); err_put_pd: - put_pd_read(pd); + uobj_put_obj_read(pd); err_uobj: - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject); return err; } @@ -3526,36 +3033,26 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, return -EOPNOTSUPP; resp.response_length = required_resp_len; - uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, - file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); wq = uobj->object; obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); - ret = ib_destroy_wq(wq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - if (ret) - return ret; - - idr_remove_uobj(&ib_uverbs_wq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); + /* + * Make sure we don't free the memory in remove_commit as we still + * needs the uobject memory to create the response. + */ + uverbs_uobject_get(uobj); - ib_uverbs_release_uevent(file, &obj->uevent); + ret = uobj_remove_commit(uobj); resp.events_reported = obj->uevent.events_reported; - put_uobj(uobj); - - ret = ib_copy_to_udata(ucore, &resp, resp.response_length); + uverbs_uobject_put(uobj); if (ret) return ret; - return 0; + return ib_copy_to_udata(ucore, &resp, resp.response_length); } int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, @@ -3588,7 +3085,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; - wq = idr_read_wq(cmd.wq_handle, file->ucontext); + wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext); if (!wq) return -EINVAL; @@ -3599,7 +3096,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, wq_attr.flags_mask = cmd.flags_mask; } ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); - put_wq_read(wq); + uobj_put_obj_read(wq); return ret; } @@ -3677,7 +3174,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { - wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext); + wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs], + file->ucontext); if (!wq) { err = -EINVAL; goto put_wqs; @@ -3686,14 +3184,12 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) { - err = -ENOMEM; + uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext); + if (IS_ERR(uobj)) { + err = PTR_ERR(uobj); goto put_wqs; } - init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); - down_write(&uobj->mutex); init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; init_attr.ind_tbl = wqs; rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); @@ -3713,10 +3209,6 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (i = 0; i < num_wq_handles; i++) atomic_inc(&wqs[i]->usecnt); - err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - if (err) - goto destroy_ind_tbl; - resp.ind_tbl_handle = uobj->id; resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; resp.response_length = required_resp_len; @@ -3729,26 +3221,18 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, kfree(wqs_handles); for (j = 0; j < num_read_wqs; j++) - put_wq_read(wqs[j]); + uobj_put_obj_read(wqs[j]); - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_alloc_commit(uobj); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); -destroy_ind_tbl: ib_destroy_rwq_ind_table(rwq_ind_tbl); err_uobj: - put_uobj_write(uobj); + uobj_alloc_abort(uobj); put_wqs: for (j = 0; j < num_read_wqs; j++) - put_wq_read(wqs[j]); + uobj_put_obj_read(wqs[j]); err_free: kfree(wqs_handles); kfree(wqs); @@ -3761,10 +3245,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, struct ib_udata *uhw) { struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; - struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_uobject *uobj; int ret; - struct ib_wq **ind_tbl; size_t required_cmd_sz; required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); @@ -3784,31 +3266,12 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, - file->ucontext); - if (!uobj) - return -EINVAL; - rwq_ind_tbl = uobj->object; - ind_tbl = rwq_ind_tbl->ind_tbl; - - ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - if (ret) - return ret; - - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - kfree(ind_tbl); - return ret; + return uobj_remove_commit(uobj); } int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, @@ -3882,15 +3345,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kern_flow_attr = &cmd.flow_attr; } - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) { - err = -ENOMEM; + uobj = uobj_alloc(uobj_get_type(flow), file->ucontext); + if (IS_ERR(uobj)) { + err = PTR_ERR(uobj); goto err_free_attr; } - init_uobj(uobj, 0, file->ucontext, &rule_lock_class); - down_write(&uobj->mutex); - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) { err = -EINVAL; goto err_uobj; @@ -3931,24 +3392,14 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, err = -EINVAL; goto err_free; } - - err = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (err) - goto err_free; - flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); if (IS_ERR(flow_id)) { err = PTR_ERR(flow_id); - goto err_create; + goto err_free; } flow_id->uobject = uobj; uobj->object = flow_id; - err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); - if (err) - goto destroy_flow; - memset(&resp, 0, sizeof(resp)); resp.flow_handle = uobj->id; @@ -3957,30 +3408,20 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, if (err) goto err_copy; - put_qp_read(qp); - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->rule_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_put_obj_read(qp); + uobj_alloc_commit(uobj); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); -destroy_flow: ib_destroy_flow(flow_id); -err_create: - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); err_free: kfree(flow_attr); err_put: - put_qp_read(qp); + uobj_put_obj_read(qp); err_uobj: - put_uobj_write(uobj); + uobj_alloc_abort(uobj); err_free_attr: if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); @@ -3993,7 +3434,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, struct ib_udata *uhw) { struct ib_uverbs_destroy_flow cmd; - struct ib_flow *flow_id; struct ib_uobject *uobj; int ret; @@ -4007,29 +3447,12 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EINVAL; - uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, - file->ucontext); - if (!uobj) - return -EINVAL; - flow_id = uobj->object; - - ret = ib_destroy_flow(flow_id); - if (!ret) { - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - uobj->live = 0; - } - - put_uobj_write(uobj); - - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); + uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + ret = uobj_remove_commit(uobj); return ret; } @@ -4046,31 +3469,37 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_srq_init_attr attr; int ret; - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; - - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); - down_write(&obj->uevent.uobject.mutex); + obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq), + file->ucontext); + if (IS_ERR(obj)) + return PTR_ERR(obj); if (cmd->srq_type == IB_SRQT_XRC) { - attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); - if (!attr.ext.xrc.xrcd) { + xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle, + file->ucontext); + if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err; } + attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object; + if (!attr.ext.xrc.xrcd) { + ret = -EINVAL; + goto err_put_xrcd; + } + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + attr.ext.xrc.cq = uobj_get_obj_read(cq, cmd->cq_handle, + file->ucontext); if (!attr.ext.xrc.cq) { ret = -EINVAL; goto err_put_xrcd; } } - pd = idr_read_pd(cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_put_cq; @@ -4086,11 +3515,6 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); - ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_put_cq; - srq = pd->device->create_srq(pd, &attr, udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); @@ -4115,9 +3539,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, atomic_set(&srq->usecnt, 0); obj->uevent.uobject.object = srq; - ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; + obj->uevent.uobject.user_handle = cmd->user_handle; memset(&resp, 0, sizeof resp); resp.srq_handle = obj->uevent.uobject.id; @@ -4133,44 +3555,32 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, } if (cmd->srq_type == IB_SRQT_XRC) { - put_uobj_read(xrcd_uobj); - put_cq_read(attr.ext.xrc.cq); + uobj_put_read(xrcd_uobj); + uobj_put_obj_read(attr.ext.xrc.cq); } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - - up_write(&obj->uevent.uobject.mutex); + uobj_put_obj_read(pd); + uobj_alloc_commit(&obj->uevent.uobject); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); - -err_destroy: ib_destroy_srq(srq); err_put: - ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - put_pd_read(pd); + uobj_put_obj_read(pd); err_put_cq: if (cmd->srq_type == IB_SRQT_XRC) - put_cq_read(attr.ext.xrc.cq); + uobj_put_obj_read(attr.ext.xrc.cq); err_put_xrcd: if (cmd->srq_type == IB_SRQT_XRC) { atomic_dec(&obj->uxrcd->refcnt); - put_uobj_read(xrcd_uobj); + uobj_put_read(xrcd_uobj); } err: - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject); return ret; } @@ -4255,7 +3665,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; @@ -4264,7 +3674,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); - put_srq_read(srq); + uobj_put_obj_read(srq); return ret ? ret : in_len; } @@ -4286,13 +3696,13 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; ret = ib_query_srq(srq, &attr); - put_srq_read(srq); + uobj_put_obj_read(srq); if (ret) return ret; @@ -4321,53 +3731,39 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_srq *srq; struct ib_uevent_object *obj; int ret = -EINVAL; - struct ib_usrq_object *us; enum ib_srq_type srq_type; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + srq = uobj->object; obj = container_of(uobj, struct ib_uevent_object, uobject); srq_type = srq->srq_type; + /* + * Make sure we don't free the memory in remove_commit as we still + * needs the uobject memory to create the response. + */ + uverbs_uobject_get(uobj); - ret = ib_destroy_srq(srq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + memset(&resp, 0, sizeof(resp)); - if (ret) + ret = uobj_remove_commit(uobj); + if (ret) { + uverbs_uobject_put(uobj); return ret; - - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - if (srq_type == IB_SRQT_XRC) { - us = container_of(obj, struct ib_usrq_object, uevent); - atomic_dec(&us->uxrcd->refcnt); } - - idr_remove_uobj(&ib_uverbs_srq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_uevent(file, obj); - - memset(&resp, 0, sizeof resp); resp.events_reported = obj->events_reported; + uverbs_uobject_put(uobj); + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + return -EFAULT; - put_uobj(uobj); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - - return ret ? ret : in_len; + return in_len; } int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 35c788a32e26..3d2609608f58 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -52,6 +52,7 @@ #include "uverbs.h" #include "core_priv.h" +#include "rdma_core.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); @@ -67,19 +68,6 @@ enum { static struct class *uverbs_class; -DEFINE_SPINLOCK(ib_uverbs_idr_lock); -DEFINE_IDR(ib_uverbs_pd_idr); -DEFINE_IDR(ib_uverbs_mr_idr); -DEFINE_IDR(ib_uverbs_mw_idr); -DEFINE_IDR(ib_uverbs_ah_idr); -DEFINE_IDR(ib_uverbs_cq_idr); -DEFINE_IDR(ib_uverbs_qp_idr); -DEFINE_IDR(ib_uverbs_srq_idr); -DEFINE_IDR(ib_uverbs_xrcd_idr); -DEFINE_IDR(ib_uverbs_rule_idr); -DEFINE_IDR(ib_uverbs_wq_idr); -DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); - static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -168,37 +156,37 @@ static struct kobj_type ib_uverbs_dev_ktype = { .release = ib_uverbs_release_dev, }; -static void ib_uverbs_release_event_file(struct kref *ref) +static void ib_uverbs_release_async_event_file(struct kref *ref) { - struct ib_uverbs_event_file *file = - container_of(ref, struct ib_uverbs_event_file, ref); + struct ib_uverbs_async_event_file *file = + container_of(ref, struct ib_uverbs_async_event_file, ref); kfree(file); } void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_event_file *ev_file, + struct ib_uverbs_completion_event_file *ev_file, struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; if (ev_file) { - spin_lock_irq(&ev_file->lock); + spin_lock_irq(&ev_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&ev_file->lock); + spin_unlock_irq(&ev_file->ev_queue.lock); - kref_put(&ev_file->ref, ib_uverbs_release_event_file); + uverbs_uobject_put(&ev_file->uobj_file.uobj); } - spin_lock_irq(&file->async_file->lock); + spin_lock_irq(&file->async_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file->lock); + spin_unlock_irq(&file->async_file->ev_queue.lock); } void ib_uverbs_release_uevent(struct ib_uverbs_file *file, @@ -206,16 +194,16 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file, { struct ib_uverbs_event *evt, *tmp; - spin_lock_irq(&file->async_file->lock); + spin_lock_irq(&file->async_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file->lock); + spin_unlock_irq(&file->async_file->ev_queue.lock); } -static void ib_uverbs_detach_umcast(struct ib_qp *qp, - struct ib_uqp_object *uobj) +void ib_uverbs_detach_umcast(struct ib_qp *qp, + struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; @@ -227,138 +215,11 @@ static void ib_uverbs_detach_umcast(struct ib_qp *qp, } static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, - struct ib_ucontext *context) + struct ib_ucontext *context, + bool device_removed) { - struct ib_uobject *uobj, *tmp; - context->closing = 1; - - list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { - struct ib_ah *ah = uobj->object; - - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - ib_destroy_ah(ah); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - kfree(uobj); - } - - /* Remove MWs before QPs, in order to support type 2A MWs. */ - list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { - struct ib_mw *mw = uobj->object; - - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - uverbs_dealloc_mw(mw); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { - struct ib_flow *flow_id = uobj->object; - - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); - ib_destroy_flow(flow_id); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { - struct ib_qp *qp = uobj->object; - struct ib_uqp_object *uqp = - container_of(uobj, struct ib_uqp_object, uevent.uobject); - - idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - if (qp == qp->real_qp) - ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - ib_uverbs_release_uevent(file, &uqp->uevent); - kfree(uqp); - } - - list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { - struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; - struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; - - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - ib_destroy_rwq_ind_table(rwq_ind_tbl); - kfree(ind_tbl); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { - struct ib_wq *wq = uobj->object; - struct ib_uwq_object *uwq = - container_of(uobj, struct ib_uwq_object, uevent.uobject); - - idr_remove_uobj(&ib_uverbs_wq_idr, uobj); - ib_destroy_wq(wq); - ib_uverbs_release_uevent(file, &uwq->uevent); - kfree(uwq); - } - - list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { - struct ib_srq *srq = uobj->object; - struct ib_uevent_object *uevent = - container_of(uobj, struct ib_uevent_object, uobject); - - idr_remove_uobj(&ib_uverbs_srq_idr, uobj); - ib_destroy_srq(srq); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - ib_uverbs_release_uevent(file, uevent); - kfree(uevent); - } - - list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { - struct ib_cq *cq = uobj->object; - struct ib_uverbs_event_file *ev_file = cq->cq_context; - struct ib_ucq_object *ucq = - container_of(uobj, struct ib_ucq_object, uobject); - - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); - ib_destroy_cq(cq); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - ib_uverbs_release_ucq(file, ev_file, ucq); - kfree(ucq); - } - - list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { - struct ib_mr *mr = uobj->object; - - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - ib_dereg_mr(mr); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - kfree(uobj); - } - - mutex_lock(&file->device->xrcd_tree_mutex); - list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { - struct ib_xrcd *xrcd = uobj->object; - struct ib_uxrcd_object *uxrcd = - container_of(uobj, struct ib_uxrcd_object, uobject); - - idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); - ib_uverbs_dealloc_xrcd(file->device, xrcd); - kfree(uxrcd); - } - mutex_unlock(&file->device->xrcd_tree_mutex); - - list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { - struct ib_pd *pd = uobj->object; - - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - ib_dealloc_pd(pd); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - kfree(uobj); - } - + uverbs_cleanup_ucontext(context, device_removed); put_pid(context->tgid); ib_rdmacg_uncharge(&context->cg_obj, context->device, @@ -372,7 +233,7 @@ static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) complete(&dev->comp); } -static void ib_uverbs_release_file(struct kref *ref) +void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); @@ -392,58 +253,54 @@ static void ib_uverbs_release_file(struct kref *ref) kfree(file); } -static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, - size_t count, loff_t *pos) +static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, + struct ib_uverbs_file *uverbs_file, + struct file *filp, char __user *buf, + size_t count, loff_t *pos, + size_t eventsz) { - struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *event; - int eventsz; int ret = 0; - spin_lock_irq(&file->lock); + spin_lock_irq(&ev_queue->lock); - while (list_empty(&file->event_list)) { - spin_unlock_irq(&file->lock); + while (list_empty(&ev_queue->event_list)) { + spin_unlock_irq(&ev_queue->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; - if (wait_event_interruptible(file->poll_wait, - (!list_empty(&file->event_list) || + if (wait_event_interruptible(ev_queue->poll_wait, + (!list_empty(&ev_queue->event_list) || /* The barriers built into wait_event_interruptible() * and wake_up() guarentee this will see the null set * without using RCU */ - !file->uverbs_file->device->ib_dev))) + !uverbs_file->device->ib_dev))) return -ERESTARTSYS; /* If device was disassociated and no event exists set an error */ - if (list_empty(&file->event_list) && - !file->uverbs_file->device->ib_dev) + if (list_empty(&ev_queue->event_list) && + !uverbs_file->device->ib_dev) return -EIO; - spin_lock_irq(&file->lock); + spin_lock_irq(&ev_queue->lock); } - event = list_entry(file->event_list.next, struct ib_uverbs_event, list); - - if (file->is_async) - eventsz = sizeof (struct ib_uverbs_async_event_desc); - else - eventsz = sizeof (struct ib_uverbs_comp_event_desc); + event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); if (eventsz > count) { ret = -EINVAL; event = NULL; } else { - list_del(file->event_list.next); + list_del(ev_queue->event_list.next); if (event->counter) { ++(*event->counter); list_del(&event->obj_list); } } - spin_unlock_irq(&file->lock); + spin_unlock_irq(&ev_queue->lock); if (event) { if (copy_to_user(buf, event, eventsz)) @@ -457,87 +314,158 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, return ret; } -static unsigned int ib_uverbs_event_poll(struct file *filp, +static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_async_event_file *file = filp->private_data; + + return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp, + buf, count, pos, + sizeof(struct ib_uverbs_async_event_desc)); +} + +static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_completion_event_file *comp_ev_file = + filp->private_data; + + return ib_uverbs_event_read(&comp_ev_file->ev_queue, + comp_ev_file->uobj_file.ufile, filp, + buf, count, pos, + sizeof(struct ib_uverbs_comp_event_desc)); +} + +static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, + struct file *filp, struct poll_table_struct *wait) { unsigned int pollflags = 0; - struct ib_uverbs_event_file *file = filp->private_data; - poll_wait(filp, &file->poll_wait, wait); + poll_wait(filp, &ev_queue->poll_wait, wait); - spin_lock_irq(&file->lock); - if (!list_empty(&file->event_list)) + spin_lock_irq(&ev_queue->lock); + if (!list_empty(&ev_queue->event_list)) pollflags = POLLIN | POLLRDNORM; - spin_unlock_irq(&file->lock); + spin_unlock_irq(&ev_queue->lock); return pollflags; } -static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) +static unsigned int ib_uverbs_async_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + return ib_uverbs_event_poll(filp->private_data, filp, wait); +} + +static unsigned int ib_uverbs_comp_event_poll(struct file *filp, + struct poll_table_struct *wait) { - struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_completion_event_file *comp_ev_file = + filp->private_data; - return fasync_helper(fd, filp, on, &file->async_queue); + return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait); } -static int ib_uverbs_event_close(struct inode *inode, struct file *filp) +static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) { - struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_event_queue *ev_queue = filp->private_data; + + return fasync_helper(fd, filp, on, &ev_queue->async_queue); +} + +static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) +{ + struct ib_uverbs_completion_event_file *comp_ev_file = + filp->private_data; + + return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); +} + +static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_async_event_file *file = filp->private_data; + struct ib_uverbs_file *uverbs_file = file->uverbs_file; struct ib_uverbs_event *entry, *tmp; int closed_already = 0; - mutex_lock(&file->uverbs_file->device->lists_mutex); - spin_lock_irq(&file->lock); - closed_already = file->is_closed; - file->is_closed = 1; - list_for_each_entry_safe(entry, tmp, &file->event_list, list) { + mutex_lock(&uverbs_file->device->lists_mutex); + spin_lock_irq(&file->ev_queue.lock); + closed_already = file->ev_queue.is_closed; + file->ev_queue.is_closed = 1; + list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { if (entry->counter) list_del(&entry->obj_list); kfree(entry); } - spin_unlock_irq(&file->lock); + spin_unlock_irq(&file->ev_queue.lock); if (!closed_already) { list_del(&file->list); - if (file->is_async) - ib_unregister_event_handler(&file->uverbs_file-> - event_handler); + ib_unregister_event_handler(&uverbs_file->event_handler); + } + mutex_unlock(&uverbs_file->device->lists_mutex); + + kref_put(&uverbs_file->ref, ib_uverbs_release_file); + kref_put(&file->ref, ib_uverbs_release_async_event_file); + + return 0; +} + +static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_completion_event_file *file = filp->private_data; + struct ib_uverbs_event *entry, *tmp; + + spin_lock_irq(&file->ev_queue.lock); + list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + kfree(entry); } - mutex_unlock(&file->uverbs_file->device->lists_mutex); + spin_unlock_irq(&file->ev_queue.lock); - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); - kref_put(&file->ref, ib_uverbs_release_event_file); + uverbs_close_fd(filp); return 0; } -static const struct file_operations uverbs_event_fops = { +const struct file_operations uverbs_event_fops = { + .owner = THIS_MODULE, + .read = ib_uverbs_comp_event_read, + .poll = ib_uverbs_comp_event_poll, + .release = ib_uverbs_comp_event_close, + .fasync = ib_uverbs_comp_event_fasync, + .llseek = no_llseek, +}; + +static const struct file_operations uverbs_async_event_fops = { .owner = THIS_MODULE, - .read = ib_uverbs_event_read, - .poll = ib_uverbs_event_poll, - .release = ib_uverbs_event_close, - .fasync = ib_uverbs_event_fasync, + .read = ib_uverbs_async_event_read, + .poll = ib_uverbs_async_event_poll, + .release = ib_uverbs_async_event_close, + .fasync = ib_uverbs_async_event_fasync, .llseek = no_llseek, }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { - struct ib_uverbs_event_file *file = cq_context; + struct ib_uverbs_event_queue *ev_queue = cq_context; struct ib_ucq_object *uobj; struct ib_uverbs_event *entry; unsigned long flags; - if (!file) + if (!ev_queue) return; - spin_lock_irqsave(&file->lock, flags); - if (file->is_closed) { - spin_unlock_irqrestore(&file->lock, flags); + spin_lock_irqsave(&ev_queue->lock, flags); + if (ev_queue->is_closed) { + spin_unlock_irqrestore(&ev_queue->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { - spin_unlock_irqrestore(&file->lock, flags); + spin_unlock_irqrestore(&ev_queue->lock, flags); return; } @@ -546,12 +474,12 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; - list_add_tail(&entry->list, &file->event_list); + list_add_tail(&entry->list, &ev_queue->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); - spin_unlock_irqrestore(&file->lock, flags); + spin_unlock_irqrestore(&ev_queue->lock, flags); - wake_up_interruptible(&file->poll_wait); - kill_fasync(&file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&ev_queue->poll_wait); + kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, @@ -562,15 +490,15 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, struct ib_uverbs_event *entry; unsigned long flags; - spin_lock_irqsave(&file->async_file->lock, flags); - if (file->async_file->is_closed) { - spin_unlock_irqrestore(&file->async_file->lock, flags); + spin_lock_irqsave(&file->async_file->ev_queue.lock, flags); + if (file->async_file->ev_queue.is_closed) { + spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { - spin_unlock_irqrestore(&file->async_file->lock, flags); + spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); return; } @@ -579,13 +507,13 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, entry->desc.async.reserved = 0; entry->counter = counter; - list_add_tail(&entry->list, &file->async_file->event_list); + list_add_tail(&entry->list, &file->async_file->ev_queue.event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file->lock, flags); + spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); - wake_up_interruptible(&file->async_file->poll_wait); - kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->async_file->ev_queue.poll_wait); + kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) @@ -603,7 +531,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) struct ib_uevent_object *uobj; /* for XRC target qp's, check that qp is live */ - if (!event->element.qp->uobject || !event->element.qp->uobject->live) + if (!event->element.qp->uobject) return; uobj = container_of(event->element.qp->uobject, @@ -648,15 +576,23 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) { - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); + kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); file->async_file = NULL; } -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev, - int is_async) +void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) { - struct ib_uverbs_event_file *ev_file; + spin_lock_init(&ev_queue->lock); + INIT_LIST_HEAD(&ev_queue->event_list); + init_waitqueue_head(&ev_queue->poll_wait); + ev_queue->is_closed = 0; + ev_queue->async_queue = NULL; +} + +struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, + struct ib_device *ib_dev) +{ + struct ib_uverbs_async_event_file *ev_file; struct file *filp; int ret; @@ -664,16 +600,11 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, if (!ev_file) return ERR_PTR(-ENOMEM); - kref_init(&ev_file->ref); - spin_lock_init(&ev_file->lock); - INIT_LIST_HEAD(&ev_file->event_list); - init_waitqueue_head(&ev_file->poll_wait); + ib_uverbs_init_event_queue(&ev_file->ev_queue); ev_file->uverbs_file = uverbs_file; kref_get(&ev_file->uverbs_file->ref); - ev_file->async_queue = NULL; - ev_file->is_closed = 0; - - filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops, + kref_init(&ev_file->ref); + filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops, ev_file, O_RDONLY); if (IS_ERR(filp)) goto err_put_refs; @@ -683,64 +614,33 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, &uverbs_file->device->uverbs_events_file_list); mutex_unlock(&uverbs_file->device->lists_mutex); - if (is_async) { - WARN_ON(uverbs_file->async_file); - uverbs_file->async_file = ev_file; - kref_get(&uverbs_file->async_file->ref); - INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, - ib_dev, - ib_uverbs_event_handler); - ret = ib_register_event_handler(&uverbs_file->event_handler); - if (ret) - goto err_put_file; - - /* At that point async file stuff was fully set */ - ev_file->is_async = 1; - } + WARN_ON(uverbs_file->async_file); + uverbs_file->async_file = ev_file; + kref_get(&uverbs_file->async_file->ref); + INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, + ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&uverbs_file->event_handler); + if (ret) + goto err_put_file; + + /* At that point async file stuff was fully set */ return filp; err_put_file: fput(filp); - kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file); + kref_put(&uverbs_file->async_file->ref, + ib_uverbs_release_async_event_file); uverbs_file->async_file = NULL; return ERR_PTR(ret); err_put_refs: kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); - kref_put(&ev_file->ref, ib_uverbs_release_event_file); + kref_put(&ev_file->ref, ib_uverbs_release_async_event_file); return filp; } -/* - * Look up a completion event file by FD. If lookup is successful, - * takes a ref to the event file struct that it returns; if - * unsuccessful, returns NULL. - */ -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) -{ - struct ib_uverbs_event_file *ev_file = NULL; - struct fd f = fdget(fd); - - if (!f.file) - return NULL; - - if (f.file->f_op != &uverbs_event_fops) - goto out; - - ev_file = f.file->private_data; - if (ev_file->is_async) { - ev_file = NULL; - goto out; - } - - kref_get(&ev_file->ref); - -out: - fdput(f); - return ev_file; -} - static int verify_command_mask(struct ib_device *ib_dev, __u32 command) { u64 mask; @@ -986,6 +886,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) } file->device = dev; + spin_lock_init(&file->idr_lock); + idr_init(&file->idr); file->ucontext = NULL; file->async_file = NULL; kref_init(&file->ref); @@ -1019,10 +921,11 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) mutex_lock(&file->cleanup_mutex); if (file->ucontext) { - ib_uverbs_cleanup_ucontext(file, file->ucontext); + ib_uverbs_cleanup_ucontext(file, file->ucontext, false); file->ucontext = NULL; } mutex_unlock(&file->cleanup_mutex); + idr_destroy(&file->idr); mutex_lock(&file->device->lists_mutex); if (!file->is_closed) { @@ -1032,7 +935,8 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) mutex_unlock(&file->device->lists_mutex); if (file->async_file) - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); + kref_put(&file->async_file->ref, + ib_uverbs_release_async_event_file); kref_put(&file->ref, ib_uverbs_release_file); kobject_put(&dev->kobj); @@ -1189,7 +1093,7 @@ static void ib_uverbs_add_one(struct ib_device *device) cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; - uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj; + cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj); kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; @@ -1231,7 +1135,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { struct ib_uverbs_file *file; - struct ib_uverbs_event_file *event_file; + struct ib_uverbs_async_event_file *event_file; struct ib_event event; /* Pending running commands to terminate */ @@ -1268,7 +1172,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, * (e.g mmput). */ ib_dev->disassociate_ucontext(ucontext); - ib_uverbs_cleanup_ucontext(file, ucontext); + mutex_lock(&file->cleanup_mutex); + ib_uverbs_cleanup_ucontext(file, ucontext, true); + mutex_unlock(&file->cleanup_mutex); } mutex_lock(&uverbs_dev->lists_mutex); @@ -1278,21 +1184,20 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { event_file = list_first_entry(&uverbs_dev-> uverbs_events_file_list, - struct ib_uverbs_event_file, + struct ib_uverbs_async_event_file, list); - spin_lock_irq(&event_file->lock); - event_file->is_closed = 1; - spin_unlock_irq(&event_file->lock); + spin_lock_irq(&event_file->ev_queue.lock); + event_file->ev_queue.is_closed = 1; + spin_unlock_irq(&event_file->ev_queue.lock); list_del(&event_file->list); - if (event_file->is_async) { - ib_unregister_event_handler(&event_file->uverbs_file-> - event_handler); - event_file->uverbs_file->event_handler.device = NULL; - } + ib_unregister_event_handler( + &event_file->uverbs_file->event_handler); + event_file->uverbs_file->event_handler.device = + NULL; - wake_up_interruptible(&event_file->poll_wait); - kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&event_file->ev_queue.poll_wait); + kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); } mutex_unlock(&uverbs_dev->lists_mutex); } @@ -1396,13 +1301,6 @@ static void __exit ib_uverbs_cleanup(void) unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); if (overflow_maj) unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); - idr_destroy(&ib_uverbs_pd_idr); - idr_destroy(&ib_uverbs_mr_idr); - idr_destroy(&ib_uverbs_mw_idr); - idr_destroy(&ib_uverbs_ah_idr); - idr_destroy(&ib_uverbs_cq_idr); - idr_destroy(&ib_uverbs_qp_idr); - idr_destroy(&ib_uverbs_srq_idr); } module_init(ib_uverbs_init); diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index af020f80d50f..8b9587fe2303 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -34,20 +34,25 @@ #include <rdma/ib_marshall.h> void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, - struct ib_ah_attr *src) + struct rdma_ah_attr *src) { - memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid); - dst->grh.flow_label = src->grh.flow_label; - dst->grh.sgid_index = src->grh.sgid_index; - dst->grh.hop_limit = src->grh.hop_limit; - dst->grh.traffic_class = src->grh.traffic_class; memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); - dst->dlid = src->dlid; - dst->sl = src->sl; - dst->src_path_bits = src->src_path_bits; - dst->static_rate = src->static_rate; - dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0; - dst->port_num = src->port_num; + dst->dlid = rdma_ah_get_dlid(src); + dst->sl = rdma_ah_get_sl(src); + dst->src_path_bits = rdma_ah_get_path_bits(src); + dst->static_rate = rdma_ah_get_static_rate(src); + dst->is_global = rdma_ah_get_ah_flags(src) & + IB_AH_GRH ? 1 : 0; + if (dst->is_global) { + const struct ib_global_route *grh = rdma_ah_read_grh(src); + + memcpy(dst->grh.dgid, grh->dgid.raw, sizeof(grh->dgid)); + dst->grh.flow_label = grh->flow_label; + dst->grh.sgid_index = grh->sgid_index; + dst->grh.hop_limit = grh->hop_limit; + dst->grh.traffic_class = grh->traffic_class; + } + dst->port_num = rdma_ah_get_port_num(src); dst->reserved = 0; } EXPORT_SYMBOL(ib_copy_ah_attr_to_user); @@ -91,15 +96,15 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, } EXPORT_SYMBOL(ib_copy_qp_attr_to_user); -void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, - struct ib_sa_path_rec *src) +void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, + struct sa_path_rec *src) { memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid); memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid); - dst->dlid = src->dlid; - dst->slid = src->slid; - dst->raw_traffic = src->raw_traffic; + dst->dlid = htons(ntohl(sa_path_get_dlid(src))); + dst->slid = htons(ntohl(sa_path_get_slid(src))); + dst->raw_traffic = sa_path_get_raw_traffic(src); dst->flow_label = src->flow_label; dst->hop_limit = src->hop_limit; dst->traffic_class = src->traffic_class; @@ -115,17 +120,43 @@ void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, dst->preference = src->preference; dst->packet_life_time_selector = src->packet_life_time_selector; } + +void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, + struct sa_path_rec *src) +{ + struct sa_path_rec rec; + + if (src->rec_type == SA_PATH_REC_TYPE_OPA) { + sa_convert_path_opa_to_ib(&rec, src); + __ib_copy_path_rec_to_user(dst, &rec); + return; + } + __ib_copy_path_rec_to_user(dst, src); +} EXPORT_SYMBOL(ib_copy_path_rec_to_user); -void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, +void ib_copy_path_rec_from_user(struct sa_path_rec *dst, struct ib_user_path_rec *src) { + __be32 slid, dlid; + + memset(dst, 0, sizeof(*dst)); + if ((ib_is_opa_gid((union ib_gid *)src->sgid)) || + (ib_is_opa_gid((union ib_gid *)src->dgid))) { + dst->rec_type = SA_PATH_REC_TYPE_OPA; + slid = htonl(opa_get_lid_from_gid((union ib_gid *)src->sgid)); + dlid = htonl(opa_get_lid_from_gid((union ib_gid *)src->dgid)); + } else { + dst->rec_type = SA_PATH_REC_TYPE_IB; + slid = htonl(ntohs(src->slid)); + dlid = htonl(ntohs(src->dlid)); + } memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid); memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid); - dst->dlid = src->dlid; - dst->slid = src->slid; - dst->raw_traffic = src->raw_traffic; + sa_path_set_dlid(dst, dlid); + sa_path_set_slid(dst, slid); + sa_path_set_raw_traffic(dst, src->raw_traffic); dst->flow_label = src->flow_label; dst->hop_limit = src->hop_limit; dst->traffic_class = src->traffic_class; @@ -141,9 +172,9 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, dst->preference = src->preference; dst->packet_life_time_selector = src->packet_life_time_selector; - memset(dst->dmac, 0, sizeof(dst->dmac)); - dst->net = NULL; - dst->ifindex = 0; - dst->gid_type = IB_GID_TYPE_IB; + /* TODO: No need to set this */ + sa_path_set_dmac_zero(dst); + sa_path_set_ndev(dst, NULL); + sa_path_set_ifindex(dst, 0); } EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c new file mode 100644 index 000000000000..ef293379f37a --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/uverbs_std_types.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <linux/bug.h> +#include <linux/file.h> +#include "rdma_core.h" +#include "uverbs.h" + +static int uverbs_free_ah(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + return rdma_destroy_ah((struct ib_ah *)uobject->object); +} + +static int uverbs_free_flow(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + return ib_destroy_flow((struct ib_flow *)uobject->object); +} + +static int uverbs_free_mw(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + return uverbs_dealloc_mw((struct ib_mw *)uobject->object); +} + +static int uverbs_free_qp(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_qp *qp = uobject->object; + struct ib_uqp_object *uqp = + container_of(uobject, struct ib_uqp_object, uevent.uobject); + int ret; + + if (why == RDMA_REMOVE_DESTROY) { + if (!list_empty(&uqp->mcast_list)) + return -EBUSY; + } else if (qp == qp->real_qp) { + ib_uverbs_detach_umcast(qp, uqp); + } + + ret = ib_destroy_qp(qp); + if (ret && why == RDMA_REMOVE_DESTROY) + return ret; + + if (uqp->uxrcd) + atomic_dec(&uqp->uxrcd->refcnt); + + ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent); + return ret; +} + +static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object; + struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; + int ret; + + ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); + if (!ret || why != RDMA_REMOVE_DESTROY) + kfree(ind_tbl); + return ret; +} + +static int uverbs_free_wq(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_wq *wq = uobject->object; + struct ib_uwq_object *uwq = + container_of(uobject, struct ib_uwq_object, uevent.uobject); + int ret; + + ret = ib_destroy_wq(wq); + if (!ret || why != RDMA_REMOVE_DESTROY) + ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); + return ret; +} + +static int uverbs_free_srq(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_srq *srq = uobject->object; + struct ib_uevent_object *uevent = + container_of(uobject, struct ib_uevent_object, uobject); + enum ib_srq_type srq_type = srq->srq_type; + int ret; + + ret = ib_destroy_srq(srq); + + if (ret && why == RDMA_REMOVE_DESTROY) + return ret; + + if (srq_type == IB_SRQT_XRC) { + struct ib_usrq_object *us = + container_of(uevent, struct ib_usrq_object, uevent); + + atomic_dec(&us->uxrcd->refcnt); + } + + ib_uverbs_release_uevent(uobject->context->ufile, uevent); + return ret; +} + +static int uverbs_free_cq(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_cq *cq = uobject->object; + struct ib_uverbs_event_queue *ev_queue = cq->cq_context; + struct ib_ucq_object *ucq = + container_of(uobject, struct ib_ucq_object, uobject); + int ret; + + ret = ib_destroy_cq(cq); + if (!ret || why != RDMA_REMOVE_DESTROY) + ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ? + container_of(ev_queue, + struct ib_uverbs_completion_event_file, + ev_queue) : NULL, + ucq); + return ret; +} + +static int uverbs_free_mr(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + return ib_dereg_mr((struct ib_mr *)uobject->object); +} + +static int uverbs_free_xrcd(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_xrcd *xrcd = uobject->object; + struct ib_uxrcd_object *uxrcd = + container_of(uobject, struct ib_uxrcd_object, uobject); + int ret; + + mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); + if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt)) + ret = -EBUSY; + else + ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device, + xrcd, why); + mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); + + return ret; +} + +static int uverbs_free_pd(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_pd *pd = uobject->object; + + if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt)) + return -EBUSY; + + ib_dealloc_pd((struct ib_pd *)uobject->object); + return 0; +} + +static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file, + enum rdma_remove_reason why) +{ + struct ib_uverbs_completion_event_file *comp_event_file = + container_of(uobj_file, struct ib_uverbs_completion_event_file, + uobj_file); + struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue; + + spin_lock_irq(&event_queue->lock); + event_queue->is_closed = 1; + spin_unlock_irq(&event_queue->lock); + + if (why == RDMA_REMOVE_DRIVER_REMOVE) { + wake_up_interruptible(&event_queue->poll_wait); + kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN); + } + return 0; +}; + +const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = { + .type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0), + .context_closed = uverbs_hot_unplug_completion_event_file, + .fops = &uverbs_event_fops, + .name = "[infinibandevent]", + .flags = O_RDONLY, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_cq = { + .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0), + .destroy_object = uverbs_free_cq, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_qp = { + .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0), + .destroy_object = uverbs_free_qp, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_mw = { + .type = UVERBS_TYPE_ALLOC_IDR(0), + .destroy_object = uverbs_free_mw, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_mr = { + /* 1 is used in order to free the MR after all the MWs */ + .type = UVERBS_TYPE_ALLOC_IDR(1), + .destroy_object = uverbs_free_mr, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_srq = { + .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0), + .destroy_object = uverbs_free_srq, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_ah = { + .type = UVERBS_TYPE_ALLOC_IDR(0), + .destroy_object = uverbs_free_ah, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_flow = { + .type = UVERBS_TYPE_ALLOC_IDR(0), + .destroy_object = uverbs_free_flow, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_wq = { + .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0), + .destroy_object = uverbs_free_wq, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = { + .type = UVERBS_TYPE_ALLOC_IDR(0), + .destroy_object = uverbs_free_rwq_ind_tbl, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = { + .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0), + .destroy_object = uverbs_free_xrcd, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_pd = { + /* 2 is used in order to free the PD after MRs */ + .type = UVERBS_TYPE_ALLOC_IDR(2), + .destroy_object = uverbs_free_pd, +}; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 85ed5051fdfd..4792f5209ac2 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -311,7 +311,7 @@ EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ -struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) { struct ib_ah *ah; @@ -321,12 +321,13 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) ah->device = pd->device; ah->pd = pd; ah->uobject = NULL; + ah->type = ah_attr->type; atomic_inc(&pd->usecnt); } return ah; } -EXPORT_SYMBOL(ib_create_ah); +EXPORT_SYMBOL(rdma_create_ah); int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) { @@ -452,7 +453,7 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, - struct ib_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr) { u32 flow_class; u16 gid_index; @@ -464,6 +465,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, union ib_gid sgid; memset(ah_attr, 0, sizeof *ah_attr); + ah_attr->type = rdma_ah_find_type(device, port_num); if (rdma_cap_eth_ah(device, port_num)) { if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE) net_type = wc->network_hdr_type; @@ -494,7 +496,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, return -ENODEV; ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid, - ah_attr->dmac, + ah_attr->roce.dmac, wc->wc_flags & IB_WC_WITH_VLAN ? NULL : &vlan_id, &if_index, &hoplimit); @@ -525,15 +527,12 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, return ret; } - ah_attr->dlid = wc->slid; - ah_attr->sl = wc->sl; - ah_attr->src_path_bits = wc->dlid_path_bits; - ah_attr->port_num = port_num; + rdma_ah_set_dlid(ah_attr, wc->slid); + rdma_ah_set_sl(ah_attr, wc->sl); + rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits); + rdma_ah_set_port_num(ah_attr, port_num); if (wc->wc_flags & IB_WC_GRH) { - ah_attr->ah_flags = IB_AH_GRH; - ah_attr->grh.dgid = sgid; - if (!rdma_cap_eth_ah(device, port_num)) { if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { ret = ib_find_cached_gid_by_port(device, &dgid, @@ -547,11 +546,12 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, } } - ah_attr->grh.sgid_index = (u8) gid_index; flow_class = be32_to_cpu(grh->version_tclass_flow); - ah_attr->grh.flow_label = flow_class & 0xFFFFF; - ah_attr->grh.hop_limit = hoplimit; - ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF; + rdma_ah_set_grh(ah_attr, &sgid, + flow_class & 0xFFFFF, + (u8)gid_index, hoplimit, + (flow_class >> 20) & 0xFF); + } return 0; } @@ -560,34 +560,37 @@ EXPORT_SYMBOL(ib_init_ah_from_wc); struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u8 port_num) { - struct ib_ah_attr ah_attr; + struct rdma_ah_attr ah_attr; int ret; ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr); if (ret) return ERR_PTR(ret); - return ib_create_ah(pd, &ah_attr); + return rdma_create_ah(pd, &ah_attr); } EXPORT_SYMBOL(ib_create_ah_from_wc); -int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { + if (ah->type != ah_attr->type) + return -EINVAL; + return ah->device->modify_ah ? ah->device->modify_ah(ah, ah_attr) : -ENOSYS; } -EXPORT_SYMBOL(ib_modify_ah); +EXPORT_SYMBOL(rdma_modify_ah); -int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { return ah->device->query_ah ? ah->device->query_ah(ah, ah_attr) : -ENOSYS; } -EXPORT_SYMBOL(ib_query_ah); +EXPORT_SYMBOL(rdma_query_ah); -int ib_destroy_ah(struct ib_ah *ah) +int rdma_destroy_ah(struct ib_ah *ah) { struct ib_pd *pd; int ret; @@ -599,7 +602,7 @@ int ib_destroy_ah(struct ib_ah *ah) return ret; } -EXPORT_SYMBOL(ib_destroy_ah); +EXPORT_SYMBOL(rdma_destroy_ah); /* Shared receive queues */ @@ -1201,19 +1204,22 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, EXPORT_SYMBOL(ib_modify_qp_is_ok); int ib_resolve_eth_dmac(struct ib_device *device, - struct ib_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr) { int ret = 0; + struct ib_global_route *grh; - if (!rdma_is_port_valid(device, ah_attr->port_num)) + if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr))) return -EINVAL; - if (!rdma_cap_eth_ah(device, ah_attr->port_num)) + if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) return 0; - if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { - rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw, - ah_attr->dmac); + grh = rdma_ah_retrieve_grh(ah_attr); + + if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) { + rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, + ah_attr->roce.dmac); } else { union ib_gid sgid; struct ib_gid_attr sgid_attr; @@ -1221,8 +1227,8 @@ int ib_resolve_eth_dmac(struct ib_device *device, int hop_limit; ret = ib_query_gid(device, - ah_attr->port_num, - ah_attr->grh.sgid_index, + rdma_ah_get_port_num(ah_attr), + grh->sgid_index, &sgid, &sgid_attr); if (ret || !sgid_attr.ndev) { @@ -1233,14 +1239,14 @@ int ib_resolve_eth_dmac(struct ib_device *device, ifindex = sgid_attr.ndev->ifindex; - ret = rdma_addr_find_l2_eth_by_grh(&sgid, - &ah_attr->grh.dgid, - ah_attr->dmac, - NULL, &ifindex, &hop_limit); + ret = + rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, + ah_attr->roce.dmac, + NULL, &ifindex, &hop_limit); dev_put(sgid_attr.ndev); - ah_attr->grh.hop_limit = hop_limit; + grh->hop_limit = hop_limit; } out: return ret; @@ -1519,7 +1525,9 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) if (!qp->device->attach_mcast) return -ENOSYS; - if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD || + lid < be16_to_cpu(IB_MULTICAST_LID_BASE) || + lid == be16_to_cpu(IB_LID_PERMISSIVE)) return -EINVAL; ret = qp->device->attach_mcast(qp, gid, lid); @@ -1535,7 +1543,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) if (!qp->device->detach_mcast) return -ENOSYS; - if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD || + lid < be16_to_cpu(IB_MULTICAST_LID_BASE) || + lid == be16_to_cpu(IB_LID_PERMISSIVE)) return -EINVAL; ret = qp->device->detach_mcast(qp, gid, lid); |