diff options
Diffstat (limited to 'drivers/infiniband/core')
34 files changed, 1919 insertions, 2170 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 9a8871e21545..d1b14887960e 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -11,7 +11,8 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ - nldev.o restrack.o counters.o ib_core_uverbs.o + nldev.o restrack.o counters.o ib_core_uverbs.o \ + trace.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o @@ -20,7 +21,8 @@ ib_cm-y := cm.o iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o -rdma_cm-y := cma.o +CFLAGS_cma_trace.o += -I$(src) +rdma_cm-y := cma.o cma_trace.o rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o @@ -33,6 +35,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_cq.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ uverbs_std_types_mr.o uverbs_std_types_counters.o \ - uverbs_uapi.o uverbs_std_types_device.o + uverbs_uapi.o uverbs_std_types_device.o \ + uverbs_std_types_async_fd.o ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 606fa6d86685..1753a9801b70 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -139,7 +139,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb, if (ib_nl_is_good_ip_resp(nlh)) ib_nl_process_good_ip_rsep(nlh); - return skb->len; + return 0; } static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index d535995711c3..17bfedd24cc3 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -51,9 +51,8 @@ struct ib_pkey_cache { struct ib_update_work { struct work_struct work; - struct ib_device *device; - u8 port_num; - bool enforce_security; + struct ib_event event; + bool enforce_security; }; union ib_gid zgid; @@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) event.element.port_num = port; event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + ib_dispatch_event_clients(&event); } static const char * const gid_type_str[] = { @@ -1034,7 +1033,7 @@ int ib_get_cached_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; @@ -1043,7 +1042,7 @@ int ib_get_cached_pkey(struct ib_device *device, else *pkey = cache->table[index]; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1058,9 +1057,9 @@ int ib_get_cached_subnet_prefix(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); *sn_pfx = device->port_data[port_num].cache.subnet_prefix; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return 0; } @@ -1080,7 +1079,7 @@ int ib_find_cached_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; @@ -1101,7 +1100,7 @@ int ib_find_cached_pkey(struct ib_device *device, ret = 0; } - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1120,7 +1119,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; @@ -1133,7 +1132,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, break; } - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1149,9 +1148,9 @@ int ib_get_cached_lmc(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); *lmc = device->port_data[port_num].cache.lmc; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1167,9 +1166,9 @@ int ib_get_cached_port_state(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); *port_state = device->port_data[port_num].cache.port_state; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1381,9 +1380,8 @@ err: return ret; } -static void ib_cache_update(struct ib_device *device, - u8 port, - bool enforce_security) +static int +ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; @@ -1391,11 +1389,11 @@ static void ib_cache_update(struct ib_device *device, int ret; if (!rdma_is_port_valid(device, port)) - return; + return -EINVAL; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) - return; + return -ENOMEM; ret = ib_query_port(device, port, tprops); if (ret) { @@ -1413,8 +1411,10 @@ static void ib_cache_update(struct ib_device *device, pkey_cache = kmalloc(struct_size(pkey_cache, table, tprops->pkey_tbl_len), GFP_KERNEL); - if (!pkey_cache) + if (!pkey_cache) { + ret = -ENOMEM; goto err; + } pkey_cache->table_len = tprops->pkey_tbl_len; @@ -1428,7 +1428,7 @@ static void ib_cache_update(struct ib_device *device, } } - write_lock_irq(&device->cache.lock); + write_lock_irq(&device->cache_lock); old_pkey_cache = device->port_data[port].cache.pkey; @@ -1437,7 +1437,7 @@ static void ib_cache_update(struct ib_device *device, device->port_data[port].cache.port_state = tprops->state; device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; - write_unlock_irq(&device->cache.lock); + write_unlock_irq(&device->cache_lock); if (enforce_security) ib_security_cache_change(device, @@ -1446,57 +1446,91 @@ static void ib_cache_update(struct ib_device *device, kfree(old_pkey_cache); kfree(tprops); - return; + return 0; err: kfree(pkey_cache); kfree(tprops); + return ret; +} + +static void ib_cache_event_task(struct work_struct *_work) +{ + struct ib_update_work *work = + container_of(_work, struct ib_update_work, work); + int ret; + + /* Before distributing the cache update event, first sync + * the cache. + */ + ret = ib_cache_update(work->event.device, work->event.element.port_num, + work->enforce_security); + + /* GID event is notified already for individual GID entries by + * dispatch_gid_change_event(). Hence, notifiy for rest of the + * events. + */ + if (!ret && work->event.event != IB_EVENT_GID_CHANGE) + ib_dispatch_event_clients(&work->event); + + kfree(work); } -static void ib_cache_task(struct work_struct *_work) +static void ib_generic_event_task(struct work_struct *_work) { struct ib_update_work *work = container_of(_work, struct ib_update_work, work); - ib_cache_update(work->device, - work->port_num, - work->enforce_security); + ib_dispatch_event_clients(&work->event); kfree(work); } -static void ib_cache_event(struct ib_event_handler *handler, - struct ib_event *event) +static bool is_cache_update_event(const struct ib_event *event) +{ + return (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_GID_CHANGE); +} + +/** + * ib_dispatch_event - Dispatch an asynchronous event + * @event:Event to dispatch + * + * Low-level drivers must call ib_dispatch_event() to dispatch the + * event to all registered event handlers when an asynchronous event + * occurs. + */ +void ib_dispatch_event(const struct ib_event *event) { struct ib_update_work *work; - if (event->event == IB_EVENT_PORT_ERR || - event->event == IB_EVENT_PORT_ACTIVE || - event->event == IB_EVENT_LID_CHANGE || - event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_CLIENT_REREGISTER || - event->event == IB_EVENT_GID_CHANGE) { - work = kmalloc(sizeof *work, GFP_ATOMIC); - if (work) { - INIT_WORK(&work->work, ib_cache_task); - work->device = event->device; - work->port_num = event->element.port_num; - if (event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_GID_CHANGE) - work->enforce_security = true; - else - work->enforce_security = false; - - queue_work(ib_wq, &work->work); - } - } + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + if (is_cache_update_event(event)) + INIT_WORK(&work->work, ib_cache_event_task); + else + INIT_WORK(&work->work, ib_generic_event_task); + + work->event = *event; + if (event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_GID_CHANGE) + work->enforce_security = true; + + queue_work(ib_wq, &work->work); } +EXPORT_SYMBOL(ib_dispatch_event); int ib_cache_setup_one(struct ib_device *device) { unsigned int p; int err; - rwlock_init(&device->cache.lock); + rwlock_init(&device->cache_lock); err = gid_table_setup_one(device); if (err) @@ -1505,9 +1539,6 @@ int ib_cache_setup_one(struct ib_device *device) rdma_for_each_port (device, p) ib_cache_update(device, p, true); - INIT_IB_EVENT_HANDLER(&device->cache.event_handler, - device, ib_cache_event); - ib_register_event_handler(&device->cache.event_handler); return 0; } @@ -1529,14 +1560,12 @@ void ib_cache_release_one(struct ib_device *device) void ib_cache_cleanup_one(struct ib_device *device) { - /* The cleanup function unregisters the event handler, - * waits for all in-progress workqueue elements and cleans - * up the GID cache. This function should be called after - * the device was removed from the devices list and all - * clients were removed, so the cache exists but is + /* The cleanup function waits for all in-progress workqueue + * elements and cleans up the GID cache. This function should be + * called after the device was removed from the devices list and + * all clients were removed, so the cache exists but is * non-functional and shouldn't be updated anymore. */ - ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); gid_table_cleanup_one(device); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 455b3659d84b..15e99a888427 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -241,6 +241,7 @@ struct cm_id_private { /* Number of clients sharing this ib_cm_id. Only valid for listeners. * Protected by the cm.lock spinlock. */ int listen_sharecount; + struct rcu_head rcu; struct ib_mad_send_buf *msg; struct cm_timewait_info *timewait_info; @@ -593,28 +594,16 @@ static void cm_free_id(__be32 local_id) xa_erase_irq(&cm.local_id_table, cm_local_id(local_id)); } -static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) +static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; + rcu_read_lock(); cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id)); - if (cm_id_priv) { - if (cm_id_priv->id.remote_id == remote_id) - refcount_inc(&cm_id_priv->refcount); - else - cm_id_priv = NULL; - } - - return cm_id_priv; -} - -static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) -{ - struct cm_id_private *cm_id_priv; - - spin_lock_irq(&cm.lock); - cm_id_priv = cm_get_id(local_id, remote_id); - spin_unlock_irq(&cm.lock); + if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id || + !refcount_inc_not_zero(&cm_id_priv->refcount)) + cm_id_priv = NULL; + rcu_read_unlock(); return cm_id_priv; } @@ -1089,7 +1078,7 @@ retest: rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr); rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr); kfree(cm_id_priv->private_data); - kfree(cm_id_priv); + kfree_rcu(cm_id_priv, rcu); } void ib_destroy_cm_id(struct ib_cm_id *cm_id) @@ -1202,6 +1191,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, /* Sharing an ib_cm_id with different handlers is not * supported */ spin_unlock_irqrestore(&cm.lock, flags); + ib_destroy_cm_id(cm_id); return ERR_PTR(-EINVAL); } refcount_inc(&cm_id_priv->refcount); @@ -1262,54 +1252,72 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, cm_form_tid(cm_id_priv)); - req_msg->local_comm_id = cm_id_priv->id.local_id; - req_msg->service_id = param->service_id; - req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; - cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); - cm_req_set_init_depth(req_msg, param->initiator_depth); - cm_req_set_remote_resp_timeout(req_msg, - param->remote_cm_response_timeout); + IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_REQ_SERVICE_ID, req_msg, be64_to_cpu(param->service_id)); + IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg, + be64_to_cpu(cm_id_priv->id.device->node_guid)); + IBA_SET(CM_REQ_LOCAL_QPN, req_msg, param->qp_num); + IBA_SET(CM_REQ_INITIATOR_DEPTH, req_msg, param->initiator_depth); + IBA_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg, + param->remote_cm_response_timeout); cm_req_set_qp_type(req_msg, param->qp_type); - cm_req_set_flow_ctrl(req_msg, param->flow_control); - cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); - cm_req_set_local_resp_timeout(req_msg, - param->local_cm_response_timeout); - req_msg->pkey = param->primary_path->pkey; - cm_req_set_path_mtu(req_msg, param->primary_path->mtu); - cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); + IBA_SET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg, param->flow_control); + IBA_SET(CM_REQ_STARTING_PSN, req_msg, param->starting_psn); + IBA_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg, + param->local_cm_response_timeout); + IBA_SET(CM_REQ_PARTITION_KEY, req_msg, + be16_to_cpu(param->primary_path->pkey)); + IBA_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg, + param->primary_path->mtu); + IBA_SET(CM_REQ_MAX_CM_RETRIES, req_msg, param->max_cm_retries); if (param->qp_type != IB_QPT_XRC_INI) { - cm_req_set_resp_res(req_msg, param->responder_resources); - cm_req_set_retry_count(req_msg, param->retry_count); - cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); - cm_req_set_srq(req_msg, param->srq); + IBA_SET(CM_REQ_RESPONDER_RESOURCES, req_msg, + param->responder_resources); + IBA_SET(CM_REQ_RETRY_COUNT, req_msg, param->retry_count); + IBA_SET(CM_REQ_RNR_RETRY_COUNT, req_msg, + param->rnr_retry_count); + IBA_SET(CM_REQ_SRQ, req_msg, param->srq); } - req_msg->primary_local_gid = pri_path->sgid; - req_msg->primary_remote_gid = pri_path->dgid; + *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) = + pri_path->sgid; + *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) = + pri_path->dgid; if (pri_ext) { - req_msg->primary_local_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid)); - req_msg->primary_remote_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid)); + IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) + ->global.interface_id = + OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid)); + IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) + ->global.interface_id = + OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid)); } if (pri_path->hop_limit <= 1) { - req_msg->primary_local_lid = pri_ext ? 0 : - htons(ntohl(sa_path_get_slid(pri_path))); - req_msg->primary_remote_lid = pri_ext ? 0 : - htons(ntohl(sa_path_get_dlid(pri_path))); + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(pri_ext ? 0 : + htons(ntohl(sa_path_get_slid( + pri_path))))); + IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg, + be16_to_cpu(pri_ext ? 0 : + htons(ntohl(sa_path_get_dlid( + pri_path))))); } else { /* Work-around until there's a way to obtain remote LID info */ - req_msg->primary_local_lid = IB_LID_PERMISSIVE; - req_msg->primary_remote_lid = IB_LID_PERMISSIVE; + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); + IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); } - cm_req_set_primary_flow_label(req_msg, pri_path->flow_label); - cm_req_set_primary_packet_rate(req_msg, pri_path->rate); - req_msg->primary_traffic_class = pri_path->traffic_class; - req_msg->primary_hop_limit = pri_path->hop_limit; - cm_req_set_primary_sl(req_msg, pri_path->sl); - cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1)); - cm_req_set_primary_local_ack_timeout(req_msg, + IBA_SET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg, + be32_to_cpu(pri_path->flow_label)); + IBA_SET(CM_REQ_PRIMARY_PACKET_RATE, req_msg, pri_path->rate); + IBA_SET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg, pri_path->traffic_class); + IBA_SET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg, pri_path->hop_limit); + IBA_SET(CM_REQ_PRIMARY_SL, req_msg, pri_path->sl); + IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg, + (pri_path->hop_limit <= 1)); + IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, pri_path->packet_life_time)); @@ -1320,38 +1328,55 @@ static void cm_format_req(struct cm_req_msg *req_msg, alt_ext = opa_is_extended_lid(alt_path->opa.dlid, alt_path->opa.slid); - req_msg->alt_local_gid = alt_path->sgid; - req_msg->alt_remote_gid = alt_path->dgid; + *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg) = + alt_path->sgid; + *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg) = + alt_path->dgid; if (alt_ext) { - req_msg->alt_local_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid)); - req_msg->alt_remote_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid)); + IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, + req_msg) + ->global.interface_id = + OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid)); + IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, + req_msg) + ->global.interface_id = + OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid)); } if (alt_path->hop_limit <= 1) { - req_msg->alt_local_lid = alt_ext ? 0 : - htons(ntohl(sa_path_get_slid(alt_path))); - req_msg->alt_remote_lid = alt_ext ? 0 : - htons(ntohl(sa_path_get_dlid(alt_path))); + IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg, + be16_to_cpu( + alt_ext ? 0 : + htons(ntohl(sa_path_get_slid( + alt_path))))); + IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg, + be16_to_cpu( + alt_ext ? 0 : + htons(ntohl(sa_path_get_dlid( + alt_path))))); } else { - req_msg->alt_local_lid = IB_LID_PERMISSIVE; - req_msg->alt_remote_lid = IB_LID_PERMISSIVE; + IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); + IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); } - cm_req_set_alt_flow_label(req_msg, - alt_path->flow_label); - cm_req_set_alt_packet_rate(req_msg, alt_path->rate); - req_msg->alt_traffic_class = alt_path->traffic_class; - req_msg->alt_hop_limit = alt_path->hop_limit; - cm_req_set_alt_sl(req_msg, alt_path->sl); - cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1)); - cm_req_set_alt_local_ack_timeout(req_msg, + IBA_SET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg, + be32_to_cpu(alt_path->flow_label)); + IBA_SET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg, alt_path->rate); + IBA_SET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg, + alt_path->traffic_class); + IBA_SET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg, + alt_path->hop_limit); + IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, alt_path->sl); + IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg, + (alt_path->hop_limit <= 1)); + IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, alt_path->packet_life_time)); } if (param->private_data && param->private_data_len) - memcpy(req_msg->private_data, param->private_data, - param->private_data_len); + IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data, + param->private_data_len); } static int cm_validate_req_param(struct ib_cm_req_param *param) @@ -1443,8 +1468,8 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; - cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); - cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); + cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); + cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); spin_lock_irqsave(&cm_id_priv->lock, flags); ret = ib_post_send_mad(cm_id_priv->msg, NULL); @@ -1482,14 +1507,16 @@ static int cm_issue_rej(struct cm_port *port, rej_msg = (struct cm_rej_msg *) msg->mad; cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid); - rej_msg->remote_comm_id = rcv_msg->local_comm_id; - rej_msg->local_comm_id = rcv_msg->remote_comm_id; - cm_rej_set_msg_rejected(rej_msg, msg_rejected); - rej_msg->reason = cpu_to_be16(reason); + IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg, + IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg)); + IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, + IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg)); + IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, msg_rejected); + IBA_SET(CM_REJ_REASON, rej_msg, reason); if (ari && ari_length) { - cm_rej_set_reject_info_len(rej_msg, ari_length); - memcpy(rej_msg->ari, ari, ari_length); + IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length); + IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length); } ret = ib_post_send_mad(msg, NULL); @@ -1501,8 +1528,10 @@ static int cm_issue_rej(struct cm_port *port, static bool cm_req_has_alt_path(struct cm_req_msg *req_msg) { - return ((req_msg->alt_local_lid) || - (ib_is_opa_gid(&req_msg->alt_local_gid))); + return ((cpu_to_be16( + IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg))) || + (ib_is_opa_gid(IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, + req_msg)))); } static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num, @@ -1522,14 +1551,18 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { sa_path_set_dlid(primary_path, - ntohs(req_msg->primary_local_lid)); + IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, + req_msg)); sa_path_set_slid(primary_path, - ntohs(req_msg->primary_remote_lid)); + IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID, + req_msg)); } else { - lid = opa_get_lid_from_gid(&req_msg->primary_local_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg)); sa_path_set_dlid(primary_path, lid); - lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg)); sa_path_set_slid(primary_path, lid); } @@ -1537,13 +1570,19 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, return; if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid)); - sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid)); + sa_path_set_dlid(alt_path, + IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, + req_msg)); + sa_path_set_slid(alt_path, + IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, + req_msg)); } else { - lid = opa_get_lid_from_gid(&req_msg->alt_local_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg)); sa_path_set_dlid(alt_path, lid); - lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg)); sa_path_set_slid(alt_path, lid); } } @@ -1552,44 +1591,58 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, struct sa_path_rec *alt_path) { - primary_path->dgid = req_msg->primary_local_gid; - primary_path->sgid = req_msg->primary_remote_gid; - primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); - primary_path->hop_limit = req_msg->primary_hop_limit; - primary_path->traffic_class = req_msg->primary_traffic_class; + primary_path->dgid = + *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg); + primary_path->sgid = + *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg); + primary_path->flow_label = + cpu_to_be32(IBA_GET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg)); + primary_path->hop_limit = IBA_GET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg); + primary_path->traffic_class = + IBA_GET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg); primary_path->reversible = 1; - primary_path->pkey = req_msg->pkey; - primary_path->sl = cm_req_get_primary_sl(req_msg); + primary_path->pkey = + cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg)); + primary_path->sl = IBA_GET(CM_REQ_PRIMARY_SL, req_msg); primary_path->mtu_selector = IB_SA_EQ; - primary_path->mtu = cm_req_get_path_mtu(req_msg); + primary_path->mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg); primary_path->rate_selector = IB_SA_EQ; - primary_path->rate = cm_req_get_primary_packet_rate(req_msg); + primary_path->rate = IBA_GET(CM_REQ_PRIMARY_PACKET_RATE, req_msg); primary_path->packet_life_time_selector = IB_SA_EQ; primary_path->packet_life_time = - cm_req_get_primary_local_ack_timeout(req_msg); + IBA_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg); primary_path->packet_life_time -= (primary_path->packet_life_time > 0); - primary_path->service_id = req_msg->service_id; + primary_path->service_id = + cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); if (sa_path_is_roce(primary_path)) primary_path->roce.route_resolved = false; if (cm_req_has_alt_path(req_msg)) { - alt_path->dgid = req_msg->alt_local_gid; - alt_path->sgid = req_msg->alt_remote_gid; - alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); - alt_path->hop_limit = req_msg->alt_hop_limit; - alt_path->traffic_class = req_msg->alt_traffic_class; + alt_path->dgid = *IBA_GET_MEM_PTR( + CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg); + alt_path->sgid = *IBA_GET_MEM_PTR( + CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg); + alt_path->flow_label = cpu_to_be32( + IBA_GET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg)); + alt_path->hop_limit = + IBA_GET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg); + alt_path->traffic_class = + IBA_GET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg); alt_path->reversible = 1; - alt_path->pkey = req_msg->pkey; - alt_path->sl = cm_req_get_alt_sl(req_msg); + alt_path->pkey = + cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg)); + alt_path->sl = IBA_GET(CM_REQ_ALTERNATE_SL, req_msg); alt_path->mtu_selector = IB_SA_EQ; - alt_path->mtu = cm_req_get_path_mtu(req_msg); + alt_path->mtu = + IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg); alt_path->rate_selector = IB_SA_EQ; - alt_path->rate = cm_req_get_alt_packet_rate(req_msg); + alt_path->rate = IBA_GET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg); alt_path->packet_life_time_selector = IB_SA_EQ; alt_path->packet_life_time = - cm_req_get_alt_local_ack_timeout(req_msg); + IBA_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg); alt_path->packet_life_time -= (alt_path->packet_life_time > 0); - alt_path->service_id = req_msg->service_id; + alt_path->service_id = + cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); if (sa_path_is_roce(alt_path)) alt_path->roce.route_resolved = false; @@ -1664,23 +1717,25 @@ static void cm_format_req_event(struct cm_work *work, } else { param->alternate_path = NULL; } - param->remote_ca_guid = req_msg->local_ca_guid; - param->remote_qkey = be32_to_cpu(req_msg->local_qkey); - param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg)); + param->remote_ca_guid = + cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg)); + param->remote_qkey = IBA_GET(CM_REQ_LOCAL_Q_KEY, req_msg); + param->remote_qpn = IBA_GET(CM_REQ_LOCAL_QPN, req_msg); param->qp_type = cm_req_get_qp_type(req_msg); - param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg)); - param->responder_resources = cm_req_get_init_depth(req_msg); - param->initiator_depth = cm_req_get_resp_res(req_msg); + param->starting_psn = IBA_GET(CM_REQ_STARTING_PSN, req_msg); + param->responder_resources = IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg); + param->initiator_depth = IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg); param->local_cm_response_timeout = - cm_req_get_remote_resp_timeout(req_msg); - param->flow_control = cm_req_get_flow_ctrl(req_msg); + IBA_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg); + param->flow_control = IBA_GET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg); param->remote_cm_response_timeout = - cm_req_get_local_resp_timeout(req_msg); - param->retry_count = cm_req_get_retry_count(req_msg); - param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); - param->srq = cm_req_get_srq(req_msg); + IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg); + param->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg); + param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg); + param->srq = IBA_GET(CM_REQ_SRQ, req_msg); param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; - work->cm_event.private_data = &req_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg); } static void cm_process_work(struct cm_id_private *cm_id_priv, @@ -1714,13 +1769,16 @@ static void cm_format_mra(struct cm_mra_msg *mra_msg, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid); - cm_mra_set_msg_mraed(mra_msg, msg_mraed); - mra_msg->local_comm_id = cm_id_priv->id.local_id; - mra_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_mra_set_service_timeout(mra_msg, service_timeout); + IBA_SET(CM_MRA_MESSAGE_MRAED, mra_msg, msg_mraed); + IBA_SET(CM_MRA_LOCAL_COMM_ID, mra_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); + IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, service_timeout); if (private_data && private_data_len) - memcpy(mra_msg->private_data, private_data, private_data_len); + IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data, + private_data_len); } static void cm_format_rej(struct cm_rej_msg *rej_msg, @@ -1732,36 +1790,42 @@ static void cm_format_rej(struct cm_rej_msg *rej_msg, u8 private_data_len) { cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid); - rej_msg->remote_comm_id = cm_id_priv->id.remote_id; + IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); switch(cm_id_priv->id.state) { case IB_CM_REQ_RCVD: - rej_msg->local_comm_id = 0; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); + IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0)); + IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ); break; case IB_CM_MRA_REQ_SENT: - rej_msg->local_comm_id = cm_id_priv->id.local_id; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); + IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ); break; case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: - rej_msg->local_comm_id = cm_id_priv->id.local_id; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP); + IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP); break; default: - rej_msg->local_comm_id = cm_id_priv->id.local_id; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER); + IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, + CM_MSG_RESPONSE_OTHER); break; } - rej_msg->reason = cpu_to_be16(reason); + IBA_SET(CM_REJ_REASON, rej_msg, reason); if (ari && ari_length) { - cm_rej_set_reject_info_len(rej_msg, ari_length); - memcpy(rej_msg->ari, ari, ari_length); + IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length); + IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length); } if (private_data && private_data_len) - memcpy(rej_msg->private_data, private_data, private_data_len); + IBA_SET_MEM(CM_REJ_PRIVATE_DATA, rej_msg, private_data, + private_data_len); } static void cm_dup_req_handler(struct cm_work *work, @@ -1821,7 +1885,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, spin_lock_irq(&cm.lock); timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); if (timewait_info) { - cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); spin_unlock_irq(&cm.lock); if (cur_cm_id_priv) { @@ -1835,7 +1899,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { cm_cleanup_timewait(cm_id_priv->timewait_info); - cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); spin_unlock_irq(&cm.lock); @@ -1851,8 +1915,9 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, } /* Find matching listen request. */ - listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, - req_msg->service_id); + listen_cm_id_priv = cm_find_listen( + cm_id_priv->id.device, + cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg))); if (!listen_cm_id_priv) { cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irq(&cm.lock); @@ -1877,24 +1942,32 @@ out: */ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) { - if (!cm_req_get_primary_subnet_local(req_msg)) { - if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { - req_msg->primary_local_lid = ib_lid_be16(wc->slid); - cm_req_set_primary_sl(req_msg, wc->sl); + if (!IBA_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg)) { + if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, + req_msg)) == IB_LID_PERMISSIVE) { + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(ib_lid_be16(wc->slid))); + IBA_SET(CM_REQ_PRIMARY_SL, req_msg, wc->sl); } - if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE) - req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits); + if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID, + req_msg)) == IB_LID_PERMISSIVE) + IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg, + wc->dlid_path_bits); } - if (!cm_req_get_alt_subnet_local(req_msg)) { - if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { - req_msg->alt_local_lid = ib_lid_be16(wc->slid); - cm_req_set_alt_sl(req_msg, wc->sl); + if (!IBA_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg)) { + if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, + req_msg)) == IB_LID_PERMISSIVE) { + IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg, + be16_to_cpu(ib_lid_be16(wc->slid))); + IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, wc->sl); } - if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE) - req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits); + if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, + req_msg)) == IB_LID_PERMISSIVE) + IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg, + wc->dlid_path_bits); } } @@ -1914,7 +1987,8 @@ static int cm_req_handler(struct cm_work *work) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); - cm_id_priv->id.remote_id = req_msg->local_comm_id; + cm_id_priv->id.remote_id = + cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg)); ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); @@ -1926,9 +2000,12 @@ static int cm_req_handler(struct cm_work *work) ret = PTR_ERR(cm_id_priv->timewait_info); goto destroy; } - cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; - cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; - cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg); + cm_id_priv->timewait_info->work.remote_id = + cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg)); + cm_id_priv->timewait_info->remote_ca_guid = + cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg)); + cm_id_priv->timewait_info->remote_qpn = + cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { @@ -1940,7 +2017,8 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; cm_id_priv->id.context = listen_cm_id_priv->id.context; - cm_id_priv->id.service_id = req_msg->service_id; + cm_id_priv->id.service_id = + cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_process_routed_req(req_msg, work->mad_recv_wc->wc); @@ -1957,10 +2035,11 @@ static int cm_req_handler(struct cm_work *work) work->path[0].rec_type = sa_conv_gid_to_pathrec_type(gid_attr->gid_type); } else { - cm_path_set_rec_type(work->port->cm_dev->ib_device, - work->port->port_num, - &work->path[0], - &req_msg->primary_local_gid); + cm_path_set_rec_type( + work->port->cm_dev->ib_device, work->port->port_num, + &work->path[0], + IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, + req_msg)); } if (cm_req_has_alt_path(req_msg)) work->path[1].rec_type = work->path[0].rec_type; @@ -2000,16 +2079,19 @@ static int cm_req_handler(struct cm_work *work) } cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( - cm_req_get_local_resp_timeout(req_msg)); - cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); - cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); - cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); - cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); - cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); - cm_id_priv->pkey = req_msg->pkey; - cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); - cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); - cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg)); + cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg); + cm_id_priv->remote_qpn = + cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); + cm_id_priv->initiator_depth = + IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg); + cm_id_priv->responder_resources = + IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg); + cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg); + cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg)); + cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); + cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg); + cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg); cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); @@ -2032,29 +2114,35 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, struct ib_cm_rep_param *param) { cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); - rep_msg->local_comm_id = cm_id_priv->id.local_id; - rep_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); - rep_msg->resp_resources = param->responder_resources; - cm_rep_set_target_ack_delay(rep_msg, - cm_id_priv->av.port->cm_dev->ack_delay); - cm_rep_set_failover(rep_msg, param->failover_accepted); - cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); - rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); + IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn); + IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg, + param->responder_resources); + IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg, + cm_id_priv->av.port->cm_dev->ack_delay); + IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted); + IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count); + IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg, + be64_to_cpu(cm_id_priv->id.device->node_guid)); if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { - rep_msg->initiator_depth = param->initiator_depth; - cm_rep_set_flow_ctrl(rep_msg, param->flow_control); - cm_rep_set_srq(rep_msg, param->srq); - cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); + IBA_SET(CM_REP_INITIATOR_DEPTH, rep_msg, + param->initiator_depth); + IBA_SET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg, + param->flow_control); + IBA_SET(CM_REP_SRQ, rep_msg, param->srq); + IBA_SET(CM_REP_LOCAL_QPN, rep_msg, param->qp_num); } else { - cm_rep_set_srq(rep_msg, 1); - cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); + IBA_SET(CM_REP_SRQ, rep_msg, 1); + IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num); } if (param->private_data && param->private_data_len) - memcpy(rep_msg->private_data, param->private_data, - param->private_data_len); + IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data, + param->private_data_len); } int ib_send_cm_rep(struct ib_cm_id *cm_id, @@ -2100,7 +2188,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, cm_id_priv->msg = msg; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; - cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); + cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg)); cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -2114,11 +2202,14 @@ static void cm_format_rtu(struct cm_rtu_msg *rtu_msg, u8 private_data_len) { cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid); - rtu_msg->local_comm_id = cm_id_priv->id.local_id; - rtu_msg->remote_comm_id = cm_id_priv->id.remote_id; + IBA_SET(CM_RTU_LOCAL_COMM_ID, rtu_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_RTU_REMOTE_COMM_ID, rtu_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); if (private_data && private_data_len) - memcpy(rtu_msg->private_data, private_data, private_data_len); + IBA_SET_MEM(CM_RTU_PRIVATE_DATA, rtu_msg, private_data, + private_data_len); } int ib_send_cm_rtu(struct ib_cm_id *cm_id, @@ -2181,18 +2272,20 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.rep_rcvd; - param->remote_ca_guid = rep_msg->local_ca_guid; - param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); + param->remote_ca_guid = + cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg)); + param->remote_qkey = IBA_GET(CM_REP_LOCAL_Q_KEY, rep_msg); param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); - param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); - param->responder_resources = rep_msg->initiator_depth; - param->initiator_depth = rep_msg->resp_resources; - param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); - param->failover_accepted = cm_rep_get_failover(rep_msg); - param->flow_control = cm_rep_get_flow_ctrl(rep_msg); - param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); - param->srq = cm_rep_get_srq(rep_msg); - work->cm_event.private_data = &rep_msg->private_data; + param->starting_psn = IBA_GET(CM_REP_STARTING_PSN, rep_msg); + param->responder_resources = IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg); + param->initiator_depth = IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg); + param->target_ack_delay = IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg); + param->failover_accepted = IBA_GET(CM_REP_FAILOVER_ACCEPTED, rep_msg); + param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg); + param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg); + param->srq = IBA_GET(CM_REP_SRQ, rep_msg); + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg); } static void cm_dup_rep_handler(struct cm_work *work) @@ -2203,8 +2296,9 @@ static void cm_dup_rep_handler(struct cm_work *work) int ret; rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, - rep_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), + cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg))); if (!cm_id_priv) return; @@ -2248,11 +2342,12 @@ static int cm_rep_handler(struct cm_work *work) struct cm_timewait_info *timewait_info; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0); if (!cm_id_priv) { cm_dup_rep_handler(work); pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__, - be32_to_cpu(rep_msg->remote_comm_id)); + IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); return -EINVAL; } @@ -2266,15 +2361,18 @@ static int cm_rep_handler(struct cm_work *work) default: spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; - pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n", - __func__, cm_id_priv->id.state, - be32_to_cpu(rep_msg->local_comm_id), - be32_to_cpu(rep_msg->remote_comm_id)); + pr_debug( + "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n", + __func__, cm_id_priv->id.state, + IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg), + IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); goto error; } - cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; - cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; + cm_id_priv->timewait_info->work.remote_id = + cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)); + cm_id_priv->timewait_info->remote_ca_guid = + cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg)); cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); spin_lock(&cm.lock); @@ -2284,7 +2382,7 @@ static int cm_rep_handler(struct cm_work *work) spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; pr_debug("%s: Failed to insert remote id %d\n", __func__, - be32_to_cpu(rep_msg->remote_comm_id)); + IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); goto error; } /* Check for a stale connection. */ @@ -2293,7 +2391,7 @@ static int cm_rep_handler(struct cm_work *work) rb_erase(&cm_id_priv->timewait_info->remote_id_node, &cm.remote_id_table); cm_id_priv->timewait_info->inserted_remote_id = 0; - cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); spin_unlock(&cm.lock); @@ -2302,9 +2400,10 @@ static int cm_rep_handler(struct cm_work *work) IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; - pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n", - __func__, be32_to_cpu(rep_msg->local_comm_id), - be32_to_cpu(rep_msg->remote_comm_id)); + pr_debug( + "%s: Stale connection. local_comm_id %d, remote_comm_id %d\n", + __func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg), + IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); if (cur_cm_id_priv) { cm_id = &cur_cm_id_priv->id; @@ -2317,13 +2416,17 @@ static int cm_rep_handler(struct cm_work *work) spin_unlock(&cm.lock); cm_id_priv->id.state = IB_CM_REP_RCVD; - cm_id_priv->id.remote_id = rep_msg->local_comm_id; + cm_id_priv->id.remote_id = + cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)); cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); - cm_id_priv->initiator_depth = rep_msg->resp_resources; - cm_id_priv->responder_resources = rep_msg->initiator_depth; - cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); - cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); - cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); + cm_id_priv->initiator_depth = + IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg); + cm_id_priv->responder_resources = + IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg); + cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg)); + cm_id_priv->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg); + cm_id_priv->target_ack_delay = + IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg); cm_id_priv->av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->av.timeout - 1); @@ -2389,12 +2492,14 @@ static int cm_rtu_handler(struct cm_work *work) int ret; rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id, - rtu_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_RTU_REMOTE_COMM_ID, rtu_msg)), + cpu_to_be32(IBA_GET(CM_RTU_LOCAL_COMM_ID, rtu_msg))); if (!cm_id_priv) return -EINVAL; - work->cm_event.private_data = &rtu_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_RTU_PRIVATE_DATA, rtu_msg); spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_REP_SENT && @@ -2429,12 +2534,16 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, { cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, cm_form_tid(cm_id_priv)); - dreq_msg->local_comm_id = cm_id_priv->id.local_id; - dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); + IBA_SET(CM_DREQ_LOCAL_COMM_ID, dreq_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_DREQ_REMOTE_COMM_ID, dreq_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); + IBA_SET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg, + be32_to_cpu(cm_id_priv->remote_qpn)); if (private_data && private_data_len) - memcpy(dreq_msg->private_data, private_data, private_data_len); + IBA_SET_MEM(CM_DREQ_PRIVATE_DATA, dreq_msg, private_data, + private_data_len); } int ib_send_cm_dreq(struct ib_cm_id *cm_id, @@ -2494,11 +2603,14 @@ static void cm_format_drep(struct cm_drep_msg *drep_msg, u8 private_data_len) { cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid); - drep_msg->local_comm_id = cm_id_priv->id.local_id; - drep_msg->remote_comm_id = cm_id_priv->id.remote_id; + IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); if (private_data && private_data_len) - memcpy(drep_msg->private_data, private_data, private_data_len); + IBA_SET_MEM(CM_DREP_PRIVATE_DATA, drep_msg, private_data, + private_data_len); } int ib_send_cm_drep(struct ib_cm_id *cm_id, @@ -2566,8 +2678,10 @@ static int cm_issue_drep(struct cm_port *port, drep_msg = (struct cm_drep_msg *) msg->mad; cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid); - drep_msg->remote_comm_id = dreq_msg->local_comm_id; - drep_msg->local_comm_id = dreq_msg->remote_comm_id; + IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg, + IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)); + IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg, + IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) @@ -2584,22 +2698,26 @@ static int cm_dreq_handler(struct cm_work *work) int ret; dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, - dreq_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)), + cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg))); if (!cm_id_priv) { atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); - pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n", - __func__, be32_to_cpu(dreq_msg->local_comm_id), - be32_to_cpu(dreq_msg->remote_comm_id)); + pr_debug( + "%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n", + __func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg), + IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); return -EINVAL; } - work->cm_event.private_data = &dreq_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_DREQ_PRIVATE_DATA, dreq_msg); spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) + if (cm_id_priv->local_qpn != + cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg))) goto unlock; switch (cm_id_priv->id.state) { @@ -2665,12 +2783,14 @@ static int cm_drep_handler(struct cm_work *work) int ret; drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id, - drep_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_DREP_REMOTE_COMM_ID, drep_msg)), + cpu_to_be32(IBA_GET(CM_DREP_LOCAL_COMM_ID, drep_msg))); if (!cm_id_priv) return -EINVAL; - work->cm_event.private_data = &drep_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_DREP_PRIVATE_DATA, drep_msg); spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_DREQ_SENT && @@ -2766,10 +2886,11 @@ static void cm_format_rej_event(struct cm_work *work) rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.rej_rcvd; - param->ari = rej_msg->ari; - param->ari_length = cm_rej_get_reject_info_len(rej_msg); - param->reason = __be16_to_cpu(rej_msg->reason); - work->cm_event.private_data = &rej_msg->private_data; + param->ari = IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg); + param->ari_length = IBA_GET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg); + param->reason = IBA_GET(CM_REJ_REASON, rej_msg); + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg); } static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) @@ -2778,29 +2899,29 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) struct cm_id_private *cm_id_priv; __be32 remote_id; - remote_id = rej_msg->local_comm_id; + remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg)); - if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { + if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) { spin_lock_irq(&cm.lock); - timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), - remote_id); + timewait_info = cm_find_remote_id( + *((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)), + remote_id); if (!timewait_info) { spin_unlock_irq(&cm.lock); return NULL; } - cm_id_priv = xa_load(&cm.local_id_table, - cm_local_id(timewait_info->work.local_id)); - if (cm_id_priv) { - if (cm_id_priv->id.remote_id == remote_id) - refcount_inc(&cm_id_priv->refcount); - else - cm_id_priv = NULL; - } + cm_id_priv = + cm_acquire_id(timewait_info->work.local_id, remote_id); spin_unlock_irq(&cm.lock); - } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) - cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); + } else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) == + CM_MSG_RESPONSE_REQ) + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)), + 0); else - cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)), + remote_id); return cm_id_priv; } @@ -2828,7 +2949,7 @@ static int cm_rej_handler(struct cm_work *work) /* fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: - if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN) + if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN) cm_enter_timewait(cm_id_priv); else cm_reset_to_idle(cm_id_priv); @@ -2958,13 +3079,16 @@ EXPORT_SYMBOL(ib_send_cm_mra); static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg) { - switch (cm_mra_get_msg_mraed(mra_msg)) { + switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) { case CM_MSG_RESPONSE_REQ: - return cm_acquire_id(mra_msg->remote_comm_id, 0); + return cm_acquire_id( + cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)), + 0); case CM_MSG_RESPONSE_REP: case CM_MSG_RESPONSE_OTHER: - return cm_acquire_id(mra_msg->remote_comm_id, - mra_msg->local_comm_id); + return cm_acquire_id( + cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)), + cpu_to_be32(IBA_GET(CM_MRA_LOCAL_COMM_ID, mra_msg))); default: return NULL; } @@ -2981,30 +3105,34 @@ static int cm_mra_handler(struct cm_work *work) if (!cm_id_priv) return -EINVAL; - work->cm_event.private_data = &mra_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_MRA_PRIVATE_DATA, mra_msg); work->cm_event.param.mra_rcvd.service_timeout = - cm_mra_get_service_timeout(mra_msg); - timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + + IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg); + timeout = cm_convert_to_ms(IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg)) + cm_convert_to_ms(cm_id_priv->av.timeout); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: - if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || + if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != + CM_MSG_RESPONSE_REQ || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: - if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || + if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != + CM_MSG_RESPONSE_REP || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; case IB_CM_ESTABLISHED: - if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || + if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != + CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) { @@ -3046,117 +3174,23 @@ out: return -EINVAL; } -static void cm_format_lap(struct cm_lap_msg *lap_msg, - struct cm_id_private *cm_id_priv, - struct sa_path_rec *alternate_path, - const void *private_data, - u8 private_data_len) -{ - bool alt_ext = false; - - if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA) - alt_ext = opa_is_extended_lid(alternate_path->opa.dlid, - alternate_path->opa.slid); - cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, - cm_form_tid(cm_id_priv)); - lap_msg->local_comm_id = cm_id_priv->id.local_id; - lap_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); - /* todo: need remote CM response timeout */ - cm_lap_set_remote_resp_timeout(lap_msg, 0x1F); - lap_msg->alt_local_lid = - htons(ntohl(sa_path_get_slid(alternate_path))); - lap_msg->alt_remote_lid = - htons(ntohl(sa_path_get_dlid(alternate_path))); - lap_msg->alt_local_gid = alternate_path->sgid; - lap_msg->alt_remote_gid = alternate_path->dgid; - if (alt_ext) { - lap_msg->alt_local_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid)); - lap_msg->alt_remote_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid)); - } - cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); - cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); - lap_msg->alt_hop_limit = alternate_path->hop_limit; - cm_lap_set_packet_rate(lap_msg, alternate_path->rate); - cm_lap_set_sl(lap_msg, alternate_path->sl); - cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ - cm_lap_set_local_ack_timeout(lap_msg, - cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, - alternate_path->packet_life_time)); - - if (private_data && private_data_len) - memcpy(lap_msg->private_data, private_data, private_data_len); -} - -int ib_send_cm_lap(struct ib_cm_id *cm_id, - struct sa_path_rec *alternate_path, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_ESTABLISHED || - (cm_id->lap_state != IB_CM_LAP_UNINIT && - cm_id->lap_state != IB_CM_LAP_IDLE)) { - ret = -EINVAL; - goto out; - } - - ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av, - cm_id_priv); - if (ret) - goto out; - cm_id_priv->alt_av.timeout = - cm_ack_timeout(cm_id_priv->target_ack_delay, - cm_id_priv->alt_av.timeout - 1); - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, - alternate_path, private_data, private_data_len); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; - - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - - cm_id->lap_state = IB_CM_LAP_SENT; - cm_id_priv->msg = msg; - -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_lap); - static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg, struct sa_path_rec *path) { u32 lid; if (path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid)); - sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid)); + sa_path_set_dlid(path, IBA_GET(CM_LAP_ALTERNATE_LOCAL_PORT_LID, + lap_msg)); + sa_path_set_slid(path, IBA_GET(CM_LAP_ALTERNATE_REMOTE_PORT_LID, + lap_msg)); } else { - lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg)); sa_path_set_dlid(path, lid); - lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg)); sa_path_set_slid(path, lid); } } @@ -3165,20 +3199,23 @@ static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, struct sa_path_rec *path, struct cm_lap_msg *lap_msg) { - path->dgid = lap_msg->alt_local_gid; - path->sgid = lap_msg->alt_remote_gid; - path->flow_label = cm_lap_get_flow_label(lap_msg); - path->hop_limit = lap_msg->alt_hop_limit; - path->traffic_class = cm_lap_get_traffic_class(lap_msg); + path->dgid = *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg); + path->sgid = + *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg); + path->flow_label = + cpu_to_be32(IBA_GET(CM_LAP_ALTERNATE_FLOW_LABEL, lap_msg)); + path->hop_limit = IBA_GET(CM_LAP_ALTERNATE_HOP_LIMIT, lap_msg); + path->traffic_class = IBA_GET(CM_LAP_ALTERNATE_TRAFFIC_CLASS, lap_msg); path->reversible = 1; path->pkey = cm_id_priv->pkey; - path->sl = cm_lap_get_sl(lap_msg); + path->sl = IBA_GET(CM_LAP_ALTERNATE_SL, lap_msg); path->mtu_selector = IB_SA_EQ; path->mtu = cm_id_priv->path_mtu; path->rate_selector = IB_SA_EQ; - path->rate = cm_lap_get_packet_rate(lap_msg); + path->rate = IBA_GET(CM_LAP_ALTERNATE_PACKET_RATE, lap_msg); path->packet_life_time_selector = IB_SA_EQ; - path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); + path->packet_life_time = + IBA_GET(CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT, lap_msg); path->packet_life_time -= (path->packet_life_time > 0); cm_format_path_lid_from_lap(lap_msg, path); } @@ -3200,20 +3237,22 @@ static int cm_lap_handler(struct cm_work *work) /* todo: verify LAP request and send reject APR if invalid. */ lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id, - lap_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_LAP_REMOTE_COMM_ID, lap_msg)), + cpu_to_be32(IBA_GET(CM_LAP_LOCAL_COMM_ID, lap_msg))); if (!cm_id_priv) return -EINVAL; param = &work->cm_event.param.lap_rcvd; memset(&work->path[0], 0, sizeof(work->path[1])); cm_path_set_rec_type(work->port->cm_dev->ib_device, - work->port->port_num, - &work->path[0], - &lap_msg->alt_local_gid); + work->port->port_num, &work->path[0], + IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, + lap_msg)); param->alternate_path = &work->path[0]; cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); - work->cm_event.private_data = &lap_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg); spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) @@ -3278,72 +3317,6 @@ deref: cm_deref_id(cm_id_priv); return -EINVAL; } -static void cm_format_apr(struct cm_apr_msg *apr_msg, - struct cm_id_private *cm_id_priv, - enum ib_cm_apr_status status, - void *info, - u8 info_length, - const void *private_data, - u8 private_data_len) -{ - cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid); - apr_msg->local_comm_id = cm_id_priv->id.local_id; - apr_msg->remote_comm_id = cm_id_priv->id.remote_id; - apr_msg->ap_status = (u8) status; - - if (info && info_length) { - apr_msg->info_length = info_length; - memcpy(apr_msg->info, info, info_length); - } - - if (private_data && private_data_len) - memcpy(apr_msg->private_data, private_data, private_data_len); -} - -int ib_send_cm_apr(struct ib_cm_id *cm_id, - enum ib_cm_apr_status status, - void *info, - u8 info_length, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) || - (info && info_length > IB_CM_APR_INFO_LENGTH)) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_ESTABLISHED || - (cm_id->lap_state != IB_CM_LAP_RCVD && - cm_id->lap_state != IB_CM_MRA_LAP_SENT)) { - ret = -EINVAL; - goto out; - } - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, - info, info_length, private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - - cm_id->lap_state = IB_CM_LAP_IDLE; -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_apr); - static int cm_apr_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; @@ -3358,15 +3331,20 @@ static int cm_apr_handler(struct cm_work *work) return -EINVAL; apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id, - apr_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_APR_REMOTE_COMM_ID, apr_msg)), + cpu_to_be32(IBA_GET(CM_APR_LOCAL_COMM_ID, apr_msg))); if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ - work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status; - work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info; - work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; - work->cm_event.private_data = &apr_msg->private_data; + work->cm_event.param.apr_rcvd.ap_status = + IBA_GET(CM_APR_AR_STATUS, apr_msg); + work->cm_event.param.apr_rcvd.apr_info = + IBA_GET_MEM_PTR(CM_APR_ADDITIONAL_INFORMATION, apr_msg); + work->cm_event.param.apr_rcvd.info_len = + IBA_GET(CM_APR_ADDITIONAL_INFORMATION_LENGTH, apr_msg); + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_APR_PRIVATE_DATA, apr_msg); spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED || @@ -3438,13 +3416,16 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, { cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, cm_form_tid(cm_id_priv)); - sidr_req_msg->request_id = cm_id_priv->id.local_id; - sidr_req_msg->pkey = param->path->pkey; - sidr_req_msg->service_id = param->service_id; + IBA_SET(CM_SIDR_REQ_REQUESTID, sidr_req_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg, + be16_to_cpu(param->path->pkey)); + IBA_SET(CM_SIDR_REQ_SERVICEID, sidr_req_msg, + be64_to_cpu(param->service_id)); if (param->private_data && param->private_data_len) - memcpy(sidr_req_msg->private_data, param->private_data, - param->private_data_len); + IBA_SET_MEM(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg, + param->private_data, param->private_data_len); } int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, @@ -3508,13 +3489,15 @@ static void cm_format_sidr_req_event(struct cm_work *work, sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_req_rcvd; - param->pkey = __be16_to_cpu(sidr_req_msg->pkey); + param->pkey = IBA_GET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg); param->listen_id = listen_id; - param->service_id = sidr_req_msg->service_id; + param->service_id = + cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)); param->bth_pkey = cm_get_bth_pkey(work); param->port = work->port->port_num; param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr; - work->cm_event.private_data = &sidr_req_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg); } static int cm_sidr_req_handler(struct cm_work *work) @@ -3542,7 +3525,8 @@ static int cm_sidr_req_handler(struct cm_work *work) if (ret) goto out; - cm_id_priv->id.remote_id = sidr_req_msg->request_id; + cm_id_priv->id.remote_id = + cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg)); cm_id_priv->tid = sidr_req_msg->hdr.tid; atomic_inc(&cm_id_priv->work_count); @@ -3555,8 +3539,9 @@ static int cm_sidr_req_handler(struct cm_work *work) goto out; /* Duplicate message. */ } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; - cur_cm_id_priv = cm_find_listen(cm_id->device, - sidr_req_msg->service_id); + cur_cm_id_priv = cm_find_listen( + cm_id->device, + cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg))); if (!cur_cm_id_priv) { spin_unlock_irq(&cm.lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); @@ -3568,7 +3553,8 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; cm_id_priv->id.context = cur_cm_id_priv->id.context; - cm_id_priv->id.service_id = sidr_req_msg->service_id; + cm_id_priv->id.service_id = + cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)); cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id); @@ -3586,18 +3572,21 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, { cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, cm_id_priv->tid); - sidr_rep_msg->request_id = cm_id_priv->id.remote_id; - sidr_rep_msg->status = param->status; - cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num)); - sidr_rep_msg->service_id = cm_id_priv->id.service_id; - sidr_rep_msg->qkey = cpu_to_be32(param->qkey); + IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); + IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status); + IBA_SET(CM_SIDR_REP_QPN, sidr_rep_msg, param->qp_num); + IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg, + be64_to_cpu(cm_id_priv->id.service_id)); + IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey); if (param->info && param->info_length) - memcpy(sidr_rep_msg->info, param->info, param->info_length); + IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg, + param->info, param->info_length); if (param->private_data && param->private_data_len) - memcpy(sidr_rep_msg->private_data, param->private_data, - param->private_data_len); + IBA_SET_MEM(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg, + param->private_data, param->private_data_len); } int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, @@ -3657,13 +3646,16 @@ static void cm_format_sidr_rep_event(struct cm_work *work, sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_rep_rcvd; - param->status = sidr_rep_msg->status; - param->qkey = be32_to_cpu(sidr_rep_msg->qkey); - param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); - param->info = &sidr_rep_msg->info; - param->info_len = sidr_rep_msg->info_length; + param->status = IBA_GET(CM_SIDR_REP_STATUS, sidr_rep_msg); + param->qkey = IBA_GET(CM_SIDR_REP_Q_KEY, sidr_rep_msg); + param->qpn = IBA_GET(CM_SIDR_REP_QPN, sidr_rep_msg); + param->info = IBA_GET_MEM_PTR(CM_SIDR_REP_ADDITIONAL_INFORMATION, + sidr_rep_msg); + param->info_len = IBA_GET(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH, + sidr_rep_msg); param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; - work->cm_event.private_data = &sidr_rep_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg); } static int cm_sidr_rep_handler(struct cm_work *work) @@ -3673,7 +3665,8 @@ static int cm_sidr_rep_handler(struct cm_work *work) sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)), 0); if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 92d7260ac913..0cc40656b5c5 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -8,6 +8,7 @@ #ifndef CM_MSGS_H #define CM_MSGS_H +#include <rdma/ibta_vol1_c12.h> #include <rdma/ib_mad.h> #include <rdma/ib_cm.h> @@ -18,120 +19,14 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -struct cm_req_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 rsvd4; - __be64 service_id; - __be64 local_ca_guid; - __be32 rsvd24; - __be32 local_qkey; - /* local QPN:24, responder resources:8 */ - __be32 offset32; - /* local EECN:24, initiator depth:8 */ - __be32 offset36; - /* - * remote EECN:24, remote CM response timeout:5, - * transport service type:2, end-to-end flow control:1 - */ - __be32 offset40; - /* starting PSN:24, local CM response timeout:5, retry count:3 */ - __be32 offset44; - __be16 pkey; - /* path MTU:4, RDC exists:1, RNR retry count:3. */ - u8 offset50; - /* max CM Retries:4, SRQ:1, extended transport type:3 */ - u8 offset51; - - __be16 primary_local_lid; - __be16 primary_remote_lid; - union ib_gid primary_local_gid; - union ib_gid primary_remote_gid; - /* flow label:20, rsvd:6, packet rate:6 */ - __be32 primary_offset88; - u8 primary_traffic_class; - u8 primary_hop_limit; - /* SL:4, subnet local:1, rsvd:3 */ - u8 primary_offset94; - /* local ACK timeout:5, rsvd:3 */ - u8 primary_offset95; - - __be16 alt_local_lid; - __be16 alt_remote_lid; - union ib_gid alt_local_gid; - union ib_gid alt_remote_gid; - /* flow label:20, rsvd:6, packet rate:6 */ - __be32 alt_offset132; - u8 alt_traffic_class; - u8 alt_hop_limit; - /* SL:4, subnet local:1, rsvd:3 */ - u8 alt_offset138; - /* local ACK timeout:5, rsvd:3 */ - u8 alt_offset139; - - u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; - -} __packed; - -static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg) -{ - return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8); -} - -static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn) -{ - req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) | - (be32_to_cpu(req_msg->offset32) & - 0x000000FF)); -} - -static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg) -{ - return (u8) be32_to_cpu(req_msg->offset32); -} - -static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res) -{ - req_msg->offset32 = cpu_to_be32(resp_res | - (be32_to_cpu(req_msg->offset32) & - 0xFFFFFF00)); -} - -static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg) -{ - return (u8) be32_to_cpu(req_msg->offset36); -} - -static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg, - u8 init_depth) -{ - req_msg->offset36 = cpu_to_be32(init_depth | - (be32_to_cpu(req_msg->offset36) & - 0xFFFFFF00)); -} - -static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg) -{ - return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3); -} - -static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg, - u8 resp_timeout) -{ - req_msg->offset40 = cpu_to_be32((resp_timeout << 3) | - (be32_to_cpu(req_msg->offset40) & - 0xFFFFFF07)); -} - static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg) { - u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1; + u8 transport_type = IBA_GET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg); switch(transport_type) { case 0: return IB_QPT_RC; case 1: return IB_QPT_UC; case 3: - switch (req_msg->offset51 & 0x7) { + switch (IBA_GET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg)) { case 1: return IB_QPT_XRC_TGT; default: return 0; } @@ -144,240 +39,17 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, { switch(qp_type) { case IB_QPT_UC: - req_msg->offset40 = cpu_to_be32((be32_to_cpu( - req_msg->offset40) & - 0xFFFFFFF9) | 0x2); + IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 1); break; case IB_QPT_XRC_INI: - req_msg->offset40 = cpu_to_be32((be32_to_cpu( - req_msg->offset40) & - 0xFFFFFFF9) | 0x6); - req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1; + IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 3); + IBA_SET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg, 1); break; default: - req_msg->offset40 = cpu_to_be32(be32_to_cpu( - req_msg->offset40) & - 0xFFFFFFF9); + IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 0); } } -static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg) -{ - return be32_to_cpu(req_msg->offset40) & 0x1; -} - -static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg, - u8 flow_ctrl) -{ - req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) | - (be32_to_cpu(req_msg->offset40) & - 0xFFFFFFFE)); -} - -static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg) -{ - return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8); -} - -static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg, - __be32 starting_psn) -{ - req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | - (be32_to_cpu(req_msg->offset44) & 0x000000FF)); -} - -static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg) -{ - return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3); -} - -static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg, - u8 resp_timeout) -{ - req_msg->offset44 = cpu_to_be32((resp_timeout << 3) | - (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07)); -} - -static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg) -{ - return (u8) (be32_to_cpu(req_msg->offset44) & 0x7); -} - -static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg, - u8 retry_count) -{ - req_msg->offset44 = cpu_to_be32((retry_count & 0x7) | - (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8)); -} - -static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg) -{ - return req_msg->offset50 >> 4; -} - -static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu) -{ - req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4)); -} - -static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg) -{ - return req_msg->offset50 & 0x7; -} - -static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg, - u8 rnr_retry_count) -{ - req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) | - (rnr_retry_count & 0x7)); -} - -static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg) -{ - return req_msg->offset51 >> 4; -} - -static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg, - u8 retries) -{ - req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4)); -} - -static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg) -{ - return (req_msg->offset51 & 0x8) >> 3; -} - -static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq) -{ - req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) | - ((srq & 0x1) << 3)); -} - -static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg) -{ - return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12); -} - -static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg, - __be32 flow_label) -{ - req_msg->primary_offset88 = cpu_to_be32( - (be32_to_cpu(req_msg->primary_offset88) & - 0x00000FFF) | - (be32_to_cpu(flow_label) << 12)); -} - -static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg) -{ - return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F); -} - -static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg, - u8 rate) -{ - req_msg->primary_offset88 = cpu_to_be32( - (be32_to_cpu(req_msg->primary_offset88) & - 0xFFFFFFC0) | (rate & 0x3F)); -} - -static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg) -{ - return (u8) (req_msg->primary_offset94 >> 4); -} - -static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl) -{ - req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) | - (sl << 4)); -} - -static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg) -{ - return (u8) ((req_msg->primary_offset94 & 0x08) >> 3); -} - -static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg, - u8 subnet_local) -{ - req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) | - ((subnet_local & 0x1) << 3)); -} - -static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg) -{ - return (u8) (req_msg->primary_offset95 >> 3); -} - -static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg, - u8 local_ack_timeout) -{ - req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) | - (local_ack_timeout << 3)); -} - -static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg) -{ - return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12); -} - -static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg, - __be32 flow_label) -{ - req_msg->alt_offset132 = cpu_to_be32( - (be32_to_cpu(req_msg->alt_offset132) & - 0x00000FFF) | - (be32_to_cpu(flow_label) << 12)); -} - -static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg) -{ - return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F); -} - -static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg, - u8 rate) -{ - req_msg->alt_offset132 = cpu_to_be32( - (be32_to_cpu(req_msg->alt_offset132) & - 0xFFFFFFC0) | (rate & 0x3F)); -} - -static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg) -{ - return (u8) (req_msg->alt_offset138 >> 4); -} - -static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl) -{ - req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) | - (sl << 4)); -} - -static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg) -{ - return (u8) ((req_msg->alt_offset138 & 0x08) >> 3); -} - -static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg, - u8 subnet_local) -{ - req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) | - ((subnet_local & 0x1) << 3)); -} - -static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg) -{ - return (u8) (req_msg->alt_offset139 >> 3); -} - -static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg, - u8 local_ack_timeout) -{ - req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) | - (local_ack_timeout << 3)); -} - /* Message REJected or MRAed */ enum cm_msg_response { CM_MSG_RESPONSE_REQ = 0x0, @@ -385,419 +57,12 @@ enum cm_msg_response { CM_MSG_RESPONSE_OTHER = 0x2 }; - struct cm_mra_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - /* message MRAed:2, rsvd:6 */ - u8 offset8; - /* service timeout:5, rsvd:3 */ - u8 offset9; - - u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE]; - -} __packed; - -static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg) -{ - return (u8) (mra_msg->offset8 >> 6); -} - -static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg) -{ - mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6)); -} - -static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg) -{ - return (u8) (mra_msg->offset9 >> 3); -} - -static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg, - u8 service_timeout) -{ - mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) | - (service_timeout << 3)); -} - -struct cm_rej_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - /* message REJected:2, rsvd:6 */ - u8 offset8; - /* reject info length:7, rsvd:1. */ - u8 offset9; - __be16 reason; - u8 ari[IB_CM_REJ_ARI_LENGTH]; - - u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE]; - -} __packed; - -static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg) -{ - return (u8) (rej_msg->offset8 >> 6); -} - -static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg) -{ - rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6)); -} - -static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg) -{ - return (u8) (rej_msg->offset9 >> 1); -} - -static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg, - u8 len) -{ - rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1)); -} - -struct cm_rep_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - __be32 local_qkey; - /* local QPN:24, rsvd:8 */ - __be32 offset12; - /* local EECN:24, rsvd:8 */ - __be32 offset16; - /* starting PSN:24 rsvd:8 */ - __be32 offset20; - u8 resp_resources; - u8 initiator_depth; - /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */ - u8 offset26; - /* RNR retry count:3, SRQ:1, rsvd:5 */ - u8 offset27; - __be64 local_ca_guid; - - u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE]; - -} __packed; - -static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg) -{ - return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8); -} - -static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn) -{ - rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | - (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); -} - -static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg) -{ - return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8); -} - -static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn) -{ - rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) | - (be32_to_cpu(rep_msg->offset16) & 0x000000FF)); -} - static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type) { return (qp_type == IB_QPT_XRC_INI) ? - cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg); -} - -static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) -{ - return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); -} - -static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg, - __be32 starting_psn) -{ - rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | - (be32_to_cpu(rep_msg->offset20) & 0x000000FF)); -} - -static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg) -{ - return (u8) (rep_msg->offset26 >> 3); -} - -static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg, - u8 target_ack_delay) -{ - rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) | - (target_ack_delay << 3)); -} - -static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg) -{ - return (u8) ((rep_msg->offset26 & 0x06) >> 1); -} - -static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover) -{ - rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) | - ((failover & 0x3) << 1)); -} - -static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg) -{ - return (u8) (rep_msg->offset26 & 0x01); -} - -static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg, - u8 flow_ctrl) -{ - rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) | - (flow_ctrl & 0x1)); -} - -static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg) -{ - return (u8) (rep_msg->offset27 >> 5); -} - -static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg, - u8 rnr_retry_count) -{ - rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) | - (rnr_retry_count << 5)); -} - -static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg) -{ - return (u8) ((rep_msg->offset27 >> 4) & 0x1); -} - -static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq) -{ - rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) | - ((srq & 0x1) << 4)); -} - -struct cm_rtu_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE]; - -} __packed; - -struct cm_dreq_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - /* remote QPN/EECN:24, rsvd:8 */ - __be32 offset8; - - u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE]; - -} __packed; - -static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg) -{ - return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8); -} - -static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn) -{ - dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | - (be32_to_cpu(dreq_msg->offset8) & 0x000000FF)); -} - -struct cm_drep_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE]; - -} __packed; - -struct cm_lap_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - __be32 rsvd8; - /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */ - __be32 offset12; - __be32 rsvd16; - - __be16 alt_local_lid; - __be16 alt_remote_lid; - union ib_gid alt_local_gid; - union ib_gid alt_remote_gid; - /* flow label:20, rsvd:4, traffic class:8 */ - __be32 offset56; - u8 alt_hop_limit; - /* rsvd:2, packet rate:6 */ - u8 offset61; - /* SL:4, subnet local:1, rsvd:3 */ - u8 offset62; - /* local ACK timeout:5, rsvd:3 */ - u8 offset63; - - u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE]; -} __packed; - -static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg) -{ - return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8); -} - -static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn) -{ - lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | - (be32_to_cpu(lap_msg->offset12) & - 0x000000FF)); -} - -static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg) -{ - return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3); -} - -static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg, - u8 resp_timeout) -{ - lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) | - (be32_to_cpu(lap_msg->offset12) & - 0xFFFFFF07)); -} - -static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg) -{ - return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12); -} - -static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg, - __be32 flow_label) -{ - lap_msg->offset56 = cpu_to_be32( - (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) | - (be32_to_cpu(flow_label) << 12)); -} - -static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg) -{ - return (u8) be32_to_cpu(lap_msg->offset56); -} - -static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg, - u8 traffic_class) -{ - lap_msg->offset56 = cpu_to_be32(traffic_class | - (be32_to_cpu(lap_msg->offset56) & - 0xFFFFFF00)); -} - -static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg) -{ - return lap_msg->offset61 & 0x3F; -} - -static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg, - u8 packet_rate) -{ - lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0); -} - -static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg) -{ - return lap_msg->offset62 >> 4; -} - -static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl) -{ - lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F); -} - -static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg) -{ - return (lap_msg->offset62 >> 3) & 0x1; -} - -static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg, - u8 subnet_local) -{ - lap_msg->offset62 = ((subnet_local & 0x1) << 3) | - (lap_msg->offset61 & 0xF7); -} -static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg) -{ - return lap_msg->offset63 >> 3; -} - -static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg, - u8 local_ack_timeout) -{ - lap_msg->offset63 = (local_ack_timeout << 3) | - (lap_msg->offset63 & 0x07); -} - -struct cm_apr_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - u8 info_length; - u8 ap_status; - __be16 rsvd; - u8 info[IB_CM_APR_INFO_LENGTH]; - - u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; -} __packed; - -struct cm_sidr_req_msg { - struct ib_mad_hdr hdr; - - __be32 request_id; - __be16 pkey; - __be16 rsvd; - __be64 service_id; - - u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; -} __packed; - -struct cm_sidr_rep_msg { - struct ib_mad_hdr hdr; - - __be32 request_id; - u8 status; - u8 info_length; - __be16 rsvd; - /* QPN:24, rsvd:8 */ - __be32 offset8; - __be64 service_id; - __be32 qkey; - u8 info[IB_CM_SIDR_REP_INFO_LENGTH]; - - u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE]; -} __packed; - -static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg) -{ - return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8); -} - -static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg, - __be32 qpn) -{ - sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | - (be32_to_cpu(sidr_rep_msg->offset8) & - 0x000000FF)); + cpu_to_be32(IBA_GET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, + rep_msg)) : + cpu_to_be32(IBA_GET(CM_REP_LOCAL_QPN, rep_msg)); } #endif /* CM_MSGS_H */ diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 25f2b70fd8ef..2dec3a02ab9f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -36,6 +36,7 @@ #include "core_priv.h" #include "cma_priv.h" +#include "cma_trace.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); @@ -877,6 +878,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; + trace_cm_id_create(id_priv); return &id_priv->id; } EXPORT_SYMBOL(__rdma_create_id); @@ -928,27 +930,34 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (id->device != pd->device) - return -EINVAL; + if (id->device != pd->device) { + ret = -EINVAL; + goto out_err; + } qp_init_attr->port_num = id->port_num; qp = ib_create_qp(pd, qp_init_attr); - if (IS_ERR(qp)) - return PTR_ERR(qp); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto out_err; + } if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); else ret = cma_init_conn_qp(id_priv, qp); if (ret) - goto err; + goto out_destroy; id->qp = qp; id_priv->qp_num = qp->qp_num; id_priv->srq = (qp->srq != NULL); + trace_cm_qp_create(id_priv, pd, qp_init_attr, 0); return 0; -err: +out_destroy: ib_destroy_qp(qp); +out_err: + trace_cm_qp_create(id_priv, pd, qp_init_attr, ret); return ret; } EXPORT_SYMBOL(rdma_create_qp); @@ -958,6 +967,7 @@ void rdma_destroy_qp(struct rdma_cm_id *id) struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); + trace_cm_qp_destroy(id_priv); mutex_lock(&id_priv->qp_mutex); ib_destroy_qp(id_priv->id.qp); id_priv->id.qp = NULL; @@ -1811,6 +1821,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) enum rdma_cm_state state; id_priv = container_of(id, struct rdma_id_private, id); + trace_cm_id_destroy(id_priv); state = cma_exch(id_priv, RDMA_CM_DESTROYING); cma_cancel_operation(id_priv, state); @@ -1863,6 +1874,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) if (ret) goto reject; + trace_cm_send_rtu(id_priv); ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); if (ret) goto reject; @@ -1871,6 +1883,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) reject: pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret); cma_modify_qp_err(id_priv); + trace_cm_send_rej(id_priv); ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); return ret; @@ -1890,6 +1903,17 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = rep_data->remote_qpn; } +static int cma_cm_event_handler(struct rdma_id_private *id_priv, + struct rdma_cm_event *event) +{ + int ret; + + trace_cm_event_handler(id_priv, event); + ret = id_priv->id.event_handler(&id_priv->id, event); + trace_cm_event_done(id_priv, event, ret); + return ret; +} + static int cma_ib_handler(struct ib_cm_id *cm_id, const struct ib_cm_event *ib_event) { @@ -1912,8 +1936,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, break; case IB_CM_REP_RECEIVED: if (cma_comp(id_priv, RDMA_CM_CONNECT) && - (id_priv->id.qp_type != IB_QPT_UD)) + (id_priv->id.qp_type != IB_QPT_UD)) { + trace_cm_send_mra(id_priv); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + } if (id_priv->id.qp) { event.status = cma_rep_recv(id_priv); event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : @@ -1958,7 +1984,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, goto out; } - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; @@ -2119,6 +2145,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, if (IS_ERR(listen_id)) return PTR_ERR(listen_id); + trace_cm_req_handler(listen_id, ib_event->event); if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) { ret = -EINVAL; goto net_dev_put; @@ -2161,7 +2188,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, * until we're done accessing it. */ atomic_inc(&conn_id->refcount); - ret = conn_id->id.event_handler(&conn_id->id, &event); + ret = cma_cm_event_handler(conn_id, &event); if (ret) goto err3; /* @@ -2170,8 +2197,10 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, */ mutex_lock(&lock); if (cma_comp(conn_id, RDMA_CM_CONNECT) && - (conn_id->id.qp_type != IB_QPT_UD)) + (conn_id->id.qp_type != IB_QPT_UD)) { + trace_cm_send_mra(cm_id->context); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + } mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); @@ -2286,7 +2315,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) event.status = iw_event->status; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; @@ -2363,7 +2392,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, * until we're done accessing it. */ atomic_inc(&conn_id->refcount); - ret = conn_id->id.event_handler(&conn_id->id, &event); + ret = cma_cm_event_handler(conn_id, &event); if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; @@ -2435,6 +2464,7 @@ static int cma_listen_handler(struct rdma_cm_id *id, id->context = id_priv->id.context; id->event_handler = id_priv->id.event_handler; + trace_cm_event_handler(id_priv, event); return id_priv->id.event_handler(id, event); } @@ -2611,7 +2641,7 @@ static void cma_work_handler(struct work_struct *_work) if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) goto out; - if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + if (cma_cm_event_handler(id_priv, &work->event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } @@ -2634,7 +2664,7 @@ static void cma_ndev_work_handler(struct work_struct *_work) id_priv->state == RDMA_CM_DEVICE_REMOVAL) goto out; - if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + if (cma_cm_event_handler(id_priv, &work->event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } @@ -3089,7 +3119,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, } else event.event = RDMA_CM_EVENT_ADDR_RESOLVED; - if (id_priv->id.event_handler(&id_priv->id, &event)) { + if (cma_cm_event_handler(id_priv, &event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); @@ -3118,6 +3148,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); + atomic_inc(&id_priv->refcount); cma_init_resolve_addr_work(work, id_priv); queue_work(cma_wq, &work->work); return 0; @@ -3144,6 +3175,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); + atomic_inc(&id_priv->refcount); cma_init_resolve_addr_work(work, id_priv); queue_work(cma_wq, &work->work); return 0; @@ -3180,19 +3212,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) + if (ret) { + memset(cma_dst_addr(id_priv), 0, + rdma_addr_size(dst_addr)); return ret; + } } - if (cma_family(id_priv) != dst_addr->sa_family) + if (cma_family(id_priv) != dst_addr->sa_family) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); } else { @@ -3736,7 +3775,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, goto out; } - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { @@ -3800,6 +3839,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; + trace_cm_send_sidr_req(id_priv); ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); if (ret) { ib_destroy_cm_id(id_priv->cm_id.ib); @@ -3873,6 +3913,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.max_cm_retries = CMA_MAX_CM_RETRIES; req.srq = id_priv->srq ? 1 : 0; + trace_cm_send_req(id_priv); ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: if (ret && !IS_ERR(id)) { @@ -3986,6 +4027,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); rep.srq = id_priv->srq ? 1 : 0; + trace_cm_send_rep(id_priv); ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); out: return ret; @@ -4035,6 +4077,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, rep.private_data = private_data; rep.private_data_len = private_data_len; + trace_cm_send_sidr_rep(id_priv); return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } @@ -4120,13 +4163,15 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, return -EINVAL; if (rdma_cap_ib_cm(id->device, id->port_num)) { - if (id->qp_type == IB_QPT_UD) + if (id->qp_type == IB_QPT_UD) { ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, private_data, private_data_len); - else + } else { + trace_cm_send_rej(id_priv); ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); + } } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_reject(id_priv->cm_id.iw, private_data, private_data_len); @@ -4151,8 +4196,13 @@ int rdma_disconnect(struct rdma_cm_id *id) if (ret) goto out; /* Initiate or respond to a disconnect. */ - if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) - ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); + trace_cm_disconnect(id_priv); + if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) { + if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0)) + trace_cm_sent_drep(id_priv); + } else { + trace_cm_sent_dreq(id_priv); + } } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); } else @@ -4218,7 +4268,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) } else event.event = RDMA_CM_EVENT_MULTICAST_ERROR; - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { @@ -4623,6 +4673,7 @@ static void cma_add_one(struct ib_device *device) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); + trace_cm_add_one(device); return; free_gid_type: @@ -4653,7 +4704,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) goto out; event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); out: mutex_unlock(&id_priv->handler_mutex); return ret; @@ -4691,6 +4742,8 @@ static void cma_remove_one(struct ib_device *device, void *client_data) { struct cma_device *cma_dev = client_data; + trace_cm_remove_one(device); + if (!cma_dev) return; @@ -4763,6 +4816,7 @@ err_ib: err: unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); err_wq: destroy_workqueue(cma_wq); return ret; diff --git a/drivers/infiniband/core/cma_trace.c b/drivers/infiniband/core/cma_trace.c new file mode 100644 index 000000000000..b314a281e10e --- /dev/null +++ b/drivers/infiniband/core/cma_trace.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Trace points for the RDMA Connection Manager. + * + * Author: Chuck Lever <chuck.lever@oracle.com> + * + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ + +#define CREATE_TRACE_POINTS + +#include <rdma/rdma_cm.h> +#include <rdma/ib_cm.h> +#include "cma_priv.h" + +#include "cma_trace.h" diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h new file mode 100644 index 000000000000..81e36bf13159 --- /dev/null +++ b/drivers/infiniband/core/cma_trace.h @@ -0,0 +1,391 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Trace point definitions for the RDMA Connect Manager. + * + * Author: Chuck Lever <chuck.lever@oracle.com> + * + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rdma_cma + +#if !defined(_TRACE_RDMA_CMA_H) || defined(TRACE_HEADER_MULTI_READ) + +#define _TRACE_RDMA_CMA_H + +#include <linux/tracepoint.h> +#include <trace/events/rdma.h> + +/* + * enum ib_cm_event_type, from include/rdma/ib_cm.h + */ +#define IB_CM_EVENT_LIST \ + ib_cm_event(REQ_ERROR) \ + ib_cm_event(REQ_RECEIVED) \ + ib_cm_event(REP_ERROR) \ + ib_cm_event(REP_RECEIVED) \ + ib_cm_event(RTU_RECEIVED) \ + ib_cm_event(USER_ESTABLISHED) \ + ib_cm_event(DREQ_ERROR) \ + ib_cm_event(DREQ_RECEIVED) \ + ib_cm_event(DREP_RECEIVED) \ + ib_cm_event(TIMEWAIT_EXIT) \ + ib_cm_event(MRA_RECEIVED) \ + ib_cm_event(REJ_RECEIVED) \ + ib_cm_event(LAP_ERROR) \ + ib_cm_event(LAP_RECEIVED) \ + ib_cm_event(APR_RECEIVED) \ + ib_cm_event(SIDR_REQ_ERROR) \ + ib_cm_event(SIDR_REQ_RECEIVED) \ + ib_cm_event_end(SIDR_REP_RECEIVED) + +#undef ib_cm_event +#undef ib_cm_event_end + +#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x); +#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x); + +IB_CM_EVENT_LIST + +#undef ib_cm_event +#undef ib_cm_event_end + +#define ib_cm_event(x) { IB_CM_##x, #x }, +#define ib_cm_event_end(x) { IB_CM_##x, #x } + +#define rdma_show_ib_cm_event(x) \ + __print_symbolic(x, IB_CM_EVENT_LIST) + + +DECLARE_EVENT_CLASS(cma_fsm_class, + TP_PROTO( + const struct rdma_id_private *id_priv + ), + + TP_ARGS(id_priv), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, tos) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->tos = id_priv->tos; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos + ) +); + +#define DEFINE_CMA_FSM_EVENT(name) \ + DEFINE_EVENT(cma_fsm_class, cm_##name, \ + TP_PROTO( \ + const struct rdma_id_private *id_priv \ + ), \ + TP_ARGS(id_priv)) + +DEFINE_CMA_FSM_EVENT(send_rtu); +DEFINE_CMA_FSM_EVENT(send_rej); +DEFINE_CMA_FSM_EVENT(send_mra); +DEFINE_CMA_FSM_EVENT(send_sidr_req); +DEFINE_CMA_FSM_EVENT(send_sidr_rep); +DEFINE_CMA_FSM_EVENT(disconnect); +DEFINE_CMA_FSM_EVENT(sent_drep); +DEFINE_CMA_FSM_EVENT(sent_dreq); +DEFINE_CMA_FSM_EVENT(id_destroy); + +TRACE_EVENT(cm_id_create, + TP_PROTO( + const struct rdma_id_private *id_priv + ), + + TP_ARGS(id_priv), + + TP_STRUCT__entry( + __field(u32, cm_id) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + ), + + TP_printk("cm.id=%u", + __entry->cm_id + ) +); + +DECLARE_EVENT_CLASS(cma_qp_class, + TP_PROTO( + const struct rdma_id_private *id_priv + ), + + TP_ARGS(id_priv), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, tos) + __field(u32, qp_num) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->tos = id_priv->tos; + __entry->qp_num = id_priv->qp_num; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u qp_num=%u", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, + __entry->qp_num + ) +); + +#define DEFINE_CMA_QP_EVENT(name) \ + DEFINE_EVENT(cma_qp_class, cm_##name, \ + TP_PROTO( \ + const struct rdma_id_private *id_priv \ + ), \ + TP_ARGS(id_priv)) + +DEFINE_CMA_QP_EVENT(send_req); +DEFINE_CMA_QP_EVENT(send_rep); +DEFINE_CMA_QP_EVENT(qp_destroy); + +/* + * enum ib_wp_type, from include/rdma/ib_verbs.h + */ +#define IB_QP_TYPE_LIST \ + ib_qp_type(SMI) \ + ib_qp_type(GSI) \ + ib_qp_type(RC) \ + ib_qp_type(UC) \ + ib_qp_type(UD) \ + ib_qp_type(RAW_IPV6) \ + ib_qp_type(RAW_ETHERTYPE) \ + ib_qp_type(RAW_PACKET) \ + ib_qp_type(XRC_INI) \ + ib_qp_type_end(XRC_TGT) + +#undef ib_qp_type +#undef ib_qp_type_end + +#define ib_qp_type(x) TRACE_DEFINE_ENUM(IB_QPT_##x); +#define ib_qp_type_end(x) TRACE_DEFINE_ENUM(IB_QPT_##x); + +IB_QP_TYPE_LIST + +#undef ib_qp_type +#undef ib_qp_type_end + +#define ib_qp_type(x) { IB_QPT_##x, #x }, +#define ib_qp_type_end(x) { IB_QPT_##x, #x } + +#define rdma_show_qp_type(x) \ + __print_symbolic(x, IB_QP_TYPE_LIST) + + +TRACE_EVENT(cm_qp_create, + TP_PROTO( + const struct rdma_id_private *id_priv, + const struct ib_pd *pd, + const struct ib_qp_init_attr *qp_init_attr, + int rc + ), + + TP_ARGS(id_priv, pd, qp_init_attr, rc), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, pd_id) + __field(u32, tos) + __field(u32, qp_num) + __field(u32, send_wr) + __field(u32, recv_wr) + __field(int, rc) + __field(unsigned long, qp_type) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->pd_id = pd->res.id; + __entry->tos = id_priv->tos; + __entry->send_wr = qp_init_attr->cap.max_send_wr; + __entry->recv_wr = qp_init_attr->cap.max_recv_wr; + __entry->rc = rc; + if (!rc) { + __entry->qp_num = id_priv->qp_num; + __entry->qp_type = id_priv->id.qp_type; + } else { + __entry->qp_num = 0; + __entry->qp_type = 0; + } + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u pd.id=%u qp_type=%s" + " send_wr=%u recv_wr=%u qp_num=%u rc=%d", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, + __entry->tos, __entry->pd_id, + rdma_show_qp_type(__entry->qp_type), __entry->send_wr, + __entry->recv_wr, __entry->qp_num, __entry->rc + ) +); + +TRACE_EVENT(cm_req_handler, + TP_PROTO( + const struct rdma_id_private *id_priv, + int event + ), + + TP_ARGS(id_priv, event), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, tos) + __field(unsigned long, event) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->tos = id_priv->tos; + __entry->event = event; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu)", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, + rdma_show_ib_cm_event(__entry->event), __entry->event + ) +); + +TRACE_EVENT(cm_event_handler, + TP_PROTO( + const struct rdma_id_private *id_priv, + const struct rdma_cm_event *event + ), + + TP_ARGS(id_priv, event), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, tos) + __field(unsigned long, event) + __field(int, status) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->tos = id_priv->tos; + __entry->event = event->event; + __entry->status = event->status; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu/%d)", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, + rdma_show_cm_event(__entry->event), __entry->event, + __entry->status + ) +); + +TRACE_EVENT(cm_event_done, + TP_PROTO( + const struct rdma_id_private *id_priv, + const struct rdma_cm_event *event, + int result + ), + + TP_ARGS(id_priv, event, result), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, tos) + __field(unsigned long, event) + __field(int, result) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->tos = id_priv->tos; + __entry->event = event->event; + __entry->result = result; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s consumer returns %d", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, + rdma_show_cm_event(__entry->event), __entry->result + ) +); + +DECLARE_EVENT_CLASS(cma_client_class, + TP_PROTO( + const struct ib_device *device + ), + + TP_ARGS(device), + + TP_STRUCT__entry( + __string(name, device->name) + ), + + TP_fast_assign( + __assign_str(name, device->name); + ), + + TP_printk("device name=%s", + __get_str(name) + ) +); + +#define DEFINE_CMA_CLIENT_EVENT(name) \ + DEFINE_EVENT(cma_client_class, cm_##name, \ + TP_PROTO( \ + const struct ib_device *device \ + ), \ + TP_ARGS(device)) + +DEFINE_CMA_CLIENT_EVENT(add_one); +DEFINE_CMA_CLIENT_EVENT(remove_one); + +#endif /* _TRACE_RDMA_CMA_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE cma_trace + +#include <trace/define_trace.h> diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 3645e092e1c7..cf42acca4a3a 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -149,6 +149,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); int ib_cache_setup_one(struct ib_device *device); void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); +void ib_dispatch_event_clients(struct ib_event *event); #ifdef CONFIG_CGROUP_RDMA void ib_device_register_rdmacg(struct ib_device *device); @@ -320,7 +321,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, struct ib_pd *pd, struct ib_qp_init_attr *attr, struct ib_udata *udata, - struct ib_uobject *uobj) + struct ib_uqp_object *uobj) { enum ib_qp_type qp_type = attr->qp_type; struct ib_qp *qp; @@ -337,6 +338,20 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->pd = pd; qp->uobject = uobj; qp->real_qp = qp; + + qp->qp_type = attr->qp_type; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->send_cq = attr->send_cq; + qp->recv_cq = attr->recv_cq; + qp->srq = attr->srq; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->event_handler = attr->event_handler; + + atomic_set(&qp->usecnt, 0); + spin_lock_init(&qp->mr_lock); + INIT_LIST_HEAD(&qp->rdma_mrs); + INIT_LIST_HEAD(&qp->sig_mrs); + /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 8434ec082c3a..2257d7f7810f 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -286,6 +286,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; + if (!qp->res.valid) + return 0; + if (!rdma_is_port_valid(dev, port)) return -EINVAL; diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index bbfded6d5d3d..4f25b2400694 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -7,6 +7,8 @@ #include <linux/slab.h> #include <rdma/ib_verbs.h> +#include <trace/events/rdma_core.h> + /* # of WCs to poll for with a single call to ib_poll_cq */ #define IB_POLL_BATCH 16 #define IB_POLL_BATCH_DIRECT 8 @@ -41,6 +43,7 @@ static void ib_cq_rdma_dim_work(struct work_struct *w) dim->state = DIM_START_MEASURE; + trace_cq_modify(cq, comps, usec); cq->device->ops.modify_cq(cq, comps, usec); } @@ -65,18 +68,29 @@ static void rdma_dim_init(struct ib_cq *cq) INIT_WORK(&dim->work, ib_cq_rdma_dim_work); } +static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) +{ + int rc; + + rc = ib_poll_cq(cq, num_entries, wc); + trace_cq_poll(cq, num_entries, rc); + return rc; +} + static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs, int batch) { int i, n, completed = 0; + trace_cq_process(cq); + /* * budget might be (-1) if the caller does not * want to bound this call, thus we need unsigned * minimum here. */ - while ((n = ib_poll_cq(cq, min_t(u32, batch, - budget - completed), wcs)) > 0) { + while ((n = __poll_cq(cq, min_t(u32, batch, + budget - completed), wcs)) > 0) { for (i = 0; i < n; i++) { struct ib_wc *wc = &wcs[i]; @@ -131,8 +145,10 @@ static int ib_poll_handler(struct irq_poll *iop, int budget) completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH); if (completed < budget) { irq_poll_complete(&cq->iop); - if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) + if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) { + trace_cq_reschedule(cq); irq_poll_sched(&cq->iop); + } } if (dim) @@ -143,6 +159,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget) static void ib_cq_completion_softirq(struct ib_cq *cq, void *private) { + trace_cq_schedule(cq); irq_poll_sched(&cq->iop); } @@ -162,6 +179,7 @@ static void ib_cq_poll_work(struct work_struct *work) static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) { + trace_cq_schedule(cq); queue_work(cq->comp_wq, &cq->work); } @@ -239,6 +257,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, goto out_destroy_cq; } + trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx); return cq; out_destroy_cq: @@ -248,6 +267,7 @@ out_free_wc: kfree(cq->wc); out_free_cq: kfree(cq); + trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret); return ERR_PTR(ret); } EXPORT_SYMBOL(__ib_alloc_cq_user); @@ -304,6 +324,7 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) WARN_ON_ONCE(1); } + trace_cq_free(cq); rdma_restrack_del(&cq->res); cq->device->ops.destroy_cq(cq, udata); if (cq->dim) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 84dd74fe13b8..f6c255202d7f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -587,7 +587,8 @@ struct ib_device *_ib_alloc_device(size_t size) rdma_init_coredev(&device->coredev, device, &init_net); INIT_LIST_HEAD(&device->event_handler_list); - spin_lock_init(&device->event_handler_lock); + spin_lock_init(&device->qp_open_list_lock); + init_rwsem(&device->event_handler_rwsem); mutex_init(&device->unregistration_lock); /* * client_data needs to be alloc because we don't want our mark to be @@ -1931,17 +1932,15 @@ EXPORT_SYMBOL(ib_set_client_data); * * ib_register_event_handler() registers an event handler that will be * called back when asynchronous IB events occur (as defined in - * chapter 11 of the InfiniBand Architecture Specification). This - * callback may occur in interrupt context. + * chapter 11 of the InfiniBand Architecture Specification). This + * callback occurs in workqueue context. */ void ib_register_event_handler(struct ib_event_handler *event_handler) { - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + down_write(&event_handler->device->event_handler_rwsem); list_add_tail(&event_handler->list, &event_handler->device->event_handler_list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + up_write(&event_handler->device->event_handler_rwsem); } EXPORT_SYMBOL(ib_register_event_handler); @@ -1954,35 +1953,23 @@ EXPORT_SYMBOL(ib_register_event_handler); */ void ib_unregister_event_handler(struct ib_event_handler *event_handler) { - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + down_write(&event_handler->device->event_handler_rwsem); list_del(&event_handler->list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + up_write(&event_handler->device->event_handler_rwsem); } EXPORT_SYMBOL(ib_unregister_event_handler); -/** - * ib_dispatch_event - Dispatch an asynchronous event - * @event:Event to dispatch - * - * Low-level drivers must call ib_dispatch_event() to dispatch the - * event to all registered event handlers when an asynchronous event - * occurs. - */ -void ib_dispatch_event(struct ib_event *event) +void ib_dispatch_event_clients(struct ib_event *event) { - unsigned long flags; struct ib_event_handler *handler; - spin_lock_irqsave(&event->device->event_handler_lock, flags); + down_read(&event->device->event_handler_rwsem); list_for_each_entry(handler, &event->device->event_handler_list, list) handler->handler(handler, event); - spin_unlock_irqrestore(&event->device->event_handler_lock, flags); + up_read(&event->device->event_handler_rwsem); } -EXPORT_SYMBOL(ib_dispatch_event); static int iw_query_port(struct ib_device *device, u8 port_num, @@ -1990,7 +1977,6 @@ static int iw_query_port(struct ib_device *device, { struct in_device *inetdev; struct net_device *netdev; - int err; memset(port_attr, 0, sizeof(*port_attr)); @@ -2021,11 +2007,7 @@ static int iw_query_port(struct ib_device *device, } dev_put(netdev); - err = device->ops.query_port(device, port_num, port_attr); - if (err) - return err; - - return 0; + return device->ops.query_port(device, port_num, port_attr); } static int __ib_query_port(struct ib_device *device, diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index f509c478b469..b51bd7087a88 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -232,34 +232,40 @@ void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) if (!entry) return; + xa_lock(&entry->ucontext->mmap_xa); entry->driver_removed = true; + xa_unlock(&entry->ucontext->mmap_xa); kref_put(&entry->ref, rdma_user_mmap_entry_free); } EXPORT_SYMBOL(rdma_user_mmap_entry_remove); /** - * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa + * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa + * in a given range. * * @ucontext: associated user context. * @entry: the entry to insert into the mmap_xa * @length: length of the address that will be mmapped + * @min_pgoff: minimum pgoff to be returned + * @max_pgoff: maximum pgoff to be returned * * This function should be called by drivers that use the rdma_user_mmap * interface for implementing their mmap syscall A database of mmap offsets is * handled in the core and helper functions are provided to insert entries * into the database and extract entries when the user calls mmap with the - * given offset. The function allocates a unique page offset that should be - * provided to user, the user will use the offset to retrieve information such - * as address to be mapped and how. + * given offset. The function allocates a unique page offset in a given range + * that should be provided to user, the user will use the offset to retrieve + * information such as address to be mapped and how. * * Return: 0 on success and -ENOMEM on failure */ -int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, - struct rdma_user_mmap_entry *entry, - size_t length) +int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 min_pgoff, + u32 max_pgoff) { struct ib_uverbs_file *ufile = ucontext->ufile; - XA_STATE(xas, &ucontext->mmap_xa, 0); + XA_STATE(xas, &ucontext->mmap_xa, min_pgoff); u32 xa_first, xa_last, npages; int err; u32 i; @@ -285,7 +291,7 @@ int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, entry->npages = npages; while (true) { /* First find an empty index */ - xas_find_marked(&xas, U32_MAX, XA_FREE_MARK); + xas_find_marked(&xas, max_pgoff, XA_FREE_MARK); if (xas.xa_node == XAS_RESTART) goto err_unlock; @@ -332,4 +338,30 @@ err_unlock: mutex_unlock(&ufile->umap_lock); return -ENOMEM; } +EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range); + +/** + * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa. + * + * @ucontext: associated user context. + * @entry: the entry to insert into the mmap_xa + * @length: length of the address that will be mmapped + * + * This function should be called by drivers that use the rdma_user_mmap + * interface for handling user mmapped addresses. The database is handled in + * the core and helper functions are provided to insert entries into the + * database and extract entries when the user calls mmap with the given offset. + * The function allocates a unique page offset that should be provided to user, + * the user will use the offset to retrieve information such as address to + * be mapped and how. + * + * Return: 0 on success and -ENOMEM on failure + */ +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length) +{ + return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0, + U32_MAX); +} EXPORT_SYMBOL(rdma_user_mmap_entry_insert); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index ade71823370f..da8adadf4755 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -159,8 +159,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; - list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { + list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); + } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index cbf6041a5d4a..e0b0a91da696 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -41,6 +41,7 @@ #include "core_priv.h" #include "cma_priv.h" #include "restrack.h" +#include "uverbs.h" typedef int (*res_fill_func_t)(struct sk_buff*, bool, struct rdma_restrack_entry*, uint32_t); @@ -599,7 +600,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, goto err; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, - cq->uobject->context->res.id)) + cq->uobject->uevent.uobject.context->res.id)) goto err; if (fill_res_name_pid(msg, res)) @@ -1756,6 +1757,8 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (ret) goto err_msg; } else { + if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) + goto err_msg; qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 6c72773faf29..5128cb16bb48 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -42,26 +42,21 @@ #include "core_priv.h" #include "rdma_core.h" -void uverbs_uobject_get(struct ib_uobject *uobject) -{ - kref_get(&uobject->ref); -} - static void uverbs_uobject_free(struct kref *ref) { - struct ib_uobject *uobj = - container_of(ref, struct ib_uobject, ref); - - if (uobj->uapi_object->type_class->needs_kfree_rcu) - kfree_rcu(uobj, rcu); - else - kfree(uobj); + kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu); } +/* + * In order to indicate we no longer needs this uobject, uverbs_uobject_put + * is called. When the reference count is decreased, the uobject is freed. + * For example, this is used when attaching a completion channel to a CQ. + */ void uverbs_uobject_put(struct ib_uobject *uobject) { kref_put(&uobject->ref, uverbs_uobject_free); } +EXPORT_SYMBOL(uverbs_uobject_put); static int uverbs_try_lock_object(struct ib_uobject *uobj, enum rdma_lookup_mode mode) @@ -135,7 +130,11 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, lockdep_assert_held(&ufile->hw_destroy_rwsem); assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); - if (uobj->object) { + if (reason == RDMA_REMOVE_ABORT) { + WARN_ON(!list_empty(&uobj->list)); + WARN_ON(!uobj->context); + uobj->uapi_object->type_class->alloc_abort(uobj); + } else if (uobj->object) { ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, attrs); if (ret) { @@ -151,12 +150,6 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, uobj->object = NULL; } - if (reason == RDMA_REMOVE_ABORT) { - WARN_ON(!list_empty(&uobj->list)); - WARN_ON(!uobj->context); - uobj->uapi_object->type_class->alloc_abort(uobj); - } - uobj->context = NULL; /* @@ -263,15 +256,20 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, } /* alloc_uobj must be undone by uverbs_destroy_uobject() */ -static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, +static struct ib_uobject *alloc_uobj(struct uverbs_attr_bundle *attrs, const struct uverbs_api_object *obj) { + struct ib_uverbs_file *ufile = attrs->ufile; struct ib_uobject *uobj; - struct ib_ucontext *ucontext; - ucontext = ib_uverbs_get_ucontext_file(ufile); - if (IS_ERR(ucontext)) - return ERR_CAST(ucontext); + if (!attrs->context) { + struct ib_ucontext *ucontext = + ib_uverbs_get_ucontext_file(ufile); + + if (IS_ERR(ucontext)) + return ERR_CAST(ucontext); + attrs->context = ucontext; + } uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL); if (!uobj) @@ -281,7 +279,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, * The object is added to the list in the commit stage. */ uobj->ufile = ufile; - uobj->context = ucontext; + uobj->context = attrs->context; INIT_LIST_HEAD(&uobj->list); uobj->uapi_object = obj; /* @@ -358,9 +356,9 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj, uobject = f->private_data; /* - * fget(id) ensures we are not currently running uverbs_close_fd, - * and the caller is expected to ensure that uverbs_close_fd is never - * done while a call top lookup is possible. + * fget(id) ensures we are not currently running + * uverbs_uobject_fd_release(), and the caller is expected to ensure + * that release is never done while a call to lookup is possible. */ if (f->f_op != fd_type->fops) { fput(f); @@ -424,12 +422,12 @@ free: static struct ib_uobject * alloc_begin_idr_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile) + struct uverbs_attr_bundle *attrs) { int ret; struct ib_uobject *uobj; - uobj = alloc_uobj(ufile, obj); + uobj = alloc_uobj(attrs, obj); if (IS_ERR(uobj)) return uobj; @@ -445,7 +443,7 @@ alloc_begin_idr_uobject(const struct uverbs_api_object *obj, return uobj; remove: - xa_erase(&ufile->idr, uobj->id); + xa_erase(&attrs->ufile->idr, uobj->id); uobj_put: uverbs_uobject_put(uobj); return ERR_PTR(ret); @@ -453,31 +451,48 @@ uobj_put: static struct ib_uobject * alloc_begin_fd_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile) + struct uverbs_attr_bundle *attrs) { + const struct uverbs_obj_fd_type *fd_type = + container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); int new_fd; struct ib_uobject *uobj; + struct file *filp; + + if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release)) + return ERR_PTR(-EINVAL); new_fd = get_unused_fd_flags(O_CLOEXEC); if (new_fd < 0) return ERR_PTR(new_fd); - uobj = alloc_uobj(ufile, obj); - if (IS_ERR(uobj)) { - put_unused_fd(new_fd); - return uobj; + uobj = alloc_uobj(attrs, obj); + if (IS_ERR(uobj)) + goto err_fd; + + /* Note that uverbs_uobject_fd_release() is called during abort */ + filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL, + fd_type->flags); + if (IS_ERR(filp)) { + uobj = ERR_CAST(filp); + goto err_uobj; } + uobj->object = filp; uobj->id = new_fd; - uobj->ufile = ufile; + return uobj; +err_uobj: + uverbs_uobject_put(uobj); +err_fd: + put_unused_fd(new_fd); return uobj; } struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_file *ufile = attrs->ufile; struct ib_uobject *ret; if (IS_ERR(obj)) @@ -491,13 +506,11 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, if (!down_read_trylock(&ufile->hw_destroy_rwsem)) return ERR_PTR(-EIO); - ret = obj->type_class->alloc_begin(obj, ufile); + ret = obj->type_class->alloc_begin(obj, attrs); if (IS_ERR(ret)) { up_read(&ufile->hw_destroy_rwsem); return ret; } - if (attrs) - attrs->context = ret->context; return ret; } @@ -544,6 +557,9 @@ static void remove_handle_idr_uobject(struct ib_uobject *uobj) static void alloc_abort_fd_uobject(struct ib_uobject *uobj) { + struct file *filp = uobj->object; + + fput(filp); put_unused_fd(uobj->id); } @@ -553,7 +569,7 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, { const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); - int ret = fd_type->context_closed(uobj, why); + int ret = fd_type->destroy_object(uobj, why); if (ib_is_destroy_retryable(ret, why, uobj)) return ret; @@ -565,7 +581,7 @@ static void remove_handle_fd_uobject(struct ib_uobject *uobj) { } -static int alloc_commit_idr_uobject(struct ib_uobject *uobj) +static void alloc_commit_idr_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; void *old; @@ -579,33 +595,14 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj) */ old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL); WARN_ON(old != NULL); - - return 0; } -static int alloc_commit_fd_uobject(struct ib_uobject *uobj) +static void alloc_commit_fd_uobject(struct ib_uobject *uobj) { - const struct uverbs_obj_fd_type *fd_type = container_of( - uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); int fd = uobj->id; - struct file *filp; - - /* - * The kref for uobj is moved into filp->private data and put in - * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd() - * must be guaranteed to be called from the provided fops release - * callback. - */ - filp = anon_inode_getfile(fd_type->name, - fd_type->fops, - uobj, - fd_type->flags); - if (IS_ERR(filp)) - return PTR_ERR(filp); - - uobj->object = filp; + struct file *filp = uobj->object; - /* Matching put will be done in uverbs_close_fd() */ + /* Matching put will be done in uverbs_uobject_fd_release() */ kref_get(&uobj->ufile->ref); /* This shouldn't be used anymore. Use the file object instead */ @@ -613,11 +610,10 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) /* * NOTE: Once we install the file we loose ownership of our kref on - * uobj. It will be put by uverbs_close_fd() + * uobj. It will be put by uverbs_uobject_fd_release() */ + filp->private_data = uobj; fd_install(fd, filp); - - return 0; } /* @@ -625,19 +621,13 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) * caller can no longer assume uobj is valid. If this function fails it * destroys the uboject, including the attached HW object. */ -int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, - struct uverbs_attr_bundle *attrs) +void rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) { struct ib_uverbs_file *ufile = attrs->ufile; - int ret; /* alloc_commit consumes the uobj kref */ - ret = uobj->uapi_object->type_class->alloc_commit(uobj); - if (ret) { - uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); - up_read(&ufile->hw_destroy_rwsem); - return ret; - } + uobj->uapi_object->type_class->alloc_commit(uobj); /* kref is held so long as the uobj is on the uobj list. */ uverbs_uobject_get(uobj); @@ -650,8 +640,6 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, /* Matches the down_read in rdma_alloc_begin_uobject */ up_read(&ufile->hw_destroy_rwsem); - - return 0; } /* @@ -663,7 +651,6 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, { struct ib_uverbs_file *ufile = uobj->ufile; - uobj->object = NULL; uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); /* Matches the down_read in rdma_alloc_begin_uobject */ @@ -681,7 +668,10 @@ static void lookup_put_fd_uobject(struct ib_uobject *uobj, struct file *filp = uobj->object; WARN_ON(mode != UVERBS_LOOKUP_READ); - /* This indirectly calls uverbs_close_fd and free the object */ + /* + * This indirectly calls uverbs_uobject_fd_release() and free the + * object + */ fput(filp); } @@ -744,33 +734,32 @@ const struct uverbs_obj_type_class uverbs_idr_class = { .lookup_put = lookup_put_idr_uobject, .destroy_hw = destroy_hw_idr_uobject, .remove_handle = remove_handle_idr_uobject, - /* - * When we destroy an object, we first just lock it for WRITE and - * actually DESTROY it in the finalize stage. So, the problematic - * scenario is when we just started the finalize stage of the - * destruction (nothing was executed yet). Now, the other thread - * fetched the object for READ access, but it didn't lock it yet. - * The DESTROY thread continues and starts destroying the object. - * When the other thread continue - without the RCU, it would - * access freed memory. However, the rcu_read_lock delays the free - * until the rcu_read_lock of the READ operation quits. Since the - * exclusive lock of the object is still taken by the DESTROY flow, the - * READ operation will get -EBUSY and it'll just bail out. - */ - .needs_kfree_rcu = true, }; EXPORT_SYMBOL(uverbs_idr_class); -void uverbs_close_fd(struct file *f) +/* + * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct + * file_operations release method. + */ +int uverbs_uobject_fd_release(struct inode *inode, struct file *filp) { - struct ib_uobject *uobj = f->private_data; - struct ib_uverbs_file *ufile = uobj->ufile; - struct uverbs_attr_bundle attrs = { - .context = uobj->context, - .ufile = ufile, - }; + struct ib_uverbs_file *ufile; + struct ib_uobject *uobj; + + /* + * This can only happen if the fput came from alloc_abort_fd_uobject() + */ + if (!filp->private_data) + return 0; + uobj = filp->private_data; + ufile = uobj->ufile; if (down_read_trylock(&ufile->hw_destroy_rwsem)) { + struct uverbs_attr_bundle attrs = { + .context = uobj->context, + .ufile = ufile, + }; + /* * lookup_get_fd_uobject holds the kref on the struct file any * time a FD uobj is locked, which prevents this release @@ -782,13 +771,14 @@ void uverbs_close_fd(struct file *f) up_read(&ufile->hw_destroy_rwsem); } - /* Matches the get in alloc_begin_fd_uobject */ + /* Matches the get in alloc_commit_fd_uobject() */ kref_put(&ufile->ref, ib_uverbs_release_file); /* Pairs with filp->private_data in alloc_begin_fd_uobject */ uverbs_uobject_put(uobj); + return 0; } -EXPORT_SYMBOL(uverbs_close_fd); +EXPORT_SYMBOL(uverbs_uobject_fd_release); /* * Drop the ucontext off the ufile and completely disconnect it from the @@ -855,9 +845,7 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, } /* - * Destroy the uncontext and every uobject associated with it. If called with - * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has - * been completed and ufile->ucontext is NULL. + * Destroy the uncontext and every uobject associated with it. * * This is internally locked and can be called in parallel from multiple * contexts. @@ -865,22 +853,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason) { - if (reason == RDMA_REMOVE_CLOSE) { - /* - * During destruction we might trigger something that - * synchronously calls release on any file descriptor. For - * this reason all paths that come from file_operations - * release must use try_lock. They can progress knowing that - * there is an ongoing uverbs_destroy_ufile_hw that will clean - * up the driver resources. - */ - if (!mutex_trylock(&ufile->ucontext_lock)) - return; - - } else { - mutex_lock(&ufile->ucontext_lock); - } - down_write(&ufile->hw_destroy_rwsem); /* @@ -909,7 +881,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, done: up_write(&ufile->hw_destroy_rwsem); - mutex_unlock(&ufile->ucontext_lock); } const struct uverbs_obj_type_class uverbs_fd_class = { @@ -920,7 +891,6 @@ const struct uverbs_obj_type_class uverbs_fd_class = { .lookup_put = lookup_put_fd_uobject, .destroy_hw = destroy_hw_fd_uobject, .remove_handle = remove_handle_fd_uobject, - .needs_kfree_rcu = false, }; EXPORT_SYMBOL(uverbs_fd_class); @@ -943,19 +913,17 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, return rdma_lookup_get_uobject(obj, attrs->ufile, id, UVERBS_LOOKUP_WRITE, attrs); case UVERBS_ACCESS_NEW: - return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); + return rdma_alloc_begin_uobject(obj, attrs); default: WARN_ON(true); return ERR_PTR(-EOPNOTSUPP); } } -int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, bool commit, - struct uverbs_attr_bundle *attrs) +void uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs) { - int ret = 0; - /* * refcounts should be handled at the object level and not at the * uobject level. Refcounts of the objects themselves are done in @@ -975,14 +943,11 @@ int uverbs_finalize_object(struct ib_uobject *uobj, break; case UVERBS_ACCESS_NEW: if (commit) - ret = rdma_alloc_commit_uobject(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); else rdma_alloc_abort_uobject(uobj, attrs); break; default: WARN_ON(true); - ret = -EOPNOTSUPP; } - - return ret; } diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index e63fbda25e1d..33978e0f1262 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -51,29 +51,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); /* - * uverbs_uobject_get is called in order to increase the reference count on - * an uobject. This is useful when a handler wants to keep the uobject's memory - * alive, regardless if this uobject is still alive in the context's objects - * repository. Objects are put via uverbs_uobject_put. - */ -void uverbs_uobject_get(struct ib_uobject *uobject); - -/* - * In order to indicate we no longer needs this uobject, uverbs_uobject_put - * is called. When the reference count is decreased, the uobject is freed. - * For example, this is used when attaching a completion channel to a CQ. - */ -void uverbs_uobject_put(struct ib_uobject *uobject); - -/* Indicate this fd is no longer used by this consumer, but its memory isn't - * necessarily released yet. When the last reference is put, we release the - * memory. After this call is executed, calling uverbs_uobject_get isn't - * allowed. - * This must be called from the release file_operations of the file! - */ -void uverbs_close_fd(struct file *f); - -/* * Get an ib_uobject that corresponds to the given id from ufile, assuming * the object is from the given type. Lock it to the required access when * applicable. @@ -86,24 +63,9 @@ struct ib_uobject * uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, s64 id, struct uverbs_attr_bundle *attrs); -/* - * Note that certain finalize stages could return a status: - * (a) alloc_commit could return a failure if the object is committed at the - * same time when the context is destroyed. - * (b) remove_commit could fail if the object wasn't destroyed successfully. - * Since multiple objects could be finalized in one transaction, it is very NOT - * recommended to have several finalize actions which have side effects. - * For example, it's NOT recommended to have a certain action which has both - * a commit action and a destroy action or two destroy objects in the same - * action. The rule of thumb is to have one destroy or commit action with - * multiple lookups. - * The first non zero return value of finalize_object is returned from this - * function. For example, this could happen when we couldn't destroy an - * object. - */ -int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, bool commit, - struct uverbs_attr_bundle *attrs); +void uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs); int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); @@ -189,6 +151,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, unsigned int num_attrs); void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile); +extern const struct uapi_definition uverbs_def_obj_async_fd[]; extern const struct uapi_definition uverbs_def_obj_counters[]; extern const struct uapi_definition uverbs_def_obj_cq[]; extern const struct uapi_definition uverbs_def_obj_device[]; diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 4fad732f9b3c..06e5b6787443 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -273,6 +273,23 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return 1; } +static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + u32 sg_cnt, enum dma_data_direction dir) +{ + if (is_pci_p2pdma_page(sg_page(sg))) + pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir); + else + ib_dma_unmap_sg(dev, sg, sg_cnt, dir); +} + +static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, + u32 sg_cnt, enum dma_data_direction dir) +{ + if (is_pci_p2pdma_page(sg_page(sg))) + return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + return ib_dma_map_sg(dev, sg, sg_cnt, dir); +} + /** * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context * @ctx: context to initialize @@ -295,11 +312,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_device *dev = qp->pd->device; int ret; - if (is_pci_p2pdma_page(sg_page(sg))) - ret = pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); - else - ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); - + ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); if (!ret) return -ENOMEM; sg_cnt = ret; @@ -338,7 +351,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, return ret; out_unmap_sg: - ib_dma_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_init); @@ -588,11 +601,7 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, break; } - if (is_pci_p2pdma_page(sg_page(sg))) - pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg, - sg_cnt, dir); - else - ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8917125ea16d..30d4c126a2db 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1068,7 +1068,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, } settimeout_out: - return skb->len; + return 0; } static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) @@ -1139,7 +1139,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, } resp_out: - return skb->len; + return 0; } static void free_sm_ah(struct kref *kref) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 6eb6d2717ca5..2d5608315dc8 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -339,22 +339,20 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, if (!new_pps) return NULL; - if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) { - if (!qp_pps) { - new_pps->main.port_num = qp_attr->port_num; - new_pps->main.pkey_index = qp_attr->pkey_index; - } else { - new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ? - qp_attr->port_num : - qp_pps->main.port_num; - - new_pps->main.pkey_index = - (qp_attr_mask & IB_QP_PKEY_INDEX) ? - qp_attr->pkey_index : - qp_pps->main.pkey_index; - } + if (qp_attr_mask & IB_QP_PORT) + new_pps->main.port_num = qp_attr->port_num; + else if (qp_pps) + new_pps->main.port_num = qp_pps->main.port_num; + + if (qp_attr_mask & IB_QP_PKEY_INDEX) + new_pps->main.pkey_index = qp_attr->pkey_index; + else if (qp_pps) + new_pps->main.pkey_index = qp_pps->main.pkey_index; + + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; - } else if (qp_pps) { + + if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c new file mode 100644 index 000000000000..6c3514beac4d --- /dev/null +++ b/drivers/infiniband/core/trace.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Trace points for core RDMA functions. + * + * Author: Chuck Lever <chuck.lever@oracle.com> + * + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ + +#define CREATE_TRACE_POINTS + +#include <rdma/ib_verbs.h> + +#include <trace/events/rdma_core.h> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 7a3b99597ead..06b6125b5ae1 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -54,7 +54,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) { page = sg_page_iter_page(&sg_iter); - put_user_pages_dirty_lock(&page, 1, umem->writable && dirty); + unpin_user_pages_dirty_lock(&page, 1, umem->writable && dirty); } sg_free_table(&umem->sg_head); @@ -166,10 +166,13 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, * for any address. */ mask |= (sg_dma_address(sg) + pgoff) ^ va; - if (i && i != (umem->nmap - 1)) - /* restrict by length as well for interior SGEs */ - mask |= sg_dma_len(sg); va += sg_dma_len(sg) - pgoff; + /* Except for the last entry, the ending iova alignment sets + * the maximum possible page size as the low bits of the iova + * must be zero when starting the next chunk. + */ + if (i != (umem->nmap - 1)) + mask |= va; pgoff = 0; } best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap); @@ -181,15 +184,14 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz); /** * ib_umem_get - Pin and DMA map userspace memory. * - * @udata: userspace context to pin memory for + * @device: IB device to connect UMEM * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned */ -struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, +struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, size_t size, int access) { - struct ib_ucontext *context; struct ib_umem *umem; struct page **page_list; unsigned long lock_limit; @@ -201,14 +203,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, struct scatterlist *sg; unsigned int gup_flags = FOLL_WRITE; - if (!udata) - return ERR_PTR(-EIO); - - context = container_of(udata, struct uverbs_attr_bundle, driver_udata) - ->context; - if (!context) - return ERR_PTR(-EIO); - /* * If the combination of the addr and size requested for this memory * region causes an integer overflow, return error. @@ -226,7 +220,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, umem = kzalloc(sizeof(*umem), GFP_KERNEL); if (!umem) return ERR_PTR(-ENOMEM); - umem->ibdev = context->device; + umem->ibdev = device; umem->length = size; umem->address = addr; umem->writable = ib_access_writable(access); @@ -266,33 +260,28 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, sg = umem->sg_head.sgl; while (npages) { - down_read(&mm->mmap_sem); - ret = get_user_pages(cur_base, - min_t(unsigned long, npages, - PAGE_SIZE / sizeof (struct page *)), - gup_flags | FOLL_LONGTERM, - page_list, NULL); - if (ret < 0) { - up_read(&mm->mmap_sem); + ret = pin_user_pages_fast(cur_base, + min_t(unsigned long, npages, + PAGE_SIZE / + sizeof(struct page *)), + gup_flags | FOLL_LONGTERM, page_list); + if (ret < 0) goto umem_release; - } cur_base += ret * PAGE_SIZE; npages -= ret; sg = ib_umem_add_sg_table(sg, page_list, ret, - dma_get_max_seg_size(context->device->dma_device), + dma_get_max_seg_size(device->dma_device), &umem->sg_nents); - - up_read(&mm->mmap_sem); } sg_mark_end(sg); - umem->nmap = ib_dma_map_sg(context->device, - umem->sg_head.sgl, - umem->sg_nents, - DMA_BIDIRECTIONAL); + umem->nmap = ib_dma_map_sg(device, + umem->sg_head.sgl, + umem->sg_nents, + DMA_BIDIRECTIONAL); if (!umem->nmap) { ret = -ENOMEM; @@ -303,7 +292,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, goto out; umem_release: - __ib_umem_release(context->device, umem, 0); + __ib_umem_release(device, umem, 0); vma: atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm); out: diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index e42d44e501fd..cd656ad4953b 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -110,15 +110,12 @@ out_page_list: * They exist only to hold the per_mm reference to help the driver create * children umems. * - * @udata: udata from the syscall being used to create the umem + * @device: IB device to create UMEM * @access: ib_reg_mr access flags */ -struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, +struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device, int access) { - struct ib_ucontext *context = - container_of(udata, struct uverbs_attr_bundle, driver_udata) - ->context; struct ib_umem *umem; struct ib_umem_odp *umem_odp; int ret; @@ -126,14 +123,11 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, if (access & IB_ACCESS_HUGETLB) return ERR_PTR(-EINVAL); - if (!context) - return ERR_PTR(-EIO); - umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL); if (!umem_odp) return ERR_PTR(-ENOMEM); umem = &umem_odp->umem; - umem->ibdev = context->device; + umem->ibdev = device; umem->writable = ib_access_writable(access); umem->owning_mm = current->mm; umem_odp->is_implicit_odp = 1; @@ -187,21 +181,35 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr, odp_data->page_shift = PAGE_SHIFT; odp_data->notifier.ops = ops; + /* + * A mmget must be held when registering a notifier, the owming_mm only + * has a mm_grab at this point. + */ + if (!mmget_not_zero(umem->owning_mm)) { + ret = -EFAULT; + goto out_free; + } + odp_data->tgid = get_pid(root->tgid); ret = ib_init_umem_odp(odp_data, ops); - if (ret) { - put_pid(odp_data->tgid); - kfree(odp_data); - return ERR_PTR(ret); - } + if (ret) + goto out_tgid; + mmput(umem->owning_mm); return odp_data; + +out_tgid: + put_pid(odp_data->tgid); + mmput(umem->owning_mm); +out_free: + kfree(odp_data); + return ERR_PTR(ret); } EXPORT_SYMBOL(ib_umem_odp_alloc_child); /** * ib_umem_odp_get - Create a umem_odp for a userspace va * - * @udata: userspace context to pin memory for + * @device: IB device struct to get UMEM * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned @@ -210,23 +218,14 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_child); * pinning, instead, stores the mm for future page fault handling in * conjunction with MMU notifiers. */ -struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access, +struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device, + unsigned long addr, size_t size, int access, const struct mmu_interval_notifier_ops *ops) { struct ib_umem_odp *umem_odp; - struct ib_ucontext *context; struct mm_struct *mm; int ret; - if (!udata) - return ERR_PTR(-EIO); - - context = container_of(udata, struct uverbs_attr_bundle, driver_udata) - ->context; - if (!context) - return ERR_PTR(-EIO); - if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND))) return ERR_PTR(-EINVAL); @@ -234,7 +233,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, if (!umem_odp) return ERR_PTR(-ENOMEM); - umem_odp->umem.ibdev = context->device; + umem_odp->umem.ibdev = device; umem_odp->umem.length = size; umem_odp->umem.address = addr; umem_odp->umem.writable = ib_access_writable(access); @@ -242,21 +241,10 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, umem_odp->notifier.ops = ops; umem_odp->page_shift = PAGE_SHIFT; - if (access & IB_ACCESS_HUGETLB) { - struct vm_area_struct *vma; - struct hstate *h; - - down_read(&mm->mmap_sem); - vma = find_vma(mm, ib_umem_start(umem_odp)); - if (!vma || !is_vm_hugetlb_page(vma)) { - up_read(&mm->mmap_sem); - ret = -EINVAL; - goto err_free; - } - h = hstate_vma(vma); - umem_odp->page_shift = huge_page_shift(h); - up_read(&mm->mmap_sem); - } +#ifdef CONFIG_HUGETLB_PAGE + if (access & IB_ACCESS_HUGETLB) + umem_odp->page_shift = HPAGE_SHIFT; +#endif umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); ret = ib_init_umem_odp(umem_odp, ops); @@ -266,7 +254,6 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, err_put_pid: put_pid(umem_odp->tgid); -err_free: kfree(umem_odp); return ERR_PTR(ret); } @@ -308,9 +295,8 @@ EXPORT_SYMBOL(ib_umem_odp_release); * The function returns -EFAULT if the DMA mapping operation fails. It returns * -EAGAIN if a concurrent invalidation prevents us from updating the page. * - * The page is released via put_user_page even if the operation failed. For - * on-demand pinning, the page is released whenever it isn't stored in the - * umem. + * The page is released via put_page even if the operation failed. For on-demand + * pinning, the page is released whenever it isn't stored in the umem. */ static int ib_umem_odp_map_dma_single_page( struct ib_umem_odp *umem_odp, @@ -363,7 +349,7 @@ static int ib_umem_odp_map_dma_single_page( } out: - put_user_page(page); + put_page(page); return ret; } @@ -440,7 +426,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, while (bcnt > 0) { const size_t gup_num_pages = min_t(size_t, - (bcnt + BIT(page_shift) - 1) >> page_shift, + ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, PAGE_SIZE / sizeof(struct page *)); down_read(&owning_mm->mmap_sem); @@ -473,7 +459,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ret = -EFAULT; break; } - put_user_page(local_page_list[j]); + put_page(local_page_list[j]); continue; } @@ -500,8 +486,8 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * ib_umem_odp_map_dma_single_page(). */ if (npages - (j + 1) > 0) - put_user_pages(&local_page_list[j+1], - npages - (j + 1)); + release_pages(&local_page_list[j+1], + npages - (j + 1)); break; } } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index d1407fa378e8..1235ffb2389b 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1312,6 +1312,9 @@ static void ib_umad_kill_port(struct ib_umad_port *port) struct ib_umad_file *file; int id; + cdev_device_del(&port->sm_cdev, &port->sm_dev); + cdev_device_del(&port->cdev, &port->dev); + mutex_lock(&port->file_mutex); /* Mark ib_dev NULL and block ioctl or other file ops to progress @@ -1331,8 +1334,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port) mutex_unlock(&port->file_mutex); - cdev_device_del(&port->sm_cdev, &port->sm_dev); - cdev_device_del(&port->cdev, &port->dev); ida_free(&umad_ida, port->dev_num); /* balances device_initialize() */ diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 63f7f7db5902..7df71983212d 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -111,7 +111,6 @@ struct ib_uverbs_device { struct srcu_struct disassociate_srcu; struct mutex lists_mutex; /* protect lists */ struct list_head uverbs_file_list; - struct list_head uverbs_events_file_list; struct uverbs_api *uapi; }; @@ -124,10 +123,9 @@ struct ib_uverbs_event_queue { }; struct ib_uverbs_async_event_file { + struct ib_uobject uobj; struct ib_uverbs_event_queue ev_queue; - struct ib_uverbs_file *uverbs_file; - struct kref ref; - struct list_head list; + struct ib_event_handler event_handler; }; struct ib_uverbs_completion_event_file { @@ -144,8 +142,7 @@ struct ib_uverbs_file { * ucontext_lock held */ struct ib_ucontext *ucontext; - struct ib_event_handler event_handler; - struct ib_uverbs_async_event_file *async_file; + struct ib_uverbs_async_event_file *async_file; struct list_head list; /* @@ -183,6 +180,7 @@ struct ib_uverbs_mcast_entry { struct ib_uevent_object { struct ib_uobject uobject; + /* List member for ib_uverbs_async_event_file list */ struct list_head event_list; u32 events_reported; }; @@ -210,25 +208,24 @@ struct ib_uwq_object { }; struct ib_ucq_object { - struct ib_uobject uobject; + struct ib_uevent_object uevent; struct list_head comp_list; - struct list_head async_list; u32 comp_events_reported; - u32 async_events_reported; }; extern const struct file_operations uverbs_event_fops; +extern const struct file_operations uverbs_async_event_fops; void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue); -struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev); -void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); +void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file); +void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue); void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res); -void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_completion_event_file *ev_file, +int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs); +int ib_init_ucontext(struct uverbs_attr_bundle *attrs); + +void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file, struct ib_ucq_object *uobj); -void ib_uverbs_release_uevent(struct ib_uverbs_file *file, - struct ib_uevent_object *uobj); +void ib_uverbs_release_uevent(struct ib_uevent_object *uobj); void ib_uverbs_release_file(struct kref *ref); void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); @@ -236,8 +233,6 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); -void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event); int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); @@ -276,23 +271,6 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, size_t kern_filter_sz, union ib_flow_spec *ib_spec); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS); - /* * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the * PortInfo CapabilityMask, but was extended with unique bits. diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 06ed32c8662f..060b4ebbd2ba 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -203,82 +203,55 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs) #define ib_uverbs_lookup_comp_file(_fd, _ufile) \ _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile) -static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) +int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_file *file = attrs->ufile; - struct ib_uverbs_get_context cmd; - struct ib_uverbs_get_context_resp resp; - struct ib_ucontext *ucontext; - struct file *filp; - struct ib_rdmacg_object cg_obj; + struct ib_uverbs_file *ufile = attrs->ufile; + struct ib_ucontext *ucontext; struct ib_device *ib_dev; - int ret; - - ret = uverbs_request(attrs, &cmd, sizeof(cmd)); - if (ret) - return ret; - - mutex_lock(&file->ucontext_lock); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; - goto err; - } - if (file->ucontext) { - ret = -EINVAL; - goto err; - } - - ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); - if (ret) - goto err; + ib_dev = srcu_dereference(ufile->device->ib_dev, + &ufile->device->disassociate_srcu); + if (!ib_dev) + return -EIO; ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext); - if (!ucontext) { - ret = -ENOMEM; - goto err_alloc; - } - - attrs->context = ucontext; + if (!ucontext) + return -ENOMEM; ucontext->res.type = RDMA_RESTRACK_CTX; ucontext->device = ib_dev; - ucontext->cg_obj = cg_obj; - /* ufile is required when some objects are released */ - ucontext->ufile = file; - - ucontext->closing = false; - ucontext->cleanup_retryable = false; - + ucontext->ufile = ufile; xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC); + attrs->context = ucontext; + return 0; +} - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) - goto err_free; - resp.async_fd = ret; +int ib_init_ucontext(struct uverbs_attr_bundle *attrs) +{ + struct ib_ucontext *ucontext = attrs->context; + struct ib_uverbs_file *file = attrs->ufile; + int ret; - filp = ib_uverbs_alloc_async_event_file(file, ib_dev); - if (IS_ERR(filp)) { - ret = PTR_ERR(filp); - goto err_fd; + if (!down_read_trylock(&file->hw_destroy_rwsem)) + return -EIO; + mutex_lock(&file->ucontext_lock); + if (file->ucontext) { + ret = -EINVAL; + goto err; } - resp.num_comp_vectors = file->device->num_comp_vectors; - - ret = uverbs_response(attrs, &resp, sizeof(resp)); + ret = ib_rdmacg_try_charge(&ucontext->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_HANDLE); if (ret) - goto err_file; + goto err; - ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata); + ret = ucontext->device->ops.alloc_ucontext(ucontext, + &attrs->driver_udata); if (ret) - goto err_file; + goto err_uncharge; rdma_restrack_uadd(&ucontext->res); - fd_install(resp.async_fd, filp); - /* * Make sure that ib_uverbs_get_ucontext() sees the pointer update * only after all writes to setup the ucontext have completed @@ -286,24 +259,62 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) smp_store_release(&file->ucontext, ucontext); mutex_unlock(&file->ucontext_lock); - + up_read(&file->hw_destroy_rwsem); return 0; -err_file: - ib_uverbs_free_async_event_file(file); - fput(filp); +err_uncharge: + ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_HANDLE); +err: + mutex_unlock(&file->ucontext_lock); + up_read(&file->hw_destroy_rwsem); + return ret; +} + +static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_get_context_resp resp; + struct ib_uverbs_get_context cmd; + struct ib_device *ib_dev; + struct ib_uobject *uobj; + int ret; -err_fd: - put_unused_fd(resp.async_fd); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; -err_free: - kfree(ucontext); + ret = ib_alloc_ucontext(attrs); + if (ret) + return ret; -err_alloc: - ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); + uobj = uobj_alloc(UVERBS_OBJECT_ASYNC_EVENT, attrs, &ib_dev); + if (IS_ERR(uobj)) { + ret = PTR_ERR(uobj); + goto err_ucontext; + } -err: - mutex_unlock(&file->ucontext_lock); + resp = (struct ib_uverbs_get_context_resp){ + .num_comp_vectors = attrs->ufile->device->num_comp_vectors, + .async_fd = uobj->id, + }; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) + goto err_uobj; + + ret = ib_init_ucontext(attrs); + if (ret) + goto err_uobj; + + ib_uverbs_init_async_event_file( + container_of(uobj, struct ib_uverbs_async_event_file, uobj)); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; + +err_uobj: + rdma_alloc_abort_uobject(uobj, attrs); +err_ucontext: + kfree(attrs->context); + attrs->context = NULL; return ret; } @@ -446,7 +457,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) if (ret) goto err_copy; - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs)); @@ -642,7 +654,8 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) mutex_unlock(&ibudev->xrcd_tree_mutex); - return uobj_alloc_commit(&obj->uobject, attrs); + rdma_alloc_commit_uobject(&obj->uobject, attrs); + return 0; err_copy: if (inode) { @@ -774,7 +787,8 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); @@ -928,7 +942,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: uverbs_dealloc_mw(mw); @@ -980,7 +995,8 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs) return ret; } - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; } static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, @@ -1010,11 +1026,9 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, } } - obj->uobject.user_handle = cmd->user_handle; - obj->comp_events_reported = 0; - obj->async_events_reported = 0; + obj->uevent.uobject.user_handle = cmd->user_handle; INIT_LIST_HEAD(&obj->comp_list); - INIT_LIST_HEAD(&obj->async_list); + INIT_LIST_HEAD(&obj->uevent.event_list); attr.cqe = cmd->cqe; attr.comp_vector = cmd->comp_vector; @@ -1026,7 +1040,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, goto err_file; } cq->device = ib_dev; - cq->uobject = &obj->uobject; + cq->uobject = obj; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; @@ -1036,9 +1050,9 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, if (ret) goto err_free; - obj->uobject.object = cq; + obj->uevent.uobject.object = cq; memset(&resp, 0, sizeof resp); - resp.base.cq_handle = obj->uobject.id; + resp.base.cq_handle = obj->uevent.uobject.id; resp.base.cqe = cq->cqe; resp.response_length = uverbs_response_length(attrs, sizeof(resp)); @@ -1049,9 +1063,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - ret = uobj_alloc_commit(&obj->uobject, attrs); - if (ret) - return ERR_PTR(ret); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); return obj; err_cb: @@ -1061,10 +1073,10 @@ err_free: kfree(cq); err_file: if (ev_file) - ib_uverbs_release_ucq(attrs->ufile, ev_file, obj); + ib_uverbs_release_ucq(ev_file, obj); err: - uobj_alloc_abort(&obj->uobject, attrs); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ERR_PTR(ret); } @@ -1133,7 +1145,8 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) ret = uverbs_response(attrs, &resp, sizeof(resp)); out: - uobj_put_obj_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -1216,7 +1229,8 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT); out_put: - uobj_put_obj_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -1237,8 +1251,8 @@ static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs) ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - uobj_put_obj_read(cq); - + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return 0; } @@ -1258,10 +1272,10 @@ static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs) if (IS_ERR(uobj)) return PTR_ERR(uobj); - obj = container_of(uobj, struct ib_ucq_object, uobject); + obj = container_of(uobj, struct ib_ucq_object, uevent.uobject); memset(&resp, 0, sizeof(resp)); resp.comp_events_reported = obj->comp_events_reported; - resp.async_events_reported = obj->async_events_reported; + resp.async_events_reported = obj->uevent.events_reported; uobj_put_destroy(uobj); @@ -1375,7 +1389,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, } attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = attrs->ufile; attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; @@ -1391,7 +1404,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, attr.cap.max_recv_sge = cmd->max_recv_sge; attr.cap.max_inline_data = cmd->max_inline_data; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); @@ -1421,7 +1433,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, qp = ib_create_qp(pd, &attr); else qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - &obj->uevent.uobject); + obj); if (IS_ERR(qp)) { ret = PTR_ERR(qp); @@ -1433,17 +1445,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->rwq_ind_tbl = ind_tbl; - qp->event_handler = attr.event_handler; - qp->qp_context = attr.qp_context; - qp->qp_type = attr.qp_type; - atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); - qp->port = 0; if (attr.send_cq) atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) @@ -1454,7 +1456,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, atomic_inc(&ind_tbl->usecnt); } else { /* It is done in _ib_create_qp for other QP types */ - qp->uobject = &obj->uevent.uobject; + qp->uobject = obj; } obj->uevent.uobject.object = qp; @@ -1483,15 +1485,19 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (pd) uobj_put_obj_read(pd); if (scq) - uobj_put_obj_read(scq); + rdma_lookup_put_uobject(&scq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (rcq && rcq != scq) - uobj_put_obj_read(rcq); + rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (srq) - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ind_tbl) uobj_put_obj_read(ind_tbl); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_cb: ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); @@ -1501,11 +1507,14 @@ err_put: if (pd) uobj_put_obj_read(pd); if (scq) - uobj_put_obj_read(scq); + rdma_lookup_put_uobject(&scq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (rcq && rcq != scq) - uobj_put_obj_read(rcq); + rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (srq) - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ind_tbl) uobj_put_obj_read(ind_tbl); @@ -1567,7 +1576,7 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) struct ib_xrcd *xrcd; struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_qp *qp; - struct ib_qp_open_attr attr; + struct ib_qp_open_attr attr = {}; int ret; struct ib_device *ib_dev; @@ -1593,11 +1602,9 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) } attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = attrs->ufile; attr.qp_num = cmd.qpn; attr.qp_type = cmd.qp_type; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); @@ -1620,10 +1627,11 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - qp->uobject = &obj->uevent.uobject; + qp->uobject = obj; uobj_put_read(xrcd_uobj); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_destroy: ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); @@ -1684,7 +1692,8 @@ static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs) ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) goto out; @@ -1921,7 +1930,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs, &attrs->driver_udata); release_qp: - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); out: kfree(attr); @@ -2185,7 +2195,8 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) ret = ret2; out_put: - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); while (wr) { if (is_ud && ud_wr(wr)->ah) @@ -2327,7 +2338,8 @@ static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs) resp.bad_wr = 0; ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) { for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -2377,7 +2389,8 @@ static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs) resp.bad_wr = 0; ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr); - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) for (next = wr; next; next = next->next) { @@ -2465,7 +2478,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE, @@ -2507,7 +2521,7 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs) if (!qp) return -EINVAL; - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = qp->uobject; mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) @@ -2534,7 +2548,8 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs) out_put: mutex_unlock(&obj->mcast_lock); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -2556,7 +2571,7 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) if (!qp) return -EINVAL; - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = qp->uobject; mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) @@ -2577,7 +2592,8 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) out_put: mutex_unlock(&obj->mcast_lock); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -2720,12 +2736,6 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs, return 0; } -static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec) -{ - /* Returns user space filter size, includes padding */ - return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; -} - static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size, u16 ib_real_filter_sz) { @@ -2869,11 +2879,16 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec) { - ssize_t kern_filter_sz; + size_t kern_filter_sz; void *kern_spec_mask; void *kern_spec_val; - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); + if (check_sub_overflow((size_t)kern_spec->hdr.size, + sizeof(struct ib_uverbs_flow_spec_hdr), + &kern_filter_sz)) + return -EINVAL; + + kern_filter_sz /= 2; kern_spec_val = (void *)kern_spec + sizeof(struct ib_uverbs_flow_spec_hdr); @@ -2943,7 +2958,6 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) wq_init_attr.wq_type = cmd.wq_type; wq_init_attr.event_handler = ib_uverbs_wq_event_handler; wq_init_attr.create_flags = cmd.create_flags; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata); @@ -2952,7 +2966,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) goto err_put_cq; } - wq->uobject = &obj->uevent.uobject; + wq->uobject = obj; obj->uevent.uobject.object = wq; wq->wq_type = wq_init_attr.wq_type; wq->cq = cq; @@ -2962,7 +2976,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) atomic_set(&wq->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&cq->usecnt); - wq->uobject = &obj->uevent.uobject; + wq->uobject = obj; obj->uevent.uobject.object = wq; memset(&resp, 0, sizeof(resp)); @@ -2976,13 +2990,16 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - uobj_put_obj_read(cq); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_copy: ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); err_put_cq: - uobj_put_obj_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_put_pd: uobj_put_obj_read(pd); err_uobj: @@ -3048,7 +3065,8 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs) } ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask, &attrs->driver_udata); - uobj_put_obj_read(wq); + rdma_lookup_put_uobject(&wq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -3149,9 +3167,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) kfree(wqs_handles); for (j = 0; j < num_read_wqs; j++) - uobj_put_obj_read(wqs[j]); + rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: ib_destroy_rwq_ind_table(rwq_ind_tbl); @@ -3159,7 +3179,8 @@ err_uobj: uobj_alloc_abort(uobj, attrs); put_wqs: for (j = 0; j < num_read_wqs; j++) - uobj_put_obj_read(wqs[j]); + rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_free: kfree(wqs_handles); kfree(wqs); @@ -3325,11 +3346,13 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) if (err) goto err_copy; - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: if (!qp->device->ops.destroy_flow(flow_id)) atomic_dec(&qp->usecnt); @@ -3338,7 +3361,8 @@ err_free: err_free_flow_attr: kfree(flow_attr); err_put: - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_uobj: uobj_alloc_abort(uobj, attrs); err_free_attr: @@ -3423,7 +3447,6 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, attr.attr.max_sge = cmd->max_sge; attr.attr.srq_limit = cmd->srq_limit; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); srq = rdma_zalloc_drv_obj(ib_dev, ib_srq); @@ -3435,7 +3458,7 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, srq->device = pd->device; srq->pd = pd; srq->srq_type = cmd->srq_type; - srq->uobject = &obj->uevent.uobject; + srq->uobject = obj; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; @@ -3474,10 +3497,12 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, uobj_put_read(xrcd_uobj); if (ib_srq_has_cq(cmd->srq_type)) - uobj_put_obj_read(attr.ext.cq); + rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); uobj_put_obj_read(pd); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_copy: ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); @@ -3490,7 +3515,8 @@ err_put: err_put_cq: if (ib_srq_has_cq(cmd->srq_type)) - uobj_put_obj_read(attr.ext.cq); + rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_put_xrcd: if (cmd->srq_type == IB_SRQT_XRC) { @@ -3558,7 +3584,8 @@ static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs) ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask, &attrs->driver_udata); - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -3581,7 +3608,8 @@ static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs) ret = ib_query_srq(srq, &attr); - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) return ret; @@ -3706,8 +3734,8 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period); - uobj_put_obj_read(cq); - + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 269938f59d3f..538affbc517e 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -220,24 +220,17 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle, return ret; } -static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, - struct uverbs_objs_arr_attr *attr, - bool commit, struct uverbs_attr_bundle *attrs) +static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, + struct uverbs_objs_arr_attr *attr, + bool commit, + struct uverbs_attr_bundle *attrs) { const struct uverbs_attr_spec *spec = &attr_uapi->spec; - int current_ret; - int ret = 0; size_t i; - for (i = 0; i != attr->len; i++) { - current_ret = uverbs_finalize_object(attr->uobjects[i], - spec->u2.objs_arr.access, - commit, attrs); - if (!ret) - ret = current_ret; - } - - return ret; + for (i = 0; i != attr->len; i++) + uverbs_finalize_object(attr->uobjects[i], + spec->u2.objs_arr.access, commit, attrs); } static int uverbs_process_attr(struct bundle_priv *pbundle, @@ -495,26 +488,22 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, return ret; } -static int bundle_destroy(struct bundle_priv *pbundle, bool commit) +static void bundle_destroy(struct bundle_priv *pbundle, bool commit) { unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len; struct bundle_alloc_head *memblock; unsigned int i; - int ret = 0; /* fast path for simple uobjects */ i = -1; while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, i + 1)) < key_bitmap_len) { struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; - int current_ret; - current_ret = uverbs_finalize_object( + uverbs_finalize_object( attr->obj_attr.uobject, attr->obj_attr.attr_elm->spec.u.obj.access, commit, &pbundle->bundle); - if (!ret) - ret = current_ret; } i = -1; @@ -523,7 +512,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; const struct uverbs_api_attr *attr_uapi; void __rcu **slot; - int current_ret; slot = uapi_get_attr_for_method( pbundle, @@ -534,11 +522,8 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) attr_uapi = rcu_dereference_protected(*slot, true); if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { - current_ret = uverbs_free_idrs_array( - attr_uapi, &attr->objs_arr_attr, commit, - &pbundle->bundle); - if (!ret) - ret = current_ret; + uverbs_free_idrs_array(attr_uapi, &attr->objs_arr_attr, + commit, &pbundle->bundle); } } @@ -548,8 +533,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) memblock = memblock->next; kvfree(tmp); } - - return ret; } static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, @@ -562,7 +545,6 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, struct bundle_priv *pbundle; struct bundle_priv onstack; void __rcu **slot; - int destroy_ret; int ret; if (unlikely(hdr->driver_id != uapi->driver_id)) @@ -610,10 +592,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize)); ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); - destroy_ret = bundle_destroy(pbundle, ret == 0); - if (unlikely(destroy_ret && !ret)) - return destroy_ret; - + bundle_destroy(pbundle, ret == 0); return ret; } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 970d8e31dd65..2d4083bf4a04 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -125,17 +125,8 @@ static void ib_uverbs_release_dev(struct device *device) kfree(dev); } -static void ib_uverbs_release_async_event_file(struct kref *ref) -{ - struct ib_uverbs_async_event_file *file = - container_of(ref, struct ib_uverbs_async_event_file, ref); - - kfree(file); -} - -void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_completion_event_file *ev_file, - struct ib_ucq_object *uobj) +void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file, + struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; @@ -150,25 +141,24 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file, uverbs_uobject_put(&ev_file->uobj); } - spin_lock_irq(&file->async_file->ev_queue.lock); - list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file->ev_queue.lock); + ib_uverbs_release_uevent(&uobj->uevent); } -void ib_uverbs_release_uevent(struct ib_uverbs_file *file, - struct ib_uevent_object *uobj) +void ib_uverbs_release_uevent(struct ib_uevent_object *uobj) { + struct ib_uverbs_async_event_file *async_file = + READ_ONCE(uobj->uobject.ufile->async_file); struct ib_uverbs_event *evt, *tmp; - spin_lock_irq(&file->async_file->ev_queue.lock); + if (!async_file) + return; + + spin_lock_irq(&async_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file->ev_queue.lock); + spin_unlock_irq(&async_file->ev_queue.lock); } void ib_uverbs_detach_umcast(struct ib_qp *qp, @@ -208,8 +198,7 @@ void ib_uverbs_release_file(struct kref *ref) ib_uverbs_comp_dev(file->device); if (file->async_file) - kref_put(&file->async_file->ref, - ib_uverbs_release_async_event_file); + uverbs_uobject_put(&file->async_file->uobj); put_device(&file->device->dev); if (file->disassociate_page) @@ -220,7 +209,6 @@ void ib_uverbs_release_file(struct kref *ref) } static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, - struct ib_uverbs_file *uverbs_file, struct file *filp, char __user *buf, size_t count, loff_t *pos, size_t eventsz) @@ -238,19 +226,16 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, if (wait_event_interruptible(ev_queue->poll_wait, (!list_empty(&ev_queue->event_list) || - /* The barriers built into wait_event_interruptible() - * and wake_up() guarentee this will see the null set - * without using RCU - */ - !uverbs_file->device->ib_dev))) + ev_queue->is_closed))) return -ERESTARTSYS; + spin_lock_irq(&ev_queue->lock); + /* If device was disassociated and no event exists set an error */ - if (list_empty(&ev_queue->event_list) && - !uverbs_file->device->ib_dev) + if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) { + spin_unlock_irq(&ev_queue->lock); return -EIO; - - spin_lock_irq(&ev_queue->lock); + } } event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); @@ -285,8 +270,7 @@ static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, { struct ib_uverbs_async_event_file *file = filp->private_data; - return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp, - buf, count, pos, + return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos, sizeof(struct ib_uverbs_async_event_desc)); } @@ -296,9 +280,8 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; - return ib_uverbs_event_read(&comp_ev_file->ev_queue, - comp_ev_file->uobj.ufile, filp, - buf, count, pos, + return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count, + pos, sizeof(struct ib_uverbs_comp_event_desc)); } @@ -321,7 +304,9 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, static __poll_t ib_uverbs_async_event_poll(struct file *filp, struct poll_table_struct *wait) { - return ib_uverbs_event_poll(filp->private_data, filp, wait); + struct ib_uverbs_async_event_file *file = filp->private_data; + + return ib_uverbs_event_poll(&file->ev_queue, filp, wait); } static __poll_t ib_uverbs_comp_event_poll(struct file *filp, @@ -335,9 +320,9 @@ static __poll_t ib_uverbs_comp_event_poll(struct file *filp, static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) { - struct ib_uverbs_event_queue *ev_queue = filp->private_data; + struct ib_uverbs_async_event_file *file = filp->private_data; - return fasync_helper(fd, filp, on, &ev_queue->async_queue); + return fasync_helper(fd, filp, on, &file->ev_queue.async_queue); } static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) @@ -348,70 +333,20 @@ static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); } -static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) -{ - struct ib_uverbs_async_event_file *file = filp->private_data; - struct ib_uverbs_file *uverbs_file = file->uverbs_file; - struct ib_uverbs_event *entry, *tmp; - int closed_already = 0; - - mutex_lock(&uverbs_file->device->lists_mutex); - spin_lock_irq(&file->ev_queue.lock); - closed_already = file->ev_queue.is_closed; - file->ev_queue.is_closed = 1; - list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { - if (entry->counter) - list_del(&entry->obj_list); - kfree(entry); - } - spin_unlock_irq(&file->ev_queue.lock); - if (!closed_already) { - list_del(&file->list); - ib_unregister_event_handler(&uverbs_file->event_handler); - } - mutex_unlock(&uverbs_file->device->lists_mutex); - - kref_put(&uverbs_file->ref, ib_uverbs_release_file); - kref_put(&file->ref, ib_uverbs_release_async_event_file); - - return 0; -} - -static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) -{ - struct ib_uobject *uobj = filp->private_data; - struct ib_uverbs_completion_event_file *file = container_of( - uobj, struct ib_uverbs_completion_event_file, uobj); - struct ib_uverbs_event *entry, *tmp; - - spin_lock_irq(&file->ev_queue.lock); - list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { - if (entry->counter) - list_del(&entry->obj_list); - kfree(entry); - } - file->ev_queue.is_closed = 1; - spin_unlock_irq(&file->ev_queue.lock); - - uverbs_close_fd(filp); - - return 0; -} - const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_comp_event_read, .poll = ib_uverbs_comp_event_poll, - .release = ib_uverbs_comp_event_close, + .release = uverbs_uobject_fd_release, .fasync = ib_uverbs_comp_event_fasync, .llseek = no_llseek, }; -static const struct file_operations uverbs_async_event_fops = { +const struct file_operations uverbs_async_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_async_event_read, .poll = ib_uverbs_async_event_poll, - .release = ib_uverbs_async_event_close, + .release = uverbs_uobject_fd_release, .fasync = ib_uverbs_async_event_fasync, .llseek = no_llseek, }; @@ -438,9 +373,9 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) return; } - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + uobj = cq->uobject; - entry->desc.comp.cq_handle = cq->uobject->user_handle; + entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle; entry->counter = &uobj->comp_events_reported; list_add_tail(&entry->list, &ev_queue->event_list); @@ -451,102 +386,82 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); } -static void ib_uverbs_async_handler(struct ib_uverbs_file *file, - __u64 element, __u64 event, - struct list_head *obj_list, - u32 *counter) +static void +ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file, + __u64 element, __u64 event, struct list_head *obj_list, + u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; - spin_lock_irqsave(&file->async_file->ev_queue.lock, flags); - if (file->async_file->ev_queue.is_closed) { - spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); + if (!async_file) + return; + + spin_lock_irqsave(&async_file->ev_queue.lock, flags); + if (async_file->ev_queue.is_closed) { + spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); return; } entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { - spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); + spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); return; } - entry->desc.async.element = element; + entry->desc.async.element = element; entry->desc.async.event_type = event; - entry->desc.async.reserved = 0; - entry->counter = counter; + entry->desc.async.reserved = 0; + entry->counter = counter; - list_add_tail(&entry->list, &file->async_file->ev_queue.event_list); + list_add_tail(&entry->list, &async_file->ev_queue.event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); + spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); - wake_up_interruptible(&file->async_file->ev_queue.poll_wait); - kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&async_file->ev_queue.poll_wait); + kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN); } -void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) +static void uverbs_uobj_event(struct ib_uevent_object *eobj, + struct ib_event *event) { - struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, - struct ib_ucq_object, uobject); + ib_uverbs_async_handler(READ_ONCE(eobj->uobject.ufile->async_file), + eobj->uobject.user_handle, event->event, + &eobj->event_list, &eobj->events_reported); +} - ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, - event->event, &uobj->async_list, - &uobj->async_events_reported); +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) +{ + uverbs_uobj_event(&event->element.cq->uobject->uevent, event); } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj; - /* for XRC target qp's, check that qp is live */ if (!event->element.qp->uobject) return; - uobj = container_of(event->element.qp->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); + uverbs_uobj_event(&event->element.qp->uobject->uevent, event); } void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); + uverbs_uobj_event(&event->element.wq->uobject->uevent, event); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj; - - uobj = container_of(event->element.srq->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); -} - -void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event) -{ - struct ib_uverbs_file *file = - container_of(handler, struct ib_uverbs_file, event_handler); - - ib_uverbs_async_handler(file, event->element.port_num, event->event, - NULL, NULL); + uverbs_uobj_event(&event->element.srq->uobject->uevent, event); } -void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) +static void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) { - kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); - file->async_file = NULL; + ib_uverbs_async_handler( + container_of(handler, struct ib_uverbs_async_event_file, + event_handler), + event->element.port_num, event->event, NULL, NULL); } void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) @@ -558,45 +473,26 @@ void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) ev_queue->async_queue = NULL; } -struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev) +void ib_uverbs_init_async_event_file( + struct ib_uverbs_async_event_file *async_file) { - struct ib_uverbs_async_event_file *ev_file; - struct file *filp; - - ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); - if (!ev_file) - return ERR_PTR(-ENOMEM); - - ib_uverbs_init_event_queue(&ev_file->ev_queue); - ev_file->uverbs_file = uverbs_file; - kref_get(&ev_file->uverbs_file->ref); - kref_init(&ev_file->ref); - filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops, - ev_file, O_RDONLY); - if (IS_ERR(filp)) - goto err_put_refs; - - mutex_lock(&uverbs_file->device->lists_mutex); - list_add_tail(&ev_file->list, - &uverbs_file->device->uverbs_events_file_list); - mutex_unlock(&uverbs_file->device->lists_mutex); - - WARN_ON(uverbs_file->async_file); - uverbs_file->async_file = ev_file; - kref_get(&uverbs_file->async_file->ref); - INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, - ib_dev, - ib_uverbs_event_handler); - ib_register_event_handler(&uverbs_file->event_handler); - /* At that point async file stuff was fully set */ + struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile; + struct ib_device *ib_dev = async_file->uobj.context->device; + + ib_uverbs_init_event_queue(&async_file->ev_queue); - return filp; + /* The first async_event_file becomes the default one for the file. */ + mutex_lock(&uverbs_file->ucontext_lock); + if (!uverbs_file->async_file) { + /* Pairs with the put in ib_uverbs_release_file */ + uverbs_uobject_get(&async_file->uobj); + smp_store_release(&uverbs_file->async_file, async_file); + } + mutex_unlock(&uverbs_file->ucontext_lock); -err_put_refs: - kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); - kref_put(&ev_file->ref, ib_uverbs_release_async_event_file); - return filp; + INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev, + ib_uverbs_event_handler); + ib_register_event_handler(&async_file->event_handler); } static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, @@ -1225,7 +1121,6 @@ static void ib_uverbs_add_one(struct ib_device *device) mutex_init(&uverbs_dev->xrcd_tree_mutex); mutex_init(&uverbs_dev->lists_mutex); INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); - INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; @@ -1270,14 +1165,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { struct ib_uverbs_file *file; - struct ib_uverbs_async_event_file *event_file; - struct ib_event event; /* Pending running commands to terminate */ uverbs_disassociate_api_pre(uverbs_dev); - event.event = IB_EVENT_DEVICE_FATAL; - event.element.port_num = 0; - event.device = ib_dev; mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { @@ -1293,31 +1183,14 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, */ mutex_unlock(&uverbs_dev->lists_mutex); - ib_uverbs_event_handler(&file->event_handler, &event); + ib_uverbs_async_handler(READ_ONCE(file->async_file), 0, + IB_EVENT_DEVICE_FATAL, NULL, NULL); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); kref_put(&file->ref, ib_uverbs_release_file); mutex_lock(&uverbs_dev->lists_mutex); } - - while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { - event_file = list_first_entry(&uverbs_dev-> - uverbs_events_file_list, - struct ib_uverbs_async_event_file, - list); - spin_lock_irq(&event_file->ev_queue.lock); - event_file->ev_queue.is_closed = 1; - spin_unlock_irq(&event_file->ev_queue.lock); - - list_del(&event_file->list); - ib_unregister_event_handler( - &event_file->uverbs_file->event_handler); - event_file->uverbs_file->event_handler.device = - NULL; - - wake_up_interruptible(&event_file->ev_queue.poll_wait); - kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); - } mutex_unlock(&uverbs_dev->lists_mutex); uverbs_disassociate_api(uverbs_dev->uapi); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 35b2e2c640cc..3abfc63225cb 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -105,7 +105,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject, if (uqp->uxrcd) atomic_dec(&uqp->uxrcd->refcnt); - ib_uverbs_release_uevent(attrs->ufile, &uqp->uevent); + ib_uverbs_release_uevent(&uqp->uevent); return ret; } @@ -138,7 +138,7 @@ static int uverbs_free_wq(struct ib_uobject *uobject, if (ib_is_destroy_retryable(ret, why, uobject)) return ret; - ib_uverbs_release_uevent(attrs->ufile, &uwq->uevent); + ib_uverbs_release_uevent(&uwq->uevent); return ret; } @@ -163,7 +163,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, atomic_dec(&us->uxrcd->refcnt); } - ib_uverbs_release_uevent(attrs->ufile, uevent); + ib_uverbs_release_uevent(uevent); return ret; } @@ -202,24 +202,41 @@ static int uverbs_free_pd(struct ib_uobject *uobject, return 0; } -static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj, - enum rdma_remove_reason why) +void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue) { - struct ib_uverbs_completion_event_file *comp_event_file = - container_of(uobj, struct ib_uverbs_completion_event_file, - uobj); - struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue; + struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&event_queue->lock); + /* + * The user must ensure that no new items are added to the event_list + * once is_closed is set. + */ event_queue->is_closed = 1; spin_unlock_irq(&event_queue->lock); + wake_up_interruptible(&event_queue->poll_wait); + kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN); - if (why == RDMA_REMOVE_DRIVER_REMOVE) { - wake_up_interruptible(&event_queue->poll_wait); - kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN); + spin_lock_irq(&event_queue->lock); + list_for_each_entry_safe(entry, tmp, &event_queue->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + list_del(&entry->list); + kfree(entry); } + spin_unlock_irq(&event_queue->lock); +} + +static int +uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct ib_uverbs_completion_event_file *file = + container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); + + ib_uverbs_free_event_queue(&file->ev_queue); return 0; -}; +} int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) { @@ -230,7 +247,7 @@ EXPORT_SYMBOL(uverbs_destroy_def_handler); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_COMP_CHANNEL, UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), - uverbs_hot_unplug_completion_event_file, + uverbs_completion_event_file_destroy_uobj, &uverbs_event_fops, "[infinibandevent]", O_RDONLY)); diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c new file mode 100644 index 000000000000..82ec0806b34b --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/uverbs_std_types.h> +#include <rdma/uverbs_ioctl.h> +#include "rdma_core.h" +#include "uverbs.h" + +static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_METHOD_ASYNC_EVENT_ALLOC); + + ib_uverbs_init_async_event_file( + container_of(uobj, struct ib_uverbs_async_event_file, uobj)); + return 0; +} + +static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct ib_uverbs_async_event_file *event_file = + container_of(uobj, struct ib_uverbs_async_event_file, uobj); + + ib_unregister_event_handler(&event_file->event_handler); + ib_uverbs_free_event_queue(&event_file->ev_queue); + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_ASYNC_EVENT_ALLOC, + UVERBS_ATTR_FD(UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE, + UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_async_event_file), + uverbs_async_event_destroy_uobj, + &uverbs_async_event_fops, + "[infinibandevent]", + O_RDONLY), + &UVERBS_METHOD(UVERBS_METHOD_ASYNC_EVENT_ALLOC)); + +const struct uapi_definition uverbs_def_obj_async_fd[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_ASYNC_EVENT), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index e39fe6a8aac4..da4110a0eea2 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -41,7 +41,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, struct ib_cq *cq = uobject->object; struct ib_uverbs_event_queue *ev_queue = cq->cq_context; struct ib_ucq_object *ucq = - container_of(uobject, struct ib_ucq_object, uobject); + container_of(uobject, struct ib_ucq_object, uevent.uobject); int ret; ret = ib_destroy_cq_user(cq, &attrs->driver_udata); @@ -49,7 +49,6 @@ static int uverbs_free_cq(struct ib_uobject *uobject, return ret; ib_uverbs_release_ucq( - attrs->ufile, ev_queue ? container_of(ev_queue, struct ib_uverbs_completion_event_file, ev_queue) : @@ -63,7 +62,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( { struct ib_ucq_object *obj = container_of( uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), - typeof(*obj), uobject); + typeof(*obj), uevent.uobject); struct ib_device *ib_dev = attrs->context->device; int ret; u64 user_handle; @@ -106,10 +105,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( goto err_event_file; } - obj->comp_events_reported = 0; - obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); - INIT_LIST_HEAD(&obj->async_list); + INIT_LIST_HEAD(&obj->uevent.event_list); cq = rdma_zalloc_drv_obj(ib_dev, ib_cq); if (!cq) { @@ -118,7 +115,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( } cq->device = ib_dev; - cq->uobject = &obj->uobject; + cq->uobject = obj; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; @@ -129,8 +126,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( if (ret) goto err_free; - obj->uobject.object = cq; - obj->uobject.user_handle = user_handle; + obj->uevent.uobject.object = cq; + obj->uevent.uobject.user_handle = user_handle; rdma_restrack_uadd(&cq->res); ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, @@ -182,10 +179,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); struct ib_ucq_object *obj = - container_of(uobj, struct ib_ucq_object, uobject); + container_of(uobj, struct ib_ucq_object, uevent.uobject); struct ib_uverbs_destroy_cq_resp resp = { .comp_events_reported = obj->comp_events_reported, - .async_events_reported = obj->async_events_reported + .async_events_reported = obj->uevent.events_reported }; return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp, diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 2a3f2f01028d..ae4a59d6f9b1 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -200,6 +200,43 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)( &resp, sizeof(resp)); } +static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)( + struct uverbs_attr_bundle *attrs) +{ + u32 num_comp = attrs->ufile->device->num_comp_vectors; + u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS; + int ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, + &num_comp, sizeof(num_comp)); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, + &core_support, sizeof(core_support)); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + ret = ib_alloc_ucontext(attrs); + if (ret) + return ret; + ret = ib_init_ucontext(attrs); + if (ret) { + kfree(attrs->context); + attrs->context = NULL; + return ret; + } + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_GET_CONTEXT, + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, + UVERBS_ATTR_TYPE(u32), UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, + UVERBS_ATTR_TYPE(u64), UA_OPTIONAL), + UVERBS_ATTR_UHW()); + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_INFO_HANDLES, /* Also includes any device specific object ids */ @@ -220,6 +257,7 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY)); DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE, + &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT), &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE), &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES), &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT)); diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 00c547887132..3f121ac31e0a 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -195,9 +195,9 @@ static int uapi_merge_obj_tree(struct uverbs_api *uapi, * disassociation, and the FD types require the driver to use * struct file_operations.owner to prevent the driver module * code from unloading while the file is open. This provides - * enough safety that uverbs_close_fd() will continue to work. - * Drivers using FD are responsible to handle disassociation of - * the device on their own. + * enough safety that uverbs_uobject_fd_release() will + * continue to work. Drivers using FD are responsible to + * handle disassociation of the device on their own. */ if (WARN_ON(is_driver && obj->type_attrs->type_class != &uverbs_idr_class && @@ -626,6 +626,7 @@ void uverbs_destroy_api(struct uverbs_api *uapi) } static const struct uapi_definition uverbs_core_api[] = { + UAPI_DEF_CHAIN(uverbs_def_obj_async_fd), UAPI_DEF_CHAIN(uverbs_def_obj_counters), UAPI_DEF_CHAIN(uverbs_def_obj_cq), UAPI_DEF_CHAIN(uverbs_def_obj_device), diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index dd765e176cdd..e62c9dfc7837 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -52,6 +52,9 @@ #include <rdma/rw.h> #include "core_priv.h" +#include <trace/events/rdma_core.h> + +#include <trace/events/rdma_core.h> static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr); @@ -1053,11 +1056,11 @@ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) struct ib_qp *qp = context; unsigned long flags; - spin_lock_irqsave(&qp->device->event_handler_lock, flags); + spin_lock_irqsave(&qp->device->qp_open_list_lock, flags); list_for_each_entry(event->element.qp, &qp->open_list, open_list) if (event->element.qp->event_handler) event->element.qp->event_handler(event, event->element.qp->qp_context); - spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags); } static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) @@ -1094,9 +1097,9 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, qp->qp_num = real_qp->qp_num; qp->qp_type = real_qp->qp_type; - spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); list_add(&qp->open_list, &real_qp->open_list); - spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); return qp; } @@ -1182,16 +1185,6 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, if (ret) goto err; - qp->qp_type = qp_init_attr->qp_type; - qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; - - atomic_set(&qp->usecnt, 0); - qp->mrs_used = 0; - spin_lock_init(&qp->mr_lock); - INIT_LIST_HEAD(&qp->rdma_mrs); - INIT_LIST_HEAD(&qp->sig_mrs); - qp->port = 0; - if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { struct ib_qp *xrc_qp = create_xrc_qp_user(qp, qp_init_attr, udata); @@ -1824,9 +1817,9 @@ int ib_close_qp(struct ib_qp *qp) if (real_qp == qp) return -EINVAL; - spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); list_del(&qp->open_list); - spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); atomic_dec(&real_qp->usecnt); if (qp->qp_sec) @@ -1990,6 +1983,47 @@ EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ +struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags) +{ + struct ib_mr *mr; + + if (access_flags & IB_ACCESS_ON_DEMAND) { + if (!(pd->device->attrs.device_cap_flags & + IB_DEVICE_ON_DEMAND_PAGING)) { + pr_debug("ODP support not available\n"); + return ERR_PTR(-EINVAL); + } + } + + mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr, + access_flags, NULL); + + if (IS_ERR(mr)) + return mr; + + mr->device = pd->device; + mr->pd = pd; + mr->dm = NULL; + atomic_inc(&pd->usecnt); + mr->res.type = RDMA_RESTRACK_MR; + rdma_restrack_kadd(&mr->res); + + return mr; +} +EXPORT_SYMBOL(ib_reg_user_mr); + +int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge) +{ + if (!pd->device->ops.advise_mr) + return -EOPNOTSUPP; + + return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge, + NULL); +} +EXPORT_SYMBOL(ib_advise_mr); + int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) { struct ib_pd *pd = mr->pd; @@ -1997,6 +2031,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) struct ib_sig_attrs *sig_attrs = mr->sig_attrs; int ret; + trace_mr_dereg(mr); rdma_restrack_del(&mr->res); ret = mr->device->ops.dereg_mr(mr, udata); if (!ret) { @@ -2028,11 +2063,16 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, { struct ib_mr *mr; - if (!pd->device->ops.alloc_mr) - return ERR_PTR(-EOPNOTSUPP); + if (!pd->device->ops.alloc_mr) { + mr = ERR_PTR(-EOPNOTSUPP); + goto out; + } - if (WARN_ON_ONCE(mr_type == IB_MR_TYPE_INTEGRITY)) - return ERR_PTR(-EINVAL); + if (mr_type == IB_MR_TYPE_INTEGRITY) { + WARN_ON_ONCE(1); + mr = ERR_PTR(-EINVAL); + goto out; + } mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); if (!IS_ERR(mr)) { @@ -2048,6 +2088,8 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, mr->sig_attrs = NULL; } +out: + trace_mr_alloc(pd, mr_type, max_num_sg, mr); return mr; } EXPORT_SYMBOL(ib_alloc_mr_user); @@ -2072,21 +2114,27 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, struct ib_sig_attrs *sig_attrs; if (!pd->device->ops.alloc_mr_integrity || - !pd->device->ops.map_mr_sg_pi) - return ERR_PTR(-EOPNOTSUPP); + !pd->device->ops.map_mr_sg_pi) { + mr = ERR_PTR(-EOPNOTSUPP); + goto out; + } - if (!max_num_meta_sg) - return ERR_PTR(-EINVAL); + if (!max_num_meta_sg) { + mr = ERR_PTR(-EINVAL); + goto out; + } sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL); - if (!sig_attrs) - return ERR_PTR(-ENOMEM); + if (!sig_attrs) { + mr = ERR_PTR(-ENOMEM); + goto out; + } mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg, max_num_meta_sg); if (IS_ERR(mr)) { kfree(sig_attrs); - return mr; + goto out; } mr->device = pd->device; @@ -2100,6 +2148,8 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, mr->type = IB_MR_TYPE_INTEGRITY; mr->sig_attrs = sig_attrs; +out: + trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr); return mr; } EXPORT_SYMBOL(ib_alloc_mr_integrity); @@ -2744,6 +2794,7 @@ void ib_drain_sq(struct ib_qp *qp) qp->device->ops.drain_sq(qp); else __ib_drain_sq(qp); + trace_cq_drain_complete(qp->send_cq); } EXPORT_SYMBOL(ib_drain_sq); @@ -2772,6 +2823,7 @@ void ib_drain_rq(struct ib_qp *qp) qp->device->ops.drain_rq(qp); else __ib_drain_rq(qp); + trace_cq_drain_complete(qp->recv_cq); } EXPORT_SYMBOL(ib_drain_rq); |