diff options
Diffstat (limited to 'drivers/infiniband/core')
23 files changed, 684 insertions, 701 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 65e3e7df8a4b..f253295795f0 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -37,7 +37,6 @@ #include <linux/inetdevice.h> #include <linux/slab.h> #include <linux/workqueue.h> -#include <linux/module.h> #include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index b79f816a7203..4084d05a4510 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -34,7 +34,6 @@ */ #include <linux/if_vlan.h> -#include <linux/module.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/workqueue.h> @@ -956,7 +955,7 @@ int rdma_query_gid(struct ib_device *device, u32 port_num, { struct ib_gid_table *table; unsigned long flags; - int res = -EINVAL; + int res; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; @@ -964,9 +963,15 @@ int rdma_query_gid(struct ib_device *device, u32 port_num, table = rdma_gid_table(device, port_num); read_lock_irqsave(&table->rwlock, flags); - if (index < 0 || index >= table->sz || - !is_gid_entry_valid(table->data_vec[index])) + if (index < 0 || index >= table->sz) { + res = -EINVAL; goto done; + } + + if (!is_gid_entry_valid(table->data_vec[index])) { + res = -ENOENT; + goto done; + } memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); res = 0; diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c903b74f46a4..1f9938a2c475 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -175,6 +175,7 @@ struct cm_device { struct cm_av { struct cm_port *port; struct rdma_ah_attr ah_attr; + u16 dlid_datapath; u16 pkey_index; u8 timeout; }; @@ -617,7 +618,6 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; - __be64 service_mask = cm_id_priv->id.service_mask; unsigned long flags; spin_lock_irqsave(&cm.lock, flags); @@ -625,9 +625,16 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); - if ((cur_cm_id_priv->id.service_mask & service_id) == - (service_mask & cur_cm_id_priv->id.service_id) && - (cm_id_priv->id.device == cur_cm_id_priv->id.device)) { + + if (cm_id_priv->id.device < cur_cm_id_priv->id.device) + link = &(*link)->rb_left; + else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) + link = &(*link)->rb_right; + else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) + link = &(*link)->rb_left; + else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) + link = &(*link)->rb_right; + else { /* * Sharing an ib_cm_id with different handlers is not * supported @@ -643,17 +650,6 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, spin_unlock_irqrestore(&cm.lock, flags); return cur_cm_id_priv; } - - if (cm_id_priv->id.device < cur_cm_id_priv->id.device) - link = &(*link)->rb_left; - else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) - link = &(*link)->rb_right; - else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) - link = &(*link)->rb_left; - else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) - link = &(*link)->rb_right; - else - link = &(*link)->rb_right; } cm_id_priv->listen_sharecount++; rb_link_node(&cm_id_priv->service_node, parent, link); @@ -670,12 +666,7 @@ static struct cm_id_private *cm_find_listen(struct ib_device *device, while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); - if ((cm_id_priv->id.service_mask & service_id) == - cm_id_priv->id.service_id && - (cm_id_priv->id.device == device)) { - refcount_inc(&cm_id_priv->refcount); - return cm_id_priv; - } + if (device < cm_id_priv->id.device) node = node->rb_left; else if (device > cm_id_priv->id.device) @@ -684,8 +675,10 @@ static struct cm_id_private *cm_find_listen(struct ib_device *device, node = node->rb_left; else if (be64_gt(service_id, cm_id_priv->id.service_id)) node = node->rb_right; - else - node = node->rb_right; + else { + refcount_inc(&cm_id_priv->refcount); + return cm_id_priv; + } } return NULL; } @@ -1158,22 +1151,17 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id) } EXPORT_SYMBOL(ib_destroy_cm_id); -static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id, - __be64 service_mask) +static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id) { - service_mask = service_mask ? service_mask : ~cpu_to_be64(0); - service_id &= service_mask; if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && (service_id != IB_CM_ASSIGN_SERVICE_ID)) return -EINVAL; - if (service_id == IB_CM_ASSIGN_SERVICE_ID) { + if (service_id == IB_CM_ASSIGN_SERVICE_ID) cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); - } else { + else cm_id_priv->id.service_id = service_id; - cm_id_priv->id.service_mask = service_mask; - } + return 0; } @@ -1185,12 +1173,8 @@ static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id, * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. - * @service_mask: Mask applied to service ID used to listen across a - * range of service IDs. If set to 0, the service ID is matched - * exactly. This parameter is ignored if %service_id is set to - * IB_CM_ASSIGN_SERVICE_ID. */ -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id) { struct cm_id_private *cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -1203,7 +1187,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) goto out; } - ret = cm_init_listen(cm_id_priv, service_id, service_mask); + ret = cm_init_listen(cm_id_priv, service_id); if (ret) goto out; @@ -1251,9 +1235,11 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, if (IS_ERR(cm_id_priv)) return ERR_CAST(cm_id_priv); - err = cm_init_listen(cm_id_priv, service_id, 0); - if (err) + err = cm_init_listen(cm_id_priv, service_id); + if (err) { + ib_destroy_cm_id(&cm_id_priv->id); return ERR_PTR(err); + } spin_lock_irq(&cm_id_priv->lock); listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler); @@ -1319,6 +1305,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, struct sa_path_rec *pri_path = param->primary_path; struct sa_path_rec *alt_path = param->alternate_path; bool pri_ext = false; + __be16 lid; if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA) pri_ext = opa_is_extended_lid(pri_path->opa.dlid, @@ -1378,9 +1365,16 @@ static void cm_format_req(struct cm_req_msg *req_msg, htons(ntohl(sa_path_get_dlid( pri_path))))); } else { + + if (param->primary_path_inbound) { + lid = param->primary_path_inbound->ib.dlid; + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(lid)); + } else + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); + /* Work-around until there's a way to obtain remote LID info */ - IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, - be16_to_cpu(IB_LID_PERMISSIVE)); IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg, be16_to_cpu(IB_LID_PERMISSIVE)); } @@ -1520,7 +1514,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, } } cm_id->service_id = param->service_id; - cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = cm_convert_to_ms( param->primary_path->packet_life_time) * 2 + cm_convert_to_ms( @@ -1536,6 +1529,10 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); cm_move_av_from_path(&cm_id_priv->av, &av); + if (param->primary_path_outbound) + cm_id_priv->av.dlid_datapath = + be16_to_cpu(param->primary_path_outbound->ib.dlid); + if (param->alternate_path) cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); @@ -1630,14 +1627,13 @@ static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num, static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, - struct sa_path_rec *alt_path) + struct sa_path_rec *alt_path, + struct ib_wc *wc) { u32 lid; if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(primary_path, - IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, - req_msg)); + sa_path_set_dlid(primary_path, wc->slid); sa_path_set_slid(primary_path, IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg)); @@ -1674,7 +1670,8 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, - struct sa_path_rec *alt_path) + struct sa_path_rec *alt_path, + struct ib_wc *wc) { primary_path->dgid = *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg); @@ -1732,7 +1729,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, if (sa_path_is_roce(alt_path)) alt_path->roce.route_resolved = false; } - cm_format_path_lid_from_req(req_msg, primary_path, alt_path); + cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc); } static u16 cm_get_bth_pkey(struct cm_work *work) @@ -2077,7 +2074,6 @@ static int cm_req_handler(struct cm_work *work) cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg)); cm_id_priv->id.service_id = cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg)); @@ -2146,7 +2142,7 @@ static int cm_req_handler(struct cm_work *work) if (cm_req_has_alt_path(req_msg)) work->path[1].rec_type = work->path[0].rec_type; cm_format_paths_from_req(req_msg, &work->path[0], - &work->path[1]); + &work->path[1], work->mad_recv_wc->wc); if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); @@ -2171,6 +2167,10 @@ static int cm_req_handler(struct cm_work *work) NULL, 0); goto rejected; } + if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_IB) + cm_id_priv->av.dlid_datapath = + IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg); + if (cm_req_has_alt_path(req_msg)) { ret = cm_init_av_by_path(&work->path[1], NULL, &cm_id_priv->alt_av); @@ -2824,6 +2824,7 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: + case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_ESTABLISHED: @@ -2831,8 +2832,6 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->msg); break; - case IB_CM_MRA_REP_RCVD: - break; case IB_CM_TIMEWAIT: atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] [CM_DREQ_COUNTER]); @@ -3322,7 +3321,7 @@ static int cm_lap_handler(struct cm_work *work) ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av); if (ret) { rdma_destroy_ah_attr(&ah_attr); - return -EINVAL; + goto deref; } spin_lock_irq(&cm_id_priv->lock); @@ -3485,7 +3484,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); cm_move_av_from_path(&cm_id_priv->av, &av); cm_id->service_id = param->service_id; - cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; if (cm_id->state != IB_CM_IDLE) { @@ -3560,7 +3558,6 @@ static int cm_sidr_req_handler(struct cm_work *work) cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg)); cm_id_priv->id.service_id = cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_id_priv->tid = sidr_req_msg->hdr.tid; wc = work->mad_recv_wc->wc; @@ -4133,6 +4130,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; + if ((qp_attr->ah_attr.type == RDMA_AH_ATTR_TYPE_IB) && + cm_id_priv->av.dlid_datapath && + (cm_id_priv->av.dlid_datapath != 0xffff)) + qp_attr->ah_attr.ib.dlid = cm_id_priv->av.dlid_datapath; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 835ac54d4a24..26d1772179b8 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -11,6 +11,7 @@ #include <linux/in6.h> #include <linux/mutex.h> #include <linux/random.h> +#include <linux/rbtree.h> #include <linux/igmp.h> #include <linux/xarray.h> #include <linux/inetdevice.h> @@ -20,6 +21,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netevent.h> #include <net/tcp.h> #include <net/ipv6.h> #include <net/ip_fib.h> @@ -67,8 +69,8 @@ static const char * const cma_events[] = { [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", }; -static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, - union ib_gid *mgid); +static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, + enum ib_gid_type gid_type); const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { @@ -168,6 +170,9 @@ static struct ib_sa_client sa_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); +static struct rb_root id_table = RB_ROOT; +/* Serialize operations of id_table tree */ +static DEFINE_SPINLOCK(id_table_lock); static struct workqueue_struct *cma_wq; static unsigned int cma_pernet_id; @@ -202,6 +207,11 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps) } } +struct id_table_entry { + struct list_head id_list; + struct rb_node rb_node; +}; + struct cma_device { struct list_head list; struct ib_device *device; @@ -420,11 +430,21 @@ static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) return hdr->ip_version >> 4; } -static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) +static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) { hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); } +static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *)&id_priv->id.route.addr.src_addr; +} + +static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *)&id_priv->id.route.addr.dst_addr; +} + static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) { struct in_device *in_dev = NULL; @@ -445,6 +465,117 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) return (in_dev) ? 0 : -ENODEV; } +static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa, + struct id_table_entry *entry_b) +{ + struct rdma_id_private *id_priv = list_first_entry( + &entry_b->id_list, struct rdma_id_private, id_list_entry); + int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if; + struct sockaddr *sb = cma_dst_addr(id_priv); + + if (ifindex_a != ifindex_b) + return (ifindex_a > ifindex_b) ? 1 : -1; + + if (sa->sa_family != sb->sa_family) + return sa->sa_family - sb->sa_family; + + if (sa->sa_family == AF_INET) + return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr, + (char *)&((struct sockaddr_in *)sb)->sin_addr, + sizeof(((struct sockaddr_in *)sa)->sin_addr)); + + return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr, + &((struct sockaddr_in6 *)sb)->sin6_addr); +} + +static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv) +{ + struct rb_node **new, *parent = NULL; + struct id_table_entry *this, *node; + unsigned long flags; + int result; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + spin_lock_irqsave(&id_table_lock, flags); + new = &id_table.rb_node; + while (*new) { + this = container_of(*new, struct id_table_entry, rb_node); + result = compare_netdev_and_ip( + node_id_priv->id.route.addr.dev_addr.bound_dev_if, + cma_dst_addr(node_id_priv), this); + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else { + list_add_tail(&node_id_priv->id_list_entry, + &this->id_list); + kfree(node); + goto unlock; + } + } + + INIT_LIST_HEAD(&node->id_list); + list_add_tail(&node_id_priv->id_list_entry, &node->id_list); + + rb_link_node(&node->rb_node, parent, new); + rb_insert_color(&node->rb_node, &id_table); + +unlock: + spin_unlock_irqrestore(&id_table_lock, flags); + return 0; +} + +static struct id_table_entry * +node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa) +{ + struct rb_node *node = root->rb_node; + struct id_table_entry *data; + int result; + + while (node) { + data = container_of(node, struct id_table_entry, rb_node); + result = compare_netdev_and_ip(ifindex, sa, data); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return data; + } + + return NULL; +} + +static void cma_remove_id_from_tree(struct rdma_id_private *id_priv) +{ + struct id_table_entry *data; + unsigned long flags; + + spin_lock_irqsave(&id_table_lock, flags); + if (list_empty(&id_priv->id_list_entry)) + goto out; + + data = node_from_ndev_ip(&id_table, + id_priv->id.route.addr.dev_addr.bound_dev_if, + cma_dst_addr(id_priv)); + if (!data) + goto out; + + list_del_init(&id_priv->id_list_entry); + if (list_empty(&data->id_list)) { + rb_erase(&data->rb_node, &id_table); + kfree(data); + } +out: + spin_unlock_irqrestore(&id_table_lock, flags); +} + static void _cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { @@ -481,16 +612,6 @@ static void cma_release_dev(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) -{ - return (struct sockaddr *) &id_priv->id.route.addr.src_addr; -} - -static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) -{ - return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; -} - static inline unsigned short cma_family(struct rdma_id_private *id_priv) { return id_priv->id.route.addr.src_addr.ss_family; @@ -766,6 +887,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) unsigned int p; u16 pkey, index; enum ib_port_state port_state; + int ret; int i; cma_dev = NULL; @@ -784,9 +906,14 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) continue; - for (i = 0; !rdma_query_gid(cur_dev->device, - p, i, &gid); - i++) { + + for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len; + ++i) { + ret = rdma_query_gid(cur_dev->device, p, i, + &gid); + if (ret) + continue; + if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; sgid = gid; @@ -855,6 +982,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, refcount_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); INIT_LIST_HEAD(&id_priv->device_item); + INIT_LIST_HEAD(&id_priv->id_list_entry); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); @@ -1428,7 +1556,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev, return false; memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_iif = net_dev->ifindex; + fl4.flowi4_oif = net_dev->ifindex; fl4.daddr = daddr; fl4.saddr = saddr; @@ -1713,8 +1841,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, } if (!validate_net_dev(*net_dev, - (struct sockaddr *)&req->listen_addr_storage, - (struct sockaddr *)&req->src_addr_storage)) { + (struct sockaddr *)&req->src_addr_storage, + (struct sockaddr *)&req->listen_addr_storage)) { id_priv = ERR_PTR(-EHOSTUNREACH); goto err; } @@ -1840,17 +1968,19 @@ static void destroy_mc(struct rdma_id_private *id_priv, if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (ndev) { + if (ndev && !send_only) { + enum ib_gid_type gid_type; union ib_gid mgid; - cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr, - &mgid); - - if (!send_only) - cma_igmp_send(ndev, &mgid, false); - - dev_put(ndev); + gid_type = id_priv->cma_dev->default_gid_type + [id_priv->id.port_num - + rdma_start_port( + id_priv->cma_dev->device)]; + cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid, + gid_type); + cma_igmp_send(ndev, &mgid, false); } + dev_put(ndev); cancel_work_sync(&mc->iboe_join.work); } @@ -1875,6 +2005,7 @@ static void _destroy_id(struct rdma_id_private *id_priv, cma_cancel_operation(id_priv, state); rdma_restrack_del(&id_priv->res); + cma_remove_id_from_tree(id_priv); if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) @@ -1895,6 +2026,8 @@ static void _destroy_id(struct rdma_id_private *id_priv, cma_id_put(id_priv->id.context); kfree(id_priv->id.route.path_rec); + kfree(id_priv->id.route.path_rec_inbound); + kfree(id_priv->id.route.path_rec_outbound); put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); @@ -2110,14 +2243,14 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, goto err; rt = &id->route; - rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; - rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec), - GFP_KERNEL); + rt->num_pri_alt_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; + rt->path_rec = kmalloc_array(rt->num_pri_alt_paths, + sizeof(*rt->path_rec), GFP_KERNEL); if (!rt->path_rec) goto err; rt->path_rec[0] = *path; - if (rt->num_paths == 2) + if (rt->num_pri_alt_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (net_dev) { @@ -2634,7 +2767,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout) { struct rdma_id_private *id_priv; - if (id->qp_type != IB_QPT_RC) + if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI) return -EINVAL; id_priv = container_of(id, struct rdma_id_private, id); @@ -2686,26 +2819,72 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer) } EXPORT_SYMBOL(rdma_set_min_rnr_timer); +static void route_set_path_rec_inbound(struct cma_work *work, + struct sa_path_rec *path_rec) +{ + struct rdma_route *route = &work->id->id.route; + + if (!route->path_rec_inbound) { + route->path_rec_inbound = + kzalloc(sizeof(*route->path_rec_inbound), GFP_KERNEL); + if (!route->path_rec_inbound) + return; + } + + *route->path_rec_inbound = *path_rec; +} + +static void route_set_path_rec_outbound(struct cma_work *work, + struct sa_path_rec *path_rec) +{ + struct rdma_route *route = &work->id->id.route; + + if (!route->path_rec_outbound) { + route->path_rec_outbound = + kzalloc(sizeof(*route->path_rec_outbound), GFP_KERNEL); + if (!route->path_rec_outbound) + return; + } + + *route->path_rec_outbound = *path_rec; +} + static void cma_query_handler(int status, struct sa_path_rec *path_rec, - void *context) + int num_prs, void *context) { struct cma_work *work = context; struct rdma_route *route; + int i; route = &work->id->id.route; - if (!status) { - route->num_paths = 1; - *route->path_rec = *path_rec; - } else { - work->old_state = RDMA_CM_ROUTE_QUERY; - work->new_state = RDMA_CM_ADDR_RESOLVED; - work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; - work->event.status = status; - pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", - status); + if (status) + goto fail; + + for (i = 0; i < num_prs; i++) { + if (!path_rec[i].flags || (path_rec[i].flags & IB_PATH_GMP)) + *route->path_rec = path_rec[i]; + else if (path_rec[i].flags & IB_PATH_INBOUND) + route_set_path_rec_inbound(work, &path_rec[i]); + else if (path_rec[i].flags & IB_PATH_OUTBOUND) + route_set_path_rec_outbound(work, &path_rec[i]); + } + if (!route->path_rec) { + status = -EINVAL; + goto fail; } + route->num_pri_alt_paths = 1; + queue_work(cma_wq, &work->work); + return; + +fail: + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; + work->event.status = status; + pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", + status); queue_work(cma_wq, &work->work); } @@ -2950,7 +3129,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id, dev_put(ndev); } - id->route.num_paths = 1; + id->route.num_pri_alt_paths = 1; return 0; err_free: @@ -3083,7 +3262,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err1; } - route->num_paths = 1; + route->num_pri_alt_paths = 1; ndev = cma_iboe_set_path_rec_l2_fields(id_priv); if (!ndev) { @@ -3143,7 +3322,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) err2: kfree(route->path_rec); route->path_rec = NULL; - route->num_paths = 0; + route->num_pri_alt_paths = 0; err1: kfree(work); return ret; @@ -3164,8 +3343,11 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) cma_id_get(id_priv); if (rdma_cap_ib_sa(id->device, id->port_num)) ret = cma_resolve_ib_route(id_priv, timeout_ms); - else if (rdma_protocol_roce(id->device, id->port_num)) + else if (rdma_protocol_roce(id->device, id->port_num)) { ret = cma_resolve_iboe_route(id_priv); + if (!ret) + cma_add_id_to_tree(id_priv); + } else if (rdma_protocol_iwarp(id->device, id->port_num)) ret = cma_resolve_iw_route(id_priv); else @@ -3362,22 +3544,30 @@ err: static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, const struct sockaddr *dst_addr) { - if (!src_addr || !src_addr->sa_family) { - src_addr = (struct sockaddr *) &id->route.addr.src_addr; - src_addr->sa_family = dst_addr->sa_family; - if (IS_ENABLED(CONFIG_IPV6) && - dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; - struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *) src_addr)->sib_pkey = - ((struct sockaddr_ib *) dst_addr)->sib_pkey; - } + struct sockaddr_storage zero_sock = {}; + + if (src_addr && src_addr->sa_family) + return rdma_bind_addr(id, src_addr); + + /* + * When the src_addr is not specified, automatically supply an any addr + */ + zero_sock.ss_family = dst_addr->sa_family; + if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = + (struct sockaddr_in6 *)&zero_sock; + struct sockaddr_in6 *dst_addr6 = + (struct sockaddr_in6 *)dst_addr; + + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = + dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *)&zero_sock)->sib_pkey = + ((struct sockaddr_ib *)dst_addr)->sib_pkey; } - return rdma_bind_addr(id, src_addr); + return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); } /* @@ -3617,7 +3807,7 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps, inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; - rover = prandom_u32() % remaining + low; + rover = prandom_u32_max(remaining) + low; retry: if (last_used_port != rover) { struct rdma_bind_list *bind_list; @@ -4033,8 +4223,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); - req.private_data_len = offset + conn_param->private_data_len; - if (req.private_data_len < conn_param->private_data_len) + if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len)) return -EINVAL; if (req.private_data_len) { @@ -4093,8 +4282,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); - req.private_data_len = offset + conn_param->private_data_len; - if (req.private_data_len < conn_param->private_data_len) + if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len)) return -EINVAL; if (req.private_data_len) { @@ -4125,7 +4313,9 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, } req.primary_path = &route->path_rec[0]; - if (route->num_paths == 2) + req.primary_path_inbound = route->path_rec_inbound; + req.primary_path_outbound = route->path_rec_outbound; + if (route->num_pri_alt_paths == 2) req.alternate_path = &route->path_rec[1]; req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; @@ -4908,10 +5098,87 @@ out: return ret; } +static void cma_netevent_work_handler(struct work_struct *_work) +{ + struct rdma_id_private *id_priv = + container_of(_work, struct rdma_id_private, id.net_work); + struct rdma_cm_event event = {}; + + mutex_lock(&id_priv->handler_mutex); + + if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || + READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) + goto out_unlock; + + event.event = RDMA_CM_EVENT_UNREACHABLE; + event.status = -ETIMEDOUT; + + if (cma_cm_event_handler(id_priv, &event)) { + __acquire(&id_priv->handler_mutex); + id_priv->cm_id.ib = NULL; + cma_id_put(id_priv); + destroy_id_handler_unlock(id_priv); + return; + } + +out_unlock: + mutex_unlock(&id_priv->handler_mutex); + cma_id_put(id_priv); +} + +static int cma_netevent_callback(struct notifier_block *self, + unsigned long event, void *ctx) +{ + struct id_table_entry *ips_node = NULL; + struct rdma_id_private *current_id; + struct neighbour *neigh = ctx; + unsigned long flags; + + if (event != NETEVENT_NEIGH_UPDATE) + return NOTIFY_DONE; + + spin_lock_irqsave(&id_table_lock, flags); + if (neigh->tbl->family == AF_INET6) { + struct sockaddr_in6 neigh_sock_6; + + neigh_sock_6.sin6_family = AF_INET6; + neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key; + ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex, + (struct sockaddr *)&neigh_sock_6); + } else if (neigh->tbl->family == AF_INET) { + struct sockaddr_in neigh_sock_4; + + neigh_sock_4.sin_family = AF_INET; + neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key); + ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex, + (struct sockaddr *)&neigh_sock_4); + } else + goto out; + + if (!ips_node) + goto out; + + list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) { + if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr, + neigh->ha, ETH_ALEN)) + continue; + INIT_WORK(¤t_id->id.net_work, cma_netevent_work_handler); + cma_id_get(current_id); + queue_work(cma_wq, ¤t_id->id.net_work); + } +out: + spin_unlock_irqrestore(&id_table_lock, flags); + return NOTIFY_DONE; +} + static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; +static struct notifier_block cma_netevent_cb = { + .notifier_call = cma_netevent_callback +}; + static void cma_send_device_removal_put(struct rdma_id_private *id_priv) { struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; @@ -5134,6 +5401,7 @@ static int __init cma_init(void) ib_sa_register_client(&sa_client); register_netdevice_notifier(&cma_nb); + register_netevent_notifier(&cma_netevent_cb); ret = ib_register_client(&cma_client); if (ret) @@ -5148,6 +5416,7 @@ static int __init cma_init(void) err_ib: ib_unregister_client(&cma_client); err: + unregister_netevent_notifier(&cma_netevent_cb); unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); unregister_pernet_subsys(&cma_pernet_operations); @@ -5160,6 +5429,7 @@ static void __exit cma_cleanup(void) { cma_configfs_exit(); ib_unregister_client(&cma_client); + unregister_netevent_notifier(&cma_netevent_cb); unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); unregister_pernet_subsys(&cma_pernet_operations); diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 9ac16e0db761..7b68b3ea979f 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/module.h> #include <linux/configfs.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> @@ -293,7 +292,7 @@ static struct config_group *make_cma_dev(struct config_group *group, goto fail; } - strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name)); + strscpy(cma_dev_group->name, name, sizeof(cma_dev_group->name)); config_group_init_type_name(&cma_dev_group->ports_group, "ports", &cma_ports_group_type); diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index 757a0ef79872..b7354c94cf1b 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -64,6 +64,7 @@ struct rdma_id_private { struct list_head listen_item; struct list_head listen_list; }; + struct list_head id_list_entry; struct cma_device *cma_dev; struct list_head mc_list; diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 433b426729d4..a70876a0a231 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -2,7 +2,6 @@ /* * Copyright (c) 2015 HGST, a Western Digital Company. */ -#include <linux/module.h> #include <linux/err.h> #include <linux/slab.h> #include <rdma/ib_verbs.h> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 22a4adda7981..b69e2c4e4d2a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq; struct workqueue_struct *ib_comp_unbound_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); +static struct workqueue_struct *ib_unreg_wq; /* * Each of the three rwsem locks (devices, clients, client_data) protects the @@ -421,7 +422,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) return ret; } - strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); + strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX); ret = rename_compat_devs(ibdev); downgrade_write(&devices_rwsem); @@ -1216,7 +1217,7 @@ static int assign_name(struct ib_device *device, const char *name) ret = -ENFILE; goto out; } - strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); + strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b, &last_id, GFP_KERNEL); @@ -1602,7 +1603,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev) WARN_ON(!refcount_read(&ib_dev->refcount)); WARN_ON(!ib_dev->ops.dealloc_driver); get_device(&ib_dev->dev); - if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work)) + if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work)) put_device(&ib_dev->dev); } EXPORT_SYMBOL(ib_unregister_device_queued); @@ -2461,7 +2462,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, ++i) { ret = rdma_query_gid(device, port, i, &tmp_gid); if (ret) - return ret; + continue; + if (!memcmp(&tmp_gid, gid, sizeof *gid)) { *port_num = port; if (index) @@ -2612,7 +2614,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_counters); SET_DEVICE_OP(dev_ops, create_cq); SET_DEVICE_OP(dev_ops, create_flow); - SET_DEVICE_OP(dev_ops, create_flow_action_esp); SET_DEVICE_OP(dev_ops, create_qp); SET_DEVICE_OP(dev_ops, create_rwq_ind_table); SET_DEVICE_OP(dev_ops, create_srq); @@ -2675,7 +2676,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, modify_ah); SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); - SET_DEVICE_OP(dev_ops, modify_flow_action_esp); SET_DEVICE_OP(dev_ops, modify_hw_stat); SET_DEVICE_OP(dev_ops, modify_port); SET_DEVICE_OP(dev_ops, modify_qp); @@ -2752,27 +2752,28 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { static int __init ib_core_init(void) { - int ret; + int ret = -ENOMEM; ib_wq = alloc_workqueue("infiniband", 0, 0); if (!ib_wq) return -ENOMEM; + ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!ib_unreg_wq) + goto err; + ib_comp_wq = alloc_workqueue("ib-comp-wq", WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); - if (!ib_comp_wq) { - ret = -ENOMEM; - goto err; - } + if (!ib_comp_wq) + goto err_unbound; ib_comp_unbound_wq = alloc_workqueue("ib-comp-unb-wq", WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE); - if (!ib_comp_unbound_wq) { - ret = -ENOMEM; + if (!ib_comp_unbound_wq) goto err_comp; - } ret = class_register(&ib_class); if (ret) { @@ -2814,10 +2815,18 @@ static int __init ib_core_init(void) nldev_init(); rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); - roce_gid_mgmt_init(); + ret = roce_gid_mgmt_init(); + if (ret) { + pr_warn("Couldn't init RoCE GID management\n"); + goto err_parent; + } return 0; +err_parent: + rdma_nl_unregister(RDMA_NL_LS); + nldev_exit(); + unregister_pernet_device(&rdma_dev_net_ops); err_compat: unregister_blocking_lsm_notifier(&ibdev_lsm_nb); err_sa: @@ -2832,6 +2841,8 @@ err_comp_unbound: destroy_workqueue(ib_comp_unbound_wq); err_comp: destroy_workqueue(ib_comp_wq); +err_unbound: + destroy_workqueue(ib_unreg_wq); err: destroy_workqueue(ib_wq); return ret; @@ -2853,7 +2864,7 @@ static void __exit ib_core_cleanup(void) destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); - flush_workqueue(system_unbound_wq); + destroy_workqueue(ib_unreg_wq); WARN_ON(!xa_empty(&clients)); WARN_ON(!xa_empty(&devices)); } diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 3a42ad43056e..d6fc8402158a 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -33,7 +33,6 @@ #ifndef _IWPM_UTIL_H #define _IWPM_UTIL_H -#include <linux/module.h> #include <linux/io.h> #include <linux/in.h> #include <linux/in6.h> diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c index 7063e41eaf26..c77d7d2559a1 100644 --- a/drivers/infiniband/core/lag.c +++ b/drivers/infiniband/core/lag.c @@ -7,8 +7,7 @@ #include <rdma/ib_cache.h> #include <rdma/lag.h> -static struct sk_buff *rdma_build_skb(struct ib_device *device, - struct net_device *netdev, +static struct sk_buff *rdma_build_skb(struct net_device *netdev, struct rdma_ah_attr *ah_attr, gfp_t flags) { @@ -86,7 +85,7 @@ static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device, struct net_device *slave; struct sk_buff *skb; - skb = rdma_build_skb(device, master, ah_attr, flags); + skb = rdma_build_skb(master, ah_attr, flags); if (!skb) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index f5aacaf7fb8e..12dc97067ed2 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1739,7 +1739,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (!device) return -EINVAL; - if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) { + if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) { ib_device_put(device); return -EINVAL; } @@ -1951,9 +1951,10 @@ static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], u32 port) { struct rdma_hw_stats *stats; - int rem, i, index, ret = 0; struct nlattr *entry_attr; unsigned long *target; + int rem, i, ret = 0; + u32 index; stats = ib_get_hw_stats_port(device, port); if (!stats) @@ -2536,7 +2537,7 @@ void __init nldev_init(void) rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); } -void __exit nldev_exit(void) +void nldev_exit(void) { rdma_nl_unregister(RDMA_NL_NLDEV); } diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 94d83b665a2f..29b1ab1d5f93 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -68,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, * In exclusive access mode, we check that the counter is zero (nobody * claimed this object) and we set it to -1. Releasing a shared access * lock is done simply by decreasing the counter. As for exclusive - * access locks, since only a single one of them is is allowed + * access locks, since only a single one of them is allowed * concurrently, setting the counter to zero is enough for releasing * this lock. */ diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 68197e576433..e958c43dd28f 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -250,7 +250,7 @@ static bool upper_device_filter(struct ib_device *ib_dev, u32 port, /** * is_upper_ndev_bond_master_filter - Check if a given netdevice - * is bond master device of netdevice of the the RDMA device of port. + * is bond master device of netdevice of the RDMA device of port. * @ib_dev: IB device to check * @port: Port to consider for adding default GID * @rdma_ndev: Pointer to rdma netdevice diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 5a3bd41b331c..8367974b7998 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2016 HGST, a Western Digital Company. */ +#include <linux/memremap.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/pci-p2pdma.h> @@ -273,33 +274,6 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return 1; } -static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, - u32 sg_cnt, enum dma_data_direction dir) -{ - if (is_pci_p2pdma_page(sg_page(sg))) - pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir); - else - ib_dma_unmap_sg(dev, sg, sg_cnt, dir); -} - -static int rdma_rw_map_sgtable(struct ib_device *dev, struct sg_table *sgt, - enum dma_data_direction dir) -{ - int nents; - - if (is_pci_p2pdma_page(sg_page(sgt->sgl))) { - if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) - return 0; - nents = pci_p2pdma_map_sg(dev->dma_device, sgt->sgl, - sgt->orig_nents, dir); - if (!nents) - return -EIO; - sgt->nents = nents; - return 0; - } - return ib_dma_map_sgtable_attrs(dev, sgt, dir, 0); -} - /** * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context * @ctx: context to initialize @@ -326,7 +300,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, }; int ret; - ret = rdma_rw_map_sgtable(dev, &sgt, dir); + ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0); if (ret) return ret; sg_cnt = sgt.nents; @@ -365,7 +339,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, return ret; out_unmap_sg: - rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); + ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_init); @@ -413,12 +387,12 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return -EINVAL; } - ret = rdma_rw_map_sgtable(dev, &sgt, dir); + ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0); if (ret) return ret; if (prot_sg_cnt) { - ret = rdma_rw_map_sgtable(dev, &prot_sgt, dir); + ret = ib_dma_map_sgtable_attrs(dev, &prot_sgt, dir, 0); if (ret) goto out_unmap_sg; } @@ -485,9 +459,9 @@ out_free_ctx: kfree(ctx->reg); out_unmap_prot_sg: if (prot_sgt.nents) - rdma_rw_unmap_sg(dev, prot_sgt.sgl, prot_sgt.orig_nents, dir); + ib_dma_unmap_sgtable_attrs(dev, &prot_sgt, dir, 0); out_unmap_sg: - rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); + ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_signature_init); @@ -620,7 +594,7 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, break; } - rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy); @@ -648,8 +622,8 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, kfree(ctx->reg); if (prot_sg_cnt) - rdma_rw_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); - rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 74ecd7456a11..0de83d9a4985 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -32,7 +32,6 @@ * SOFTWARE. */ -#include <linux/module.h> #include <linux/init.h> #include <linux/err.h> #include <linux/random.h> @@ -51,6 +50,7 @@ #include <rdma/ib_marshall.h> #include <rdma/ib_addr.h> #include <rdma/opa_addr.h> +#include <rdma/rdma_cm.h> #include "sa.h" #include "core_priv.h" @@ -105,7 +105,8 @@ struct ib_sa_device { }; struct ib_sa_query { - void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); + void (*callback)(struct ib_sa_query *sa_query, int status, + int num_prs, struct ib_sa_mad *mad); void (*release)(struct ib_sa_query *); struct ib_sa_client *client; struct ib_sa_port *port; @@ -117,6 +118,12 @@ struct ib_sa_query { u32 seq; /* Local svc request sequence number */ unsigned long timeout; /* Local svc timeout */ u8 path_use; /* How will the pathrecord be used */ + + /* A separate buffer to save pathrecords of a response, as in cases + * like IB/netlink, mulptiple pathrecords are supported, so that + * mad->data is not large enough to hold them + */ + void *resp_pr_data; }; #define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 @@ -124,7 +131,8 @@ struct ib_sa_query { #define IB_SA_QUERY_OPA 0x00000004 struct ib_sa_path_query { - void (*callback)(int, struct sa_path_rec *, void *); + void (*callback)(int status, struct sa_path_rec *rec, + int num_paths, void *context); void *context; struct ib_sa_query sa_query; struct sa_path_rec *conv_pr; @@ -713,7 +721,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && sa_rec->reversible != 0) - query->path_use = LS_RESOLVE_PATH_USE_GMP; + query->path_use = LS_RESOLVE_PATH_USE_ALL; else query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL; header->path_use = query->path_use; @@ -866,50 +874,81 @@ static void send_handler(struct ib_mad_agent *agent, static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query, const struct nlmsghdr *nlh) { + struct ib_path_rec_data *srec, *drec; + struct ib_sa_path_query *path_query; struct ib_mad_send_wc mad_send_wc; - struct ib_sa_mad *mad = NULL; const struct nlattr *head, *curr; - struct ib_path_rec_data *rec; - int len, rem; + struct ib_sa_mad *mad = NULL; + int len, rem, num_prs = 0; u32 mask = 0; int status = -EIO; - if (query->callback) { - head = (const struct nlattr *) nlmsg_data(nlh); - len = nlmsg_len(nlh); - switch (query->path_use) { - case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: - mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; - break; + if (!query->callback) + goto out; - case LS_RESOLVE_PATH_USE_ALL: - case LS_RESOLVE_PATH_USE_GMP: - default: - mask = IB_PATH_PRIMARY | IB_PATH_GMP | - IB_PATH_BIDIRECTIONAL; - break; + path_query = container_of(query, struct ib_sa_path_query, sa_query); + mad = query->mad_buf->mad; + if (!path_query->conv_pr && + (be16_to_cpu(mad->mad_hdr.attr_id) == IB_SA_ATTR_PATH_REC)) { + /* Need a larger buffer for possible multiple PRs */ + query->resp_pr_data = kvcalloc(RDMA_PRIMARY_PATH_MAX_REC_NUM, + sizeof(*drec), GFP_KERNEL); + if (!query->resp_pr_data) { + query->callback(query, -ENOMEM, 0, NULL); + return; } - nla_for_each_attr(curr, head, len, rem) { - if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) { - rec = nla_data(curr); - /* - * Get the first one. In the future, we may - * need to get up to 6 pathrecords. - */ - if ((rec->flags & mask) == mask) { - mad = query->mad_buf->mad; - mad->mad_hdr.method |= - IB_MGMT_METHOD_RESP; - memcpy(mad->data, rec->path_rec, - sizeof(rec->path_rec)); - status = 0; - break; - } - } + } + + head = (const struct nlattr *) nlmsg_data(nlh); + len = nlmsg_len(nlh); + switch (query->path_use) { + case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: + mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; + break; + + case LS_RESOLVE_PATH_USE_ALL: + mask = IB_PATH_PRIMARY; + break; + + case LS_RESOLVE_PATH_USE_GMP: + default: + mask = IB_PATH_PRIMARY | IB_PATH_GMP | + IB_PATH_BIDIRECTIONAL; + break; + } + + drec = (struct ib_path_rec_data *)query->resp_pr_data; + nla_for_each_attr(curr, head, len, rem) { + if (curr->nla_type != LS_NLA_TYPE_PATH_RECORD) + continue; + + srec = nla_data(curr); + if ((srec->flags & mask) != mask) + continue; + + status = 0; + if (!drec) { + memcpy(mad->data, srec->path_rec, + sizeof(srec->path_rec)); + num_prs = 1; + break; } - query->callback(query, status, mad); + + memcpy(drec, srec, sizeof(*drec)); + drec++; + num_prs++; + if (num_prs >= RDMA_PRIMARY_PATH_MAX_REC_NUM) + break; } + if (!status) + mad->mad_hdr.method |= IB_MGMT_METHOD_RESP; + + query->callback(query, status, num_prs, mad); + kvfree(query->resp_pr_data); + query->resp_pr_data = NULL; + +out: mad_send_wc.send_buf = query->mad_buf; mad_send_wc.status = IB_WC_SUCCESS; send_handler(query->mad_buf->mad_agent, &mad_send_wc); @@ -1035,10 +1074,9 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, struct netlink_ext_ack *extack) { unsigned long flags; - struct ib_sa_query *query; + struct ib_sa_query *query = NULL, *iter; struct ib_mad_send_buf *send_buf; struct ib_mad_send_wc mad_send_wc; - int found = 0; int ret; if ((nlh->nlmsg_flags & NLM_F_REQUEST) || @@ -1046,20 +1084,21 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, return -EPERM; spin_lock_irqsave(&ib_nl_request_lock, flags); - list_for_each_entry(query, &ib_nl_request_list, list) { + list_for_each_entry(iter, &ib_nl_request_list, list) { /* * If the query is cancelled, let the timeout routine * take care of it. */ - if (nlh->nlmsg_seq == query->seq) { - found = !ib_sa_query_cancelled(query); - if (found) - list_del(&query->list); + if (nlh->nlmsg_seq == iter->seq) { + if (!ib_sa_query_cancelled(iter)) { + list_del(&iter->list); + query = iter; + } break; } } - if (!found) { + if (!query) { spin_unlock_irqrestore(&ib_nl_request_lock, flags); goto resp_out; } @@ -1412,41 +1451,90 @@ static int opa_pr_query_possible(struct ib_sa_client *client, return PR_IB_SUPPORTED; } +static void ib_sa_pr_callback_single(struct ib_sa_path_query *query, + int status, struct ib_sa_mad *mad) +{ + struct sa_path_rec rec = {}; + + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), + mad->data, &rec); + rec.rec_type = SA_PATH_REC_TYPE_IB; + sa_path_set_dmac_zero(&rec); + + if (query->conv_pr) { + struct sa_path_rec opa; + + memset(&opa, 0, sizeof(struct sa_path_rec)); + sa_convert_path_ib_to_opa(&opa, &rec); + query->callback(status, &opa, 1, query->context); + } else { + query->callback(status, &rec, 1, query->context); + } +} + +/** + * ib_sa_pr_callback_multiple() - Parse path records then do callback. + * + * In a multiple-PR case the PRs are saved in "query->resp_pr_data" + * (instead of"mad->data") and with "ib_path_rec_data" structure format, + * so that rec->flags can be set to indicate the type of PR. + * This is valid only in IB fabric. + */ +static void ib_sa_pr_callback_multiple(struct ib_sa_path_query *query, + int status, int num_prs, + struct ib_path_rec_data *rec_data) +{ + struct sa_path_rec *rec; + int i; + + rec = kvcalloc(num_prs, sizeof(*rec), GFP_KERNEL); + if (!rec) { + query->callback(-ENOMEM, NULL, 0, query->context); + return; + } + + for (i = 0; i < num_prs; i++) { + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), + rec_data[i].path_rec, rec + i); + rec[i].rec_type = SA_PATH_REC_TYPE_IB; + sa_path_set_dmac_zero(rec + i); + rec[i].flags = rec_data[i].flags; + } + + query->callback(status, rec, num_prs, query->context); + kvfree(rec); +} + static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { struct ib_sa_path_query *query = container_of(sa_query, struct ib_sa_path_query, sa_query); + struct sa_path_rec rec; - if (mad) { - struct sa_path_rec rec; - - if (sa_query->flags & IB_SA_QUERY_OPA) { - ib_unpack(opa_path_rec_table, - ARRAY_SIZE(opa_path_rec_table), - mad->data, &rec); - rec.rec_type = SA_PATH_REC_TYPE_OPA; - query->callback(status, &rec, query->context); - } else { - ib_unpack(path_rec_table, - ARRAY_SIZE(path_rec_table), - mad->data, &rec); - rec.rec_type = SA_PATH_REC_TYPE_IB; - sa_path_set_dmac_zero(&rec); - - if (query->conv_pr) { - struct sa_path_rec opa; + if (!mad || !num_prs) { + query->callback(status, NULL, 0, query->context); + return; + } - memset(&opa, 0, sizeof(struct sa_path_rec)); - sa_convert_path_ib_to_opa(&opa, &rec); - query->callback(status, &opa, query->context); - } else { - query->callback(status, &rec, query->context); - } + if (sa_query->flags & IB_SA_QUERY_OPA) { + if (num_prs != 1) { + query->callback(-EINVAL, NULL, 0, query->context); + return; } - } else - query->callback(status, NULL, query->context); + + ib_unpack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table), + mad->data, &rec); + rec.rec_type = SA_PATH_REC_TYPE_OPA; + query->callback(status, &rec, num_prs, query->context); + } else { + if (!sa_query->resp_pr_data) + ib_sa_pr_callback_single(query, status, mad); + else + ib_sa_pr_callback_multiple(query, status, num_prs, + sa_query->resp_pr_data); + } } static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) @@ -1490,7 +1578,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, - void *context), + int num_paths, void *context), void *context, struct ib_sa_query **sa_query) { @@ -1589,7 +1677,7 @@ err1: EXPORT_SYMBOL(ib_sa_path_rec_get); static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { struct ib_sa_mcmember_query *query = @@ -1681,7 +1769,7 @@ err1: /* Support GuidInfoRecord */ static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_paths, struct ib_sa_mad *mad) { struct ib_sa_guidinfo_query *query = @@ -1791,7 +1879,7 @@ static void ib_classportinfo_cb(void *context) } static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { unsigned long flags; @@ -1967,13 +2055,13 @@ static void send_handler(struct ib_mad_agent *agent, /* No callback -- already got recv */ break; case IB_WC_RESP_TIMEOUT_ERR: - query->callback(query, -ETIMEDOUT, NULL); + query->callback(query, -ETIMEDOUT, 0, NULL); break; case IB_WC_WR_FLUSH_ERR: - query->callback(query, -EINTR, NULL); + query->callback(query, -EINTR, 0, NULL); break; default: - query->callback(query, -EIO, NULL); + query->callback(query, -EIO, 0, NULL); break; } @@ -2001,10 +2089,10 @@ static void recv_handler(struct ib_mad_agent *mad_agent, if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? - -EINVAL : 0, + -EINVAL : 0, 1, (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); else - query->callback(query, -EIO, NULL); + query->callback(query, -EIO, 0, NULL); } ib_free_recv_mad(mad_recv_wc); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index a3f84b50c46a..84c53bd2a52d 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -433,6 +433,7 @@ static struct attribute *port_default_attrs[] = { &ib_port_attr_link_layer.attr, NULL }; +ATTRIBUTE_GROUPS(port_default); static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { @@ -774,7 +775,7 @@ static void ib_port_gid_attr_release(struct kobject *kobj) static struct kobj_type port_type = { .release = ib_port_release, .sysfs_ops = &port_sysfs_ops, - .default_attrs = port_default_attrs + .default_groups = port_default_groups, }; static struct kobj_type gid_attr_type = { diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2b72c4fa9550..bf42650f125b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -95,6 +95,7 @@ struct ucma_context { u64 uid; struct list_head list; + struct list_head mc_list; struct work_struct close_work; }; @@ -105,6 +106,7 @@ struct ucma_multicast { u64 uid; u8 join_state; + struct list_head list; struct sockaddr_storage addr; }; @@ -198,6 +200,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) INIT_WORK(&ctx->close_work, ucma_close_id); init_completion(&ctx->comp); + INIT_LIST_HEAD(&ctx->mc_list); /* So list_del() will work if we don't do ucma_finish_ctx() */ INIT_LIST_HEAD(&ctx->list); ctx->file = file; @@ -484,19 +487,19 @@ err1: static void ucma_cleanup_multicast(struct ucma_context *ctx) { - struct ucma_multicast *mc; - unsigned long index; + struct ucma_multicast *mc, *tmp; - xa_for_each(&multicast_table, index, mc) { - if (mc->ctx != ctx) - continue; + xa_lock(&multicast_table); + list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { + list_del(&mc->list); /* * At this point mc->ctx->ref is 0 so the mc cannot leave the * lock on the reader and this is enough serialization */ - xa_erase(&multicast_table, index); + __xa_erase(&multicast_table, mc->id); kfree(mc); } + xa_unlock(&multicast_table); } static void ucma_cleanup_mc_events(struct ucma_multicast *mc) @@ -751,8 +754,8 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, { struct rdma_dev_addr *dev_addr; - resp->num_paths = route->num_paths; - switch (route->num_paths) { + resp->num_paths = route->num_pri_alt_paths; + switch (route->num_pri_alt_paths) { case 0: dev_addr = &route->addr.dev_addr; rdma_addr_get_dgid(dev_addr, @@ -778,8 +781,8 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { - resp->num_paths = route->num_paths; - switch (route->num_paths) { + resp->num_paths = route->num_pri_alt_paths; + switch (route->num_pri_alt_paths) { case 0: rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, (union ib_gid *)&resp->ib_route[0].dgid); @@ -918,7 +921,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx, if (!resp) return -ENOMEM; - resp->num_paths = ctx->cm_id->route.num_paths; + resp->num_paths = ctx->cm_id->route.num_pri_alt_paths; for (i = 0, out_len -= sizeof(*resp); i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); i++, out_len -= sizeof(struct ib_path_rec_data)) { @@ -1469,12 +1472,16 @@ static ssize_t ucma_process_join(struct ucma_file *file, mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); - if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, + xa_lock(&multicast_table); + if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL)) { ret = -ENOMEM; goto err_free_mc; } + list_add_tail(&mc->list, &ctx->mc_list); + xa_unlock(&multicast_table); + mutex_lock(&ctx->mutex); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); @@ -1500,8 +1507,11 @@ err_leave_multicast: mutex_unlock(&ctx->mutex); ucma_cleanup_mc_events(mc); err_xa_erase: - xa_erase(&multicast_table, mc->id); + xa_lock(&multicast_table); + list_del(&mc->list); + __xa_erase(&multicast_table, mc->id); err_free_mc: + xa_unlock(&multicast_table); kfree(mc); err_put_ctx: ucma_put_ctx(ctx); @@ -1569,15 +1579,17 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, mc = ERR_PTR(-EINVAL); else if (!refcount_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); - else - __xa_erase(&multicast_table, mc->id); - xa_unlock(&multicast_table); if (IS_ERR(mc)) { + xa_unlock(&multicast_table); ret = PTR_ERR(mc); goto out; } + list_del(&mc->list); + __xa_erase(&multicast_table, mc->id); + xa_unlock(&multicast_table); + mutex_lock(&mc->ctx->mutex); rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); mutex_unlock(&mc->ctx->mutex); diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index f0760741f281..04c04e6d24c3 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -16,9 +16,9 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) { struct sg_table *sgt; struct scatterlist *sg; - struct dma_fence *fence; unsigned long start, end, cur = 0; unsigned int nmap = 0; + long ret; int i; dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); @@ -68,10 +68,13 @@ wait_fence: * may be not up-to-date. Wait for the exporter to finish * the migration. */ - fence = dma_resv_excl_fence(umem_dmabuf->attach->dmabuf->resv); - if (fence) - return dma_fence_wait(fence, false); - + ret = dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, + DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (ret < 0) + return ret; + if (ret == 0) + return -ETIMEDOUT; return 0; } EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 7a47343d11f9..e9fa22d31c23 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -43,8 +43,6 @@ #include <linux/hmm.h> #include <linux/pagemap.h> -#include <rdma/ib_verbs.h> -#include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include "uverbs.h" @@ -227,7 +225,6 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device, const struct mmu_interval_notifier_ops *ops) { struct ib_umem_odp *umem_odp; - struct mm_struct *mm; int ret; if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND))) @@ -241,7 +238,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device, umem_odp->umem.length = size; umem_odp->umem.address = addr; umem_odp->umem.writable = ib_access_writable(access); - umem_odp->umem.owning_mm = mm = current->mm; + umem_odp->umem.owning_mm = current->mm; umem_odp->notifier.ops = ops; umem_odp->page_shift = PAGE_SHIFT; @@ -456,14 +453,14 @@ retry: break; } } - /* upon sucesss lock should stay on hold for the callee */ + /* upon success lock should stay on hold for the callee */ if (!ret) ret = dma_index - start_idx; else mutex_unlock(&umem_odp->umem_mutex); out_put_mm: - mmput(owning_mm); + mmput_async(owning_mm); out_put_task: if (owning_process) put_task_struct(owning_process); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d1345d76d9b1..4796f6a8828c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -337,7 +337,7 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext, resp->hw_ver = attr->hw_ver; resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; - resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); + resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge); resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; @@ -739,6 +739,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->iova = cmd.hca_va; + mr->length = cmd.length; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_set_name(&mr->res, NULL); @@ -861,8 +862,10 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) mr->pd = new_pd; atomic_inc(&new_pd->usecnt); } - if (cmd.flags & IB_MR_REREG_TRANS) + if (cmd.flags & IB_MR_REREG_TRANS) { mr->iova = cmd.hca_va; + mr->length = cmd.length; + } } memset(&resp, 0, sizeof(resp)); @@ -1399,7 +1402,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; attr.qp_type = cmd->qp_type; - attr.create_flags = 0; attr.cap.max_send_wr = cmd->max_send_wr; attr.cap.max_recv_wr = cmd->max_recv_wr; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 990f0724acc6..d9799706c58e 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -337,6 +337,14 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, break; + case UVERBS_ATTR_TYPE_RAW_FD: + if (uattr->attr_data.reserved || uattr->len != 0 || + uattr->data_s64 < INT_MIN || uattr->data_s64 > INT_MAX) + return -EINVAL; + /* _uverbs_get_const_signed() is the accessor */ + e->ptr_attr.data = uattr->data_s64; + break; + case UVERBS_ATTR_TYPE_IDRS_ARRAY: return uverbs_process_idrs_array(pbundle, attr_uapi, &e->objs_arr_attr, uattr, diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index d42ed7ff223e..0ddcf6da66c4 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -46,385 +46,6 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, return action->device->ops.destroy_flow_action(action); } -static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs, - u32 flags, bool is_modify) -{ - u64 verbs_flags = flags; - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN)) - verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED; - - if (is_modify && uverbs_attr_is_valid(attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) - verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS; - - return verbs_flags; -}; - -static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat) -{ - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm = - &keymat->keymat.aes_gcm; - - if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return -EOPNOTSUPP; - - if (aes_gcm->key_len != 32 && - aes_gcm->key_len != 24 && - aes_gcm->key_len != 16) - return -EINVAL; - - if (aes_gcm->icv_len != 16 && - aes_gcm->icv_len != 8 && - aes_gcm->icv_len != 12) - return -EINVAL; - - return 0; -} - -static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = { - [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm, -}; - -static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay, - bool is_modify) -{ - /* This is used in order to modify an esp flow action with an enabled - * replay protection to a disabled one. This is only supported via - * modify, as in create verb we can simply drop the REPLAY attribute and - * achieve the same thing. - */ - return is_modify ? 0 : -EINVAL; -} - -static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay, - bool is_modify) -{ - /* Some replay protections could always be enabled without validating - * anything. - */ - return 0; -} - -static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay, - bool is_modify) = { - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none, - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok, -}; - -static int parse_esp_ip(enum ib_flow_spec_type proto, - const void __user *val_ptr, - size_t len, union ib_flow_spec *out) -{ - int ret; - const struct ib_uverbs_flow_ipv4_filter ipv4 = { - .src_ip = cpu_to_be32(0xffffffffUL), - .dst_ip = cpu_to_be32(0xffffffffUL), - .proto = 0xff, - .tos = 0xff, - .ttl = 0xff, - .flags = 0xff, - }; - const struct ib_uverbs_flow_ipv6_filter ipv6 = { - .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - .flow_label = cpu_to_be32(0xffffffffUL), - .next_hdr = 0xff, - .traffic_class = 0xff, - .hop_limit = 0xff, - }; - union { - struct ib_uverbs_flow_ipv4_filter ipv4; - struct ib_uverbs_flow_ipv6_filter ipv6; - } user_val = {}; - const void *user_pmask; - size_t val_len; - - /* If the flow IPv4/IPv6 flow specifications are extended, the mask - * should be changed as well. - */ - BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) + - sizeof(ipv4.flags) != sizeof(ipv4)); - BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) + - sizeof(ipv6.reserved) != sizeof(ipv6)); - - switch (proto) { - case IB_FLOW_SPEC_IPV4: - if (len > sizeof(user_val.ipv4) && - !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4), - len - sizeof(user_val.ipv4))) - return -EOPNOTSUPP; - - val_len = min_t(size_t, len, sizeof(user_val.ipv4)); - ret = copy_from_user(&user_val.ipv4, val_ptr, - val_len); - if (ret) - return -EFAULT; - - user_pmask = &ipv4; - break; - case IB_FLOW_SPEC_IPV6: - if (len > sizeof(user_val.ipv6) && - !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6), - len - sizeof(user_val.ipv6))) - return -EOPNOTSUPP; - - val_len = min_t(size_t, len, sizeof(user_val.ipv6)); - ret = copy_from_user(&user_val.ipv6, val_ptr, - val_len); - if (ret) - return -EFAULT; - - user_pmask = &ipv6; - break; - default: - return -EOPNOTSUPP; - } - - return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask, - &user_val, - val_len, out); -} - -static int flow_action_esp_get_encap(struct ib_flow_spec_list *out, - struct uverbs_attr_bundle *attrs) -{ - struct ib_uverbs_flow_action_esp_encap uverbs_encap; - int ret; - - ret = uverbs_copy_from(&uverbs_encap, attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP); - if (ret) - return ret; - - /* We currently support only one encap */ - if (uverbs_encap.next_ptr) - return -EOPNOTSUPP; - - if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 && - uverbs_encap.type != IB_FLOW_SPEC_IPV6) - return -EOPNOTSUPP; - - return parse_esp_ip(uverbs_encap.type, - u64_to_user_ptr(uverbs_encap.val_ptr), - uverbs_encap.len, - &out->spec); -} - -struct ib_flow_action_esp_attr { - struct ib_flow_action_attrs_esp hdr; - struct ib_flow_action_attrs_esp_keymats keymat; - struct ib_flow_action_attrs_esp_replays replay; - /* We currently support only one spec */ - struct ib_flow_spec_list encap; -}; - -#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW -static int parse_flow_action_esp(struct ib_device *ib_dev, - struct uverbs_attr_bundle *attrs, - struct ib_flow_action_esp_attr *esp_attr, - bool is_modify) -{ - struct ib_uverbs_flow_action_esp uverbs_esp = {}; - int ret; - - /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ - ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ESN); - if (IS_UVERBS_COPY_ERR(ret)) - return ret; - - /* This can be called from FLOW_ACTION_ESP_MODIFY where - * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional - */ - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) { - ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS); - if (ret) - return ret; - - if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1)) - return -EOPNOTSUPP; - - esp_attr->hdr.spi = uverbs_esp.spi; - esp_attr->hdr.seq = uverbs_esp.seq; - esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad; - esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts; - } - esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags, - is_modify); - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) { - esp_attr->keymat.protocol = - uverbs_attr_get_enum_id(attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT); - ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat, - attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT); - if (ret) - return ret; - - ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat); - if (ret) - return ret; - - esp_attr->hdr.keymat = &esp_attr->keymat; - } - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) { - esp_attr->replay.protocol = - uverbs_attr_get_enum_id(attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY); - - ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay, - attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY); - if (ret) - return ret; - - ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay, - is_modify); - if (ret) - return ret; - - esp_attr->hdr.replay = &esp_attr->replay; - } - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) { - ret = flow_action_esp_get_encap(&esp_attr->encap, attrs); - if (ret) - return ret; - - esp_attr->hdr.encap = &esp_attr->encap; - } - - return 0; -} - -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); - struct ib_device *ib_dev = attrs->context->device; - int ret; - struct ib_flow_action *action; - struct ib_flow_action_esp_attr esp_attr = {}; - - if (!ib_dev->ops.create_flow_action_esp) - return -EOPNOTSUPP; - - ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false); - if (ret) - return ret; - - /* No need to check as this attribute is marked as MANDATORY */ - action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr, - attrs); - if (IS_ERR(action)) - return PTR_ERR(action); - - uverbs_flow_action_fill_action(action, uobj, ib_dev, - IB_FLOW_ACTION_ESP); - - return 0; -} - -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); - struct ib_flow_action *action = uobj->object; - int ret; - struct ib_flow_action_esp_attr esp_attr = {}; - - if (!action->device->ops.modify_flow_action_esp) - return -EOPNOTSUPP; - - ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true); - if (ret) - return ret; - - if (action->type != IB_FLOW_ACTION_ESP) - return -EINVAL; - - return action->device->ops.modify_flow_action_esp(action, - &esp_attr.hdr, - attrs); -} - -static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { - [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT( - struct ib_uverbs_flow_action_esp_keymat_aes_gcm, - aes_key), - }, -}; - -static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, - size), - }, -}; - -DECLARE_UVERBS_NAMED_METHOD( - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, - hard_limit_pkts), - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, - UVERBS_ATTR_TYPE(__u32), - UA_OPTIONAL), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat, - UA_MANDATORY), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN( - UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), - UA_OPTIONAL)); - -DECLARE_UVERBS_NAMED_METHOD( - UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, - UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_WRITE, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, - hard_limit_pkts), - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, - UVERBS_ATTR_TYPE(__u32), - UA_OPTIONAL), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat, - UA_OPTIONAL), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN( - UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), - UA_OPTIONAL)); - DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_FLOW_ACTION_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, @@ -435,9 +56,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_FLOW_ACTION, UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY)); const struct uapi_definition uverbs_def_obj_flow_action[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c18634bec212..26b021f43ba4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -268,9 +268,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, return ERR_PTR(-ENOMEM); pd->device = device; - pd->uobject = NULL; - pd->__internal_mr = NULL; - atomic_set(&pd->usecnt, 0); pd->flags = flags; rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); @@ -284,7 +281,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, } rdma_restrack_add(&pd->res); - if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) + if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; else mr_access_flags |= IB_ACCESS_LOCAL_WRITE; @@ -311,7 +308,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, pd->__internal_mr = mr; - if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) + if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)) pd->local_dma_lkey = pd->__internal_mr->lkey; if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) @@ -341,11 +338,6 @@ int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) pd->__internal_mr = NULL; } - /* uverbs manipulates usecnt with proper locking, while the kabi - * requires the caller to guarantee we can't race here. - */ - WARN_ON(atomic_read(&pd->usecnt)); - ret = pd->device->ops.dealloc_pd(pd, udata); if (ret) return ret; @@ -1046,7 +1038,7 @@ struct ib_srq *ib_create_srq_user(struct ib_pd *pd, ret = pd->device->ops.create_srq(srq, srq_init_attr, udata); if (ret) { rdma_restrack_put(&srq->res); - atomic_dec(&srq->pd->usecnt); + atomic_dec(&pd->usecnt); if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd) atomic_dec(&srq->ext.xrc.xrcd->usecnt); if (ib_srq_has_cq(srq->srq_type)) @@ -2139,8 +2131,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_mr *mr; if (access_flags & IB_ACCESS_ON_DEMAND) { - if (!(pd->device->attrs.device_cap_flags & - IB_DEVICE_ON_DEMAND_PAGING)) { + if (!(pd->device->attrs.kernel_cap_flags & + IBK_ON_DEMAND_PAGING)) { pr_debug("ODP support not available\n"); return ERR_PTR(-EINVAL); } @@ -2153,9 +2145,12 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return mr; mr->device = pd->device; + mr->type = IB_MR_TYPE_USER; mr->pd = pd; mr->dm = NULL; atomic_inc(&pd->usecnt); + mr->iova = virt_addr; + mr->length = length; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_parent_name(&mr->res, &pd->res); |