diff options
Diffstat (limited to 'drivers/infiniband/core')
36 files changed, 3911 insertions, 3273 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 61667705d746..867cee5e27b2 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -35,6 +35,7 @@ ib_ucm-y := ucm.o ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ - uverbs_ioctl_merge.o uverbs_std_types_cq.o \ + uverbs_std_types_cq.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ - uverbs_std_types_mr.o uverbs_std_types_counters.o + uverbs_std_types_mr.o uverbs_std_types_counters.o \ + uverbs_uapi.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 4f32c4062fb6..46b855a42884 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -188,7 +188,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, return -ENODATA; } -int rdma_addr_size(struct sockaddr *addr) +int rdma_addr_size(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: @@ -315,19 +315,17 @@ static int dst_fetch_ha(const struct dst_entry *dst, int ret = 0; n = dst_neigh_lookup(dst, daddr); + if (!n) + return -ENODATA; - rcu_read_lock(); - if (!n || !(n->nud_state & NUD_VALID)) { - if (n) - neigh_event_send(n, NULL); + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); ret = -ENODATA; } else { rdma_copy_addr(dev_addr, dst->dev, n->ha); } - rcu_read_unlock(); - if (n) - neigh_release(n); + neigh_release(n); return ret; } @@ -587,7 +585,7 @@ static void process_one_req(struct work_struct *_work) spin_unlock_bh(&lock); } -int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, +int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 81d66f56e38f..0bee1f4b914e 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -66,20 +66,28 @@ enum gid_attr_find_mask { GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, }; -enum gid_table_entry_props { - GID_TABLE_ENTRY_INVALID = 1UL << 0, - GID_TABLE_ENTRY_DEFAULT = 1UL << 1, +enum gid_table_entry_state { + GID_TABLE_ENTRY_INVALID = 1, + GID_TABLE_ENTRY_VALID = 2, + /* + * Indicates that entry is pending to be removed, there may + * be active users of this GID entry. + * When last user of the GID entry releases reference to it, + * GID entry is detached from the table. + */ + GID_TABLE_ENTRY_PENDING_DEL = 3, }; struct ib_gid_table_entry { - unsigned long props; - union ib_gid gid; - struct ib_gid_attr attr; - void *context; + struct kref kref; + struct work_struct del_work; + struct ib_gid_attr attr; + void *context; + enum gid_table_entry_state state; }; struct ib_gid_table { - int sz; + int sz; /* In RoCE, adding a GID to the table requires: * (a) Find if this GID is already exists. * (b) Find a free space. @@ -91,13 +99,16 @@ struct ib_gid_table { * **/ /* Any writer to data_vec must hold this lock and the write side of - * rwlock. readers must hold only rwlock. All writers must be in a + * rwlock. Readers must hold only rwlock. All writers must be in a * sleepable context. */ - struct mutex lock; - /* rwlock protects data_vec[ix]->props. */ - rwlock_t rwlock; - struct ib_gid_table_entry *data_vec; + struct mutex lock; + /* rwlock protects data_vec[ix]->state and entry pointer. + */ + rwlock_t rwlock; + struct ib_gid_table_entry **data_vec; + /* bit field, each bit indicates the index of default GID */ + u32 default_gid_indices; }; static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) @@ -135,6 +146,19 @@ bool rdma_is_zero_gid(const union ib_gid *gid) } EXPORT_SYMBOL(rdma_is_zero_gid); +/** is_gid_index_default - Check if a given index belongs to + * reserved default GIDs or not. + * @table: GID table pointer + * @index: Index to check in GID table + * Returns true if index is one of the reserved default GID index otherwise + * returns false. + */ +static bool is_gid_index_default(const struct ib_gid_table *table, + unsigned int index) +{ + return index < 32 && (BIT(index) & table->default_gid_indices); +} + int ib_cache_gid_parse_type_str(const char *buf) { unsigned int i; @@ -164,26 +188,136 @@ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port) return device->cache.ports[port - rdma_start_port(device)].gid; } -static void del_roce_gid(struct ib_device *device, u8 port_num, - struct ib_gid_table *table, int ix) +static bool is_gid_entry_free(const struct ib_gid_table_entry *entry) +{ + return !entry; +} + +static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry) +{ + return entry && entry->state == GID_TABLE_ENTRY_VALID; +} + +static void schedule_free_gid(struct kref *kref) { + struct ib_gid_table_entry *entry = + container_of(kref, struct ib_gid_table_entry, kref); + + queue_work(ib_wq, &entry->del_work); +} + +static void free_gid_entry_locked(struct ib_gid_table_entry *entry) +{ + struct ib_device *device = entry->attr.device; + u8 port_num = entry->attr.port_num; + struct ib_gid_table *table = rdma_gid_table(device, port_num); + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, - device->name, port_num, ix, - table->data_vec[ix].gid.raw); + device->name, port_num, entry->attr.index, + entry->attr.gid.raw); + + if (rdma_cap_roce_gid_table(device, port_num) && + entry->state != GID_TABLE_ENTRY_INVALID) + device->del_gid(&entry->attr, &entry->context); + + write_lock_irq(&table->rwlock); - if (rdma_cap_roce_gid_table(device, port_num)) - device->del_gid(&table->data_vec[ix].attr, - &table->data_vec[ix].context); - dev_put(table->data_vec[ix].attr.ndev); + /* + * The only way to avoid overwriting NULL in table is + * by comparing if it is same entry in table or not! + * If new entry in table is added by the time we free here, + * don't overwrite the table entry. + */ + if (entry == table->data_vec[entry->attr.index]) + table->data_vec[entry->attr.index] = NULL; + /* Now this index is ready to be allocated */ + write_unlock_irq(&table->rwlock); + + if (entry->attr.ndev) + dev_put(entry->attr.ndev); + kfree(entry); } -static int add_roce_gid(struct ib_gid_table *table, - const union ib_gid *gid, - const struct ib_gid_attr *attr) +static void free_gid_entry(struct kref *kref) +{ + struct ib_gid_table_entry *entry = + container_of(kref, struct ib_gid_table_entry, kref); + + free_gid_entry_locked(entry); +} + +/** + * free_gid_work - Release reference to the GID entry + * @work: Work structure to refer to GID entry which needs to be + * deleted. + * + * free_gid_work() frees the entry from the HCA's hardware table + * if provider supports it. It releases reference to netdevice. + */ +static void free_gid_work(struct work_struct *work) +{ + struct ib_gid_table_entry *entry = + container_of(work, struct ib_gid_table_entry, del_work); + struct ib_device *device = entry->attr.device; + u8 port_num = entry->attr.port_num; + struct ib_gid_table *table = rdma_gid_table(device, port_num); + + mutex_lock(&table->lock); + free_gid_entry_locked(entry); + mutex_unlock(&table->lock); +} + +static struct ib_gid_table_entry * +alloc_gid_entry(const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry; - int ix = attr->index; - int ret = 0; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + kref_init(&entry->kref); + memcpy(&entry->attr, attr, sizeof(*attr)); + if (entry->attr.ndev) + dev_hold(entry->attr.ndev); + INIT_WORK(&entry->del_work, free_gid_work); + entry->state = GID_TABLE_ENTRY_INVALID; + return entry; +} + +static void store_gid_entry(struct ib_gid_table *table, + struct ib_gid_table_entry *entry) +{ + entry->state = GID_TABLE_ENTRY_VALID; + + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + entry->attr.device->name, entry->attr.port_num, + entry->attr.index, entry->attr.gid.raw); + + lockdep_assert_held(&table->lock); + write_lock_irq(&table->rwlock); + table->data_vec[entry->attr.index] = entry; + write_unlock_irq(&table->rwlock); +} + +static void get_gid_entry(struct ib_gid_table_entry *entry) +{ + kref_get(&entry->kref); +} + +static void put_gid_entry(struct ib_gid_table_entry *entry) +{ + kref_put(&entry->kref, schedule_free_gid); +} + +static void put_gid_entry_locked(struct ib_gid_table_entry *entry) +{ + kref_put(&entry->kref, free_gid_entry); +} + +static int add_roce_gid(struct ib_gid_table_entry *entry) +{ + const struct ib_gid_attr *attr = &entry->attr; + int ret; if (!attr->ndev) { pr_err("%s NULL netdev device=%s port=%d index=%d\n", @@ -191,38 +325,22 @@ static int add_roce_gid(struct ib_gid_table *table, attr->index); return -EINVAL; } - - entry = &table->data_vec[ix]; - if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) { - WARN(1, "GID table corruption device=%s port=%d index=%d\n", - attr->device->name, attr->port_num, - attr->index); - return -EINVAL; - } - if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { - ret = attr->device->add_gid(gid, attr, &entry->context); + ret = attr->device->add_gid(attr, &entry->context); if (ret) { pr_err("%s GID add failed device=%s port=%d index=%d\n", __func__, attr->device->name, attr->port_num, attr->index); - goto add_err; + return ret; } } - dev_hold(attr->ndev); - -add_err: - if (!ret) - pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, - attr->device->name, attr->port_num, ix, gid->raw); - return ret; + return 0; } /** * add_modify_gid - Add or modify GID table entry * * @table: GID table in which GID to be added or modified - * @gid: GID content * @attr: Attributes of the GID * * Returns 0 on success or appropriate error code. It accepts zero @@ -230,34 +348,42 @@ add_err: * GID. However such zero GIDs are not added to the cache. */ static int add_modify_gid(struct ib_gid_table *table, - const union ib_gid *gid, const struct ib_gid_attr *attr) { - int ret; + struct ib_gid_table_entry *entry; + int ret = 0; + + /* + * Invalidate any old entry in the table to make it safe to write to + * this index. + */ + if (is_gid_entry_valid(table->data_vec[attr->index])) + put_gid_entry(table->data_vec[attr->index]); + + /* + * Some HCA's report multiple GID entries with only one valid GID, and + * leave other unused entries as the zero GID. Convert zero GIDs to + * empty table entries instead of storing them. + */ + if (rdma_is_zero_gid(&attr->gid)) + return 0; + + entry = alloc_gid_entry(attr); + if (!entry) + return -ENOMEM; if (rdma_protocol_roce(attr->device, attr->port_num)) { - ret = add_roce_gid(table, gid, attr); + ret = add_roce_gid(entry); if (ret) - return ret; - } else { - /* - * Some HCA's report multiple GID entries with only one - * valid GID, but remaining as zero GID. - * So ignore such behavior for IB link layer and don't - * fail the call, but don't add such entry to GID cache. - */ - if (rdma_is_zero_gid(gid)) - return 0; + goto done; } - lockdep_assert_held(&table->lock); - memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid)); - memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr)); - - write_lock_irq(&table->rwlock); - table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID; - write_unlock_irq(&table->rwlock); + store_gid_entry(table, entry); return 0; + +done: + put_gid_entry(entry); + return ret; } /** @@ -272,16 +398,25 @@ static int add_modify_gid(struct ib_gid_table *table, static void del_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix) { + struct ib_gid_table_entry *entry; + lockdep_assert_held(&table->lock); + + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + ib_dev->name, port, ix, + table->data_vec[ix]->attr.gid.raw); + write_lock_irq(&table->rwlock); - table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; + entry = table->data_vec[ix]; + entry->state = GID_TABLE_ENTRY_PENDING_DEL; + /* + * For non RoCE protocol, GID entry slot is ready to use. + */ + if (!rdma_protocol_roce(ib_dev, port)) + table->data_vec[ix] = NULL; write_unlock_irq(&table->rwlock); - if (rdma_protocol_roce(ib_dev, port)) - del_roce_gid(ib_dev, port, table, ix); - memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid)); - memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr)); - table->data_vec[ix].context = NULL; + put_gid_entry_locked(entry); } /* rwlock should be read locked, or lock should be held */ @@ -294,8 +429,8 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, int empty = pempty ? -1 : 0; while (i < table->sz && (found < 0 || empty < 0)) { - struct ib_gid_table_entry *data = &table->data_vec[i]; - struct ib_gid_attr *attr = &data->attr; + struct ib_gid_table_entry *data = table->data_vec[i]; + struct ib_gid_attr *attr; int curr_index = i; i++; @@ -306,9 +441,9 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, * so lookup free slot only if requested. */ if (pempty && empty < 0) { - if (data->props & GID_TABLE_ENTRY_INVALID && - (default_gid == - !!(data->props & GID_TABLE_ENTRY_DEFAULT))) { + if (is_gid_entry_free(data) && + default_gid == + is_gid_index_default(table, curr_index)) { /* * Found an invalid (free) entry; allocate it. * If default GID is requested, then our @@ -323,22 +458,23 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, /* * Additionally find_gid() is used to find valid entry during - * lookup operation, where validity needs to be checked. So - * find the empty entry first to continue to search for a free - * slot and ignore its INVALID flag. + * lookup operation; so ignore the entries which are marked as + * pending for removal and the entries which are marked as + * invalid. */ - if (data->props & GID_TABLE_ENTRY_INVALID) + if (!is_gid_entry_valid(data)) continue; if (found >= 0) continue; + attr = &data->attr; if (mask & GID_ATTR_FIND_MASK_GID_TYPE && attr->gid_type != val->gid_type) continue; if (mask & GID_ATTR_FIND_MASK_GID && - memcmp(gid, &data->gid, sizeof(*gid))) + memcmp(gid, &data->attr.gid, sizeof(*gid))) continue; if (mask & GID_ATTR_FIND_MASK_NETDEV && @@ -346,8 +482,7 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, continue; if (mask & GID_ATTR_FIND_MASK_DEFAULT && - !!(data->props & GID_TABLE_ENTRY_DEFAULT) != - default_gid) + is_gid_index_default(table, curr_index) != default_gid) continue; found = curr_index; @@ -396,7 +531,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port, attr->device = ib_dev; attr->index = empty; attr->port_num = port; - ret = add_modify_gid(table, gid, attr); + attr->gid = *gid; + ret = add_modify_gid(table, attr); if (!ret) dispatch_gid_change_event(ib_dev, port); @@ -492,7 +628,8 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); for (ix = 0; ix < table->sz; ix++) { - if (table->data_vec[ix].attr.ndev == ndev) { + if (is_gid_entry_valid(table->data_vec[ix]) && + table->data_vec[ix]->attr.ndev == ndev) { del_gid(ib_dev, port, table, ix); deleted = true; } @@ -506,103 +643,37 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, return 0; } -static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, - union ib_gid *gid, struct ib_gid_attr *attr) -{ - struct ib_gid_table *table; - - table = rdma_gid_table(ib_dev, port); - - if (index < 0 || index >= table->sz) - return -EINVAL; - - if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) - return -EINVAL; - - memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); - if (attr) { - memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); - if (attr->ndev) - dev_hold(attr->ndev); - } - - return 0; -} - -static int _ib_cache_gid_table_find(struct ib_device *ib_dev, - const union ib_gid *gid, - const struct ib_gid_attr *val, - unsigned long mask, - u8 *port, u16 *index) -{ - struct ib_gid_table *table; - u8 p; - int local_index; - unsigned long flags; - - for (p = 0; p < ib_dev->phys_port_cnt; p++) { - table = ib_dev->cache.ports[p].gid; - read_lock_irqsave(&table->rwlock, flags); - local_index = find_gid(table, gid, val, false, mask, NULL); - if (local_index >= 0) { - if (index) - *index = local_index; - if (port) - *port = p + rdma_start_port(ib_dev); - read_unlock_irqrestore(&table->rwlock, flags); - return 0; - } - read_unlock_irqrestore(&table->rwlock, flags); - } - - return -ENOENT; -} - -static int ib_cache_gid_find(struct ib_device *ib_dev, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, u8 *port, - u16 *index) -{ - unsigned long mask = GID_ATTR_FIND_MASK_GID | - GID_ATTR_FIND_MASK_GID_TYPE; - struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; - - if (ndev) - mask |= GID_ATTR_FIND_MASK_NETDEV; - - return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, - mask, port, index); -} - /** - * ib_find_cached_gid_by_port - Returns the GID table index where a specified - * GID value occurs. It searches for the specified GID value in the local - * software cache. + * rdma_find_gid_by_port - Returns the GID entry attributes when it finds + * a valid GID entry for given search parameters. It searches for the specified + * GID value in the local software cache. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @port_num: The port number of the device where the GID value should be * searched. - * @ndev: In RoCE, the net device of the device. Null means ignore. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. + * @ndev: In RoCE, the net device of the device. NULL means ignore. + * + * Returns sgid attributes if the GID is found with valid reference or + * returns ERR_PTR for the error. + * The caller must invoke rdma_put_gid_attr() to release the reference. */ -int ib_find_cached_gid_by_port(struct ib_device *ib_dev, - const union ib_gid *gid, - enum ib_gid_type gid_type, - u8 port, struct net_device *ndev, - u16 *index) +const struct ib_gid_attr * +rdma_find_gid_by_port(struct ib_device *ib_dev, + const union ib_gid *gid, + enum ib_gid_type gid_type, + u8 port, struct net_device *ndev) { int local_index; struct ib_gid_table *table; unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE; struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; + const struct ib_gid_attr *attr; unsigned long flags; if (!rdma_is_port_valid(ib_dev, port)) - return -ENOENT; + return ERR_PTR(-ENOENT); table = rdma_gid_table(ib_dev, port); @@ -612,89 +683,73 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev, read_lock_irqsave(&table->rwlock, flags); local_index = find_gid(table, gid, &val, false, mask, NULL); if (local_index >= 0) { - if (index) - *index = local_index; + get_gid_entry(table->data_vec[local_index]); + attr = &table->data_vec[local_index]->attr; read_unlock_irqrestore(&table->rwlock, flags); - return 0; + return attr; } read_unlock_irqrestore(&table->rwlock, flags); - return -ENOENT; + return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL(ib_find_cached_gid_by_port); +EXPORT_SYMBOL(rdma_find_gid_by_port); /** - * ib_cache_gid_find_by_filter - Returns the GID table index where a specified - * GID value occurs + * rdma_find_gid_by_filter - Returns the GID table attribute where a + * specified GID value occurs * @device: The device to query. * @gid: The GID value to search for. - * @port_num: The port number of the device where the GID value could be + * @port: The port number of the device where the GID value could be * searched. * @filter: The filter function is executed on any matching GID in the table. * If the filter function returns true, the corresponding index is returned, * otherwise, we continue searching the GID table. It's guaranteed that * while filter is executed, ndev field is valid and the structure won't * change. filter is executed in an atomic context. filter must not be NULL. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. * - * ib_cache_gid_find_by_filter() searches for the specified GID value + * rdma_find_gid_by_filter() searches for the specified GID value * of which the filter function returns true in the port's GID table. - * This function is only supported on RoCE ports. * */ -static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, - const union ib_gid *gid, - u8 port, - bool (*filter)(const union ib_gid *, - const struct ib_gid_attr *, - void *), - void *context, - u16 *index) +const struct ib_gid_attr *rdma_find_gid_by_filter( + struct ib_device *ib_dev, const union ib_gid *gid, u8 port, + bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, + void *), + void *context) { + const struct ib_gid_attr *res = ERR_PTR(-ENOENT); struct ib_gid_table *table; - unsigned int i; unsigned long flags; - bool found = false; - + unsigned int i; - if (!rdma_is_port_valid(ib_dev, port) || - !rdma_protocol_roce(ib_dev, port)) - return -EPROTONOSUPPORT; + if (!rdma_is_port_valid(ib_dev, port)) + return ERR_PTR(-EINVAL); table = rdma_gid_table(ib_dev, port); read_lock_irqsave(&table->rwlock, flags); for (i = 0; i < table->sz; i++) { - struct ib_gid_attr attr; + struct ib_gid_table_entry *entry = table->data_vec[i]; - if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) + if (!is_gid_entry_valid(entry)) continue; - if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) + if (memcmp(gid, &entry->attr.gid, sizeof(*gid))) continue; - memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); - - if (filter(gid, &attr, context)) { - found = true; - if (index) - *index = i; + if (filter(gid, &entry->attr, context)) { + get_gid_entry(entry); + res = &entry->attr; break; } } read_unlock_irqrestore(&table->rwlock, flags); - - if (!found) - return -ENOENT; - return 0; + return res; } static struct ib_gid_table *alloc_gid_table(int sz) { - struct ib_gid_table *table = - kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); - int i; + struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) return NULL; @@ -707,12 +762,6 @@ static struct ib_gid_table *alloc_gid_table(int sz) table->sz = sz; rwlock_init(&table->rwlock); - - /* Mark all entries as invalid so that allocator can allocate - * one of the invalid (free) entry. - */ - for (i = 0; i < sz; i++) - table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID; return table; err_free_table: @@ -720,12 +769,30 @@ err_free_table: return NULL; } -static void release_gid_table(struct ib_gid_table *table) +static void release_gid_table(struct ib_device *device, u8 port, + struct ib_gid_table *table) { - if (table) { - kfree(table->data_vec); - kfree(table); + bool leak = false; + int i; + + if (!table) + return; + + for (i = 0; i < table->sz; i++) { + if (is_gid_entry_free(table->data_vec[i])) + continue; + if (kref_read(&table->data_vec[i]->kref) > 1) { + pr_err("GID entry ref leak for %s (index %d) ref=%d\n", + device->name, i, + kref_read(&table->data_vec[i]->kref)); + leak = true; + } } + if (leak) + return; + + kfree(table->data_vec); + kfree(table); } static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, @@ -739,7 +806,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { - if (!rdma_is_zero_gid(&table->data_vec[i].gid)) { + if (is_gid_entry_valid(table->data_vec[i])) { del_gid(ib_dev, port, table, i); deleted = true; } @@ -757,12 +824,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, { union ib_gid gid = { }; struct ib_gid_attr gid_attr; - struct ib_gid_table *table; unsigned int gid_type; unsigned long mask; - table = rdma_gid_table(ib_dev, port); - mask = GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_DEFAULT | GID_ATTR_FIND_MASK_NETDEV; @@ -792,19 +856,12 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port, unsigned int i; unsigned long roce_gid_type_mask; unsigned int num_default_gids; - unsigned int current_gid = 0; roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); num_default_gids = hweight_long(roce_gid_type_mask); - for (i = 0; i < num_default_gids && i < table->sz; i++) { - struct ib_gid_table_entry *entry = &table->data_vec[i]; - - entry->props |= GID_TABLE_ENTRY_DEFAULT; - current_gid = find_next_bit(&roce_gid_type_mask, - BITS_PER_LONG, - current_gid); - entry->attr.gid_type = current_gid++; - } + /* Reserve starting indices for default GIDs */ + for (i = 0; i < num_default_gids && i < table->sz; i++) + table->default_gid_indices |= BIT(i); } @@ -815,7 +872,7 @@ static void gid_table_release_one(struct ib_device *ib_dev) for (port = 0; port < ib_dev->phys_port_cnt; port++) { table = ib_dev->cache.ports[port].gid; - release_gid_table(table); + release_gid_table(ib_dev, port, table); ib_dev->cache.ports[port].gid = NULL; } } @@ -869,69 +926,94 @@ static int gid_table_setup_one(struct ib_device *ib_dev) return err; } -int ib_get_cached_gid(struct ib_device *device, - u8 port_num, - int index, - union ib_gid *gid, - struct ib_gid_attr *gid_attr) +/** + * rdma_query_gid - Read the GID content from the GID software cache + * @device: Device to query the GID + * @port_num: Port number of the device + * @index: Index of the GID table entry to read + * @gid: Pointer to GID where to store the entry's GID + * + * rdma_query_gid() only reads the GID entry content for requested device, + * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't + * hold any reference to the GID table entry in the HCA or software cache. + * + * Returns 0 on success or appropriate error code. + * + */ +int rdma_query_gid(struct ib_device *device, u8 port_num, + int index, union ib_gid *gid) { - int res; - unsigned long flags; struct ib_gid_table *table; + unsigned long flags; + int res = -EINVAL; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; table = rdma_gid_table(device, port_num); read_lock_irqsave(&table->rwlock, flags); - res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); - read_unlock_irqrestore(&table->rwlock, flags); + if (index < 0 || index >= table->sz || + !is_gid_entry_valid(table->data_vec[index])) + goto done; + + memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); + res = 0; + +done: + read_unlock_irqrestore(&table->rwlock, flags); return res; } -EXPORT_SYMBOL(ib_get_cached_gid); +EXPORT_SYMBOL(rdma_query_gid); /** - * ib_find_cached_gid - Returns the port number and GID table index where - * a specified GID value occurs. + * rdma_find_gid - Returns SGID attributes if the matching GID is found. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @ndev: In RoCE, the net device of the device. NULL means ignore. - * @port_num: The port number of the device where the GID value was found. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. * - * ib_find_cached_gid() searches for the specified GID value in - * the local software cache. + * rdma_find_gid() searches for the specified GID value in the software cache. + * + * Returns GID attributes if a valid GID is found or returns ERR_PTR for the + * error. The caller must invoke rdma_put_gid_attr() to release the reference. + * */ -int ib_find_cached_gid(struct ib_device *device, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, - u8 *port_num, - u16 *index) -{ - return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); -} -EXPORT_SYMBOL(ib_find_cached_gid); - -int ib_find_gid_by_filter(struct ib_device *device, - const union ib_gid *gid, - u8 port_num, - bool (*filter)(const union ib_gid *gid, - const struct ib_gid_attr *, - void *), - void *context, u16 *index) +const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, + const union ib_gid *gid, + enum ib_gid_type gid_type, + struct net_device *ndev) { - /* Only RoCE GID table supports filter function */ - if (!rdma_protocol_roce(device, port_num) && filter) - return -EPROTONOSUPPORT; + unsigned long mask = GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_GID_TYPE; + struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; + u8 p; + + if (ndev) + mask |= GID_ATTR_FIND_MASK_NETDEV; + + for (p = 0; p < device->phys_port_cnt; p++) { + struct ib_gid_table *table; + unsigned long flags; + int index; + + table = device->cache.ports[p].gid; + read_lock_irqsave(&table->rwlock, flags); + index = find_gid(table, gid, &gid_attr_val, false, mask, NULL); + if (index >= 0) { + const struct ib_gid_attr *attr; + + get_gid_entry(table->data_vec[index]); + attr = &table->data_vec[index]->attr; + read_unlock_irqrestore(&table->rwlock, flags); + return attr; + } + read_unlock_irqrestore(&table->rwlock, flags); + } - return ib_cache_gid_find_by_filter(device, gid, - port_num, filter, - context, index); + return ERR_PTR(-ENOENT); } +EXPORT_SYMBOL(rdma_find_gid); int ib_get_cached_pkey(struct ib_device *device, u8 port_num, @@ -1089,12 +1171,92 @@ int ib_get_cached_port_state(struct ib_device *device, } EXPORT_SYMBOL(ib_get_cached_port_state); +/** + * rdma_get_gid_attr - Returns GID attributes for a port of a device + * at a requested gid_index, if a valid GID entry exists. + * @device: The device to query. + * @port_num: The port number on the device where the GID value + * is to be queried. + * @index: Index of the GID table entry whose attributes are to + * be queried. + * + * rdma_get_gid_attr() acquires reference count of gid attributes from the + * cached GID table. Caller must invoke rdma_put_gid_attr() to release + * reference to gid attribute regardless of link layer. + * + * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error + * code. + */ +const struct ib_gid_attr * +rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index) +{ + const struct ib_gid_attr *attr = ERR_PTR(-EINVAL); + struct ib_gid_table *table; + unsigned long flags; + + if (!rdma_is_port_valid(device, port_num)) + return ERR_PTR(-EINVAL); + + table = rdma_gid_table(device, port_num); + if (index < 0 || index >= table->sz) + return ERR_PTR(-EINVAL); + + read_lock_irqsave(&table->rwlock, flags); + if (!is_gid_entry_valid(table->data_vec[index])) + goto done; + + get_gid_entry(table->data_vec[index]); + attr = &table->data_vec[index]->attr; +done: + read_unlock_irqrestore(&table->rwlock, flags); + return attr; +} +EXPORT_SYMBOL(rdma_get_gid_attr); + +/** + * rdma_put_gid_attr - Release reference to the GID attribute + * @attr: Pointer to the GID attribute whose reference + * needs to be released. + * + * rdma_put_gid_attr() must be used to release reference whose + * reference is acquired using rdma_get_gid_attr() or any APIs + * which returns a pointer to the ib_gid_attr regardless of link layer + * of IB or RoCE. + * + */ +void rdma_put_gid_attr(const struct ib_gid_attr *attr) +{ + struct ib_gid_table_entry *entry = + container_of(attr, struct ib_gid_table_entry, attr); + + put_gid_entry(entry); +} +EXPORT_SYMBOL(rdma_put_gid_attr); + +/** + * rdma_hold_gid_attr - Get reference to existing GID attribute + * + * @attr: Pointer to the GID attribute whose reference + * needs to be taken. + * + * Increase the reference count to a GID attribute to keep it from being + * freed. Callers are required to already be holding a reference to attribute. + * + */ +void rdma_hold_gid_attr(const struct ib_gid_attr *attr) +{ + struct ib_gid_table_entry *entry = + container_of(attr, struct ib_gid_table_entry, attr); + + get_gid_entry(entry); +} +EXPORT_SYMBOL(rdma_hold_gid_attr); + static int config_non_roce_gid_cache(struct ib_device *device, u8 port, int gid_tbl_len) { struct ib_gid_attr gid_attr = {}; struct ib_gid_table *table; - union ib_gid gid; int ret = 0; int i; @@ -1106,14 +1268,14 @@ static int config_non_roce_gid_cache(struct ib_device *device, for (i = 0; i < gid_tbl_len; ++i) { if (!device->query_gid) continue; - ret = device->query_gid(device, port, i, &gid); + ret = device->query_gid(device, port, i, &gid_attr.gid); if (ret) { pr_warn("query_gid failed (%d) for %s (index %d)\n", ret, device->name, i); goto err; } gid_attr.index = i; - add_modify_gid(table, &gid, &gid_attr); + add_modify_gid(table, &gid_attr); } err: mutex_unlock(&table->lock); @@ -1128,13 +1290,10 @@ static void ib_cache_update(struct ib_device *device, struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; int i; int ret; - struct ib_gid_table *table; if (!rdma_is_port_valid(device, port)) return; - table = rdma_gid_table(device, port); - tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) return; @@ -1296,4 +1455,9 @@ void ib_cache_cleanup_one(struct ib_device *device) ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); gid_table_cleanup_one(device); + + /* + * Flush the wq second time for any pending GID delete work. + */ + flush_workqueue(ib_wq); } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 27a7b0a2e27a..6e39c27dca8e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -474,7 +474,7 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, if (ret) return ret; - memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); + rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } @@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv, return ret; } -static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path) +static struct cm_port * +get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) { struct cm_device *cm_dev; struct cm_port *port = NULL; unsigned long flags; - u8 p; - struct net_device *ndev = ib_get_ndev_from_path(path); - - read_lock_irqsave(&cm.device_lock, flags); - list_for_each_entry(cm_dev, &cm.device_list, list) { - if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - sa_conv_pathrec_to_gid_type(path), - ndev, &p, NULL)) { - port = cm_dev->port[p - 1]; - break; + + if (attr) { + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + if (cm_dev->ib_device == attr->device) { + port = cm_dev->port[attr->port_num - 1]; + break; + } + } + read_unlock_irqrestore(&cm.device_lock, flags); + } else { + /* SGID attribute can be NULL in following + * conditions. + * (a) Alternative path + * (b) IB link layer without GRH + * (c) LAP send messages + */ + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + attr = rdma_find_gid(cm_dev->ib_device, + &path->sgid, + sa_conv_pathrec_to_gid_type(path), + NULL); + if (!IS_ERR(attr)) { + port = cm_dev->port[attr->port_num - 1]; + break; + } } + read_unlock_irqrestore(&cm.device_lock, flags); + if (port) + rdma_put_gid_attr(attr); } - read_unlock_irqrestore(&cm.device_lock, flags); - - if (ndev) - dev_put(ndev); return port; } -static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, +static int cm_init_av_by_path(struct sa_path_rec *path, + const struct ib_gid_attr *sgid_attr, + struct cm_av *av, struct cm_id_private *cm_id_priv) { struct rdma_ah_attr new_ah_attr; @@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, struct cm_port *port; int ret; - port = get_cm_port_from_path(path); + port = get_cm_port_from_path(path, sgid_attr); if (!port) return -EINVAL; cm_dev = port->cm_dev; @@ -554,22 +573,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, /* * av->ah_attr might be initialized based on wc or during - * request processing time. So initialize a new ah_attr on stack. + * request processing time which might have reference to sgid_attr. + * So initialize a new ah_attr on stack. * If initialization fails, old ah_attr is used for sending any * responses. If initialization is successful, than new ah_attr - * is used by overwriting the old one. + * is used by overwriting the old one. So that right ah_attr + * can be used to return an error response. */ ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path, - &new_ah_attr); + &new_ah_attr, sgid_attr); if (ret) return ret; av->timeout = path->packet_life_time + 1; ret = add_cm_id_to_port_list(cm_id_priv, av, port); - if (ret) + if (ret) { + rdma_destroy_ah_attr(&new_ah_attr); return ret; - memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); + } + rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } @@ -1091,6 +1114,9 @@ retest: wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); + + rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr); + rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr); kfree(cm_id_priv->private_data); kfree(cm_id_priv); } @@ -1230,14 +1256,12 @@ new_id: } EXPORT_SYMBOL(ib_cm_insert_listen); -static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, - enum cm_msg_sequence msg_seq) +static __be64 cm_form_tid(struct cm_id_private *cm_id_priv) { u64 hi_tid, low_tid; hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; - low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | - (msg_seq << 30)); + low_tid = (u64)cm_id_priv->id.local_id; return cpu_to_be64(hi_tid | low_tid); } @@ -1265,7 +1289,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, pri_path->opa.slid); cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); + cm_form_tid(cm_id_priv)); req_msg->local_comm_id = cm_id_priv->id.local_id; req_msg->service_id = param->service_id; @@ -1413,12 +1437,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, + ret = cm_init_av_by_path(param->primary_path, + param->ppath_sgid_attr, &cm_id_priv->av, cm_id_priv); if (ret) goto error1; if (param->alternate_path) { - ret = cm_init_av_by_path(param->alternate_path, + ret = cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto error1; @@ -1646,7 +1671,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work, (ib_is_opa_gid(&path->sgid))) { union ib_gid sgid; - if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) { + if (rdma_query_gid(dev, port_num, 0, &sgid)) { dev_warn(&dev->dev, "Error updating sgid in CM request\n"); return; @@ -1691,6 +1716,7 @@ static void cm_format_req_event(struct cm_work *work, param->retry_count = cm_req_get_retry_count(req_msg); param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); param->srq = cm_req_get_srq(req_msg); + param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; work->cm_event.private_data = &req_msg->private_data; } @@ -1914,9 +1940,8 @@ static int cm_req_handler(struct cm_work *work) struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *listen_cm_id_priv; struct cm_req_msg *req_msg; - union ib_gid gid; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh; + const struct ib_gid_attr *gid_attr; int ret; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1961,24 +1986,13 @@ static int cm_req_handler(struct cm_work *work) if (cm_req_has_alt_path(req_msg)) memset(&work->path[1], 0, sizeof(work->path[1])); grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr); - ret = ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, - grh->sgid_index, - &gid, &gid_attr); - if (ret) { - ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0); - goto rejected; - } + gid_attr = grh->sgid_attr; - if (gid_attr.ndev) { + if (gid_attr && gid_attr->ndev) { work->path[0].rec_type = - sa_conv_gid_to_pathrec_type(gid_attr.gid_type); - sa_path_set_ifindex(&work->path[0], - gid_attr.ndev->ifindex); - sa_path_set_ndev(&work->path[0], - dev_net(gid_attr.ndev)); - dev_put(gid_attr.ndev); + sa_conv_gid_to_pathrec_type(gid_attr->gid_type); } else { + /* If no GID attribute or ndev is null, it is not RoCE. */ cm_path_set_rec_type(work->port->cm_dev->ib_device, work->port->port_num, &work->path[0], @@ -1992,15 +2006,14 @@ static int cm_req_handler(struct cm_work *work) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); work->path[0].hop_limit = grh->hop_limit; - ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, + ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av, cm_id_priv); if (ret) { int err; - err = ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, 0, - &work->path[0].sgid, - NULL); + err = rdma_query_gid(work->port->cm_dev->ib_device, + work->port->port_num, 0, + &work->path[0].sgid); if (err) ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, NULL, 0, NULL, 0); @@ -2012,8 +2025,8 @@ static int cm_req_handler(struct cm_work *work) goto rejected; } if (cm_req_has_alt_path(req_msg)) { - ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, - cm_id_priv); + ret = cm_init_av_by_path(&work->path[1], NULL, + &cm_id_priv->alt_av, cm_id_priv); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, @@ -2451,7 +2464,7 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, u8 private_data_len) { cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); + cm_form_tid(cm_id_priv)); dreq_msg->local_comm_id = cm_id_priv->id.local_id; dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); @@ -3082,7 +3095,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, alt_ext = opa_is_extended_lid(alternate_path->opa.dlid, alternate_path->opa.slid); cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); + cm_form_tid(cm_id_priv)); lap_msg->local_comm_id = cm_id_priv->id.local_id; lap_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); @@ -3136,7 +3149,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, + ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto out; @@ -3279,7 +3292,7 @@ static int cm_lap_handler(struct cm_work *work) if (ret) goto unlock; - cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, + cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; @@ -3458,7 +3471,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, struct ib_cm_sidr_req_param *param) { cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); + cm_form_tid(cm_id_priv)); sidr_req_msg->request_id = cm_id_priv->id.local_id; sidr_req_msg->pkey = param->path->pkey; sidr_req_msg->service_id = param->service_id; @@ -3481,7 +3494,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); + ret = cm_init_av_by_path(param->path, param->sgid_attr, + &cm_id_priv->av, + cm_id_priv); if (ret) goto out; @@ -3518,6 +3533,7 @@ out: EXPORT_SYMBOL(ib_send_cm_sidr_req); static void cm_format_sidr_req_event(struct cm_work *work, + const struct cm_id_private *rx_cm_id, struct ib_cm_id *listen_id) { struct cm_sidr_req_msg *sidr_req_msg; @@ -3531,6 +3547,7 @@ static void cm_format_sidr_req_event(struct cm_work *work, param->service_id = sidr_req_msg->service_id; param->bth_pkey = cm_get_bth_pkey(work); param->port = work->port->port_num; + param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr; work->cm_event.private_data = &sidr_req_msg->private_data; } @@ -3588,7 +3605,7 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_id_priv->id.service_id = sidr_req_msg->service_id; cm_id_priv->id.service_mask = ~cpu_to_be64(0); - cm_format_sidr_req_event(work, &cur_cm_id_priv->id); + cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id); cm_process_work(cm_id_priv, work); cm_deref_id(cur_cm_id_priv); return 0; @@ -3665,7 +3682,8 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_sidr_rep); -static void cm_format_sidr_rep_event(struct cm_work *work) +static void cm_format_sidr_rep_event(struct cm_work *work, + const struct cm_id_private *cm_id_priv) { struct cm_sidr_rep_msg *sidr_rep_msg; struct ib_cm_sidr_rep_event_param *param; @@ -3678,6 +3696,7 @@ static void cm_format_sidr_rep_event(struct cm_work *work) param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); param->info = &sidr_rep_msg->info; param->info_len = sidr_rep_msg->info_length; + param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; work->cm_event.private_data = &sidr_rep_msg->private_data; } @@ -3701,7 +3720,7 @@ static int cm_sidr_rep_handler(struct cm_work *work) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); - cm_format_sidr_rep_event(work); + cm_format_sidr_rep_event(work, cm_id_priv); cm_process_work(cm_id_priv, work); return 0; out: diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 8b76f0ef965e..476d4309576d 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -44,13 +44,6 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -enum cm_msg_sequence { - CM_MSG_SEQUENCE_REQ, - CM_MSG_SEQUENCE_LAP, - CM_MSG_SEQUENCE_DREQ, - CM_MSG_SEQUENCE_SIDR -}; - struct cm_req_msg { struct ib_mad_hdr hdr; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index bff10ab141b0..f72677291b69 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -366,7 +366,6 @@ struct cma_multicast { void *context; struct sockaddr_storage addr; struct kref mcref; - bool igmp_joined; u8 join_state; }; @@ -412,11 +411,11 @@ struct cma_req_info { struct sockaddr_storage listen_addr_storage; struct sockaddr_storage src_addr_storage; struct ib_device *device; - int port; union ib_gid local_gid; __be64 service_id; + int port; + bool has_gid; u16 pkey; - bool has_gid:1; }; static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) @@ -491,12 +490,10 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, { cma_ref_dev(cma_dev); id_priv->cma_dev = cma_dev; - id_priv->gid_type = 0; id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); - id_priv->res.type = RDMA_RESTRACK_CM_ID; rdma_restrack_add(&id_priv->res); } @@ -603,46 +600,53 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a return ret; } -static inline int cma_validate_port(struct ib_device *device, u8 port, - enum ib_gid_type gid_type, - union ib_gid *gid, - struct rdma_id_private *id_priv) +static const struct ib_gid_attr * +cma_validate_port(struct ib_device *device, u8 port, + enum ib_gid_type gid_type, + union ib_gid *gid, + struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int bound_if_index = dev_addr->bound_dev_if; + const struct ib_gid_attr *sgid_attr; int dev_type = dev_addr->dev_type; struct net_device *ndev = NULL; - int ret = -ENODEV; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) - return ret; + return ERR_PTR(-ENODEV); if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) - return ret; + return ERR_PTR(-ENODEV); if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { ndev = dev_get_by_index(dev_addr->net, bound_if_index); if (!ndev) - return ret; + return ERR_PTR(-ENODEV); } else { gid_type = IB_GID_TYPE_IB; } - ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, - ndev, NULL); - + sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev); if (ndev) dev_put(ndev); + return sgid_attr; +} - return ret; +static void cma_bind_sgid_attr(struct rdma_id_private *id_priv, + const struct ib_gid_attr *sgid_attr) +{ + WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr); + id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr; } static int cma_acquire_dev(struct rdma_id_private *id_priv, - struct rdma_id_private *listen_id_priv) + const struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + const struct ib_gid_attr *sgid_attr; struct cma_device *cma_dev; union ib_gid gid, iboe_gid, *gidp; + enum ib_gid_type gid_type; int ret = -ENODEV; u8 port; @@ -662,14 +666,13 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, port = listen_id_priv->id.port_num; gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; - - ret = cma_validate_port(cma_dev->device, port, - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - listen_id_priv->gid_type, gidp, - id_priv); - if (!ret) { + gid_type = listen_id_priv->gid_type; + sgid_attr = cma_validate_port(cma_dev->device, port, + gid_type, gidp, id_priv); + if (!IS_ERR(sgid_attr)) { id_priv->id.port_num = port; + cma_bind_sgid_attr(id_priv, sgid_attr); + ret = 0; goto out; } } @@ -683,14 +686,13 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; - - ret = cma_validate_port(cma_dev->device, port, - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - cma_dev->default_gid_type[port - 1], - gidp, id_priv); - if (!ret) { + gid_type = cma_dev->default_gid_type[port - 1]; + sgid_attr = cma_validate_port(cma_dev->device, port, + gid_type, gidp, id_priv); + if (!IS_ERR(sgid_attr)) { id_priv->id.port_num = port; + cma_bind_sgid_attr(id_priv, sgid_attr); + ret = 0; goto out; } } @@ -732,8 +734,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) continue; - for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, - &gid, NULL); + for (i = 0; !rdma_query_gid(cur_dev->device, + p, i, &gid); i++) { if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; @@ -785,12 +787,14 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->res.kern_name = caller; else rdma_restrack_set_task(&id_priv->res, current); + id_priv->res.type = RDMA_RESTRACK_CM_ID; id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; id_priv->id.ps = ps; id_priv->id.qp_type = qp_type; id_priv->tos_set = false; + id_priv->gid_type = IB_GID_TYPE_IB; spin_lock_init(&id_priv->lock); mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); @@ -1036,35 +1040,38 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, } EXPORT_SYMBOL(rdma_init_qp_attr); -static inline int cma_zero_addr(struct sockaddr *addr) +static inline bool cma_zero_addr(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); case AF_INET6: - return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); + return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr); case AF_IB: - return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); + return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr); default: - return 0; + return false; } } -static inline int cma_loopback_addr(struct sockaddr *addr) +static inline bool cma_loopback_addr(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: - return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); + return ipv4_is_loopback( + ((struct sockaddr_in *)addr)->sin_addr.s_addr); case AF_INET6: - return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); + return ipv6_addr_loopback( + &((struct sockaddr_in6 *)addr)->sin6_addr); case AF_IB: - return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); + return ib_addr_loopback( + &((struct sockaddr_ib *)addr)->sib_addr); default: - return 0; + return false; } } -static inline int cma_any_addr(struct sockaddr *addr) +static inline bool cma_any_addr(const struct sockaddr *addr) { return cma_zero_addr(addr) || cma_loopback_addr(addr); } @@ -1087,7 +1094,7 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) } } -static __be16 cma_port(struct sockaddr *addr) +static __be16 cma_port(const struct sockaddr *addr) { struct sockaddr_ib *sib; @@ -1105,15 +1112,15 @@ static __be16 cma_port(struct sockaddr *addr) } } -static inline int cma_any_port(struct sockaddr *addr) +static inline int cma_any_port(const struct sockaddr *addr) { return !cma_port(addr); } static void cma_save_ib_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, - struct rdma_cm_id *listen_id, - struct sa_path_rec *path) + const struct rdma_cm_id *listen_id, + const struct sa_path_rec *path) { struct sockaddr_ib *listen_ib, *ib; @@ -1198,7 +1205,7 @@ static u16 cma_port_from_service_id(__be64 service_id) static int cma_save_ip_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, - struct ib_cm_event *ib_event, + const struct ib_cm_event *ib_event, __be64 service_id) { struct cma_hdr *hdr; @@ -1228,8 +1235,8 @@ static int cma_save_ip_info(struct sockaddr *src_addr, static int cma_save_net_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, - struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, + const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, sa_family_t sa_family, __be64 service_id) { if (sa_family == AF_IB) { @@ -1361,7 +1368,23 @@ static bool validate_net_dev(struct net_device *net_dev, } } -static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, +static struct net_device * +roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event) +{ + const struct ib_gid_attr *sgid_attr = NULL; + + if (ib_event->event == IB_CM_REQ_RECEIVED) + sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr; + else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) + sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr; + + if (!sgid_attr) + return NULL; + dev_hold(sgid_attr->ndev); + return sgid_attr->ndev; +} + +static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event, struct cma_req_info *req) { struct sockaddr *listen_addr = @@ -1376,8 +1399,12 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, if (err) return ERR_PTR(err); - net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, - gid, listen_addr); + if (rdma_protocol_roce(req->device, req->port)) + net_dev = roce_get_net_dev_by_cm_event(ib_event); + else + net_dev = ib_get_net_dev_by_params(req->device, req->port, + req->pkey, + gid, listen_addr); if (!net_dev) return ERR_PTR(-ENODEV); @@ -1440,14 +1467,20 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id, const struct rdma_addr *addr = &id->route.addr; if (!net_dev) - /* This request is an AF_IB request or a RoCE request */ + /* This request is an AF_IB request */ return (!id->port_num || id->port_num == port_num) && - (addr->src_addr.ss_family == AF_IB || - rdma_protocol_roce(id->device, port_num)); + (addr->src_addr.ss_family == AF_IB); - return !addr->dev_addr.bound_dev_if || - (net_eq(dev_net(net_dev), addr->dev_addr.net) && - addr->dev_addr.bound_dev_if == net_dev->ifindex); + /* + * Net namespaces must match, and if the listner is listening + * on a specific netdevice than netdevice must match as well. + */ + if (net_eq(dev_net(net_dev), addr->dev_addr.net) && + (!!addr->dev_addr.bound_dev_if == + (addr->dev_addr.bound_dev_if == net_dev->ifindex))) + return true; + else + return false; } static struct rdma_id_private *cma_find_listener( @@ -1480,9 +1513,10 @@ static struct rdma_id_private *cma_find_listener( return ERR_PTR(-EINVAL); } -static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, - struct ib_cm_event *ib_event, - struct net_device **net_dev) +static struct rdma_id_private * +cma_ib_id_from_event(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event, + struct net_device **net_dev) { struct cma_req_info req; struct rdma_bind_list *bind_list; @@ -1498,10 +1532,6 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { /* Assuming the protocol is AF_IB */ *net_dev = NULL; - } else if (rdma_protocol_roce(req.device, req.port)) { - /* TODO find the net dev matching the request parameters - * through the RoCE GID table */ - *net_dev = NULL; } else { return ERR_CAST(*net_dev); } @@ -1629,6 +1659,21 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_unlock(&lock); } +static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv, + struct cma_multicast *mc) +{ + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + struct net_device *ndev = NULL; + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); + if (ndev) { + cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); + dev_put(ndev); + } + kref_put(&mc->mcref, release_mc); +} + static void cma_leave_mc_groups(struct rdma_id_private *id_priv) { struct cma_multicast *mc; @@ -1642,22 +1687,7 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(&init_net, - dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, - &mc->multicast.ib->rec.mgid, - false); - dev_put(ndev); - } - } - kref_put(&mc->mcref, release_mc); + cma_leave_roce_mc_group(id_priv, mc); } } } @@ -1699,6 +1729,10 @@ void rdma_destroy_id(struct rdma_cm_id *id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); + + if (id_priv->id.route.addr.dev_addr.sgid_attr) + rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); + put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); } @@ -1730,7 +1764,7 @@ reject: } static void cma_set_rep_event_data(struct rdma_cm_event *event, - struct ib_cm_rep_event_param *rep_data, + const struct ib_cm_rep_event_param *rep_data, void *private_data) { event->param.conn.private_data = private_data; @@ -1743,10 +1777,11 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = rep_data->remote_qpn; } -static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +static int cma_ib_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = 0; mutex_lock(&id_priv->handler_mutex); @@ -1756,7 +1791,6 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) id_priv->state != RDMA_CM_DISCONNECT)) goto out; - memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_REQ_ERROR: case IB_CM_REP_ERROR: @@ -1825,9 +1859,10 @@ out: return ret; } -static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, - struct net_device *net_dev) +static struct rdma_id_private * +cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, + struct net_device *net_dev) { struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; @@ -1888,11 +1923,12 @@ err: return NULL; } -static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, - struct net_device *net_dev) +static struct rdma_id_private * +cma_ib_new_udp_id(const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, + struct net_device *net_dev) { - struct rdma_id_private *listen_id_priv; + const struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; struct rdma_cm_id *id; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; @@ -1932,7 +1968,7 @@ err: } static void cma_set_req_event_data(struct rdma_cm_event *event, - struct ib_cm_req_event_param *req_data, + const struct ib_cm_req_event_param *req_data, void *private_data, int offset) { event->param.conn.private_data = private_data + offset; @@ -1946,7 +1982,8 @@ static void cma_set_req_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = req_data->remote_qpn; } -static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) +static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id, + const struct ib_cm_event *ib_event) { return (((ib_event->event == IB_CM_REQ_RECEIVED) && (ib_event->param.req_rcvd.qp_type == id->qp_type)) || @@ -1955,19 +1992,20 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e (!id->qp_type)); } -static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +static int cma_ib_req_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id = NULL; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; struct net_device *net_dev; u8 offset; int ret; - listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); + listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev); if (IS_ERR(listen_id)) return PTR_ERR(listen_id); - if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { + if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) { ret = -EINVAL; goto net_dev_put; } @@ -1978,16 +2016,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) goto err1; } - memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { - conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); + conn_id = cma_ib_new_udp_id(&listen_id->id, ib_event, net_dev); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { - conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); + conn_id = cma_ib_new_conn_id(&listen_id->id, ib_event, net_dev); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); } @@ -2087,7 +2124,7 @@ EXPORT_SYMBOL(rdma_read_gids); static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) { struct rdma_id_private *id_priv = iw_id->context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = 0; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; @@ -2096,7 +2133,6 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) if (id_priv->state != RDMA_CM_CONNECT) goto out; - memset(&event, 0, sizeof event); switch (iw_event->event) { case IW_CM_EVENT_CLOSE: event.event = RDMA_CM_EVENT_DISCONNECTED; @@ -2156,11 +2192,17 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, { struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = -ECONNABORTED; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; + event.event = RDMA_CM_EVENT_CONNECT_REQUEST; + event.param.conn.private_data = iw_event->private_data; + event.param.conn.private_data_len = iw_event->private_data_len; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; + listen_id = cm_id->context; mutex_lock(&listen_id->handler_mutex); @@ -2202,13 +2244,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); - memset(&event, 0, sizeof event); - event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - event.param.conn.private_data = iw_event->private_data; - event.param.conn.private_data_len = iw_event->private_data_len; - event.param.conn.initiator_depth = iw_event->ird; - event.param.conn.responder_resources = iw_event->ord; - /* * Protect against the user destroying conn_id from another thread * until we're done accessing it. @@ -2241,7 +2276,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) addr = cma_src_addr(id_priv); svc_id = rdma_get_service_id(&id_priv->id, addr); - id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); + id = ib_cm_insert_listen(id_priv->id.device, + cma_ib_req_handler, svc_id); if (IS_ERR(id)) return PTR_ERR(id); id_priv->cm_id.ib = id; @@ -2561,8 +2597,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv) route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); route->path_rec->roce.route_resolved = true; - sa_path_set_ndev(route->path_rec, addr->dev_addr.net); - sa_path_set_ifindex(route->path_rec, ndev->ifindex); sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); return ndev; } @@ -2791,7 +2825,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) p = 1; port_found: - ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); + ret = rdma_query_gid(cma_dev->device, p, 0, &gid); if (ret) goto out; @@ -2817,9 +2851,8 @@ static void addr_handler(int status, struct sockaddr *src_addr, struct rdma_dev_addr *dev_addr, void *context) { struct rdma_id_private *id_priv = context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; - memset(&event, 0, sizeof event); mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_RESOLVED)) @@ -2910,7 +2943,7 @@ err: } static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - struct sockaddr *dst_addr) + const struct sockaddr *dst_addr) { if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; @@ -2931,31 +2964,25 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, } int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - struct sockaddr *dst_addr, int timeout_ms) + const struct sockaddr *dst_addr, int timeout_ms) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + if (ret) return ret; - } } - if (cma_family(id_priv) != dst_addr->sa_family) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + if (cma_family(id_priv) != dst_addr->sa_family) return -EINVAL; - } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) return -EINVAL; - } + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); atomic_inc(&id_priv->refcount); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); @@ -3451,18 +3478,18 @@ static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) } static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *ib_event) + const struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; - struct rdma_cm_event event; - struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; + struct rdma_cm_event event = {}; + const struct ib_cm_sidr_rep_event_param *rep = + &ib_event->param.sidr_rep_rcvd; int ret = 0; mutex_lock(&id_priv->handler_mutex); if (id_priv->state != RDMA_CM_CONNECT) goto out; - memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_SIDR_REQ_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; @@ -3488,7 +3515,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, ib_init_ah_attr_from_path(id_priv->id.device, id_priv->id.port_num, id_priv->id.route.path_rec, - &event.param.ud.ah_attr); + &event.param.ud.ah_attr, + rep->sgid_attr); event.param.ud.qp_num = rep->qpn; event.param.ud.qkey = rep->qkey; event.event = RDMA_CM_EVENT_ESTABLISHED; @@ -3501,6 +3529,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, } ret = id_priv->id.event_handler(&id_priv->id, &event); + + rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; @@ -3557,6 +3587,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, id_priv->cm_id.ib = id; req.path = id_priv->id.route.path_rec; + req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; @@ -3618,6 +3649,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, if (route->num_paths == 2) req.alternate_path = &route->path_rec[1]; + req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; + /* Alternate path SGID attribute currently unsupported */ req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.qp_num = id_priv->qp_num; req.qp_type = id_priv->id.qp_type; @@ -3928,7 +3961,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) { struct rdma_id_private *id_priv; struct cma_multicast *mc = multicast->context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = 0; id_priv = mc->id_priv; @@ -3952,7 +3985,6 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) } mutex_unlock(&id_priv->qp_mutex); - memset(&event, 0, sizeof event); event.status = status; event.param.ud.private_data = mc->context; if (!status) { @@ -3981,6 +4013,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) event.event = RDMA_CM_EVENT_MULTICAST_ERROR; ret = id_priv->id.event_handler(&id_priv->id, &event); + + rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); @@ -4010,7 +4044,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else if (addr->sa_family == AF_IB) { memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); - } else if ((addr->sa_family == AF_INET6)) { + } else if (addr->sa_family == AF_INET6) { ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ @@ -4168,8 +4202,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (!send_only) { err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, true); - if (!err) - mc->igmp_joined = true; } } } else { @@ -4221,26 +4253,29 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, memcpy(&mc->addr, addr, rdma_addr_size(addr)); mc->context = context; mc->id_priv = id_priv; - mc->igmp_joined = false; mc->join_state = join_state; - spin_lock(&id_priv->lock); - list_add(&mc->list, &id_priv->mc_list); - spin_unlock(&id_priv->lock); if (rdma_protocol_roce(id->device, id->port_num)) { kref_init(&mc->mcref); ret = cma_iboe_join_multicast(id_priv, mc); - } else if (rdma_cap_ib_mcast(id->device, id->port_num)) + if (ret) + goto out_err; + } else if (rdma_cap_ib_mcast(id->device, id->port_num)) { ret = cma_join_ib_multicast(id_priv, mc); - else + if (ret) + goto out_err; + } else { ret = -ENOSYS; - - if (ret) { - spin_lock_irq(&id_priv->lock); - list_del(&mc->list); - spin_unlock_irq(&id_priv->lock); - kfree(mc); + goto out_err; } + + spin_lock(&id_priv->lock); + list_add(&mc->list, &id_priv->mc_list); + spin_unlock(&id_priv->lock); + + return 0; +out_err: + kfree(mc); return ret; } EXPORT_SYMBOL(rdma_join_multicast); @@ -4268,23 +4303,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else if (rdma_protocol_roce(id->device, id->port_num)) { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id->route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, - dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, - &mc->multicast.ib->rec.mgid, - false); - dev_put(ndev); - } - mc->igmp_joined = false; - } - kref_put(&mc->mcref, release_mc); + cma_leave_roce_mc_group(id_priv, mc); } return; } @@ -4410,7 +4429,7 @@ free_cma_dev: static int cma_remove_id_dev(struct rdma_id_private *id_priv) { - struct rdma_cm_event event; + struct rdma_cm_event event = {}; enum rdma_cm_state state; int ret = 0; @@ -4426,7 +4445,6 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) goto out; - memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; ret = id_priv->id.event_handler(&id_priv->id, &event); out: diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index fae417a391fb..77c7005c396c 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -91,8 +91,8 @@ void ib_device_unregister_sysfs(struct ib_device *device); typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); -typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port, - struct net_device *idev, void *cookie); +typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port, + struct net_device *idev, void *cookie); void ib_enum_roce_netdev(struct ib_device *ib_dev, roce_netdev_filter filter, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 6fa4c59dc7a7..db3b6271f09d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -105,8 +105,6 @@ static int ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(query_pkey), IB_MANDATORY_FUNC(alloc_pd), IB_MANDATORY_FUNC(dealloc_pd), - IB_MANDATORY_FUNC(create_ah), - IB_MANDATORY_FUNC(destroy_ah), IB_MANDATORY_FUNC(create_qp), IB_MANDATORY_FUNC(modify_qp), IB_MANDATORY_FUNC(destroy_qp), @@ -862,25 +860,6 @@ int ib_query_port(struct ib_device *device, EXPORT_SYMBOL(ib_query_port); /** - * ib_query_gid - Get GID table entry - * @device:Device to query - * @port_num:Port number to query - * @index:GID table index to query - * @gid:Returned GID - * @attr: Returned GID attributes related to this GID index (only in RoCE). - * NULL means ignore. - * - * ib_query_gid() fetches the specified GID table entry from the cache. - */ -int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid, - struct ib_gid_attr *attr) -{ - return ib_get_cached_gid(device, port_num, index, gid, attr); -} -EXPORT_SYMBOL(ib_query_gid); - -/** * ib_enum_roce_netdev - enumerate all RoCE ports * @ib_dev : IB device we want to query * @filter: Should we call the callback? @@ -1057,7 +1036,7 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, continue; for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { - ret = ib_query_gid(device, port, i, &tmp_gid, NULL); + ret = rdma_query_gid(device, port, i, &tmp_gid); if (ret) return ret; if (!memcmp(&tmp_gid, gid, sizeof *gid)) { diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index f742ae7a768b..ef459f2f2eeb 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -38,6 +38,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/dma-mapping.h> +#include <linux/idr.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/security.h> @@ -58,8 +59,13 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); +/* + * The mlx4 driver uses the top byte to distinguish which virtual function + * generated the MAD, so we must avoid using it. + */ +#define AGENT_ID_LIMIT (1 << 24) +static DEFINE_IDR(ib_mad_clients); static struct list_head ib_mad_port_list; -static atomic_t ib_mad_client_id = ATOMIC_INIT(0); /* Port list lock */ static DEFINE_SPINLOCK(ib_mad_port_list_lock); @@ -190,6 +196,8 @@ EXPORT_SYMBOL(ib_response_mad); /* * ib_register_mad_agent - Register to send/receive MADs + * + * Context: Process context. */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u8 port_num, @@ -210,7 +218,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, struct ib_mad_mgmt_vendor_class *vendor_class; struct ib_mad_mgmt_method_table *method; int ret2, qpn; - unsigned long flags; u8 mgmt_class, vclass; /* Validate parameters */ @@ -376,13 +383,24 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error4; } - spin_lock_irqsave(&port_priv->reg_lock, flags); - mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id); + idr_preload(GFP_KERNEL); + idr_lock(&ib_mad_clients); + ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0, + AGENT_ID_LIMIT, GFP_ATOMIC); + idr_unlock(&ib_mad_clients); + idr_preload_end(); + + if (ret2 < 0) { + ret = ERR_PTR(ret2); + goto error5; + } + mad_agent_priv->agent.hi_tid = ret2; /* * Make sure MAD registration (if supplied) * is non overlapping with any existing ones */ + spin_lock_irq(&port_priv->reg_lock); if (mad_reg_req) { mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); if (!is_vendor_class(mgmt_class)) { @@ -393,7 +411,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (method) { if (method_in_use(&method, mad_reg_req)) - goto error5; + goto error6; } } ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, @@ -409,24 +427,25 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (is_vendor_method_in_use( vendor_class, mad_reg_req)) - goto error5; + goto error6; } } ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); } if (ret2) { ret = ERR_PTR(ret2); - goto error5; + goto error6; } } - - /* Add mad agent into port's agent list */ - list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); + spin_unlock_irq(&port_priv->reg_lock); return &mad_agent_priv->agent; +error6: + spin_unlock_irq(&port_priv->reg_lock); + idr_lock(&ib_mad_clients); + idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); + idr_unlock(&ib_mad_clients); error5: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: kfree(reg_req); @@ -575,7 +594,6 @@ static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; - unsigned long flags; /* Note that we could still be handling received MADs */ @@ -587,10 +605,12 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) port_priv = mad_agent_priv->qp_info->port_priv; cancel_delayed_work(&mad_agent_priv->timed_work); - spin_lock_irqsave(&port_priv->reg_lock, flags); + spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); - list_del(&mad_agent_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); + spin_unlock_irq(&port_priv->reg_lock); + idr_lock(&ib_mad_clients); + idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); + idr_unlock(&ib_mad_clients); flush_workqueue(port_priv->wq); ib_cancel_rmpp_recvs(mad_agent_priv); @@ -601,7 +621,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) ib_mad_agent_security_cleanup(&mad_agent_priv->agent); kfree(mad_agent_priv->reg_req); - kfree(mad_agent_priv); + kfree_rcu(mad_agent_priv, rcu); } static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) @@ -625,6 +645,8 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) /* * ib_unregister_mad_agent - Unregisters a client from using MAD services + * + * Context: Process context. */ void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { @@ -1159,7 +1181,6 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct list_head *list; - struct ib_send_wr *bad_send_wr; struct ib_mad_agent *mad_agent; struct ib_sge *sge; unsigned long flags; @@ -1197,7 +1218,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, - &bad_send_wr); + NULL); list = &qp_info->send_queue.list; } else { ret = 0; @@ -1720,22 +1741,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv, struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; - spin_lock_irqsave(&port_priv->reg_lock, flags); if (ib_response_mad(mad_hdr)) { u32 hi_tid; - struct ib_mad_agent_private *entry; /* * Routing is based on high 32 bits of transaction ID * of MAD. */ hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; - list_for_each_entry(entry, &port_priv->agent_list, agent_list) { - if (entry->agent.hi_tid == hi_tid) { - mad_agent = entry; - break; - } - } + rcu_read_lock(); + mad_agent = idr_find(&ib_mad_clients, hi_tid); + if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount)) + mad_agent = NULL; + rcu_read_unlock(); } else { struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; @@ -1744,6 +1762,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, const struct ib_vendor_mad *vendor_mad; int index; + spin_lock_irqsave(&port_priv->reg_lock, flags); /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI @@ -1783,20 +1802,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv, ~IB_MGMT_METHOD_RESP]; } } + if (mad_agent) + atomic_inc(&mad_agent->refcount); +out: + spin_unlock_irqrestore(&port_priv->reg_lock, flags); } - if (mad_agent) { - if (mad_agent->agent.recv_handler) - atomic_inc(&mad_agent->refcount); - else { - dev_notice(&port_priv->device->dev, - "No receive handler for client %p on port %d\n", - &mad_agent->agent, port_priv->port_num); - mad_agent = NULL; - } + if (mad_agent && !mad_agent->agent.recv_handler) { + dev_notice(&port_priv->device->dev, + "No receive handler for client %p on port %d\n", + &mad_agent->agent, port_priv->port_num); + deref_mad_agent(mad_agent); + mad_agent = NULL; } -out: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); return mad_agent; } @@ -1896,8 +1914,8 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_ const struct ib_global_route *grh = rdma_ah_read_grh(&attr); - if (ib_get_cached_gid(device, port_num, - grh->sgid_index, &sgid, NULL)) + if (rdma_query_gid(device, port_num, + grh->sgid_index, &sgid)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); @@ -2457,7 +2475,6 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; - struct ib_send_wr *bad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; @@ -2507,7 +2524,7 @@ retry: if (queued_send_wr) { ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, - &bad_send_wr); + NULL); if (ret) { dev_err(&port_priv->device->dev, "ib_post_send failed: %d\n", ret); @@ -2552,11 +2569,9 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, if (wc->status == IB_WC_WR_FLUSH_ERR) { if (mad_send_wr->retry) { /* Repost send */ - struct ib_send_wr *bad_send_wr; - mad_send_wr->retry = 0; ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, - &bad_send_wr); + NULL); if (!ret) return false; } @@ -2872,7 +2887,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, int post, ret; struct ib_mad_private *mad_priv; struct ib_sge sg_list; - struct ib_recv_wr recv_wr, *bad_recv_wr; + struct ib_recv_wr recv_wr; struct ib_mad_queue *recv_queue = &qp_info->recv_queue; /* Initialize common scatter list fields */ @@ -2916,7 +2931,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, post = (++recv_queue->count < recv_queue->max_active); list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); spin_unlock_irqrestore(&recv_queue->lock, flags); - ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr); + ret = ib_post_recv(qp_info->qp, &recv_wr, NULL); if (ret) { spin_lock_irqsave(&recv_queue->lock, flags); list_del(&mad_priv->header.mad_list.list); @@ -3159,7 +3174,6 @@ static int ib_mad_port_open(struct ib_device *device, port_priv->device = device; port_priv->port_num = port_num; spin_lock_init(&port_priv->reg_lock); - INIT_LIST_HEAD(&port_priv->agent_list); init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); @@ -3338,6 +3352,9 @@ int ib_mad_init(void) INIT_LIST_HEAD(&ib_mad_port_list); + /* Client ID 0 is used for snoop-only clients */ + idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL); + if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); return -EINVAL; diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 28669f6419e1..d84ae1671898 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -89,7 +89,6 @@ struct ib_rmpp_segment { }; struct ib_mad_agent_private { - struct list_head agent_list; struct ib_mad_agent agent; struct ib_mad_reg_req *reg_req; struct ib_mad_qp_info *qp_info; @@ -105,7 +104,10 @@ struct ib_mad_agent_private { struct list_head rmpp_list; atomic_t refcount; - struct completion comp; + union { + struct completion comp; + struct rcu_head rcu; + }; }; struct ib_mad_snoop_private { @@ -203,7 +205,6 @@ struct ib_mad_port_private { spinlock_t reg_lock; struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION]; - struct list_head agent_list; struct workqueue_struct *wq; struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; }; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 6c48f4193dda..d50ff70bb24b 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -716,14 +716,28 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_sa_get_mcmember_rec); +/** + * ib_init_ah_from_mcmember - Initialize AH attribute from multicast + * member record and gid of the device. + * @device: RDMA device + * @port_num: Port of the rdma device to consider + * @ndev: Optional netdevice, applicable only for RoCE + * @gid_type: GID type to consider + * @ah_attr: AH attribute to fillup on successful completion + * + * ib_init_ah_from_mcmember() initializes AH attribute based on multicast + * member record and other device properties. On success the caller is + * responsible to call rdma_destroy_ah_attr on the ah_attr. Returns 0 on + * success or appropriate error code. + * + */ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, struct rdma_ah_attr *ah_attr) { - int ret; - u16 gid_index; + const struct ib_gid_attr *sgid_attr; /* GID table is not based on the netdevice for IB link layer, * so ignore ndev during search. @@ -733,26 +747,22 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, else if (!rdma_protocol_roce(device, port_num)) return -EINVAL; - ret = ib_find_cached_gid_by_port(device, &rec->port_gid, - gid_type, port_num, - ndev, - &gid_index); - if (ret) - return ret; + sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid, + gid_type, port_num, ndev); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); - memset(ah_attr, 0, sizeof *ah_attr); + memset(ah_attr, 0, sizeof(*ah_attr)); ah_attr->type = rdma_ah_find_type(device, port_num); rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid)); rdma_ah_set_sl(ah_attr, rec->sl); rdma_ah_set_port_num(ah_attr, port_num); rdma_ah_set_static_rate(ah_attr, rec->rate); - - rdma_ah_set_grh(ah_attr, &rec->mgid, - be32_to_cpu(rec->flow_label), - (u8)gid_index, - rec->hop_limit, - rec->traffic_class); + rdma_move_grh_sgid_attr(ah_attr, &rec->mgid, + be32_to_cpu(rec->flow_label), + rec->hop_limit, rec->traffic_class, + sgid_attr); return 0; } EXPORT_SYMBOL(ib_init_ah_from_mcmember); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 340c7bea45ab..0385ab438320 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -237,15 +237,15 @@ static int fill_port_info(struct sk_buff *msg, if (ret) return ret; - BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); - if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, - (u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD)) - return -EMSGSIZE; - if (rdma_protocol_ib(device, port) && - nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, - attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) - return -EMSGSIZE; if (rdma_protocol_ib(device, port)) { + BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, + (u64)attr.port_cap_flags, + RDMA_NLDEV_ATTR_PAD)) + return -EMSGSIZE; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, + attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index a6e904973ba8..6eb64c6f0802 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -32,6 +32,7 @@ #include <linux/file.h> #include <linux/anon_inodes.h> +#include <linux/sched/mm.h> #include <rdma/ib_verbs.h> #include <rdma/uverbs_types.h> #include <linux/rcupdate.h> @@ -41,51 +42,6 @@ #include "core_priv.h" #include "rdma_core.h" -int uverbs_ns_idx(u16 *id, unsigned int ns_count) -{ - int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT; - - if (ret >= ns_count) - return -EINVAL; - - *id &= ~UVERBS_ID_NS_MASK; - return ret; -} - -const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, - uint16_t object) -{ - const struct uverbs_root_spec *object_hash = ibdev->specs_root; - const struct uverbs_object_spec_hash *objects; - int ret = uverbs_ns_idx(&object, object_hash->num_buckets); - - if (ret < 0) - return NULL; - - objects = object_hash->object_buckets[ret]; - - if (object >= objects->num_objects) - return NULL; - - return objects->objects[object]; -} - -const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, - uint16_t method) -{ - const struct uverbs_method_spec_hash *methods; - int ret = uverbs_ns_idx(&method, object->num_buckets); - - if (ret < 0) - return NULL; - - methods = object->method_buckets[ret]; - if (method >= methods->num_methods) - return NULL; - - return methods->methods[method]; -} - void uverbs_uobject_get(struct ib_uobject *uobject) { kref_get(&uobject->ref); @@ -96,7 +52,7 @@ static void uverbs_uobject_free(struct kref *ref) struct ib_uobject *uobj = container_of(ref, struct ib_uobject, ref); - if (uobj->type->type_class->needs_kfree_rcu) + if (uobj->uapi_object->type_class->needs_kfree_rcu) kfree_rcu(uobj, rcu); else kfree(uobj); @@ -107,7 +63,8 @@ void uverbs_uobject_put(struct ib_uobject *uobject) kref_put(&uobject->ref, uverbs_uobject_free); } -static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) +static int uverbs_try_lock_object(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { /* * When a shared access is required, we use a positive counter. Each @@ -120,27 +77,211 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) * concurrently, setting the counter to zero is enough for releasing * this lock. */ - if (!exclusive) - return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ? + switch (mode) { + case UVERBS_LOOKUP_READ: + return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ? -EBUSY : 0; + case UVERBS_LOOKUP_WRITE: + /* lock is exclusive */ + return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; + case UVERBS_LOOKUP_DESTROY: + return 0; + } + return 0; +} + +static void assert_uverbs_usecnt(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) +{ +#ifdef CONFIG_LOCKDEP + switch (mode) { + case UVERBS_LOOKUP_READ: + WARN_ON(atomic_read(&uobj->usecnt) <= 0); + break; + case UVERBS_LOOKUP_WRITE: + WARN_ON(atomic_read(&uobj->usecnt) != -1); + break; + case UVERBS_LOOKUP_DESTROY: + break; + } +#endif +} + +/* + * This must be called with the hw_destroy_rwsem locked for read or write, + * also the uobject itself must be locked for write. + * + * Upon return the HW object is guaranteed to be destroyed. + * + * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held, + * however the type's allocat_commit function cannot have been called and the + * uobject cannot be on the uobjects_lists + * + * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via + * rdma_lookup_get_uobject) and the object is left in a state where the caller + * needs to call rdma_lookup_put_uobject. + * + * For all other destroy modes this function internally unlocks the uobject + * and consumes the kref on the uobj. + */ +static int uverbs_destroy_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason reason) +{ + struct ib_uverbs_file *ufile = uobj->ufile; + unsigned long flags; + int ret; + + lockdep_assert_held(&ufile->hw_destroy_rwsem); + assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); + + if (uobj->object) { + ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason); + if (ret) { + if (ib_is_destroy_retryable(ret, reason, uobj)) + return ret; + + /* Nothing to be done, dangle the memory and move on */ + WARN(true, + "ib_uverbs: failed to remove uobject id %d, driver err=%d", + uobj->id, ret); + } + + uobj->object = NULL; + } - /* lock is either WRITE or DESTROY - should be exclusive */ - return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; + if (reason == RDMA_REMOVE_ABORT) { + WARN_ON(!list_empty(&uobj->list)); + WARN_ON(!uobj->context); + uobj->uapi_object->type_class->alloc_abort(uobj); + } + + uobj->context = NULL; + + /* + * For DESTROY the usecnt is held write locked, the caller is expected + * to put it unlock and put the object when done with it. Only DESTROY + * can remove the IDR handle. + */ + if (reason != RDMA_REMOVE_DESTROY) + atomic_set(&uobj->usecnt, 0); + else + uobj->uapi_object->type_class->remove_handle(uobj); + + if (!list_empty(&uobj->list)) { + spin_lock_irqsave(&ufile->uobjects_lock, flags); + list_del_init(&uobj->list); + spin_unlock_irqrestore(&ufile->uobjects_lock, flags); + + /* + * Pairs with the get in rdma_alloc_commit_uobject(), could + * destroy uobj. + */ + uverbs_uobject_put(uobj); + } + + /* + * When aborting the stack kref remains owned by the core code, and is + * not transferred into the type. Pairs with the get in alloc_uobj + */ + if (reason == RDMA_REMOVE_ABORT) + uverbs_uobject_put(uobj); + + return 0; } -static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, - const struct uverbs_obj_type *type) +/* + * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY + * sequence. It should only be used from command callbacks. On success the + * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This + * version requires the caller to have already obtained an + * LOOKUP_DESTROY uobject kref. + */ +int uobj_destroy(struct ib_uobject *uobj) { - struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL); + struct ib_uverbs_file *ufile = uobj->ufile; + int ret; + + down_read(&ufile->hw_destroy_rwsem); + + ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE); + if (ret) + goto out_unlock; + + ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY); + if (ret) { + atomic_set(&uobj->usecnt, 0); + goto out_unlock; + } +out_unlock: + up_read(&ufile->hw_destroy_rwsem); + return ret; +} + +/* + * uobj_get_destroy destroys the HW object and returns a handle to the uobj + * with a NULL object pointer. The caller must pair this with + * uverbs_put_destroy. + */ +struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, + u32 id, struct ib_uverbs_file *ufile) +{ + struct ib_uobject *uobj; + int ret; + + uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY); + if (IS_ERR(uobj)) + return uobj; + + ret = uobj_destroy(uobj); + if (ret) { + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); + return ERR_PTR(ret); + } + + return uobj; +} + +/* + * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res + * on success (negative errno on failure). For use by callers that do not need + * the uobj. + */ +int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, + struct ib_uverbs_file *ufile, int success_res) +{ + struct ib_uobject *uobj; + + uobj = __uobj_get_destroy(obj, id, ufile); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); + return success_res; +} + +/* alloc_uobj must be undone by uverbs_destroy_uobject() */ +static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, + const struct uverbs_api_object *obj) +{ + struct ib_uobject *uobj; + struct ib_ucontext *ucontext; + + ucontext = ib_uverbs_get_ucontext(ufile); + if (IS_ERR(ucontext)) + return ERR_CAST(ucontext); + + uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL); if (!uobj) return ERR_PTR(-ENOMEM); /* * user_handle should be filled by the handler, * The object is added to the list in the commit stage. */ - uobj->context = context; - uobj->type = type; + uobj->ufile = ufile; + uobj->context = ucontext; + INIT_LIST_HEAD(&uobj->list); + uobj->uapi_object = obj; /* * Allocated objects start out as write locked to deny any other * syscalls from accessing them until they are committed. See @@ -157,45 +298,39 @@ static int idr_add_uobj(struct ib_uobject *uobj) int ret; idr_preload(GFP_KERNEL); - spin_lock(&uobj->context->ufile->idr_lock); + spin_lock(&uobj->ufile->idr_lock); /* * We start with allocating an idr pointing to NULL. This represents an * object which isn't initialized yet. We'll replace it later on with * the real object once we commit. */ - ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0, + ret = idr_alloc(&uobj->ufile->idr, NULL, 0, min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT); if (ret >= 0) uobj->id = ret; - spin_unlock(&uobj->context->ufile->idr_lock); + spin_unlock(&uobj->ufile->idr_lock); idr_preload_end(); return ret < 0 ? ret : 0; } -/* - * It only removes it from the uobjects list, uverbs_uobject_put() is still - * required. - */ -static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) -{ - spin_lock(&uobj->context->ufile->idr_lock); - idr_remove(&uobj->context->ufile->idr, uobj->id); - spin_unlock(&uobj->context->ufile->idr_lock); -} - /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ -static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, - int id, bool exclusive) +static struct ib_uobject * +lookup_get_idr_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode) { struct ib_uobject *uobj; + unsigned long idrno = id; + + if (id < 0 || id > ULONG_MAX) + return ERR_PTR(-EINVAL); rcu_read_lock(); /* object won't be released as we're protected in rcu */ - uobj = idr_find(&ucontext->ufile->idr, id); + uobj = idr_find(&ufile->idr, idrno); if (!uobj) { uobj = ERR_PTR(-ENOENT); goto free; @@ -215,19 +350,28 @@ free: return uobj; } -static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, - int id, bool exclusive) +static struct ib_uobject * +lookup_get_fd_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode) { + const struct uverbs_obj_fd_type *fd_type; struct file *f; struct ib_uobject *uobject; - const struct uverbs_obj_fd_type *fd_type = - container_of(type, struct uverbs_obj_fd_type, type); + int fdno = id; - if (exclusive) + if (fdno != id) + return ERR_PTR(-EINVAL); + + if (mode != UVERBS_LOOKUP_READ) return ERR_PTR(-EOPNOTSUPP); - f = fget(id); + if (!obj->type_attrs) + return ERR_PTR(-EIO); + fd_type = + container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); + + f = fget(fdno); if (!f) return ERR_PTR(-EBADF); @@ -246,43 +390,55 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty return uobject; } -struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, - int id, bool exclusive) +struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode) { struct ib_uobject *uobj; int ret; - uobj = type->type_class->lookup_get(type, ucontext, id, exclusive); + if (!obj) + return ERR_PTR(-EINVAL); + + uobj = obj->type_class->lookup_get(obj, ufile, id, mode); if (IS_ERR(uobj)) return uobj; - if (uobj->type != type) { + if (uobj->uapi_object != obj) { ret = -EINVAL; goto free; } - ret = uverbs_try_lock_object(uobj, exclusive); - if (ret) { - WARN(ucontext->cleanup_reason, - "ib_uverbs: Trying to lookup_get while cleanup context\n"); + /* + * If we have been disassociated block every command except for + * DESTROY based commands. + */ + if (mode != UVERBS_LOOKUP_DESTROY && + !srcu_dereference(ufile->device->ib_dev, + &ufile->device->disassociate_srcu)) { + ret = -EIO; goto free; } + ret = uverbs_try_lock_object(uobj, mode); + if (ret) + goto free; + return uobj; free: - uobj->type->type_class->lookup_put(uobj, exclusive); + obj->type_class->lookup_put(uobj, mode); uverbs_uobject_put(uobj); return ERR_PTR(ret); } -static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) +static struct ib_uobject * +alloc_begin_idr_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile) { int ret; struct ib_uobject *uobj; - uobj = alloc_uobj(ucontext, type); + uobj = alloc_uobj(ufile, obj); if (IS_ERR(uobj)) return uobj; @@ -290,7 +446,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type * if (ret) goto uobj_put; - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device, + ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device, RDMACG_RESOURCE_HCA_OBJECT); if (ret) goto idr_remove; @@ -298,304 +454,305 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type * return uobj; idr_remove: - uverbs_idr_remove_uobj(uobj); + spin_lock(&ufile->idr_lock); + idr_remove(&ufile->idr, uobj->id); + spin_unlock(&ufile->idr_lock); uobj_put: uverbs_uobject_put(uobj); return ERR_PTR(ret); } -static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) +static struct ib_uobject * +alloc_begin_fd_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile) { - const struct uverbs_obj_fd_type *fd_type = - container_of(type, struct uverbs_obj_fd_type, type); int new_fd; struct ib_uobject *uobj; - struct ib_uobject_file *uobj_file; - struct file *filp; new_fd = get_unused_fd_flags(O_CLOEXEC); if (new_fd < 0) return ERR_PTR(new_fd); - uobj = alloc_uobj(ucontext, type); + uobj = alloc_uobj(ufile, obj); if (IS_ERR(uobj)) { put_unused_fd(new_fd); return uobj; } - uobj_file = container_of(uobj, struct ib_uobject_file, uobj); - filp = anon_inode_getfile(fd_type->name, - fd_type->fops, - uobj_file, - fd_type->flags); - if (IS_ERR(filp)) { - put_unused_fd(new_fd); - uverbs_uobject_put(uobj); - return (void *)filp; - } - - uobj_file->uobj.id = new_fd; - uobj_file->uobj.object = filp; - uobj_file->ufile = ucontext->ufile; - INIT_LIST_HEAD(&uobj->list); - kref_get(&uobj_file->ufile->ref); + uobj->id = new_fd; + uobj->ufile = ufile; return uobj; } -struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) +struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile) { - return type->type_class->alloc_begin(type, ucontext); -} + struct ib_uobject *ret; -static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) -{ - const struct uverbs_obj_idr_type *idr_type = - container_of(uobj->type, struct uverbs_obj_idr_type, - type); - int ret = idr_type->destroy_object(uobj, why); + if (!obj) + return ERR_PTR(-EINVAL); /* - * We can only fail gracefully if the user requested to destroy the - * object. In the rest of the cases, just remove whatever you can. + * The hw_destroy_rwsem is held across the entire object creation and + * released during rdma_alloc_commit_uobject or + * rdma_alloc_abort_uobject */ - if (why == RDMA_REMOVE_DESTROY && ret) - return ret; - - ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, - RDMACG_RESOURCE_HCA_OBJECT); - uverbs_idr_remove_uobj(uobj); + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) + return ERR_PTR(-EIO); + ret = obj->type_class->alloc_begin(obj, ufile); + if (IS_ERR(ret)) { + up_read(&ufile->hw_destroy_rwsem); + return ret; + } return ret; } -static void alloc_abort_fd_uobject(struct ib_uobject *uobj) +static void alloc_abort_idr_uobject(struct ib_uobject *uobj) { - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); - struct file *filp = uobj->object; - int id = uobj_file->uobj.id; + ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); - /* Unsuccessful NEW */ - fput(filp); - put_unused_fd(id); + spin_lock(&uobj->ufile->idr_lock); + idr_remove(&uobj->ufile->idr, uobj->id); + spin_unlock(&uobj->ufile->idr_lock); } -static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) { - const struct uverbs_obj_fd_type *fd_type = - container_of(uobj->type, struct uverbs_obj_fd_type, type); - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); - int ret = fd_type->context_closed(uobj_file, why); + const struct uverbs_obj_idr_type *idr_type = + container_of(uobj->uapi_object->type_attrs, + struct uverbs_obj_idr_type, type); + int ret = idr_type->destroy_object(uobj, why); - if (why == RDMA_REMOVE_DESTROY && ret) + /* + * We can only fail gracefully if the user requested to destroy the + * object or when a retry may be called upon an error. + * In the rest of the cases, just remove whatever you can. + */ + if (ib_is_destroy_retryable(ret, why, uobj)) return ret; - if (why == RDMA_REMOVE_DURING_CLEANUP) { - alloc_abort_fd_uobject(uobj); - return ret; - } + if (why == RDMA_REMOVE_ABORT) + return 0; - uobj_file->uobj.context = NULL; - return ret; + ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); + + return 0; } -static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive) +static void remove_handle_idr_uobject(struct ib_uobject *uobj) { -#ifdef CONFIG_LOCKDEP - if (exclusive) - WARN_ON(atomic_read(&uobj->usecnt) != -1); - else - WARN_ON(atomic_read(&uobj->usecnt) <= 0); -#endif + spin_lock(&uobj->ufile->idr_lock); + idr_remove(&uobj->ufile->idr, uobj->id); + spin_unlock(&uobj->ufile->idr_lock); + /* Matches the kref in alloc_commit_idr_uobject */ + uverbs_uobject_put(uobj); } -static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void alloc_abort_fd_uobject(struct ib_uobject *uobj) { - int ret; - struct ib_ucontext *ucontext = uobj->context; - - ret = uobj->type->type_class->remove_commit(uobj, why); - if (ret && why == RDMA_REMOVE_DESTROY) { - /* We couldn't remove the object, so just unlock the uobject */ - atomic_set(&uobj->usecnt, 0); - uobj->type->type_class->lookup_put(uobj, true); - } else { - mutex_lock(&ucontext->uobjects_lock); - list_del(&uobj->list); - mutex_unlock(&ucontext->uobjects_lock); - /* put the ref we took when we created the object */ - uverbs_uobject_put(uobj); - } - - return ret; + put_unused_fd(uobj->id); } -/* This is called only for user requested DESTROY reasons */ -int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) +static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) { - int ret; - struct ib_ucontext *ucontext = uobj->context; - - /* put the ref count we took at lookup_get */ - uverbs_uobject_put(uobj); - /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ucontext->cleanup_rwsem)) { - WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); - return 0; - } - assert_uverbs_usecnt(uobj, true); - ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY); + const struct uverbs_obj_fd_type *fd_type = container_of( + uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); + int ret = fd_type->context_closed(uobj, why); - up_read(&ucontext->cleanup_rwsem); - return ret; -} + if (ib_is_destroy_retryable(ret, why, uobj)) + return ret; -static int null_obj_type_class_remove_commit(struct ib_uobject *uobj, - enum rdma_remove_reason why) -{ return 0; } -static const struct uverbs_obj_type null_obj_type = { - .type_class = &((const struct uverbs_obj_type_class){ - .remove_commit = null_obj_type_class_remove_commit, - /* be cautious */ - .needs_kfree_rcu = true}), -}; - -int rdma_explicit_destroy(struct ib_uobject *uobject) +static void remove_handle_fd_uobject(struct ib_uobject *uobj) { - int ret; - struct ib_ucontext *ucontext = uobject->context; - - /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ucontext->cleanup_rwsem)) { - WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); - return 0; - } - assert_uverbs_usecnt(uobject, true); - ret = uobject->type->type_class->remove_commit(uobject, - RDMA_REMOVE_DESTROY); - if (ret) - goto out; - - uobject->type = &null_obj_type; - -out: - up_read(&ucontext->cleanup_rwsem); - return ret; } -static void alloc_commit_idr_uobject(struct ib_uobject *uobj) +static int alloc_commit_idr_uobject(struct ib_uobject *uobj) { - spin_lock(&uobj->context->ufile->idr_lock); + struct ib_uverbs_file *ufile = uobj->ufile; + + spin_lock(&ufile->idr_lock); /* * We already allocated this IDR with a NULL object, so * this shouldn't fail. + * + * NOTE: Once we set the IDR we loose ownership of our kref on uobj. + * It will be put by remove_commit_idr_uobject() */ - WARN_ON(idr_replace(&uobj->context->ufile->idr, - uobj, uobj->id)); - spin_unlock(&uobj->context->ufile->idr_lock); + WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id)); + spin_unlock(&ufile->idr_lock); + + return 0; } -static void alloc_commit_fd_uobject(struct ib_uobject *uobj) +static int alloc_commit_fd_uobject(struct ib_uobject *uobj) { - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); + const struct uverbs_obj_fd_type *fd_type = container_of( + uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); + int fd = uobj->id; + struct file *filp; + + /* + * The kref for uobj is moved into filp->private data and put in + * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd() + * must be guaranteed to be called from the provided fops release + * callback. + */ + filp = anon_inode_getfile(fd_type->name, + fd_type->fops, + uobj, + fd_type->flags); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + uobj->object = filp; + + /* Matching put will be done in uverbs_close_fd() */ + kref_get(&uobj->ufile->ref); - fd_install(uobj_file->uobj.id, uobj->object); /* This shouldn't be used anymore. Use the file object instead */ - uobj_file->uobj.id = 0; - /* Get another reference as we export this to the fops */ - uverbs_uobject_get(&uobj_file->uobj); + uobj->id = 0; + + /* + * NOTE: Once we install the file we loose ownership of our kref on + * uobj. It will be put by uverbs_close_fd() + */ + fd_install(fd, filp); + + return 0; } -int rdma_alloc_commit_uobject(struct ib_uobject *uobj) +/* + * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the + * caller can no longer assume uobj is valid. If this function fails it + * destroys the uboject, including the attached HW object. + */ +int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) { - /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&uobj->context->cleanup_rwsem)) { - int ret; + struct ib_uverbs_file *ufile = uobj->ufile; + int ret; - WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); - ret = uobj->type->type_class->remove_commit(uobj, - RDMA_REMOVE_DURING_CLEANUP); - if (ret) - pr_warn("ib_uverbs: cleanup of idr object %d failed\n", - uobj->id); + /* alloc_commit consumes the uobj kref */ + ret = uobj->uapi_object->type_class->alloc_commit(uobj); + if (ret) { + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + up_read(&ufile->hw_destroy_rwsem); return ret; } + /* kref is held so long as the uobj is on the uobj list. */ + uverbs_uobject_get(uobj); + spin_lock_irq(&ufile->uobjects_lock); + list_add(&uobj->list, &ufile->uobjects); + spin_unlock_irq(&ufile->uobjects_lock); + /* matches atomic_set(-1) in alloc_uobj */ - assert_uverbs_usecnt(uobj, true); atomic_set(&uobj->usecnt, 0); - mutex_lock(&uobj->context->uobjects_lock); - list_add(&uobj->list, &uobj->context->uobjects); - mutex_unlock(&uobj->context->uobjects_lock); - - uobj->type->type_class->alloc_commit(uobj); - up_read(&uobj->context->cleanup_rwsem); + /* Matches the down_read in rdma_alloc_begin_uobject */ + up_read(&ufile->hw_destroy_rwsem); return 0; } -static void alloc_abort_idr_uobject(struct ib_uobject *uobj) -{ - uverbs_idr_remove_uobj(uobj); - ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, - RDMACG_RESOURCE_HCA_OBJECT); - uverbs_uobject_put(uobj); -} - +/* + * This consumes the kref for uobj. It is up to the caller to unwind the HW + * object and anything else connected to uobj before calling this. + */ void rdma_alloc_abort_uobject(struct ib_uobject *uobj) { - uobj->type->type_class->alloc_abort(uobj); + struct ib_uverbs_file *ufile = uobj->ufile; + + uobj->object = NULL; + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + + /* Matches the down_read in rdma_alloc_begin_uobject */ + up_read(&ufile->hw_destroy_rwsem); } -static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive) +static void lookup_put_idr_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { } -static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive) +static void lookup_put_fd_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { struct file *filp = uobj->object; - WARN_ON(exclusive); + WARN_ON(mode != UVERBS_LOOKUP_READ); /* This indirectly calls uverbs_close_fd and free the object */ fput(filp); } -void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive) +void rdma_lookup_put_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { - assert_uverbs_usecnt(uobj, exclusive); - uobj->type->type_class->lookup_put(uobj, exclusive); + assert_uverbs_usecnt(uobj, mode); + uobj->uapi_object->type_class->lookup_put(uobj, mode); /* * In order to unlock an object, either decrease its usecnt for * read access or zero it in case of exclusive access. See * uverbs_try_lock_object for locking schema information. */ - if (!exclusive) + switch (mode) { + case UVERBS_LOOKUP_READ: atomic_dec(&uobj->usecnt); - else + break; + case UVERBS_LOOKUP_WRITE: atomic_set(&uobj->usecnt, 0); + break; + case UVERBS_LOOKUP_DESTROY: + break; + } + /* Pairs with the kref obtained by type->lookup_get */ uverbs_uobject_put(uobj); } +void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile) +{ + spin_lock_init(&ufile->idr_lock); + idr_init(&ufile->idr); +} + +void release_ufile_idr_uobject(struct ib_uverbs_file *ufile) +{ + struct ib_uobject *entry; + int id; + + /* + * At this point uverbs_cleanup_ufile() is guaranteed to have run, and + * there are no HW objects left, however the IDR is still populated + * with anything that has not been cleaned up by userspace. Since the + * kref on ufile is 0, nothing is allowed to call lookup_get. + * + * This is an optimized equivalent to remove_handle_idr_uobject + */ + idr_for_each_entry(&ufile->idr, entry, id) { + WARN_ON(entry->object); + uverbs_uobject_put(entry); + } + + idr_destroy(&ufile->idr); +} + const struct uverbs_obj_type_class uverbs_idr_class = { .alloc_begin = alloc_begin_idr_uobject, .lookup_get = lookup_get_idr_uobject, .alloc_commit = alloc_commit_idr_uobject, .alloc_abort = alloc_abort_idr_uobject, .lookup_put = lookup_put_idr_uobject, - .remove_commit = remove_commit_idr_uobject, + .destroy_hw = destroy_hw_idr_uobject, + .remove_handle = remove_handle_idr_uobject, /* * When we destroy an object, we first just lock it for WRITE and * actually DESTROY it in the finalize stage. So, the problematic @@ -611,103 +768,180 @@ const struct uverbs_obj_type_class uverbs_idr_class = { */ .needs_kfree_rcu = true, }; +EXPORT_SYMBOL(uverbs_idr_class); -static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) +void uverbs_close_fd(struct file *f) { - struct ib_ucontext *ucontext; - struct ib_uverbs_file *ufile = uobj_file->ufile; - int ret; + struct ib_uobject *uobj = f->private_data; + struct ib_uverbs_file *ufile = uobj->ufile; - mutex_lock(&uobj_file->ufile->cleanup_mutex); + if (down_read_trylock(&ufile->hw_destroy_rwsem)) { + /* + * lookup_get_fd_uobject holds the kref on the struct file any + * time a FD uobj is locked, which prevents this release + * method from being invoked. Meaning we can always get the + * write lock here, or we have a kernel bug. + */ + WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE)); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE); + up_read(&ufile->hw_destroy_rwsem); + } - /* uobject was either already cleaned up or is cleaned up right now anyway */ - if (!uobj_file->uobj.context || - !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem)) - goto unlock; + /* Matches the get in alloc_begin_fd_uobject */ + kref_put(&ufile->ref, ib_uverbs_release_file); - ucontext = uobj_file->uobj.context; - ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE); - up_read(&ucontext->cleanup_rwsem); - if (ret) - pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n"); -unlock: - mutex_unlock(&ufile->cleanup_mutex); + /* Pairs with filp->private_data in alloc_begin_fd_uobject */ + uverbs_uobject_put(uobj); } -void uverbs_close_fd(struct file *f) -{ - struct ib_uobject_file *uobj_file = f->private_data; - struct kref *uverbs_file_ref = &uobj_file->ufile->ref; +static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + struct ib_device *ib_dev = ibcontext->device; + struct task_struct *owning_process = NULL; + struct mm_struct *owning_mm = NULL; + + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); + if (!owning_process) + return; + + owning_mm = get_task_mm(owning_process); + if (!owning_mm) { + pr_info("no mm, disassociate ucontext is pending task termination\n"); + while (1) { + put_task_struct(owning_process); + usleep_range(1000, 2000); + owning_process = get_pid_task(ibcontext->tgid, + PIDTYPE_PID); + if (!owning_process || + owning_process->state == TASK_DEAD) { + pr_info("disassociate ucontext done, task was terminated\n"); + /* in case task was dead need to release the + * task struct. + */ + if (owning_process) + put_task_struct(owning_process); + return; + } + } + } - _uverbs_close_fd(uobj_file); - uverbs_uobject_put(&uobj_file->uobj); - kref_put(uverbs_file_ref, ib_uverbs_release_file); + down_write(&owning_mm->mmap_sem); + ib_dev->disassociate_ucontext(ibcontext); + up_write(&owning_mm->mmap_sem); + mmput(owning_mm); + put_task_struct(owning_process); } -void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) +/* + * Drop the ucontext off the ufile and completely disconnect it from the + * ib_device + */ +static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) { - enum rdma_remove_reason reason = device_removed ? - RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE; - unsigned int cur_order = 0; + struct ib_ucontext *ucontext = ufile->ucontext; + int ret; + + if (reason == RDMA_REMOVE_DRIVER_REMOVE) + ufile_disassociate_ucontext(ucontext); + + put_pid(ucontext->tgid); + ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_HANDLE); - ucontext->cleanup_reason = reason; /* - * Waits for all remove_commit and alloc_commit to finish. Logically, We - * want to hold this forever as the context is going to be destroyed, - * but we'll release it since it causes a "held lock freed" BUG message. + * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove + * the error return. */ - down_write(&ucontext->cleanup_rwsem); + ret = ucontext->device->dealloc_ucontext(ucontext); + WARN_ON(ret); - while (!list_empty(&ucontext->uobjects)) { - struct ib_uobject *obj, *next_obj; - unsigned int next_order = UINT_MAX; + ufile->ucontext = NULL; +} + +static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) +{ + struct ib_uobject *obj, *next_obj; + int ret = -EINVAL; + /* + * This shouldn't run while executing other commands on this + * context. Thus, the only thing we should take care of is + * releasing a FD while traversing this list. The FD could be + * closed and released from the _release fop of this FD. + * In order to mitigate this, we add a lock. + * We take and release the lock per traversal in order to let + * other threads (which might still use the FDs) chance to run. + */ + list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { /* - * This shouldn't run while executing other commands on this - * context. Thus, the only thing we should take care of is - * releasing a FD while traversing this list. The FD could be - * closed and released from the _release fop of this FD. - * In order to mitigate this, we add a lock. - * We take and release the lock per order traversal in order - * to let other threads (which might still use the FDs) chance - * to run. + * if we hit this WARN_ON, that means we are + * racing with a lookup_get. */ - mutex_lock(&ucontext->uobjects_lock); - list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, - list) { - if (obj->type->destroy_order == cur_order) { - int ret; - - /* - * if we hit this WARN_ON, that means we are - * racing with a lookup_get. - */ - WARN_ON(uverbs_try_lock_object(obj, true)); - ret = obj->type->type_class->remove_commit(obj, - reason); - list_del(&obj->list); - if (ret) - pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n", - obj->id, cur_order); - /* put the ref we took when we created the object */ - uverbs_uobject_put(obj); - } else { - next_order = min(next_order, - obj->type->destroy_order); - } - } - mutex_unlock(&ucontext->uobjects_lock); - cur_order = next_order; + WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); + if (!uverbs_destroy_uobject(obj, reason)) + ret = 0; } - up_write(&ucontext->cleanup_rwsem); + return ret; } -void uverbs_initialize_ucontext(struct ib_ucontext *ucontext) +/* + * Destroy the uncontext and every uobject associated with it. If called with + * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has + * been completed and ufile->ucontext is NULL. + * + * This is internally locked and can be called in parallel from multiple + * contexts. + */ +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) { - ucontext->cleanup_reason = 0; - mutex_init(&ucontext->uobjects_lock); - INIT_LIST_HEAD(&ucontext->uobjects); - init_rwsem(&ucontext->cleanup_rwsem); + if (reason == RDMA_REMOVE_CLOSE) { + /* + * During destruction we might trigger something that + * synchronously calls release on any file descriptor. For + * this reason all paths that come from file_operations + * release must use try_lock. They can progress knowing that + * there is an ongoing uverbs_destroy_ufile_hw that will clean + * up the driver resources. + */ + if (!mutex_trylock(&ufile->ucontext_lock)) + return; + + } else { + mutex_lock(&ufile->ucontext_lock); + } + + down_write(&ufile->hw_destroy_rwsem); + + /* + * If a ucontext was never created then we can't have any uobjects to + * cleanup, nothing to do. + */ + if (!ufile->ucontext) + goto done; + + ufile->ucontext->closing = true; + ufile->ucontext->cleanup_retryable = true; + while (!list_empty(&ufile->uobjects)) + if (__uverbs_cleanup_ufile(ufile, reason)) { + /* + * No entry was cleaned-up successfully during this + * iteration + */ + break; + } + + ufile->ucontext->cleanup_retryable = false; + if (!list_empty(&ufile->uobjects)) + __uverbs_cleanup_ufile(ufile, reason); + + ufile_destroy_ucontext(ufile, reason); + +done: + up_write(&ufile->hw_destroy_rwsem); + mutex_unlock(&ufile->ucontext_lock); } const struct uverbs_obj_type_class uverbs_fd_class = { @@ -716,23 +950,33 @@ const struct uverbs_obj_type_class uverbs_fd_class = { .alloc_commit = alloc_commit_fd_uobject, .alloc_abort = alloc_abort_fd_uobject, .lookup_put = lookup_put_fd_uobject, - .remove_commit = remove_commit_fd_uobject, + .destroy_hw = destroy_hw_fd_uobject, + .remove_handle = remove_handle_fd_uobject, .needs_kfree_rcu = false, }; +EXPORT_SYMBOL(uverbs_fd_class); -struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, - struct ib_ucontext *ucontext, - enum uverbs_obj_access access, - int id) +struct ib_uobject * +uverbs_get_uobject_from_file(u16 object_id, + struct ib_uverbs_file *ufile, + enum uverbs_obj_access access, s64 id) { + const struct uverbs_api_object *obj = + uapi_get_object(ufile->device->uapi, object_id); + switch (access) { case UVERBS_ACCESS_READ: - return rdma_lookup_get_uobject(type_attrs, ucontext, id, false); + return rdma_lookup_get_uobject(obj, ufile, id, + UVERBS_LOOKUP_READ); case UVERBS_ACCESS_DESTROY: + /* Actual destruction is done inside uverbs_handle_method */ + return rdma_lookup_get_uobject(obj, ufile, id, + UVERBS_LOOKUP_DESTROY); case UVERBS_ACCESS_WRITE: - return rdma_lookup_get_uobject(type_attrs, ucontext, id, true); + return rdma_lookup_get_uobject(obj, ufile, id, + UVERBS_LOOKUP_WRITE); case UVERBS_ACCESS_NEW: - return rdma_alloc_begin_uobject(type_attrs, ucontext); + return rdma_alloc_begin_uobject(obj, ufile); default: WARN_ON(true); return ERR_PTR(-EOPNOTSUPP); @@ -753,16 +997,14 @@ int uverbs_finalize_object(struct ib_uobject *uobj, switch (access) { case UVERBS_ACCESS_READ: - rdma_lookup_put_uobject(uobj, false); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ); break; case UVERBS_ACCESS_WRITE: - rdma_lookup_put_uobject(uobj, true); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); break; case UVERBS_ACCESS_DESTROY: - if (commit) - ret = rdma_remove_commit_uobject(uobj); - else - rdma_lookup_put_uobject(uobj, true); + if (uobj) + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); break; case UVERBS_ACCESS_NEW: if (commit) @@ -777,43 +1019,3 @@ int uverbs_finalize_object(struct ib_uobject *uobj, return ret; } - -int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, - struct uverbs_attr_spec_hash * const *spec_hash, - size_t num, - bool commit) -{ - unsigned int i; - int ret = 0; - - for (i = 0; i < num; i++) { - struct uverbs_attr_bundle_hash *curr_bundle = - &attrs_bundle->hash[i]; - const struct uverbs_attr_spec_hash *curr_spec_bucket = - spec_hash[i]; - unsigned int j; - - for (j = 0; j < curr_bundle->num_attrs; j++) { - struct uverbs_attr *attr; - const struct uverbs_attr_spec *spec; - - if (!uverbs_attr_is_valid_in_hash(curr_bundle, j)) - continue; - - attr = &curr_bundle->attrs[j]; - spec = &curr_spec_bucket->attrs[j]; - - if (spec->type == UVERBS_ATTR_TYPE_IDR || - spec->type == UVERBS_ATTR_TYPE_FD) { - int current_ret; - - current_ret = uverbs_finalize_object(attr->obj_attr.uobject, - spec->obj.access, - commit); - if (!ret) - ret = current_ret; - } - } - } - return ret; -} diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 1efcf93238dd..f962f2a593ba 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -43,20 +43,12 @@ #include <rdma/ib_verbs.h> #include <linux/mutex.h> -int uverbs_ns_idx(u16 *id, unsigned int ns_count); -const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, - uint16_t object); -const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, - uint16_t method); -/* - * These functions initialize the context and cleanups its uobjects. - * The context has a list of objects which is protected by a mutex - * on the context. initialize_ucontext should be called when we create - * a context. - * cleanup_ucontext removes all uobjects from the context and puts them. - */ -void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed); -void uverbs_initialize_ucontext(struct ib_ucontext *ucontext); +struct ib_uverbs_device; + +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason); + +int uobj_destroy(struct ib_uobject *uobj); /* * uverbs_uobject_get is called in order to increase the reference count on @@ -82,7 +74,7 @@ void uverbs_uobject_put(struct ib_uobject *uobject); void uverbs_close_fd(struct file *f); /* - * Get an ib_uobject that corresponds to the given id from ucontext, assuming + * Get an ib_uobject that corresponds to the given id from ufile, assuming * the object is from the given type. Lock it to the required access when * applicable. * This function could create (access == NEW), destroy (access == DESTROY) @@ -90,13 +82,11 @@ void uverbs_close_fd(struct file *f); * The action will be finalized only when uverbs_finalize_object or * uverbs_finalize_objects are called. */ -struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, - struct ib_ucontext *ucontext, - enum uverbs_obj_access access, - int id); -int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, - bool commit); +struct ib_uobject * +uverbs_get_uobject_from_file(u16 object_id, + struct ib_uverbs_file *ufile, + enum uverbs_obj_access access, s64 id); + /* * Note that certain finalize stages could return a status: * (a) alloc_commit could return a failure if the object is committed at the @@ -112,9 +102,63 @@ int uverbs_finalize_object(struct ib_uobject *uobj, * function. For example, this could happen when we couldn't destroy an * object. */ -int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, - struct uverbs_attr_spec_hash * const *spec_hash, - size_t num, - bool commit); +int uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, + bool commit); + +void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); +void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); + +/* + * This is the runtime description of the uverbs API, used by the syscall + * machinery to validate and dispatch calls. + */ + +/* + * Depending on ID the slot pointer in the radix tree points at one of these + * structs. + */ +struct uverbs_api_object { + const struct uverbs_obj_type *type_attrs; + const struct uverbs_obj_type_class *type_class; +}; + +struct uverbs_api_ioctl_method { + int (__rcu *handler)(struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); + DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN); + u16 bundle_size; + u8 use_stack:1; + u8 driver_method:1; + u8 key_bitmap_len; + u8 destroy_bkey; +}; + +struct uverbs_api_attr { + struct uverbs_attr_spec spec; +}; + +struct uverbs_api_object; +struct uverbs_api { + /* radix tree contains struct uverbs_api_* pointers */ + struct radix_tree_root radix; + enum rdma_driver_id driver_id; +}; + +static inline const struct uverbs_api_object * +uapi_get_object(struct uverbs_api *uapi, u16 object_id) +{ + return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id)); +} + +char *uapi_key_format(char *S, unsigned int key); +struct uverbs_api *uverbs_alloc_api( + const struct uverbs_object_tree_def *const *driver_specs, + enum rdma_driver_id driver_id); +void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev); +void uverbs_disassociate_api(struct uverbs_api *uapi); +void uverbs_destroy_api(struct uverbs_api *uapi); +void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, + unsigned int num_attrs); #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index a4fbdc5d28fa..ee366199b169 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -143,14 +143,15 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de #define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \ BONDING_SLAVE_STATE_NA) -static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +static bool +is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { struct net_device *real_dev; - int res; + bool res; if (!rdma_ndev) - return 0; + return false; rcu_read_lock(); real_dev = rdma_vlan_dev_real_dev(cookie); @@ -166,14 +167,15 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, return res; } -static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +static bool +is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { struct net_device *master_dev; - int res; + bool res; if (!rdma_ndev) - return 0; + return false; rcu_read_lock(); master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev); @@ -184,22 +186,59 @@ static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port, return res; } -static int pass_all_filter(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +/** is_ndev_for_default_gid_filter - Check if a given netdevice + * can be considered for default GIDs or not. + * @ib_dev: IB device to check + * @port: Port to consider for adding default GID + * @rdma_ndev: rdma netdevice pointer + * @cookie_ndev: Netdevice to consider to form a default GID + * + * is_ndev_for_default_gid_filter() returns true if a given netdevice can be + * considered for deriving default RoCE GID, returns false otherwise. + */ +static bool +is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *cookie_ndev = cookie; + bool res; + + if (!rdma_ndev) + return false; + + rcu_read_lock(); + + /* + * When rdma netdevice is used in bonding, bonding master netdevice + * should be considered for default GIDs. Therefore, ignore slave rdma + * netdevices when bonding is considered. + * Additionally when event(cookie) netdevice is bond master device, + * make sure that it the upper netdevice of rdma netdevice. + */ + res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) || + (netif_is_bond_master(cookie_ndev) && + rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))); + + rcu_read_unlock(); + return res; +} + +static bool pass_all_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { - return 1; + return true; } -static int upper_device_filter(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +static bool upper_device_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { - int res; + bool res; if (!rdma_ndev) - return 0; + return false; if (rdma_ndev == cookie) - return 1; + return true; rcu_read_lock(); res = rdma_is_upper_dev_rcu(rdma_ndev, cookie); @@ -208,6 +247,34 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port, return res; } +/** + * is_upper_ndev_bond_master_filter - Check if a given netdevice + * is bond master device of netdevice of the the RDMA device of port. + * @ib_dev: IB device to check + * @port: Port to consider for adding default GID + * @rdma_ndev: Pointer to rdma netdevice + * @cookie: Netdevice to consider to form a default GID + * + * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev + * is bond master device and rdma_ndev is its lower netdevice. It might + * not have been established as slave device yet. + */ +static bool +is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, + void *cookie) +{ + struct net_device *cookie_ndev = cookie; + bool match = false; + + rcu_read_lock(); + if (netif_is_bond_master(cookie_ndev) && + rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)) + match = true; + rcu_read_unlock(); + return match; +} + static void update_gid_ip(enum gid_op_type gid_op, struct ib_device *ib_dev, u8 port, struct net_device *ndev, @@ -223,34 +290,10 @@ static void update_gid_ip(enum gid_op_type gid_op, update_gid(gid_op, ib_dev, port, &gid, &gid_attr); } -static void enum_netdev_default_gids(struct ib_device *ib_dev, - u8 port, struct net_device *event_ndev, - struct net_device *rdma_ndev) -{ - unsigned long gid_type_mask; - - rcu_read_lock(); - if (!rdma_ndev || - ((rdma_ndev != event_ndev && - !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) || - is_eth_active_slave_of_bonding_rcu(rdma_ndev, - netdev_master_upper_dev_get_rcu(rdma_ndev)) == - BONDING_SLAVE_STATE_INACTIVE)) { - rcu_read_unlock(); - return; - } - rcu_read_unlock(); - - gid_type_mask = roce_gid_type_mask_support(ib_dev, port); - - ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask, - IB_CACHE_GID_DEFAULT_MODE_SET); -} - static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, u8 port, - struct net_device *event_ndev, - struct net_device *rdma_ndev) + struct net_device *rdma_ndev, + struct net_device *event_ndev) { struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev); unsigned long gid_type_mask; @@ -381,7 +424,6 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port, static void add_netdev_ips(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) { - enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev); _add_netdev_ips(ib_dev, port, cookie); } @@ -391,6 +433,38 @@ static void del_netdev_ips(struct ib_device *ib_dev, u8 port, ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie); } +/** + * del_default_gids - Delete default GIDs of the event/cookie netdevice + * @ib_dev: RDMA device pointer + * @port: Port of the RDMA device whose GID table to consider + * @rdma_ndev: Unused rdma netdevice + * @cookie: Pointer to event netdevice + * + * del_default_gids() deletes the default GIDs of the event/cookie netdevice. + */ +static void del_default_gids(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *cookie_ndev = cookie; + unsigned long gid_type_mask; + + gid_type_mask = roce_gid_type_mask_support(ib_dev, port); + + ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask, + IB_CACHE_GID_DEFAULT_MODE_DELETE); +} + +static void add_default_gids(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *event_ndev = cookie; + unsigned long gid_type_mask; + + gid_type_mask = roce_gid_type_mask_support(ib_dev, port); + ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask, + IB_CACHE_GID_DEFAULT_MODE_SET); +} + static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, @@ -405,9 +479,20 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, rtnl_lock(); down_read(&net_rwsem); for_each_net(net) - for_each_netdev(net, ndev) - if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev)) - add_netdev_ips(ib_dev, port, rdma_ndev, ndev); + for_each_netdev(net, ndev) { + /* + * Filter and add default GIDs of the primary netdevice + * when not in bonding mode, or add default GIDs + * of bond master device, when in bonding mode. + */ + if (is_ndev_for_default_gid_filter(ib_dev, port, + rdma_ndev, ndev)) + add_default_gids(ib_dev, port, rdma_ndev, ndev); + + if (is_eth_port_of_netdev_filter(ib_dev, port, + rdma_ndev, ndev)) + _add_netdev_ips(ib_dev, port, ndev); + } up_read(&net_rwsem); rtnl_unlock(); } @@ -513,18 +598,12 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port, rcu_read_unlock(); if (master_ndev) { - bond_delete_netdev_default_gids(ib_dev, port, master_ndev, - rdma_ndev); + bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev, + master_ndev); dev_put(master_ndev); } } -static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) -{ - bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev); -} - /* The following functions operate on all IB devices. netdevice_event and * addr_event execute ib_enum_all_roce_netdevs through a work. * ib_enum_all_roce_netdevs iterates through all IB devices. @@ -575,40 +654,94 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, } static const struct netdev_event_work_cmd add_cmd = { - .cb = add_netdev_ips, .filter = is_eth_port_of_netdev}; + .cb = add_netdev_ips, + .filter = is_eth_port_of_netdev_filter +}; + static const struct netdev_event_work_cmd add_cmd_upper_ips = { - .cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev}; + .cb = add_netdev_upper_ips, + .filter = is_eth_port_of_netdev_filter +}; -static void netdevice_event_changeupper(struct netdev_notifier_changeupper_info *changeupper_info, - struct netdev_event_work_cmd *cmds) +static void +ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info, + struct netdev_event_work_cmd *cmds) { - static const struct netdev_event_work_cmd upper_ips_del_cmd = { - .cb = del_netdev_upper_ips, .filter = upper_device_filter}; - static const struct netdev_event_work_cmd bonding_default_del_cmd = { - .cb = del_netdev_default_ips, .filter = is_eth_port_inactive_slave}; - - if (changeupper_info->linking == false) { - cmds[0] = upper_ips_del_cmd; - cmds[0].ndev = changeupper_info->upper_dev; - cmds[1] = add_cmd; - } else { - cmds[0] = bonding_default_del_cmd; - cmds[0].ndev = changeupper_info->upper_dev; - cmds[1] = add_cmd_upper_ips; - cmds[1].ndev = changeupper_info->upper_dev; - cmds[1].filter_ndev = changeupper_info->upper_dev; - } + static const struct netdev_event_work_cmd + upper_ips_del_cmd = { + .cb = del_netdev_upper_ips, + .filter = upper_device_filter + }; + + cmds[0] = upper_ips_del_cmd; + cmds[0].ndev = changeupper_info->upper_dev; + cmds[1] = add_cmd; } +static const struct netdev_event_work_cmd bonding_default_add_cmd = { + .cb = add_default_gids, + .filter = is_upper_ndev_bond_master_filter +}; + +static void +ndev_event_link(struct net_device *event_ndev, + struct netdev_notifier_changeupper_info *changeupper_info, + struct netdev_event_work_cmd *cmds) +{ + static const struct netdev_event_work_cmd + bonding_default_del_cmd = { + .cb = del_default_gids, + .filter = is_upper_ndev_bond_master_filter + }; + /* + * When a lower netdev is linked to its upper bonding + * netdev, delete lower slave netdev's default GIDs. + */ + cmds[0] = bonding_default_del_cmd; + cmds[0].ndev = event_ndev; + cmds[0].filter_ndev = changeupper_info->upper_dev; + + /* Now add bonding upper device default GIDs */ + cmds[1] = bonding_default_add_cmd; + cmds[1].ndev = changeupper_info->upper_dev; + cmds[1].filter_ndev = changeupper_info->upper_dev; + + /* Now add bonding upper device IP based GIDs */ + cmds[2] = add_cmd_upper_ips; + cmds[2].ndev = changeupper_info->upper_dev; + cmds[2].filter_ndev = changeupper_info->upper_dev; +} + +static void netdevice_event_changeupper(struct net_device *event_ndev, + struct netdev_notifier_changeupper_info *changeupper_info, + struct netdev_event_work_cmd *cmds) +{ + if (changeupper_info->linking) + ndev_event_link(event_ndev, changeupper_info, cmds); + else + ndev_event_unlink(changeupper_info, cmds); +} + +static const struct netdev_event_work_cmd add_default_gid_cmd = { + .cb = add_default_gids, + .filter = is_ndev_for_default_gid_filter, +}; + static int netdevice_event(struct notifier_block *this, unsigned long event, void *ptr) { static const struct netdev_event_work_cmd del_cmd = { .cb = del_netdev_ips, .filter = pass_all_filter}; - static const struct netdev_event_work_cmd bonding_default_del_cmd_join = { - .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave}; - static const struct netdev_event_work_cmd default_del_cmd = { - .cb = del_netdev_default_ips, .filter = pass_all_filter}; + static const struct netdev_event_work_cmd + bonding_default_del_cmd_join = { + .cb = del_netdev_default_ips_join, + .filter = is_eth_port_inactive_slave_filter + }; + static const struct netdev_event_work_cmd + netdev_del_cmd = { + .cb = del_netdev_ips, + .filter = is_eth_port_of_netdev_filter + }; static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = { .cb = del_netdev_upper_ips, .filter = upper_device_filter}; struct net_device *ndev = netdev_notifier_info_to_dev(ptr); @@ -621,7 +754,8 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, case NETDEV_REGISTER: case NETDEV_UP: cmds[0] = bonding_default_del_cmd_join; - cmds[1] = add_cmd; + cmds[1] = add_default_gid_cmd; + cmds[2] = add_cmd; break; case NETDEV_UNREGISTER: @@ -632,19 +766,22 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGEADDR: - cmds[0] = default_del_cmd; - cmds[1] = add_cmd; + cmds[0] = netdev_del_cmd; + cmds[1] = add_default_gid_cmd; + cmds[2] = add_cmd; break; case NETDEV_CHANGEUPPER: - netdevice_event_changeupper( + netdevice_event_changeupper(ndev, container_of(ptr, struct netdev_notifier_changeupper_info, info), cmds); break; case NETDEV_BONDING_FAILOVER: cmds[0] = bonding_event_ips_del_cmd; - cmds[1] = bonding_default_del_cmd_join; + /* Add default GIDs of the bond device */ + cmds[1] = bonding_default_add_cmd; + /* Add IP based GIDs of the bond device */ cmds[2] = add_cmd_upper_ips; break; @@ -660,7 +797,8 @@ static void update_gid_event_work_handler(struct work_struct *_work) struct update_gid_event_work *work = container_of(_work, struct update_gid_event_work, work); - ib_enum_all_roce_netdevs(is_eth_port_of_netdev, work->gid_attr.ndev, + ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter, + work->gid_attr.ndev, callback_for_addr_gid_device_scan, work); dev_put(work->gid_attr.ndev); diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index c8963e91f92a..683e6d11a564 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -87,7 +87,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, } ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); - if (ret < nents) { + if (ret < 0 || ret < nents) { ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); return -EINVAL; } @@ -325,7 +325,7 @@ out_unmap_sg: EXPORT_SYMBOL(rdma_rw_ctx_init); /** - * rdma_rw_ctx_signature init - initialize a RW context with signature offload + * rdma_rw_ctx_signature_init - initialize a RW context with signature offload * @ctx: context to initialize * @qp: queue pair to operate on * @port_num: port num to which the connection is bound @@ -564,10 +564,10 @@ EXPORT_SYMBOL(rdma_rw_ctx_wrs); int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr) { - struct ib_send_wr *first_wr, *bad_wr; + struct ib_send_wr *first_wr; first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr); - return ib_post_send(qp, first_wr, &bad_wr); + return ib_post_send(qp, first_wr, NULL); } EXPORT_SYMBOL(rdma_rw_ctx_post); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a61ec7e33613..7b794a14d6e8 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1227,20 +1227,10 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num) return src_path_mask; } -static int -roce_resolve_route_from_path(struct ib_device *device, u8 port_num, - struct sa_path_rec *rec) +static int roce_resolve_route_from_path(struct sa_path_rec *rec, + const struct ib_gid_attr *attr) { - struct net_device *resolved_dev; - struct net_device *ndev; - struct net_device *idev; - struct rdma_dev_addr dev_addr = { - .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ? - sa_path_get_ifindex(rec) : 0), - .net = sa_path_get_ndev(rec) ? - sa_path_get_ndev(rec) : - &init_net - }; + struct rdma_dev_addr dev_addr = {}; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; @@ -1250,9 +1240,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num, if (rec->roce.route_resolved) return 0; + if (!attr || !attr->ndev) + return -EINVAL; - if (!device->get_netdev) - return -EOPNOTSUPP; + dev_addr.bound_dev_if = attr->ndev->ifindex; + /* TODO: Use net from the ib_gid_attr once it is added to it, + * until than, limit itself to init_net. + */ + dev_addr.net = &init_net; rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); @@ -1268,60 +1263,52 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num, rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) return -EINVAL; - idev = device->get_netdev(device, port_num); - if (!idev) - return -ENODEV; - - resolved_dev = dev_get_by_index(dev_addr.net, - dev_addr.bound_dev_if); - if (!resolved_dev) { - ret = -ENODEV; - goto done; - } - ndev = ib_get_ndev_from_path(rec); - rcu_read_lock(); - if ((ndev && ndev != resolved_dev) || - (resolved_dev != idev && - !rdma_is_upper_dev_rcu(idev, resolved_dev))) - ret = -EHOSTUNREACH; - rcu_read_unlock(); - dev_put(resolved_dev); - if (ndev) - dev_put(ndev); -done: - dev_put(idev); - if (!ret) - rec->roce.route_resolved = true; - return ret; + rec->roce.route_resolved = true; + return 0; } static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *gid_attr) { enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); - struct net_device *ndev; - u16 gid_index; - int ret; - ndev = ib_get_ndev_from_path(rec); - ret = ib_find_cached_gid_by_port(device, &rec->sgid, type, - port_num, ndev, &gid_index); - if (ndev) - dev_put(ndev); - if (ret) - return ret; + if (!gid_attr) { + gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type, + port_num, NULL); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); + } else + rdma_hold_gid_attr(gid_attr); - rdma_ah_set_grh(ah_attr, &rec->dgid, - be32_to_cpu(rec->flow_label), - gid_index, rec->hop_limit, - rec->traffic_class); + rdma_move_grh_sgid_attr(ah_attr, &rec->dgid, + be32_to_cpu(rec->flow_label), + rec->hop_limit, rec->traffic_class, + gid_attr); return 0; } +/** + * ib_init_ah_attr_from_path - Initialize address handle attributes based on + * an SA path record. + * @device: Device associated ah attributes initialization. + * @port_num: Port on the specified device. + * @rec: path record entry to use for ah attributes initialization. + * @ah_attr: address handle attributes to initialization from path record. + * @sgid_attr: SGID attribute to consider during initialization. + * + * When ib_init_ah_attr_from_path() returns success, + * (a) for IB link layer it optionally contains a reference to SGID attribute + * when GRH is present for IB link layer. + * (b) for RoCE link layer it contains a reference to SGID attribute. + * User must invoke rdma_destroy_ah_attr() to release reference to SGID + * attributes which are initialized using ib_init_ah_attr_from_path(). + */ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *gid_attr) { int ret = 0; @@ -1332,7 +1319,7 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, rdma_ah_set_static_rate(ah_attr, rec->rate); if (sa_path_is_roce(rec)) { - ret = roce_resolve_route_from_path(device, port_num, rec); + ret = roce_resolve_route_from_path(rec, gid_attr); if (ret) return ret; @@ -1349,7 +1336,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, } if (rec->hop_limit > 0 || sa_path_is_roce(rec)) - ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr); + ret = init_ah_attr_grh_fields(device, port_num, + rec, ah_attr, gid_attr); return ret; } EXPORT_SYMBOL(ib_init_ah_attr_from_path); @@ -1557,8 +1545,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, ARRAY_SIZE(path_rec_table), mad->data, &rec); rec.rec_type = SA_PATH_REC_TYPE_IB; - sa_path_set_ndev(&rec, NULL); - sa_path_set_ifindex(&rec, 0); sa_path_set_dmac_zero(&rec); if (query->conv_pr) { @@ -2290,6 +2276,7 @@ static void update_sm_ah(struct work_struct *work) struct ib_sa_sm_ah *new_ah; struct ib_port_attr port_attr; struct rdma_ah_attr ah_attr; + bool grh_required; if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { pr_warn("Couldn't query port\n"); @@ -2314,16 +2301,27 @@ static void update_sm_ah(struct work_struct *work) rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid); rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); rdma_ah_set_port_num(&ah_attr, port->port_num); - if (port_attr.grh_required) { - if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) { - rdma_ah_set_make_grd(&ah_attr, true); - } else { - rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); - rdma_ah_set_subnet_prefix(&ah_attr, - cpu_to_be64(port_attr.subnet_prefix)); - rdma_ah_set_interface_id(&ah_attr, - cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); - } + + grh_required = rdma_is_grh_required(port->agent->device, + port->port_num); + + /* + * The OPA sm_lid of 0xFFFF needs special handling so that it can be + * differentiated from a permissive LID of 0xFFFF. We set the + * grh_required flag here so the SA can program the DGID in the + * address handle appropriately + */ + if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA && + (grh_required || + port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))) + rdma_ah_set_make_grd(&ah_attr, true); + + if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) { + rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); + rdma_ah_set_subnet_prefix(&ah_attr, + cpu_to_be64(port_attr.subnet_prefix)); + rdma_ah_set_interface_id(&ah_attr, + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); } new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 31c7efaf8e7a..7fd14ead7b37 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -42,6 +42,7 @@ #include <rdma/ib_mad.h> #include <rdma/ib_pma.h> +#include <rdma/ib_cache.h> struct ib_port; @@ -346,7 +347,7 @@ static struct attribute *port_default_attrs[] = { NULL }; -static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf) +static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { if (!gid_attr->ndev) return -EINVAL; @@ -354,33 +355,26 @@ static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf) return sprintf(buf, "%s\n", gid_attr->ndev->name); } -static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf) +static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) { return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); } -static ssize_t _show_port_gid_attr(struct ib_port *p, - struct port_attribute *attr, - char *buf, - size_t (*print)(struct ib_gid_attr *gid_attr, - char *buf)) +static ssize_t _show_port_gid_attr( + struct ib_port *p, struct port_attribute *attr, char *buf, + size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - union ib_gid gid; - struct ib_gid_attr gid_attr = {}; + const struct ib_gid_attr *gid_attr; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, - &gid_attr); - if (ret) - goto err; + gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); - ret = print(&gid_attr, buf); - -err: - if (gid_attr.ndev) - dev_put(gid_attr.ndev); + ret = print(gid_attr, buf); + rdma_put_gid_attr(gid_attr); return ret; } @@ -389,26 +383,28 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - union ib_gid *pgid; - union ib_gid gid; + const struct ib_gid_attr *gid_attr; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL); + gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); + if (IS_ERR(gid_attr)) { + const union ib_gid zgid = {}; + + /* If reading GID fails, it is likely due to GID entry being + * empty (invalid) or reserved GID in the table. User space + * expects to read GID table entries as long as it given index + * is within GID table size. Administrative/debugging tool + * fails to query rest of the GID entries if it hits error + * while querying a GID of the given index. To avoid user + * space throwing such error on fail to read gid, return zero + * GID as before. This maintains backward compatibility. + */ + return sprintf(buf, "%pI6\n", zgid.raw); + } - /* If reading GID fails, it is likely due to GID entry being empty - * (invalid) or reserved GID in the table. - * User space expects to read GID table entries as long as it given - * index is within GID table size. - * Administrative/debugging tool fails to query rest of the GID entries - * if it hits error while querying a GID of the given index. - * To avoid user space throwing such error on fail to read gid, return - * zero GID as before. This maintains backward compatibility. - */ - if (ret) - pgid = &zgid; - else - pgid = &gid; - return sprintf(buf, "%pI6\n", pgid->raw); + ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw); + rdma_put_gid_attr(gid_attr); + return ret; } static ssize_t show_port_gid_attr_ndev(struct ib_port *p, diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 9eef96dacbd7..faa9e6116b2f 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -207,7 +207,7 @@ error: } static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, - struct ib_cm_req_event_param *kreq) + const struct ib_cm_req_event_param *kreq) { ureq->remote_ca_guid = kreq->remote_ca_guid; ureq->remote_qkey = kreq->remote_qkey; @@ -231,7 +231,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, } static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, - struct ib_cm_rep_event_param *krep) + const struct ib_cm_rep_event_param *krep) { urep->remote_ca_guid = krep->remote_ca_guid; urep->remote_qkey = krep->remote_qkey; @@ -247,14 +247,14 @@ static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, } static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, - struct ib_cm_sidr_rep_event_param *krep) + const struct ib_cm_sidr_rep_event_param *krep) { urep->status = krep->status; urep->qkey = krep->qkey; urep->qpn = krep->qpn; }; -static int ib_ucm_event_process(struct ib_cm_event *evt, +static int ib_ucm_event_process(const struct ib_cm_event *evt, struct ib_ucm_event *uvt) { void *info = NULL; @@ -351,7 +351,7 @@ err1: } static int ib_ucm_event_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event) + const struct ib_cm_event *event) { struct ib_ucm_event *uevent; struct ib_ucm_context *ctx; @@ -1000,14 +1000,11 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { - struct ib_cm_sidr_req_param param; + struct ib_cm_sidr_req_param param = {}; struct ib_ucm_context *ctx; struct ib_ucm_sidr_req cmd; int result; - param.private_data = NULL; - param.path = NULL; - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 54ab6335c48d..a41792dbae1f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -84,7 +84,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, struct ib_umem *umem; struct page **page_list; struct vm_area_struct **vma_list; - unsigned long locked; unsigned long lock_limit; unsigned long cur_base; unsigned long npages; @@ -92,7 +91,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, int i; unsigned long dma_attrs = 0; struct scatterlist *sg, *sg_list_start; - int need_release = 0; unsigned int gup_flags = FOLL_WRITE; if (dmasync) @@ -121,10 +119,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (access & IB_ACCESS_ON_DEMAND) { ret = ib_umem_odp_get(context, umem, access); - if (ret) { - kfree(umem); - return ERR_PTR(ret); - } + if (ret) + goto umem_kfree; return umem; } @@ -135,8 +131,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { - kfree(umem); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto umem_kfree; } /* @@ -149,41 +145,43 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, npages = ib_umem_num_pages(umem); - down_write(¤t->mm->mmap_sem); - - locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + down_write(¤t->mm->mmap_sem); + current->mm->pinned_vm += npages; + if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) { + up_write(¤t->mm->mmap_sem); ret = -ENOMEM; - goto out; + goto vma; } + up_write(¤t->mm->mmap_sem); cur_base = addr & PAGE_MASK; if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; - goto out; + goto vma; } ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); if (ret) - goto out; + goto vma; if (!umem->writable) gup_flags |= FOLL_FORCE; - need_release = 1; sg_list_start = umem->sg_head.sgl; + down_read(¤t->mm->mmap_sem); while (npages) { ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), gup_flags, page_list, vma_list); - - if (ret < 0) - goto out; + if (ret < 0) { + up_read(¤t->mm->mmap_sem); + goto umem_release; + } umem->npages += ret; cur_base += ret * PAGE_SIZE; @@ -199,6 +197,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, /* preparing for next loop */ sg_list_start = sg; } + up_read(¤t->mm->mmap_sem); umem->nmap = ib_dma_map_sg_attrs(context->device, umem->sg_head.sgl, @@ -206,27 +205,28 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, DMA_BIDIRECTIONAL, dma_attrs); - if (umem->nmap <= 0) { + if (!umem->nmap) { ret = -ENOMEM; - goto out; + goto umem_release; } ret = 0; + goto out; -out: - if (ret < 0) { - if (need_release) - __ib_umem_release(context->device, umem, 0); - kfree(umem); - } else - current->mm->pinned_vm = locked; - +umem_release: + __ib_umem_release(context->device, umem, 0); +vma: + down_write(¤t->mm->mmap_sem); + current->mm->pinned_vm -= ib_umem_num_pages(umem); up_write(¤t->mm->mmap_sem); +out: if (vma_list) free_page((unsigned long) vma_list); free_page((unsigned long) page_list); - - return ret < 0 ? ERR_PTR(ret) : umem; +umem_kfree: + if (ret) + kfree(umem); + return ret ? ERR_PTR(ret) : umem; } EXPORT_SYMBOL(ib_umem_get); diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 182436b92ba9..6ec748eccff7 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -186,6 +186,7 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn, rbt_ib_umem_for_each_in_range(&context->umem_tree, 0, ULLONG_MAX, ib_umem_notifier_release_trampoline, + true, NULL); up_read(&context->umem_rwsem); } @@ -207,22 +208,31 @@ static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start, return 0; } -static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, +static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, - unsigned long end) + unsigned long end, + bool blockable) { struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); + int ret; if (!context->invalidate_range) - return; + return 0; + + if (blockable) + down_read(&context->umem_rwsem); + else if (!down_read_trylock(&context->umem_rwsem)) + return -EAGAIN; ib_ucontext_notifier_start_account(context); - down_read(&context->umem_rwsem); - rbt_ib_umem_for_each_in_range(&context->umem_tree, start, + ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start, end, - invalidate_range_start_trampoline, NULL); + invalidate_range_start_trampoline, + blockable, NULL); up_read(&context->umem_rwsem); + + return ret; } static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start, @@ -242,10 +252,15 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, if (!context->invalidate_range) return; + /* + * TODO: we currently bail out if there is any sleepable work to be done + * in ib_umem_notifier_invalidate_range_start so we shouldn't really block + * here. But this is ugly and fragile. + */ down_read(&context->umem_rwsem); rbt_ib_umem_for_each_in_range(&context->umem_tree, start, end, - invalidate_range_end_trampoline, NULL); + invalidate_range_end_trampoline, true, NULL); up_read(&context->umem_rwsem); ib_ucontext_notifier_end_account(context); } @@ -798,6 +813,7 @@ EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, u64 start, u64 last, umem_call_back cb, + bool blockable, void *cookie) { int ret_val = 0; @@ -809,6 +825,9 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, for (node = rbt_ib_umem_iter_first(root, start, last - 1); node; node = next) { + /* TODO move the blockable decision up to the callback */ + if (!blockable) + return -EAGAIN; next = rbt_ib_umem_iter_next(node, start, last - 1); umem = container_of(node, struct ib_umem_odp, interval_tree); ret_val = cb(umem->umem, start, last, cookie) || ret_val; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index bb98c9e4a7fd..c34a6852d691 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -268,6 +268,7 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.traffic_class = grh->traffic_class; memcpy(packet->mad.hdr.gid, &grh->dgid, 16); packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label); + rdma_destroy_ah_attr(&ah_attr); } if (queue_packet(file, agent, packet)) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index c0d40fc3a53a..5df8e548cc14 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -111,7 +111,7 @@ struct ib_uverbs_device { struct mutex lists_mutex; /* protect lists */ struct list_head uverbs_file_list; struct list_head uverbs_events_file_list; - struct uverbs_root_spec *specs_root; + struct uverbs_api *uapi; }; struct ib_uverbs_event_queue { @@ -130,21 +130,37 @@ struct ib_uverbs_async_event_file { }; struct ib_uverbs_completion_event_file { - struct ib_uobject_file uobj_file; + struct ib_uobject uobj; struct ib_uverbs_event_queue ev_queue; }; struct ib_uverbs_file { struct kref ref; - struct mutex mutex; - struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; + struct mutex ucontext_lock; + /* + * ucontext must be accessed via ib_uverbs_get_ucontext() or with + * ucontext_lock held + */ struct ib_ucontext *ucontext; struct ib_event_handler event_handler; struct ib_uverbs_async_event_file *async_file; struct list_head list; int is_closed; + /* + * To access the uobjects list hw_destroy_rwsem must be held for write + * OR hw_destroy_rwsem held for read AND uobjects_lock held. + * hw_destroy_rwsem should be called across any destruction of the HW + * object of an associated uobject. + */ + struct rw_semaphore hw_destroy_rwsem; + spinlock_t uobjects_lock; + struct list_head uobjects; + + u64 uverbs_cmd_mask; + u64 uverbs_ex_cmd_mask; + struct idr idr; /* spinlock protects write access to idr */ spinlock_t idr_lock; @@ -196,7 +212,6 @@ struct ib_uwq_object { struct ib_ucq_object { struct ib_uobject uobject; - struct ib_uverbs_file *uverbs_file; struct list_head comp_list; struct list_head async_list; u32 comp_events_reported; @@ -230,7 +245,7 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); -int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd, +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, enum rdma_remove_reason why); int uverbs_dealloc_mw(struct ib_mw *mw); @@ -238,12 +253,7 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj); void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata); -extern const struct uverbs_attr_def uverbs_uhw_compat_in; -extern const struct uverbs_attr_def uverbs_uhw_compat_out; long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -int uverbs_destroy_def_handler(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs); struct ib_uverbs_flow_spec { union { @@ -292,7 +302,6 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS); #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ - struct ib_device *ib_dev, \ const char __user *buf, int in_len, \ int out_len) @@ -334,7 +343,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd); #define IB_UVERBS_DECLARE_EX_CMD(name) \ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ - struct ib_device *ib_dev, \ struct ib_udata *ucore, \ struct ib_udata *uhw) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3e90b6a1d9d2..a21d5214afc3 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -48,11 +48,10 @@ #include "core_priv.h" static struct ib_uverbs_completion_event_file * -ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) +_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile) { - struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL, - fd, context); - struct ib_uobject_file *uobj_file; + struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL, + fd, ufile); if (IS_ERR(uobj)) return (void *)uobj; @@ -60,13 +59,13 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) uverbs_uobject_get(uobj); uobj_put_read(uobj); - uobj_file = container_of(uobj, struct ib_uobject_file, uobj); - return container_of(uobj_file, struct ib_uverbs_completion_event_file, - uobj_file); + return container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); } +#define ib_uverbs_lookup_comp_file(_fd, _ufile) \ + _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile) ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -76,6 +75,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_ucontext *ucontext; struct file *filp; struct ib_rdmacg_object cg_obj; + struct ib_device *ib_dev; int ret; if (out_len < sizeof resp) @@ -84,7 +84,13 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&file->mutex); + mutex_lock(&file->ucontext_lock); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (!ib_dev) { + ret = -EIO; + goto err; + } if (file->ucontext) { ret = -EINVAL; @@ -110,12 +116,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->cg_obj = cg_obj; /* ufile is required when some objects are released */ ucontext->ufile = file; - uverbs_initialize_ucontext(ucontext); rcu_read_lock(); ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); rcu_read_unlock(); ucontext->closing = 0; + ucontext->cleanup_retryable = false; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING ucontext->umem_tree = RB_ROOT_CACHED; @@ -146,11 +152,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_file; } - file->ucontext = ucontext; - fd_install(resp.async_fd, filp); - mutex_unlock(&file->mutex); + /* + * Make sure that ib_uverbs_get_ucontext() sees the pointer update + * only after all writes to setup the ucontext have completed + */ + smp_store_release(&file->ucontext, ucontext); + + mutex_unlock(&file->ucontext_lock); return in_len; @@ -169,15 +179,16 @@ err_alloc: ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); err: - mutex_unlock(&file->mutex); + mutex_unlock(&file->ucontext_lock); return ret; } -static void copy_query_dev_fields(struct ib_uverbs_file *file, - struct ib_device *ib_dev, +static void copy_query_dev_fields(struct ib_ucontext *ucontext, struct ib_uverbs_query_device_resp *resp, struct ib_device_attr *attr) { + struct ib_device *ib_dev = ucontext->device; + resp->fw_ver = attr->fw_ver; resp->node_guid = ib_dev->node_guid; resp->sys_image_guid = attr->sys_image_guid; @@ -189,7 +200,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file, resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); - resp->max_sge = attr->max_sge; + resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge); resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; resp->max_cqe = attr->max_cqe; @@ -221,12 +232,16 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file, } ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_device cmd; struct ib_uverbs_query_device_resp resp; + struct ib_ucontext *ucontext; + + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); if (out_len < sizeof resp) return -ENOSPC; @@ -235,7 +250,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, return -EFAULT; memset(&resp, 0, sizeof resp); - copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs); + copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; @@ -243,8 +258,28 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, return in_len; } +/* + * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the + * PortInfo CapabilityMask, but was extended with unique bits. + */ +static u32 make_port_cap_flags(const struct ib_port_attr *attr) +{ + u32 res; + + /* All IBA CapabilityMask bits are passed through here, except bit 26, + * which is overridden with IP_BASED_GIDS. This is due to a historical + * mistake in the implementation of IP_BASED_GIDS. Otherwise all other + * bits match the IBA definition across all kernel versions. + */ + res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS; + + if (attr->ip_gids) + res |= IB_UVERBS_PCF_IP_BASED_GIDS; + + return res; +} + ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -252,6 +287,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, struct ib_uverbs_query_port_resp resp; struct ib_port_attr attr; int ret; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; + + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; if (out_len < sizeof resp) return -ENOSPC; @@ -269,12 +311,15 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.max_mtu = attr.max_mtu; resp.active_mtu = attr.active_mtu; resp.gid_tbl_len = attr.gid_tbl_len; - resp.port_cap_flags = attr.port_cap_flags; + resp.port_cap_flags = make_port_cap_flags(&attr); resp.max_msg_sz = attr.max_msg_sz; resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; + if (rdma_is_grh_required(ib_dev, cmd.port_num)) + resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED; + if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); @@ -300,7 +345,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, } ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -310,6 +354,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; int ret; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -322,11 +367,11 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); + pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata); if (IS_ERR(pd)) { ret = PTR_ERR(pd); goto err; @@ -348,9 +393,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, goto err_copy; } - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: ib_dealloc_pd(pd); @@ -361,25 +404,16 @@ err: } ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dealloc_pd cmd; - struct ib_uobject *uobj; - int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file, + in_len); } struct xrcd_table_entry { @@ -468,7 +502,6 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev, } ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -481,6 +514,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, struct inode *inode = NULL; int ret = 0; int new_xrcd = 0; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -517,15 +551,15 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, - file->ucontext); + obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file, + &ib_dev); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_tree_mutex_unlock; } if (!xrcd) { - xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); + xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; @@ -564,9 +598,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, mutex_unlock(&file->device->xrcd_tree_mutex); - uobj_alloc_commit(&obj->uobject); - - return in_len; + return uobj_alloc_commit(&obj->uobject, in_len); err_copy: if (inode) { @@ -591,32 +623,25 @@ err_tree_mutex_unlock: } ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_close_xrcd cmd; - struct ib_uobject *uobj; - int ret = 0; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file, + in_len); } -int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, enum rdma_remove_reason why) { struct inode *inode; int ret; + struct ib_uverbs_device *dev = uobject->context->ufile->device; inode = xrcd->inode; if (inode && !atomic_dec_and_test(&xrcd->usecnt)) @@ -624,16 +649,18 @@ int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, ret = ib_dealloc_xrcd(xrcd); - if (why == RDMA_REMOVE_DESTROY && ret) + if (ib_is_destroy_retryable(ret, why, uobject)) { atomic_inc(&xrcd->usecnt); - else if (inode) + return ret; + } + + if (inode) xrcd_table_delete(dev, inode); return ret; } ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -644,6 +671,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, struct ib_pd *pd; struct ib_mr *mr; int ret; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -663,11 +691,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err_free; @@ -711,9 +739,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, uobj_put_obj_read(pd); - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: ib_dereg_mr(mr); @@ -727,7 +753,6 @@ err_free: } ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -759,8 +784,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -778,7 +802,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, } if (cmd.flags & IB_MR_REREG_PD) { - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, + file); if (!pd) { ret = -EINVAL; goto put_uobjs; @@ -819,29 +844,19 @@ put_uobjs: } ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dereg_mr cmd; - struct ib_uobject *uobj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file, + in_len); } ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -852,6 +867,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, struct ib_mw *mw; struct ib_udata udata; int ret; + struct ib_device *ib_dev; if (out_len < sizeof(resp)) return -ENOSPC; @@ -859,11 +875,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err_free; @@ -897,9 +913,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, } uobj_put_obj_read(pd); - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: uverbs_dealloc_mw(mw); @@ -911,28 +925,19 @@ err_free: } ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dealloc_mw cmd; - struct ib_uobject *uobj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file, + in_len); } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -940,6 +945,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, struct ib_uverbs_create_comp_channel_resp resp; struct ib_uobject *uobj; struct ib_uverbs_completion_event_file *ev_file; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -947,14 +953,14 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); resp.fd = uobj->id; ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, - uobj_file.uobj); + uobj); ib_uverbs_init_event_queue(&ev_file->ev_queue); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { @@ -962,12 +968,10 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, return -EFAULT; } - uobj_alloc_commit(uobj); - return in_len; + return uobj_alloc_commit(uobj, in_len); } static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw, struct ib_uverbs_ex_create_cq *cmd, @@ -985,21 +989,23 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, int ret; struct ib_uverbs_ex_create_cq_resp resp; struct ib_cq_init_attr attr = {}; - - if (!ib_dev->create_cq) - return ERR_PTR(-EOPNOTSUPP); + struct ib_device *ib_dev; if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, - file->ucontext); + obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file, + &ib_dev); if (IS_ERR(obj)) return obj; + if (!ib_dev->create_cq) { + ret = -EOPNOTSUPP; + goto err; + } + if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, - file->ucontext); + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file); if (IS_ERR(ev_file)) { ret = PTR_ERR(ev_file); goto err; @@ -1007,7 +1013,6 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, } obj->uobject.user_handle = cmd->user_handle; - obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); @@ -1019,7 +1024,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) attr.flags = cmd->flags; - cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw); + cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; @@ -1047,7 +1052,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (ret) goto err_cb; - uobj_alloc_commit(&obj->uobject); + ret = uobj_alloc_commit(&obj->uobject, 0); + if (ret) + return ERR_PTR(ret); return obj; err_cb: @@ -1075,7 +1082,6 @@ static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1106,7 +1112,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cmd_ex.comp_vector = cmd.comp_vector; cmd_ex.comp_channel = cmd.comp_channel; - obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex, + obj = create_cq(file, &ucore, &uhw, &cmd_ex, offsetof(typeof(cmd_ex), comp_channel) + sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, NULL); @@ -1129,7 +1135,6 @@ static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, } int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -1155,7 +1160,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, sizeof(resp.response_length))) return -ENOSPC; - obj = create_cq(file, ib_dev, ucore, uhw, &cmd, + obj = create_cq(file, ucore, uhw, &cmd, min(ucore->inlen, sizeof(cmd)), ib_uverbs_ex_create_cq_cb, NULL); @@ -1163,7 +1168,6 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1181,7 +1185,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1231,7 +1235,6 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, } ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1246,7 +1249,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1262,7 +1265,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (!ret) break; - ret = copy_wc_to_user(ib_dev, data_ptr, &wc); + ret = copy_wc_to_user(cq->device, data_ptr, &wc); if (ret) goto out_put; @@ -1283,7 +1286,6 @@ out_put: } ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1293,7 +1295,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1306,45 +1308,28 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_cq cmd; struct ib_uverbs_destroy_cq_resp resp; struct ib_uobject *uobj; - struct ib_cq *cq; struct ib_ucq_object *obj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, - file->ucontext); + uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - cq = uobj->object; - obj = container_of(cq->uobject, struct ib_ucq_object, uobject); - + obj = container_of(uobj, struct ib_ucq_object, uobject); memset(&resp, 0, sizeof(resp)); - - ret = uobj_remove_commit(uobj); - if (ret) { - uverbs_uobject_put(uobj); - return ret; - } - resp.comp_events_reported = obj->comp_events_reported; resp.async_events_reported = obj->async_events_reported; - uverbs_uobject_put(uobj); + uobj_put_destroy(uobj); + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; @@ -1375,12 +1360,13 @@ static int create_qp(struct ib_uverbs_file *file, int ret; struct ib_rwq_ind_table *ind_tbl = NULL; bool has_sq = true; + struct ib_device *ib_dev; if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, - file->ucontext); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); obj->uxrcd = NULL; @@ -1390,9 +1376,9 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + sizeof(cmd->rwq_ind_tbl_handle) && (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { - ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL, - cmd->rwq_ind_tbl_handle, - file->ucontext); + ind_tbl = uobj_get_obj_read(rwq_ind_table, + UVERBS_OBJECT_RWQ_IND_TBL, + cmd->rwq_ind_tbl_handle, file); if (!ind_tbl) { ret = -EINVAL; goto err_put; @@ -1418,7 +1404,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle, - file->ucontext); + file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; @@ -1437,8 +1423,8 @@ static int create_qp(struct ib_uverbs_file *file, cmd->max_recv_sge = 0; } else { if (cmd->is_srq) { - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle, - file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, + cmd->srq_handle, file); if (!srq || srq->srq_type == IB_SRQT_XRC) { ret = -EINVAL; goto err_put; @@ -1447,8 +1433,9 @@ static int create_qp(struct ib_uverbs_file *file, if (!ind_tbl) { if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle, - file->ucontext); + rcq = uobj_get_obj_read( + cq, UVERBS_OBJECT_CQ, + cmd->recv_cq_handle, file); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1458,11 +1445,12 @@ static int create_qp(struct ib_uverbs_file *file, } if (has_sq) - scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle, - file->ucontext); + scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, + cmd->send_cq_handle, file); if (!ind_tbl) rcq = rcq ?: scq; - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, + file); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; @@ -1602,9 +1590,7 @@ static int create_qp(struct ib_uverbs_file *file, if (ind_tbl) uobj_put_obj_read(ind_tbl); - uobj_alloc_commit(&obj->uevent.uobject); - - return 0; + return uobj_alloc_commit(&obj->uevent.uobject, 0); err_cb: ib_destroy_qp(qp); @@ -1637,7 +1623,6 @@ static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1698,7 +1683,6 @@ static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, } int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -1735,7 +1719,6 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, } ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_open_qp cmd; @@ -1747,6 +1730,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, struct ib_qp *qp; struct ib_qp_open_attr attr; int ret; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -1759,13 +1743,12 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, - file->ucontext); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); - xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, - file->ucontext); + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err_put; @@ -1809,10 +1792,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, qp->uobject = &obj->uevent.uobject; uobj_put_read(xrcd_uobj); - - uobj_alloc_commit(&obj->uevent.uobject); - - return in_len; + return uobj_alloc_commit(&obj->uevent.uobject, in_len); err_destroy: ib_destroy_qp(qp); @@ -1846,7 +1826,6 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr, } ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1867,7 +1846,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) { ret = -EINVAL; goto out; @@ -1968,11 +1947,11 @@ static int modify_qp(struct ib_uverbs_file *file, struct ib_qp *qp; int ret; - attr = kmalloc(sizeof *attr, GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file); if (!qp) { ret = -EINVAL; goto out; @@ -1984,15 +1963,64 @@ static int modify_qp(struct ib_uverbs_file *file, goto release_qp; } - if ((cmd->base.attr_mask & IB_QP_AV) && - !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { - ret = -EINVAL; - goto release_qp; + if ((cmd->base.attr_mask & IB_QP_AV)) { + if (!rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { + ret = -EINVAL; + goto release_qp; + } + + if (cmd->base.attr_mask & IB_QP_STATE && + cmd->base.qp_state == IB_QPS_RTR) { + /* We are in INIT->RTR TRANSITION (if we are not, + * this transition will be rejected in subsequent checks). + * In the INIT->RTR transition, we cannot have IB_QP_PORT set, + * but the IB_QP_STATE flag is required. + * + * Since kernel 3.14 (commit dbf727de7440), the uverbs driver, + * when IB_QP_AV is set, has required inclusion of a valid + * port number in the primary AV. (AVs are created and handled + * differently for infiniband and ethernet (RoCE) ports). + * + * Check the port number included in the primary AV against + * the port number in the qp struct, which was set (and saved) + * in the RST->INIT transition. + */ + if (cmd->base.dest.port_num != qp->real_qp->port) { + ret = -EINVAL; + goto release_qp; + } + } else { + /* We are in SQD->SQD. (If we are not, this transition will + * be rejected later in the verbs layer checks). + * Check for both IB_QP_PORT and IB_QP_AV, these can be set + * together in the SQD->SQD transition. + * + * If only IP_QP_AV was set, add in IB_QP_PORT as well (the + * verbs layer driver does not track primary port changes + * resulting from path migration. Thus, in SQD, if the primary + * AV is modified, the primary port should also be modified). + * + * Note that in this transition, the IB_QP_STATE flag + * is not allowed. + */ + if (((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT)) + == (IB_QP_AV | IB_QP_PORT)) && + cmd->base.port_num != cmd->base.dest.port_num) { + ret = -EINVAL; + goto release_qp; + } + if ((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT)) + == IB_QP_AV) { + cmd->base.attr_mask |= IB_QP_PORT; + cmd->base.port_num = cmd->base.dest.port_num; + } + } } if ((cmd->base.attr_mask & IB_QP_ALT_PATH) && (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) || - !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) { + !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num) || + cmd->base.alt_port_num != cmd->base.alt_dest.port_num)) { ret = -EINVAL; goto release_qp; } @@ -2049,7 +2077,6 @@ out: } ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2076,7 +2103,6 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, } int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -2112,7 +2138,6 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2120,33 +2145,19 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_uverbs_destroy_qp_resp resp; struct ib_uobject *uobj; struct ib_uqp_object *obj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - memset(&resp, 0, sizeof resp); - - uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, - file->ucontext); + uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - - ret = uobj_remove_commit(uobj); - if (ret) { - uverbs_uobject_put(uobj); - return ret; - } - + memset(&resp, 0, sizeof(resp)); resp.events_reported = obj->uevent.events_reported; - uverbs_uobject_put(uobj); + + uobj_put_destroy(uobj); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; @@ -2165,14 +2176,14 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge) } ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; struct ib_uverbs_send_wr *user_wr; - struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; + struct ib_send_wr *wr = NULL, *last, *next; + const struct ib_send_wr *bad_wr; struct ib_qp *qp; int i, sg_ind; int is_ud; @@ -2193,7 +2204,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (!user_wr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) goto out; @@ -2229,8 +2240,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah, - file->ucontext); + ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, + user_wr->wr.ud.ah, file); if (!ud->ah) { kfree(ud); ret = -EINVAL; @@ -2445,13 +2456,13 @@ err: } ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; - struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_recv_wr *wr, *next; + const struct ib_recv_wr *bad_wr; struct ib_qp *qp; ssize_t ret = -EINVAL; @@ -2464,7 +2475,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) goto out; @@ -2494,13 +2505,13 @@ out: } ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; - struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_recv_wr *wr, *next; + const struct ib_recv_wr *bad_wr; struct ib_srq *srq; ssize_t ret = -EINVAL; @@ -2513,12 +2524,13 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) goto out; resp.bad_wr = 0; - ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + ret = srq->device->post_srq_recv ? + srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP; uobj_put_obj_read(srq); @@ -2543,7 +2555,6 @@ out: } ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2552,9 +2563,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; struct ib_ah *ah; - struct rdma_ah_attr attr; + struct rdma_ah_attr attr = {}; int ret; struct ib_udata udata; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -2562,19 +2574,21 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) - return -EINVAL; - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) { + ret = -EINVAL; + goto err; + } + + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err; @@ -2616,9 +2630,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, } uobj_put_obj_read(pd); - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: rdma_destroy_ah(ah); @@ -2632,27 +2644,18 @@ err: } ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_ah cmd; - struct ib_uobject *uobj; - int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file, + in_len); } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2665,7 +2668,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) return -EINVAL; @@ -2702,7 +2705,6 @@ out_put: } ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2716,7 +2718,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) return -EINVAL; @@ -2761,29 +2763,27 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) resources = kzalloc(sizeof(*resources), GFP_KERNEL); if (!resources) - goto err_res; + return NULL; + + if (!num_specs) + goto out; resources->counters = kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL); - - if (!resources->counters) - goto err_cnt; - resources->collection = kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL); - if (!resources->collection) - goto err_collection; + if (!resources->counters || !resources->collection) + goto err; +out: resources->max = num_specs; - return resources; -err_collection: +err: kfree(resources->counters); -err_cnt: kfree(resources); -err_res: + return NULL; } @@ -2791,6 +2791,9 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) { unsigned int i; + if (!uflow_res) + return; + for (i = 0; i < uflow_res->collection_num; i++) atomic_dec(&uflow_res->collection[i]->usecnt); @@ -2826,7 +2829,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res, uflow_res->num++; } -static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, +static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -2855,7 +2858,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, ib_spec->action.act = uobj_get_obj_read(flow_action, UVERBS_OBJECT_FLOW_ACTION, kern_spec->action.handle, - ucontext); + ufile); if (!ib_spec->action.act) return -EINVAL; ib_spec->action.size = @@ -2873,7 +2876,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, uobj_get_obj_read(counters, UVERBS_OBJECT_COUNTERS, kern_spec->flow_count.handle, - ucontext); + ufile); if (!ib_spec->flow_count.counters) return -EINVAL; ib_spec->flow_count.size = @@ -3042,9 +3045,6 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, void *kern_spec_mask; void *kern_spec_val; - if (kern_spec->reserved) - return -EINVAL; - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); kern_spec_val = (void *)kern_spec + @@ -3057,7 +3057,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, kern_filter_sz, ib_spec); } -static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext, +static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -3066,14 +3066,13 @@ static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext, return -EINVAL; if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) - return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec, + return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec, uflow_res); else return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); } int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3087,6 +3086,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, struct ib_wq_init_attr wq_init_attr = {}; size_t required_cmd_sz; size_t required_resp_len; + struct ib_device *ib_dev; required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); @@ -3109,18 +3109,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, - file->ucontext); + obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3174,8 +3174,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, uobj_put_obj_read(pd); uobj_put_obj_read(cq); - uobj_alloc_commit(&obj->uevent.uobject); - return 0; + return uobj_alloc_commit(&obj->uevent.uobject, 0); err_copy: ib_destroy_wq(wq); @@ -3190,7 +3189,6 @@ err_uobj: } int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3224,29 +3222,19 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, return -EOPNOTSUPP; resp.response_length = required_resp_len; - uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, - file->ucontext); + uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - - ret = uobj_remove_commit(uobj); resp.events_reported = obj->uevent.events_reported; - uverbs_uobject_put(uobj); - if (ret) - return ret; + + uobj_put_destroy(uobj); return ib_copy_to_udata(ucore, &resp, resp.response_length); } int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3275,7 +3263,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; - wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file); if (!wq) return -EINVAL; @@ -3296,7 +3284,6 @@ out: } int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3314,6 +3301,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, u32 expected_in_size; size_t required_cmd_sz_header; size_t required_resp_len; + struct ib_device *ib_dev; required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); @@ -3369,8 +3357,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { - wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs], - file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, + wqs_handles[num_read_wqs], file); if (!wq) { err = -EINVAL; goto put_wqs; @@ -3379,7 +3367,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto put_wqs; @@ -3423,8 +3411,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (j = 0; j < num_read_wqs; j++) uobj_put_obj_read(wqs[j]); - uobj_alloc_commit(uobj); - return 0; + return uobj_alloc_commit(uobj, 0); err_copy: ib_destroy_rwq_ind_table(rwq_ind_tbl); @@ -3440,12 +3427,10 @@ err_free: } int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; - struct ib_uobject *uobj; int ret; size_t required_cmd_sz; @@ -3466,16 +3451,11 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - return uobj_remove_commit(uobj); + return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL, + cmd.ind_tbl_handle, file, 0); } int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3488,10 +3468,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_flow_attr *flow_attr; struct ib_qp *qp; struct ib_uflow_resources *uflow_res; + struct ib_uverbs_flow_spec_hdr *kern_spec; int err = 0; - void *kern_spec; void *ib_spec; int i; + struct ib_device *ib_dev; if (ucore->inlen < sizeof(cmd)) return -EINVAL; @@ -3538,8 +3519,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, if (!kern_flow_attr) return -ENOMEM; - memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); - err = ib_copy_from_udata(kern_flow_attr + 1, ucore, + *kern_flow_attr = cmd.flow_attr; + err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore, cmd.flow_attr.size); if (err) goto err_free_attr; @@ -3547,18 +3528,28 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kern_flow_attr = &cmd.flow_attr; } - uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto err_free_attr; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) { err = -EINVAL; goto err_uobj; } + if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) { + err = -EINVAL; + goto err_put; + } + + if (!qp->device->create_flow) { + err = -EOPNOTSUPP; + goto err_put; + } + flow_attr = kzalloc(struct_size(flow_attr, flows, cmd.flow_attr.num_of_specs), GFP_KERNEL); if (!flow_attr) { @@ -3578,21 +3569,22 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, flow_attr->flags = kern_flow_attr->flags; flow_attr->size = sizeof(*flow_attr); - kern_spec = kern_flow_attr + 1; + kern_spec = kern_flow_attr->flow_specs; ib_spec = flow_attr + 1; for (i = 0; i < flow_attr->num_of_specs && - cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && - cmd.flow_attr.size >= - ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { - err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec, - uflow_res); + cmd.flow_attr.size >= sizeof(*kern_spec) && + cmd.flow_attr.size >= kern_spec->size; + i++) { + err = kern_spec_to_ib_spec( + file, (struct ib_uverbs_flow_spec *)kern_spec, + ib_spec, uflow_res); if (err) goto err_free; flow_attr->size += ((union ib_flow_spec *) ib_spec)->size; - cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size; - kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size; + cmd.flow_attr.size -= kern_spec->size; + kern_spec = ((void *)kern_spec) + kern_spec->size; ib_spec += ((union ib_flow_spec *) ib_spec)->size; } if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { @@ -3611,6 +3603,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, } atomic_inc(&qp->usecnt); flow_id->qp = qp; + flow_id->device = qp->device; flow_id->uobject = uobj; uobj->object = flow_id; uflow = container_of(uobj, typeof(*uflow), uobject); @@ -3625,13 +3618,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_copy; uobj_put_obj_read(qp); - uobj_alloc_commit(uobj); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - return 0; + return uobj_alloc_commit(uobj, 0); err_copy: - ib_destroy_flow(flow_id); + if (!qp->device->destroy_flow(flow_id)) + atomic_dec(&qp->usecnt); err_free: ib_uverbs_flow_resources_free(uflow_res); err_free_flow_attr: @@ -3647,12 +3640,10 @@ err_free_attr: } int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_destroy_flow cmd; - struct ib_uobject *uobj; int ret; if (ucore->inlen < sizeof(cmd)) @@ -3665,17 +3656,11 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EINVAL; - uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret; + return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file, + 0); } static int __uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) { @@ -3686,9 +3671,10 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_srq_init_attr attr; int ret; + struct ib_device *ib_dev; - obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, - file->ucontext); + obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -3697,7 +3683,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (cmd->srq_type == IB_SRQT_XRC) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle, - file->ucontext); + file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err; @@ -3714,15 +3700,15 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, } if (ib_srq_has_cq(cmd->srq_type)) { - attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle, - file->ucontext); + attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, + cmd->cq_handle, file); if (!attr.ext.cq) { ret = -EINVAL; goto err_put_xrcd; } } - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file); if (!pd) { ret = -EINVAL; goto err_put_cq; @@ -3787,9 +3773,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, uobj_put_obj_read(attr.ext.cq); uobj_put_obj_read(pd); - uobj_alloc_commit(&obj->uevent.uobject); - - return 0; + return uobj_alloc_commit(&obj->uevent.uobject, 0); err_copy: ib_destroy_srq(srq); @@ -3813,7 +3797,6 @@ err: } ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3843,7 +3826,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); + ret = __uverbs_create_xsrq(file, &xcmd, &udata); if (ret) return ret; @@ -3851,7 +3834,6 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_xsrq cmd; @@ -3870,7 +3852,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); + ret = __uverbs_create_xsrq(file, &cmd, &udata); if (ret) return ret; @@ -3878,7 +3860,6 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3894,7 +3875,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) return -EINVAL; @@ -3909,7 +3890,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3925,7 +3905,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) return -EINVAL; @@ -3949,7 +3929,6 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3957,32 +3936,20 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_srq_resp resp; struct ib_uobject *uobj; struct ib_uevent_object *obj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, - file->ucontext); + uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); obj = container_of(uobj, struct ib_uevent_object, uobject); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - memset(&resp, 0, sizeof(resp)); - - ret = uobj_remove_commit(uobj); - if (ret) { - uverbs_uobject_put(uobj); - return ret; - } resp.events_reported = obj->events_reported; - uverbs_uobject_put(uobj); + + uobj_put_destroy(uobj); + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) return -EFAULT; @@ -3990,15 +3957,21 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, } int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_query_device_resp resp = { {0} }; struct ib_uverbs_ex_query_device cmd; struct ib_device_attr attr = {0}; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; int err; + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + if (!ib_dev->query_device) return -EOPNOTSUPP; @@ -4024,7 +3997,7 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, if (err) return err; - copy_query_dev_fields(file, ib_dev, &resp.base, &attr); + copy_query_dev_fields(ucontext, &resp.base, &attr); if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) goto end; @@ -4111,7 +4084,6 @@ end: } int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -4141,7 +4113,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, if (cmd.attr_mask > IB_CQ_MODERATE) return -EOPNOTSUPP; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 8d32c4ae368c..1a6b229e3db3 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -35,6 +35,103 @@ #include "rdma_core.h" #include "uverbs.h" +struct bundle_alloc_head { + struct bundle_alloc_head *next; + u8 data[]; +}; + +struct bundle_priv { + /* Must be first */ + struct bundle_alloc_head alloc_head; + struct bundle_alloc_head *allocated_mem; + size_t internal_avail; + size_t internal_used; + + struct radix_tree_root *radix; + const struct uverbs_api_ioctl_method *method_elm; + void __rcu **radix_slots; + unsigned long radix_slots_len; + u32 method_key; + + struct ib_uverbs_attr __user *user_attrs; + struct ib_uverbs_attr *uattrs; + + DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); + + /* + * Must be last. bundle ends in a flex array which overlaps + * internal_buffer. + */ + struct uverbs_attr_bundle bundle; + u64 internal_buffer[32]; +}; + +/* + * Each method has an absolute minimum amount of memory it needs to allocate, + * precompute that amount and determine if the onstack memory can be used or + * if allocation is need. + */ +void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, + unsigned int num_attrs) +{ + struct bundle_priv *pbundle; + size_t bundle_size = + offsetof(struct bundle_priv, internal_buffer) + + sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len + + sizeof(*pbundle->uattrs) * num_attrs; + + method_elm->use_stack = bundle_size <= sizeof(*pbundle); + method_elm->bundle_size = + ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer)); + + /* Do not want order-2 allocations for this. */ + WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE); +} + +/** + * uverbs_alloc() - Quickly allocate memory for use with a bundle + * @bundle: The bundle + * @size: Number of bytes to allocate + * @flags: Allocator flags + * + * The bundle allocator is intended for allocations that are connected with + * processing the system call related to the bundle. The allocated memory is + * always freed once the system call completes, and cannot be freed any other + * way. + * + * This tries to use a small pool of pre-allocated memory for performance. + */ +__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, + gfp_t flags) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + size_t new_used; + void *res; + + if (check_add_overflow(size, pbundle->internal_used, &new_used)) + return ERR_PTR(-EOVERFLOW); + + if (new_used > pbundle->internal_avail) { + struct bundle_alloc_head *buf; + + buf = kvmalloc(struct_size(buf, data, size), flags); + if (!buf) + return ERR_PTR(-ENOMEM); + buf->next = pbundle->allocated_mem; + pbundle->allocated_mem = buf; + return buf->data; + } + + res = (void *)pbundle->internal_buffer + pbundle->internal_used; + pbundle->internal_used = + ALIGN(new_used, sizeof(*pbundle->internal_buffer)); + if (flags & __GFP_ZERO) + memset(res, 0, size); + return res; +} +EXPORT_SYMBOL(_uverbs_alloc); + static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, u16 len) { @@ -46,45 +143,24 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, 0, uattr->len - len); } -static int uverbs_process_attr(struct ib_device *ibdev, - struct ib_ucontext *ucontext, - const struct ib_uverbs_attr *uattr, - u16 attr_id, - const struct uverbs_attr_spec_hash *attr_spec_bucket, - struct uverbs_attr_bundle_hash *attr_bundle_h, - struct ib_uverbs_attr __user *uattr_ptr) +static int uverbs_process_attr(struct bundle_priv *pbundle, + const struct uverbs_api_attr *attr_uapi, + struct ib_uverbs_attr *uattr, u32 attr_bkey) { - const struct uverbs_attr_spec *spec; - const struct uverbs_attr_spec *val_spec; - struct uverbs_attr *e; - const struct uverbs_object_spec *object; + const struct uverbs_attr_spec *spec = &attr_uapi->spec; + struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey]; + const struct uverbs_attr_spec *val_spec = spec; struct uverbs_obj_attr *o_attr; - struct uverbs_attr *elements = attr_bundle_h->attrs; - - if (attr_id >= attr_spec_bucket->num_attrs) { - if (uattr->flags & UVERBS_ATTR_F_MANDATORY) - return -EINVAL; - else - return 0; - } - - if (test_bit(attr_id, attr_bundle_h->valid_bitmap)) - return -EINVAL; - - spec = &attr_spec_bucket->attrs[attr_id]; - val_spec = spec; - e = &elements[attr_id]; - e->uattr = uattr_ptr; switch (spec->type) { case UVERBS_ATTR_TYPE_ENUM_IN: - if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems) + if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems) return -EOPNOTSUPP; if (uattr->attr_data.enum_data.reserved) return -EINVAL; - val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id]; + val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id]; /* Currently we only support PTR_IN based enums */ if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN) @@ -98,64 +174,75 @@ static int uverbs_process_attr(struct ib_device *ibdev, * longer struct will fail here if used with an old kernel and * non-zero content, making ABI compat/discovery simpler. */ - if (uattr->len > val_spec->ptr.len && - val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO && - !uverbs_is_attr_cleared(uattr, val_spec->ptr.len)) + if (uattr->len > val_spec->u.ptr.len && + val_spec->zero_trailing && + !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len)) return -EOPNOTSUPP; /* fall through */ case UVERBS_ATTR_TYPE_PTR_OUT: - if (uattr->len < val_spec->ptr.min_len || - (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) && - uattr->len > val_spec->ptr.len)) + if (uattr->len < val_spec->u.ptr.min_len || + (!val_spec->zero_trailing && + uattr->len > val_spec->u.ptr.len)) return -EINVAL; if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN && uattr->attr_data.reserved) return -EINVAL; - e->ptr_attr.data = uattr->data; + e->ptr_attr.uattr_idx = uattr - pbundle->uattrs; e->ptr_attr.len = uattr->len; - e->ptr_attr.flags = uattr->flags; + + if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) { + void *p; + + p = uverbs_alloc(&pbundle->bundle, uattr->len); + if (IS_ERR(p)) + return PTR_ERR(p); + + e->ptr_attr.ptr = p; + + if (copy_from_user(p, u64_to_user_ptr(uattr->data), + uattr->len)) + return -EFAULT; + } else { + e->ptr_attr.data = uattr->data; + } break; case UVERBS_ATTR_TYPE_IDR: - if (uattr->data >> 32) - return -EINVAL; - /* fall through */ case UVERBS_ATTR_TYPE_FD: if (uattr->attr_data.reserved) return -EINVAL; - if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX) + if (uattr->len != 0) return -EINVAL; o_attr = &e->obj_attr; - object = uverbs_get_object(ibdev, spec->obj.obj_type); - if (!object) - return -EINVAL; - o_attr->type = object->type_attrs; - - o_attr->id = (int)uattr->data; - o_attr->uobject = uverbs_get_uobject_from_context( - o_attr->type, - ucontext, - spec->obj.access, - o_attr->id); + o_attr->attr_elm = attr_uapi; + /* + * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and + * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64 + * here without caring about truncation as we know that the + * IDR implementation today rejects negative IDs + */ + o_attr->uobject = uverbs_get_uobject_from_file( + spec->u.obj.obj_type, + pbundle->bundle.ufile, + spec->u.obj.access, + uattr->data_s64); if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); + __set_bit(attr_bkey, pbundle->uobj_finalize); - if (spec->obj.access == UVERBS_ACCESS_NEW) { - u64 id = o_attr->uobject->id; + if (spec->u.obj.access == UVERBS_ACCESS_NEW) { + unsigned int uattr_idx = uattr - pbundle->uattrs; + s64 id = o_attr->uobject->id; /* Copy the allocated id to the user-space */ - if (put_user(id, &e->uattr->data)) { - uverbs_finalize_object(o_attr->uobject, - UVERBS_ACCESS_NEW, - false); + if (put_user(id, &pbundle->user_attrs[uattr_idx].data)) return -EFAULT; - } } break; @@ -163,220 +250,225 @@ static int uverbs_process_attr(struct ib_device *ibdev, return -EOPNOTSUPP; } - set_bit(attr_id, attr_bundle_h->valid_bitmap); return 0; } -static int uverbs_uattrs_process(struct ib_device *ibdev, - struct ib_ucontext *ucontext, - const struct ib_uverbs_attr *uattrs, - size_t num_uattrs, - const struct uverbs_method_spec *method, - struct uverbs_attr_bundle *attr_bundle, - struct ib_uverbs_attr __user *uattr_ptr) +/* + * We search the radix tree with the method prefix and now we want to fast + * search the suffix bits to get a particular attribute pointer. It is not + * totally clear to me if this breaks the radix tree encasulation or not, but + * it uses the iter data to determine if the method iter points at the same + * chunk that will store the attribute, if so it just derefs it directly. By + * construction in most kernel configs the method and attrs will all fit in a + * single radix chunk, so in most cases this will have no search. Other cases + * this falls back to a full search. + */ +static void __rcu **uapi_get_attr_for_method(struct bundle_priv *pbundle, + u32 attr_key) { - size_t i; - int ret = 0; - int num_given_buckets = 0; - - for (i = 0; i < num_uattrs; i++) { - const struct ib_uverbs_attr *uattr = &uattrs[i]; - u16 attr_id = uattr->attr_id; - struct uverbs_attr_spec_hash *attr_spec_bucket; - - ret = uverbs_ns_idx(&attr_id, method->num_buckets); - if (ret < 0) { - if (uattr->flags & UVERBS_ATTR_F_MANDATORY) { - uverbs_finalize_objects(attr_bundle, - method->attr_buckets, - num_given_buckets, - false); - return ret; - } - continue; - } + void __rcu **slot; - /* - * ret is the found ns, so increase num_given_buckets if - * necessary. - */ - if (ret >= num_given_buckets) - num_given_buckets = ret + 1; - - attr_spec_bucket = method->attr_buckets[ret]; - ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id, - attr_spec_bucket, &attr_bundle->hash[ret], - uattr_ptr++); - if (ret) { - uverbs_finalize_objects(attr_bundle, - method->attr_buckets, - num_given_buckets, - false); - return ret; - } + if (likely(attr_key < pbundle->radix_slots_len)) { + void *entry; + + slot = pbundle->radix_slots + attr_key; + entry = rcu_dereference_raw(*slot); + if (likely(!radix_tree_is_internal_node(entry) && entry)) + return slot; } - return num_given_buckets; + return radix_tree_lookup_slot(pbundle->radix, + pbundle->method_key | attr_key); } -static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec, - struct uverbs_attr_bundle *attr_bundle) +static int uverbs_set_attr(struct bundle_priv *pbundle, + struct ib_uverbs_attr *uattr) { - unsigned int i; - - for (i = 0; i < attr_bundle->num_buckets; i++) { - struct uverbs_attr_spec_hash *attr_spec_bucket = - method_spec->attr_buckets[i]; + u32 attr_key = uapi_key_attr(uattr->attr_id); + u32 attr_bkey = uapi_bkey_attr(attr_key); + const struct uverbs_api_attr *attr; + void __rcu **slot; + int ret; - if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask, - attr_bundle->hash[i].valid_bitmap, - attr_spec_bucket->num_attrs)) - return -EINVAL; + slot = uapi_get_attr_for_method(pbundle, attr_key); + if (!slot) { + /* + * Kernel does not support the attribute but user-space says it + * is mandatory + */ + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) + return -EPROTONOSUPPORT; + return 0; } + attr = srcu_dereference( + *slot, &pbundle->bundle.ufile->device->disassociate_srcu); - for (; i < method_spec->num_buckets; i++) { - struct uverbs_attr_spec_hash *attr_spec_bucket = - method_spec->attr_buckets[i]; + /* Reject duplicate attributes from user-space */ + if (test_bit(attr_bkey, pbundle->bundle.attr_present)) + return -EINVAL; - if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask, - attr_spec_bucket->num_attrs)) - return -EINVAL; - } + ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey); + if (ret) + return ret; + + __set_bit(attr_bkey, pbundle->bundle.attr_present); return 0; } -static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, - const struct ib_uverbs_attr *uattrs, - size_t num_uattrs, - struct ib_device *ibdev, - struct ib_uverbs_file *ufile, - const struct uverbs_method_spec *method_spec, - struct uverbs_attr_bundle *attr_bundle) +static int ib_uverbs_run_method(struct bundle_priv *pbundle, + unsigned int num_attrs) { + int (*handler)(struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); + size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs); + unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey; + unsigned int i; int ret; - int finalize_ret; - int num_given_buckets; - num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs, - num_uattrs, method_spec, - attr_bundle, uattr_ptr); - if (num_given_buckets <= 0) + /* See uverbs_disassociate_api() */ + handler = srcu_dereference( + pbundle->method_elm->handler, + &pbundle->bundle.ufile->device->disassociate_srcu); + if (!handler) + return -EIO; + + pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size); + if (IS_ERR(pbundle->uattrs)) + return PTR_ERR(pbundle->uattrs); + if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size)) + return -EFAULT; + + for (i = 0; i != num_attrs; i++) { + ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]); + if (unlikely(ret)) + return ret; + } + + /* User space did not provide all the mandatory attributes */ + if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory, + pbundle->bundle.attr_present, + pbundle->method_elm->key_bitmap_len))) return -EINVAL; - attr_bundle->num_buckets = num_given_buckets; - ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle); - if (ret) - goto cleanup; + if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) { + struct uverbs_obj_attr *destroy_attr = + &pbundle->bundle.attrs[destroy_bkey].obj_attr; - ret = method_spec->handler(ibdev, ufile, attr_bundle); -cleanup: - finalize_ret = uverbs_finalize_objects(attr_bundle, - method_spec->attr_buckets, - attr_bundle->num_buckets, - !ret); + ret = uobj_destroy(destroy_attr->uobject); + if (ret) + return ret; + __clear_bit(destroy_bkey, pbundle->uobj_finalize); - return ret ? ret : finalize_ret; -} + ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + uobj_put_destroy(destroy_attr->uobject); + } else { + ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + } -#define UVERBS_OPTIMIZE_USING_STACK_SZ 256 -static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct ib_uverbs_ioctl_hdr *hdr, - void __user *buf) -{ - const struct uverbs_object_spec *object_spec; - const struct uverbs_method_spec *method_spec; - long err = 0; - unsigned int i; - struct { - struct ib_uverbs_attr *uattrs; - struct uverbs_attr_bundle *uverbs_attr_bundle; - } *ctx = NULL; - struct uverbs_attr *curr_attr; - unsigned long *curr_bitmap; - size_t ctx_size; - uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)]; - - if (hdr->driver_id != ib_dev->driver_id) + /* + * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can + * not invoke the method because the request is not supported. No + * other cases should return this code. + */ + if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT)) return -EINVAL; - object_spec = uverbs_get_object(ib_dev, hdr->object_id); - if (!object_spec) - return -EPROTONOSUPPORT; + return ret; +} - method_spec = uverbs_get_method(object_spec, hdr->method_id); - if (!method_spec) - return -EPROTONOSUPPORT; +static int bundle_destroy(struct bundle_priv *pbundle, bool commit) +{ + unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len; + struct bundle_alloc_head *memblock; + unsigned int i; + int ret = 0; - if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext) - return -EINVAL; + i = -1; + while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, + i + 1)) < key_bitmap_len) { + struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; + int current_ret; + + current_ret = uverbs_finalize_object( + attr->obj_attr.uobject, + attr->obj_attr.attr_elm->spec.u.obj.access, commit); + if (!ret) + ret = current_ret; + } - ctx_size = sizeof(*ctx) + - sizeof(struct uverbs_attr_bundle) + - sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets + - sizeof(*ctx->uattrs) * hdr->num_attrs + - sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) * - method_spec->num_child_attrs + - sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) * - (method_spec->num_child_attrs / BITS_PER_LONG + - method_spec->num_buckets); - - if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ) - ctx = (void *)data; - if (!ctx) - ctx = kmalloc(ctx_size, GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx); - ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) + - (sizeof(ctx->uverbs_attr_bundle->hash[0]) * - method_spec->num_buckets); - curr_attr = (void *)(ctx->uattrs + hdr->num_attrs); - curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs); + for (memblock = pbundle->allocated_mem; memblock;) { + struct bundle_alloc_head *tmp = memblock; - /* - * We just fill the pointers and num_attrs here. The data itself will be - * filled at a later stage (uverbs_process_attr) - */ - for (i = 0; i < method_spec->num_buckets; i++) { - unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; - - ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr; - curr_attr += curr_num_attrs; - ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs; - ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap; - bitmap_zero(curr_bitmap, curr_num_attrs); - curr_bitmap += BITS_TO_LONGS(curr_num_attrs); + memblock = memblock->next; + kvfree(tmp); } - err = copy_from_user(ctx->uattrs, buf, - sizeof(*ctx->uattrs) * hdr->num_attrs); - if (err) { - err = -EFAULT; - goto out; - } + return ret; +} - err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev, - file, method_spec, ctx->uverbs_attr_bundle); +static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, + struct ib_uverbs_ioctl_hdr *hdr, + struct ib_uverbs_attr __user *user_attrs) +{ + const struct uverbs_api_ioctl_method *method_elm; + struct uverbs_api *uapi = ufile->device->uapi; + struct radix_tree_iter attrs_iter; + struct bundle_priv *pbundle; + struct bundle_priv onstack; + void __rcu **slot; + int destroy_ret; + int ret; - /* - * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can - * not invoke the method because the request is not supported. No - * other cases should return this code. - */ - if (unlikely(err == -EPROTONOSUPPORT)) { - WARN_ON_ONCE(err == -EPROTONOSUPPORT); - err = -EINVAL; + if (unlikely(hdr->driver_id != uapi->driver_id)) + return -EINVAL; + + slot = radix_tree_iter_lookup( + &uapi->radix, &attrs_iter, + uapi_key_obj(hdr->object_id) | + uapi_key_ioctl_method(hdr->method_id)); + if (unlikely(!slot)) + return -EPROTONOSUPPORT; + method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu); + + if (!method_elm->use_stack) { + pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL); + if (!pbundle) + return -ENOMEM; + pbundle->internal_avail = + method_elm->bundle_size - + offsetof(struct bundle_priv, internal_buffer); + pbundle->alloc_head.next = NULL; + pbundle->allocated_mem = &pbundle->alloc_head; + } else { + pbundle = &onstack; + pbundle->internal_avail = sizeof(pbundle->internal_buffer); + pbundle->allocated_mem = NULL; } -out: - if (ctx != (void *)data) - kfree(ctx); - return err; -} -#define IB_UVERBS_MAX_CMD_SZ 4096 + /* Space for the pbundle->bundle.attrs flex array */ + pbundle->method_elm = method_elm; + pbundle->method_key = attrs_iter.index; + pbundle->bundle.ufile = ufile; + pbundle->radix = &uapi->radix; + pbundle->radix_slots = slot; + pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter); + pbundle->user_attrs = user_attrs; + + pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len * + sizeof(*pbundle->bundle.attrs), + sizeof(*pbundle->internal_buffer)); + memset(pbundle->bundle.attr_present, 0, + sizeof(pbundle->bundle.attr_present)); + memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); + + ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); + destroy_ret = bundle_destroy(pbundle, ret == 0); + if (unlikely(destroy_ret && !ret)) + return destroy_ret; + + return ret; +} long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -384,39 +476,138 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct ib_uverbs_ioctl_hdr __user *user_hdr = (struct ib_uverbs_ioctl_hdr __user *)arg; struct ib_uverbs_ioctl_hdr hdr; - struct ib_device *ib_dev; int srcu_key; - long err; + int err; + + if (unlikely(cmd != RDMA_VERBS_IOCTL)) + return -ENOIOCTLCMD; + + err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + if (err) + return -EFAULT; + + if (hdr.length > PAGE_SIZE || + hdr.length != struct_size(&hdr, attrs, hdr.num_attrs)) + return -EINVAL; + + if (hdr.reserved1 || hdr.reserved2) + return -EPROTONOSUPPORT; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - err = -EIO; - goto out; + err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs); + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + return err; +} + +int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + const struct uverbs_attr *attr; + u64 flags; + + attr = uverbs_attr_get(attrs_bundle, idx); + /* Missing attribute means 0 flags */ + if (IS_ERR(attr)) { + *to = 0; + return 0; } - if (cmd == RDMA_VERBS_IOCTL) { - err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + /* + * New userspace code should use 8 bytes to pass flags, but we + * transparently support old userspaces that were using 4 bytes as + * well. + */ + if (attr->ptr_attr.len == 8) + flags = attr->ptr_attr.data; + else if (attr->ptr_attr.len == 4) + flags = *(u32 *)&attr->ptr_attr.data; + else + return -EINVAL; - if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ || - hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) { - err = -EINVAL; - goto out; - } + if (flags & ~allowed_bits) + return -EINVAL; - if (hdr.reserved1 || hdr.reserved2) { - err = -EPROTONOSUPPORT; - goto out; - } + *to = flags; + return 0; +} +EXPORT_SYMBOL(uverbs_get_flags64); - err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, - (__user void *)arg + sizeof(hdr)); +int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + u64 flags; + int ret; + + ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits); + if (ret) + return ret; + + if (flags > U32_MAX) + return -EINVAL; + *to = flags; + + return 0; +} +EXPORT_SYMBOL(uverbs_get_flags32); + +/* + * This is for ease of conversion. The purpose is to convert all drivers to + * use uverbs_attr_bundle instead of ib_udata. Assume attr == 0 is input and + * attr == 1 is output. + */ +void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + const struct uverbs_attr *uhw_in = + uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN); + const struct uverbs_attr *uhw_out = + uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT); + + if (!IS_ERR(uhw_in)) { + udata->inlen = uhw_in->ptr_attr.len; + if (uverbs_attr_ptr_is_inline(uhw_in)) + udata->inbuf = + &pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx] + .data; + else + udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); } else { - err = -ENOIOCTLCMD; + udata->inbuf = NULL; + udata->inlen = 0; } -out: - srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - return err; + if (!IS_ERR(uhw_out)) { + udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); + udata->outlen = uhw_out->ptr_attr.len; + } else { + udata->outbuf = NULL; + udata->outlen = 0; + } +} + +int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx, + const void *from, size_t size) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + u16 flags; + size_t min_size; + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + min_size = min_t(size_t, attr->ptr_attr.len, size); + if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size)) + return -EFAULT; + + flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags | + UVERBS_ATTR_F_VALID_OUTPUT; + if (put_user(flags, + &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags)) + return -EFAULT; + + return 0; } +EXPORT_SYMBOL(uverbs_copy_to); diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c deleted file mode 100644 index 6ceb672c4d46..000000000000 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ /dev/null @@ -1,664 +0,0 @@ -/* - * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <rdma/uverbs_ioctl.h> -#include <rdma/rdma_user_ioctl.h> -#include <linux/bitops.h> -#include "uverbs.h" - -#define UVERBS_NUM_NS (UVERBS_ID_NS_MASK >> UVERBS_ID_NS_SHIFT) -#define GET_NS_ID(idx) (((idx) & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT) -#define GET_ID(idx) ((idx) & ~UVERBS_ID_NS_MASK) - -#define _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset, \ - buckets_offset) \ - for (tmpj = 0, \ - elem = (*(const void ***)((hashes)[tmpi] + \ - (buckets_offset)))[0]; \ - tmpj < *(size_t *)((hashes)[tmpi] + (num_buckets_offset)); \ - tmpj++) \ - if ((elem = ((*(const void ***)(hashes[tmpi] + \ - (buckets_offset)))[tmpj]))) - -/* - * Iterate all elements of a few @hashes. The number of given hashes is - * indicated by @num_hashes. The offset of the number of buckets in the hash is - * represented by @num_buckets_offset, while the offset of the buckets array in - * the hash structure is represented by @buckets_offset. tmpi and tmpj are two - * short (or int) based indices that are given by the user. tmpi iterates over - * the different hashes. @elem points the current element in the hashes[tmpi] - * bucket we are looping on. To be honest, @hashes representation isn't exactly - * a hash, but more a collection of elements. These elements' ids are treated - * in a hash like manner, where the first upper bits are the bucket number. - * These elements are later mapped into a perfect-hash. - */ -#define for_each_element(elem, tmpi, tmpj, hashes, num_hashes, \ - num_buckets_offset, buckets_offset) \ - for (tmpi = 0; tmpi < (num_hashes); tmpi++) \ - _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,\ - buckets_offset) - -#define get_elements_iterators_entry_above(iters, num_elements, elements, \ - num_objects_fld, objects_fld, bucket,\ - min_id) \ - get_elements_above_id((const void **)iters, num_elements, \ - (const void **)(elements), \ - offsetof(typeof(**elements), \ - num_objects_fld), \ - offsetof(typeof(**elements), objects_fld),\ - offsetof(typeof(***(*elements)->objects_fld), id),\ - bucket, min_id) - -#define get_objects_above_id(iters, num_trees, trees, bucket, min_id) \ - get_elements_iterators_entry_above(iters, num_trees, trees, \ - num_objects, objects, bucket, min_id) - -#define get_methods_above_id(method_iters, num_iters, iters, bucket, min_id)\ - get_elements_iterators_entry_above(method_iters, num_iters, iters, \ - num_methods, methods, bucket, min_id) - -#define get_attrs_above_id(attrs_iters, num_iters, iters, bucket, min_id)\ - get_elements_iterators_entry_above(attrs_iters, num_iters, iters, \ - num_attrs, attrs, bucket, min_id) - -/* - * get_elements_above_id get a few hashes represented by @elements and - * @num_elements. The hashes fields are described by @num_offset, @data_offset - * and @id_offset in the same way as required by for_each_element. The function - * returns an array of @iters, represents an array of elements in the hashes - * buckets, which their ids are the smallest ids in all hashes but are all - * larger than the id given by min_id. Elements are only added to the iters - * array if their id belongs to the bucket @bucket. The number of elements in - * the returned array is returned by the function. @min_id is also updated to - * reflect the new min_id of all elements in iters. - */ -static size_t get_elements_above_id(const void **iters, - unsigned int num_elements, - const void **elements, - size_t num_offset, - size_t data_offset, - size_t id_offset, - u16 bucket, - short *min_id) -{ - size_t num_iters = 0; - short min = SHRT_MAX; - const void *elem; - int i, j, last_stored = -1; - unsigned int equal_min = 0; - - for_each_element(elem, i, j, elements, num_elements, num_offset, - data_offset) { - u16 id = *(u16 *)(elem + id_offset); - - if (GET_NS_ID(id) != bucket) - continue; - - if (GET_ID(id) < *min_id || - (min != SHRT_MAX && GET_ID(id) > min)) - continue; - - /* - * We first iterate all hashes represented by @elements. When - * we do, we try to find an element @elem in the bucket @bucket - * which its id is min. Since we can't ensure the user sorted - * the elements in increasing order, we override this hash's - * minimal id element we found, if a new element with a smaller - * id was just found. - */ - iters[last_stored == i ? num_iters - 1 : num_iters++] = elem; - last_stored = i; - if (min == GET_ID(id)) - equal_min++; - else - equal_min = 1; - min = GET_ID(id); - } - - /* - * We only insert to our iters array an element, if its id is smaller - * than all previous ids. Therefore, the final iters array is sorted so - * that smaller ids are in the end of the array. - * Therefore, we need to clean the beginning of the array to make sure - * all ids of final elements are equal to min. - */ - memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min); - - *min_id = min; - return equal_min; -} - -#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \ - objects_fld, bucket) \ - find_max_element_id(num_elements, (const void **)(elements), \ - offsetof(typeof(**elements), num_objects_fld), \ - offsetof(typeof(**elements), objects_fld), \ - offsetof(typeof(***(*elements)->objects_fld), id),\ - bucket) - -static short find_max_element_ns_id(unsigned int num_elements, - const void **elements, - size_t num_offset, - size_t data_offset, - size_t id_offset) -{ - short max_ns = SHRT_MIN; - const void *elem; - int i, j; - - for_each_element(elem, i, j, elements, num_elements, num_offset, - data_offset) { - u16 id = *(u16 *)(elem + id_offset); - - if (GET_NS_ID(id) > max_ns) - max_ns = GET_NS_ID(id); - } - - return max_ns; -} - -static short find_max_element_id(unsigned int num_elements, - const void **elements, - size_t num_offset, - size_t data_offset, - size_t id_offset, - u16 bucket) -{ - short max_id = SHRT_MIN; - const void *elem; - int i, j; - - for_each_element(elem, i, j, elements, num_elements, num_offset, - data_offset) { - u16 id = *(u16 *)(elem + id_offset); - - if (GET_NS_ID(id) == bucket && - GET_ID(id) > max_id) - max_id = GET_ID(id); - } - return max_id; -} - -#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \ - objects_fld, bucket) \ - find_max_element_id(num_elements, (const void **)(elements), \ - offsetof(typeof(**elements), num_objects_fld), \ - offsetof(typeof(**elements), objects_fld), \ - offsetof(typeof(***(*elements)->objects_fld), id),\ - bucket) - -#define find_max_element_ns_entry_id(num_elements, elements, \ - num_objects_fld, objects_fld) \ - find_max_element_ns_id(num_elements, (const void **)(elements), \ - offsetof(typeof(**elements), num_objects_fld),\ - offsetof(typeof(**elements), objects_fld), \ - offsetof(typeof(***(*elements)->objects_fld), id)) - -/* - * find_max_xxxx_ns_id gets a few elements. Each element is described by an id - * which its upper bits represents a namespace. It finds the max namespace. This - * could be used in order to know how many buckets do we need to allocate. If no - * elements exist, SHRT_MIN is returned. Namespace represents here different - * buckets. The common example is "common bucket" and "driver bucket". - * - * find_max_xxxx_id gets a few elements and a bucket. Each element is described - * by an id which its upper bits represent a namespace. It returns the max id - * which is contained in the same namespace defined in @bucket. This could be - * used in order to know how many elements do we need to allocate in the bucket. - * If no elements exist, SHRT_MIN is returned. - */ - -#define find_max_object_id(num_trees, trees, bucket) \ - find_max_element_entry_id(num_trees, trees, num_objects,\ - objects, bucket) -#define find_max_object_ns_id(num_trees, trees) \ - find_max_element_ns_entry_id(num_trees, trees, \ - num_objects, objects) - -#define find_max_method_id(num_iters, iters, bucket) \ - find_max_element_entry_id(num_iters, iters, num_methods,\ - methods, bucket) -#define find_max_method_ns_id(num_iters, iters) \ - find_max_element_ns_entry_id(num_iters, iters, \ - num_methods, methods) - -#define find_max_attr_id(num_iters, iters, bucket) \ - find_max_element_entry_id(num_iters, iters, num_attrs, \ - attrs, bucket) -#define find_max_attr_ns_id(num_iters, iters) \ - find_max_element_ns_entry_id(num_iters, iters, \ - num_attrs, attrs) - -static void free_method(struct uverbs_method_spec *method) -{ - unsigned int i; - - if (!method) - return; - - for (i = 0; i < method->num_buckets; i++) - kfree(method->attr_buckets[i]); - - kfree(method); -} - -#define IS_ATTR_OBJECT(attr) ((attr)->type == UVERBS_ATTR_TYPE_IDR || \ - (attr)->type == UVERBS_ATTR_TYPE_FD) - -/* - * This function gets array of size @num_method_defs which contains pointers to - * method definitions @method_defs. The function allocates an - * uverbs_method_spec structure and initializes its number of buckets and the - * elements in buckets to the correct attributes. While doing that, it - * validates that there aren't conflicts between attributes of different - * method_defs. - */ -static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_method_def **method_defs, - size_t num_method_defs) -{ - int bucket_idx; - int max_attr_buckets = 0; - size_t num_attr_buckets = 0; - int res = 0; - struct uverbs_method_spec *method = NULL; - const struct uverbs_attr_def **attr_defs; - unsigned int num_of_singularities = 0; - - max_attr_buckets = find_max_attr_ns_id(num_method_defs, method_defs); - if (max_attr_buckets >= 0) - num_attr_buckets = max_attr_buckets + 1; - - method = kzalloc(struct_size(method, attr_buckets, num_attr_buckets), - GFP_KERNEL); - if (!method) - return ERR_PTR(-ENOMEM); - - method->num_buckets = num_attr_buckets; - attr_defs = kcalloc(num_method_defs, sizeof(*attr_defs), GFP_KERNEL); - if (!attr_defs) { - res = -ENOMEM; - goto free_method; - } - for (bucket_idx = 0; bucket_idx < method->num_buckets; bucket_idx++) { - short min_id = SHRT_MIN; - int attr_max_bucket = 0; - struct uverbs_attr_spec_hash *hash = NULL; - - attr_max_bucket = find_max_attr_id(num_method_defs, method_defs, - bucket_idx); - if (attr_max_bucket < 0) - continue; - - hash = kzalloc(sizeof(*hash) + - ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1), - sizeof(long)) + - BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long), - GFP_KERNEL); - if (!hash) { - res = -ENOMEM; - goto free; - } - hash->num_attrs = attr_max_bucket + 1; - method->num_child_attrs += hash->num_attrs; - hash->mandatory_attrs_bitmask = (void *)(hash + 1) + - ALIGN(sizeof(*hash->attrs) * - (attr_max_bucket + 1), - sizeof(long)); - - method->attr_buckets[bucket_idx] = hash; - - do { - size_t num_attr_defs; - struct uverbs_attr_spec *attr; - bool attr_obj_with_special_access; - - num_attr_defs = - get_attrs_above_id(attr_defs, - num_method_defs, - method_defs, - bucket_idx, - &min_id); - /* Last attr in bucket */ - if (!num_attr_defs) - break; - - if (num_attr_defs > 1) { - /* - * We don't allow two attribute definitions for - * the same attribute. This is usually a - * programmer error. If required, it's better to - * just add a new attribute to capture the new - * semantics. - */ - res = -EEXIST; - goto free; - } - - attr = &hash->attrs[min_id]; - memcpy(attr, &attr_defs[0]->attr, sizeof(*attr)); - - attr_obj_with_special_access = IS_ATTR_OBJECT(attr) && - (attr->obj.access == UVERBS_ACCESS_NEW || - attr->obj.access == UVERBS_ACCESS_DESTROY); - num_of_singularities += !!attr_obj_with_special_access; - if (WARN(num_of_singularities > 1, - "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n", - min_id) || - WARN(attr_obj_with_special_access && - !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY), - "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n", - min_id) || - WARN(IS_ATTR_OBJECT(attr) && - attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n", - min_id)) { - res = -EINVAL; - goto free; - } - - if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY) - set_bit(min_id, hash->mandatory_attrs_bitmask); - min_id++; - - } while (1); - } - kfree(attr_defs); - return method; - -free: - kfree(attr_defs); -free_method: - free_method(method); - return ERR_PTR(res); -} - -static void free_object(struct uverbs_object_spec *object) -{ - unsigned int i, j; - - if (!object) - return; - - for (i = 0; i < object->num_buckets; i++) { - struct uverbs_method_spec_hash *method_buckets = - object->method_buckets[i]; - - if (!method_buckets) - continue; - - for (j = 0; j < method_buckets->num_methods; j++) - free_method(method_buckets->methods[j]); - - kfree(method_buckets); - } - - kfree(object); -} - -/* - * This function gets array of size @num_object_defs which contains pointers to - * object definitions @object_defs. The function allocated an - * uverbs_object_spec structure and initialize its number of buckets and the - * elements in buckets to the correct methods. While doing that, it - * sorts out the correct relationship between conflicts in the same method. - */ -static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_object_def **object_defs, - size_t num_object_defs) -{ - u16 bucket_idx; - int max_method_buckets = 0; - u16 num_method_buckets = 0; - int res = 0; - struct uverbs_object_spec *object = NULL; - const struct uverbs_method_def **method_defs; - - max_method_buckets = find_max_method_ns_id(num_object_defs, object_defs); - if (max_method_buckets >= 0) - num_method_buckets = max_method_buckets + 1; - - object = kzalloc(struct_size(object, method_buckets, - num_method_buckets), - GFP_KERNEL); - if (!object) - return ERR_PTR(-ENOMEM); - - object->num_buckets = num_method_buckets; - method_defs = kcalloc(num_object_defs, sizeof(*method_defs), GFP_KERNEL); - if (!method_defs) { - res = -ENOMEM; - goto free_object; - } - - for (bucket_idx = 0; bucket_idx < object->num_buckets; bucket_idx++) { - short min_id = SHRT_MIN; - int methods_max_bucket = 0; - struct uverbs_method_spec_hash *hash = NULL; - - methods_max_bucket = find_max_method_id(num_object_defs, object_defs, - bucket_idx); - if (methods_max_bucket < 0) - continue; - - hash = kzalloc(struct_size(hash, methods, - methods_max_bucket + 1), - GFP_KERNEL); - if (!hash) { - res = -ENOMEM; - goto free; - } - - hash->num_methods = methods_max_bucket + 1; - object->method_buckets[bucket_idx] = hash; - - do { - size_t num_method_defs; - struct uverbs_method_spec *method; - int i; - - num_method_defs = - get_methods_above_id(method_defs, - num_object_defs, - object_defs, - bucket_idx, - &min_id); - /* Last method in bucket */ - if (!num_method_defs) - break; - - method = build_method_with_attrs(method_defs, - num_method_defs); - if (IS_ERR(method)) { - res = PTR_ERR(method); - goto free; - } - - /* - * The last tree which is given as an argument to the - * merge overrides previous method handler. - * Therefore, we iterate backwards and search for the - * first handler which != NULL. This also defines the - * set of flags used for this handler. - */ - for (i = num_method_defs - 1; - i >= 0 && !method_defs[i]->handler; i--) - ; - hash->methods[min_id++] = method; - /* NULL handler isn't allowed */ - if (WARN(i < 0, - "ib_uverbs: tried to merge function id %d, but all handlers are NULL\n", - min_id)) { - res = -EINVAL; - goto free; - } - method->handler = method_defs[i]->handler; - method->flags = method_defs[i]->flags; - - } while (1); - } - kfree(method_defs); - return object; - -free: - kfree(method_defs); -free_object: - free_object(object); - return ERR_PTR(res); -} - -void uverbs_free_spec_tree(struct uverbs_root_spec *root) -{ - unsigned int i, j; - - if (!root) - return; - - for (i = 0; i < root->num_buckets; i++) { - struct uverbs_object_spec_hash *object_hash = - root->object_buckets[i]; - - if (!object_hash) - continue; - - for (j = 0; j < object_hash->num_objects; j++) - free_object(object_hash->objects[j]); - - kfree(object_hash); - } - - kfree(root); -} -EXPORT_SYMBOL(uverbs_free_spec_tree); - -struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, - const struct uverbs_object_tree_def **trees) -{ - u16 bucket_idx; - short max_object_buckets = 0; - size_t num_objects_buckets = 0; - struct uverbs_root_spec *root_spec = NULL; - const struct uverbs_object_def **object_defs; - int i; - int res = 0; - - max_object_buckets = find_max_object_ns_id(num_trees, trees); - /* - * Devices which don't want to support ib_uverbs, should just allocate - * an empty parsing tree. Every user-space command won't hit any valid - * entry in the parsing tree and thus will fail. - */ - if (max_object_buckets >= 0) - num_objects_buckets = max_object_buckets + 1; - - root_spec = kzalloc(struct_size(root_spec, object_buckets, - num_objects_buckets), - GFP_KERNEL); - if (!root_spec) - return ERR_PTR(-ENOMEM); - root_spec->num_buckets = num_objects_buckets; - - object_defs = kcalloc(num_trees, sizeof(*object_defs), - GFP_KERNEL); - if (!object_defs) { - res = -ENOMEM; - goto free_root; - } - - for (bucket_idx = 0; bucket_idx < root_spec->num_buckets; bucket_idx++) { - short min_id = SHRT_MIN; - short objects_max_bucket; - struct uverbs_object_spec_hash *hash = NULL; - - objects_max_bucket = find_max_object_id(num_trees, trees, - bucket_idx); - if (objects_max_bucket < 0) - continue; - - hash = kzalloc(struct_size(hash, objects, - objects_max_bucket + 1), - GFP_KERNEL); - if (!hash) { - res = -ENOMEM; - goto free; - } - hash->num_objects = objects_max_bucket + 1; - root_spec->object_buckets[bucket_idx] = hash; - - do { - size_t num_object_defs; - struct uverbs_object_spec *object; - - num_object_defs = get_objects_above_id(object_defs, - num_trees, - trees, - bucket_idx, - &min_id); - /* Last object in bucket */ - if (!num_object_defs) - break; - - object = build_object_with_methods(object_defs, - num_object_defs); - if (IS_ERR(object)) { - res = PTR_ERR(object); - goto free; - } - - /* - * The last tree which is given as an argument to the - * merge overrides previous object's type_attrs. - * Therefore, we iterate backwards and search for the - * first type_attrs which != NULL. - */ - for (i = num_object_defs - 1; - i >= 0 && !object_defs[i]->type_attrs; i--) - ; - /* - * NULL is a valid type_attrs. It means an object we - * can't instantiate (like DEVICE). - */ - object->type_attrs = i < 0 ? NULL : - object_defs[i]->type_attrs; - - hash->objects[min_id++] = object; - } while (1); - } - - kfree(object_defs); - return root_spec; - -free: - kfree(object_defs); -free_root: - uverbs_free_spec_tree(root_spec); - return ERR_PTR(res); -} -EXPORT_SYMBOL(uverbs_alloc_spec_tree); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 2094d136513d..823beca448e1 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -41,8 +41,6 @@ #include <linux/fs.h> #include <linux/poll.h> #include <linux/sched.h> -#include <linux/sched/mm.h> -#include <linux/sched/task.h> #include <linux/file.h> #include <linux/cdev.h> #include <linux/anon_inodes.h> @@ -77,7 +75,6 @@ static struct class *uverbs_class; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) = { [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, @@ -118,7 +115,6 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, }; static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) = { [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, @@ -138,6 +134,30 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); +/* + * Must be called with the ufile->device->disassociate_srcu held, and the lock + * must be held until use of the ucontext is finished. + */ +struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile) +{ + /* + * We do not hold the hw_destroy_rwsem lock for this flow, instead + * srcu is used. It does not matter if someone races this with + * get_context, we get NULL or valid ucontext. + */ + struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext); + + if (!srcu_dereference(ufile->device->ib_dev, + &ufile->device->disassociate_srcu)) + return ERR_PTR(-EIO); + + if (!ucontext) + return ERR_PTR(-EINVAL); + + return ucontext; +} +EXPORT_SYMBOL(ib_uverbs_get_ucontext); + int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; @@ -154,6 +174,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj) struct ib_uverbs_device *dev = container_of(kobj, struct ib_uverbs_device, kobj); + uverbs_destroy_api(dev->uapi); cleanup_srcu_struct(&dev->disassociate_srcu); kfree(dev); } @@ -184,7 +205,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file, } spin_unlock_irq(&ev_file->ev_queue.lock); - uverbs_uobject_put(&ev_file->uobj_file.uobj); + uverbs_uobject_put(&ev_file->uobj); } spin_lock_irq(&file->async_file->ev_queue.lock); @@ -220,20 +241,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, } } -static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, - struct ib_ucontext *context, - bool device_removed) -{ - context->closing = 1; - uverbs_cleanup_ucontext(context, device_removed); - put_pid(context->tgid); - - ib_rdmacg_uncharge(&context->cg_obj, context->device, - RDMACG_RESOURCE_HCA_HANDLE); - - return context->device->dealloc_ucontext(context); -} - static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) { complete(&dev->comp); @@ -246,6 +253,8 @@ void ib_uverbs_release_file(struct kref *ref) struct ib_device *ib_dev; int srcu_key; + release_ufile_idr_uobject(file); + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); @@ -338,7 +347,7 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, filp->private_data; return ib_uverbs_event_read(&comp_ev_file->ev_queue, - comp_ev_file->uobj_file.ufile, filp, + comp_ev_file->uobj.ufile, filp, buf, count, pos, sizeof(struct ib_uverbs_comp_event_desc)); } @@ -420,7 +429,9 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) { - struct ib_uverbs_completion_event_file *file = filp->private_data; + struct ib_uobject *uobj = filp->private_data; + struct ib_uverbs_completion_event_file *file = container_of( + uobj, struct ib_uverbs_completion_event_file, uobj); struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&file->ev_queue.lock); @@ -528,7 +539,7 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); - ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, + ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); } @@ -637,13 +648,13 @@ err_put_refs: return filp; } -static bool verify_command_mask(struct ib_device *ib_dev, - u32 command, bool extended) +static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command, + bool extended) { if (!extended) - return ib_dev->uverbs_cmd_mask & BIT_ULL(command); + return ufile->uverbs_cmd_mask & BIT_ULL(command); - return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command); + return ufile->uverbs_ex_cmd_mask & BIT_ULL(command); } static bool verify_command_idx(u32 command, bool extended) @@ -713,7 +724,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_ex_cmd_hdr ex_hdr; - struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; bool extended; int srcu_key; @@ -748,24 +758,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, return ret; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; - goto out; - } - - /* - * Must be after the ib_dev check, as once the RCU clears ib_dev == - * NULL means ucontext == NULL - */ - if (!file->ucontext && - (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended)) { - ret = -EINVAL; - goto out; - } - if (!verify_command_mask(ib_dev, command, extended)) { + if (!verify_command_mask(file, command, extended)) { ret = -EOPNOTSUPP; goto out; } @@ -773,7 +767,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, buf += sizeof(hdr); if (!extended) { - ret = uverbs_cmd_table[command](file, ib_dev, buf, + ret = uverbs_cmd_table[command](file, buf, hdr.in_words * 4, hdr.out_words * 4); } else { @@ -792,7 +786,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, ex_hdr.provider_in_words * 8, ex_hdr.provider_out_words * 8); - ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw); + ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw); ret = (ret) ? : count; } @@ -804,22 +798,18 @@ out: static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; - struct ib_device *ib_dev; + struct ib_ucontext *ucontext; int ret = 0; int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) { + ret = PTR_ERR(ucontext); goto out; } - if (!file->ucontext) - ret = -ENODEV; - else - ret = ib_dev->mmap(file->ucontext, vma); + ret = ucontext->device->mmap(ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; @@ -879,13 +869,12 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) } file->device = dev; - spin_lock_init(&file->idr_lock); - idr_init(&file->idr); - file->ucontext = NULL; - file->async_file = NULL; kref_init(&file->ref); - mutex_init(&file->mutex); - mutex_init(&file->cleanup_mutex); + mutex_init(&file->ucontext_lock); + + spin_lock_init(&file->uobjects_lock); + INIT_LIST_HEAD(&file->uobjects); + init_rwsem(&file->hw_destroy_rwsem); filp->private_data = file; kobject_get(&dev->kobj); @@ -893,6 +882,11 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); + file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask; + file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask; + + setup_ufile_idr_uobject(file); + return nonseekable_open(inode, filp); err_module: @@ -911,13 +905,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - mutex_lock(&file->cleanup_mutex); - if (file->ucontext) { - ib_uverbs_cleanup_ucontext(file, file->ucontext, false); - file->ucontext = NULL; - } - mutex_unlock(&file->cleanup_mutex); - idr_destroy(&file->idr); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); mutex_lock(&file->device->lists_mutex); if (!file->is_closed) { @@ -1006,6 +994,19 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_VERBS_ABI_VERSION)); +static int ib_uverbs_create_uapi(struct ib_device *device, + struct ib_uverbs_device *uverbs_dev) +{ + struct uverbs_api *uapi; + + uapi = uverbs_alloc_api(device->driver_specs, device->driver_id); + if (IS_ERR(uapi)) + return PTR_ERR(uapi); + + uverbs_dev->uapi = uapi; + return 0; +} + static void ib_uverbs_add_one(struct ib_device *device) { int devnum; @@ -1048,6 +1049,9 @@ static void ib_uverbs_add_one(struct ib_device *device) rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; + if (ib_uverbs_create_uapi(device, uverbs_dev)) + goto err; + cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; @@ -1067,18 +1071,6 @@ static void ib_uverbs_add_one(struct ib_device *device) if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; - if (!device->specs_root) { - const struct uverbs_object_tree_def *default_root[] = { - uverbs_default_get_objects()}; - - uverbs_dev->specs_root = uverbs_alloc_spec_tree(1, - default_root); - if (IS_ERR(uverbs_dev->specs_root)) - goto err_class; - - device->specs_root = uverbs_dev->specs_root; - } - ib_set_client_data(device, &uverbs_client, uverbs_dev); return; @@ -1098,44 +1090,6 @@ err: return; } -static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext) -{ - struct ib_device *ib_dev = ibcontext->device; - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || - owning_process->state == TASK_DEAD) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the - * task struct. - */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } - - down_write(&owning_mm->mmap_sem); - ib_dev->disassociate_ucontext(ibcontext); - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); -} - static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { @@ -1144,46 +1098,31 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_event event; /* Pending running commands to terminate */ - synchronize_srcu(&uverbs_dev->disassociate_srcu); + uverbs_disassociate_api_pre(uverbs_dev); event.event = IB_EVENT_DEVICE_FATAL; event.element.port_num = 0; event.device = ib_dev; mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { - struct ib_ucontext *ucontext; file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); file->is_closed = 1; list_del(&file->list); kref_get(&file->ref); - mutex_unlock(&uverbs_dev->lists_mutex); - - - mutex_lock(&file->cleanup_mutex); - ucontext = file->ucontext; - file->ucontext = NULL; - mutex_unlock(&file->cleanup_mutex); - /* At this point ib_uverbs_close cannot be running - * ib_uverbs_cleanup_ucontext + /* We must release the mutex before going ahead and calling + * uverbs_cleanup_ufile, as it might end up indirectly calling + * uverbs_close, for example due to freeing the resources (e.g + * mmput). */ - if (ucontext) { - /* We must release the mutex before going ahead and - * calling disassociate_ucontext. disassociate_ucontext - * might end up indirectly calling uverbs_close, - * for example due to freeing the resources - * (e.g mmput). - */ - ib_uverbs_event_handler(&file->event_handler, &event); - ib_uverbs_disassociate_ucontext(ucontext); - mutex_lock(&file->cleanup_mutex); - ib_uverbs_cleanup_ucontext(file, ucontext, true); - mutex_unlock(&file->cleanup_mutex); - } + mutex_unlock(&uverbs_dev->lists_mutex); - mutex_lock(&uverbs_dev->lists_mutex); + ib_uverbs_event_handler(&file->event_handler, &event); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); kref_put(&file->ref, ib_uverbs_release_file); + + mutex_lock(&uverbs_dev->lists_mutex); } while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { @@ -1205,6 +1144,8 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); } mutex_unlock(&uverbs_dev->lists_mutex); + + uverbs_disassociate_api(uverbs_dev->uapi); } static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) @@ -1232,7 +1173,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) * cdev was deleted, however active clients can still issue * commands and close their open files. */ - rcu_assign_pointer(uverbs_dev->ib_dev, NULL); ib_uverbs_free_hw_resources(uverbs_dev, device); wait_clients = 0; } @@ -1241,10 +1181,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) ib_uverbs_comp_dev(uverbs_dev); if (wait_clients) wait_for_completion(&uverbs_dev->comp); - if (uverbs_dev->specs_root) { - uverbs_free_spec_tree(uverbs_dev->specs_root); - device->specs_root = NULL; - } kobject_put(&uverbs_dev->kobj); } diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index bb372b4713a4..b8d715c68ca4 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst, /* TODO: No need to set this */ sa_path_set_dmac_zero(dst); - sa_path_set_ndev(dst, NULL); - sa_path_set_ifindex(dst, 0); } EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index b570acbd94af..203cc96ac6f5 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -48,14 +48,18 @@ static int uverbs_free_ah(struct ib_uobject *uobject, static int uverbs_free_flow(struct ib_uobject *uobject, enum rdma_remove_reason why) { - int ret; struct ib_flow *flow = (struct ib_flow *)uobject->object; struct ib_uflow_object *uflow = container_of(uobject, struct ib_uflow_object, uobject); + struct ib_qp *qp = flow->qp; + int ret; - ret = ib_destroy_flow(flow); - if (!ret) + ret = flow->device->destroy_flow(flow); + if (!ret) { + if (qp) + atomic_dec(&qp->usecnt); ib_uverbs_flow_resources_free(uflow->resources); + } return ret; } @@ -74,6 +78,13 @@ static int uverbs_free_qp(struct ib_uobject *uobject, container_of(uobject, struct ib_uqp_object, uevent.uobject); int ret; + /* + * If this is a user triggered destroy then do not allow destruction + * until the user cleans up all the mcast bindings. Unlike in other + * places we forcibly clean up the mcast attachments for !DESTROY + * because the mcast attaches are not ubojects and will not be + * destroyed by anything else during cleanup processing. + */ if (why == RDMA_REMOVE_DESTROY) { if (!list_empty(&uqp->mcast_list)) return -EBUSY; @@ -82,7 +93,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject, } ret = ib_destroy_qp(qp); - if (ret && why == RDMA_REMOVE_DESTROY) + if (ib_is_destroy_retryable(ret, why, uobject)) return ret; if (uqp->uxrcd) @@ -100,8 +111,10 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, int ret; ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); - if (!ret || why != RDMA_REMOVE_DESTROY) - kfree(ind_tbl); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + kfree(ind_tbl); return ret; } @@ -114,8 +127,10 @@ static int uverbs_free_wq(struct ib_uobject *uobject, int ret; ret = ib_destroy_wq(wq); - if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); return ret; } @@ -129,8 +144,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, int ret; ret = ib_destroy_srq(srq); - - if (ret && why == RDMA_REMOVE_DESTROY) + if (ib_is_destroy_retryable(ret, why, uobject)) return ret; if (srq_type == IB_SRQT_XRC) { @@ -152,12 +166,12 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, container_of(uobject, struct ib_uxrcd_object, uobject); int ret; + ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); + if (ret) + return ret; + mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); - if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt)) - ret = -EBUSY; - else - ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device, - xrcd, why); + ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why); mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); return ret; @@ -167,20 +181,22 @@ static int uverbs_free_pd(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_pd *pd = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); + if (ret) + return ret; ib_dealloc_pd((struct ib_pd *)uobject->object); return 0; } -static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file, +static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj, enum rdma_remove_reason why) { struct ib_uverbs_completion_event_file *comp_event_file = - container_of(uobj_file, struct ib_uverbs_completion_event_file, - uobj_file); + container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue; spin_lock_irq(&event_queue->lock); @@ -194,119 +210,77 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_ return 0; }; -int uverbs_destroy_def_handler(struct ib_device *ib_dev, - struct ib_uverbs_file *file, +int uverbs_destroy_def_handler(struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { return 0; } +EXPORT_SYMBOL(uverbs_destroy_def_handler); -/* - * This spec is used in order to pass information to the hardware driver in a - * legacy way. Every verb that could get driver specific data should get this - * spec. - */ -const struct uverbs_attr_def uverbs_uhw_compat_in = - UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); -const struct uverbs_attr_def uverbs_uhw_compat_out = - UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); - -void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) -{ - /* - * This is for ease of conversion. The purpose is to convert all drivers - * to use uverbs_attr_bundle instead of ib_udata. - * Assume attr == 0 is input and attr == 1 is output. - */ - const struct uverbs_attr *uhw_in = - uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN); - const struct uverbs_attr *uhw_out = - uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT); - - if (!IS_ERR(uhw_in)) { - udata->inlen = uhw_in->ptr_attr.len; - if (uverbs_attr_ptr_is_inline(uhw_in)) - udata->inbuf = &uhw_in->uattr->data; - else - udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); - } else { - udata->inbuf = NULL; - udata->inlen = 0; - } - - if (!IS_ERR(uhw_out)) { - udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); - udata->outlen = uhw_out->ptr_attr.len; - } else { - udata->outbuf = NULL; - udata->outlen = 0; - } -} - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL, - &UVERBS_TYPE_ALLOC_FD(0, - sizeof(struct ib_uverbs_completion_event_file), - uverbs_hot_unplug_completion_event_file, - &uverbs_event_fops, - "[infinibandevent]", O_RDONLY)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), + uverbs_hot_unplug_completion_event_file, + &uverbs_event_fops, + "[infinibandevent]", + O_RDONLY)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, - uverbs_free_qp)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_QP, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0, - uverbs_free_srq)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_SRQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), + uverbs_free_srq)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), - 0, uverbs_free_flow)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_FLOW, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), + uverbs_free_flow)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0, - uverbs_free_wq)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_WQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0, - uverbs_free_xrcd)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_XRCD, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), + uverbs_free_xrcd)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, - /* 2 is used in order to free the PD after MRs */ - &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd)); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL); - -static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, - &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), - &UVERBS_OBJECT(UVERBS_OBJECT_PD), - &UVERBS_OBJECT(UVERBS_OBJECT_MR), - &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), - &UVERBS_OBJECT(UVERBS_OBJECT_CQ), - &UVERBS_OBJECT(UVERBS_OBJECT_QP), - &UVERBS_OBJECT(UVERBS_OBJECT_AH), - &UVERBS_OBJECT(UVERBS_OBJECT_MW), - &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), - &UVERBS_OBJECT(UVERBS_OBJECT_WQ), - &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), - &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), - &UVERBS_OBJECT(UVERBS_OBJECT_DM), - &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd)); + +DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE); + +DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, + &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), + &UVERBS_OBJECT(UVERBS_OBJECT_PD), + &UVERBS_OBJECT(UVERBS_OBJECT_MR), + &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), + &UVERBS_OBJECT(UVERBS_OBJECT_CQ), + &UVERBS_OBJECT(UVERBS_OBJECT_QP), + &UVERBS_OBJECT(UVERBS_OBJECT_AH), + &UVERBS_OBJECT(UVERBS_OBJECT_MW), + &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), + &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), + &UVERBS_OBJECT(UVERBS_OBJECT_WQ), + &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), + &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), + &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), + &UVERBS_OBJECT(UVERBS_OBJECT_DM), + &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); const struct uverbs_object_tree_def *uverbs_default_get_objects(void) { return &uverbs_default_objects; } -EXPORT_SYMBOL_GPL(uverbs_default_get_objects); diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 03b182a684a6..a0ffdcf9a51c 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -38,20 +38,22 @@ static int uverbs_free_counters(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_counters *counters = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && - atomic_read(&counters->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); + if (ret) + return ret; return counters->device->destroy_counters(counters); } -static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); + struct ib_device *ib_dev = uobj->context->device; struct ib_counters *counters; - struct ib_uobject *uobj; int ret; /* @@ -62,7 +64,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_de if (!ib_dev->create_counters) return -EOPNOTSUPP; - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); counters = ib_dev->create_counters(ib_dev, attrs); if (IS_ERR(counters)) { ret = PTR_ERR(counters); @@ -80,9 +81,8 @@ err_create_counters: return ret; } -static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_counters_read_attr read_attr = {}; const struct uverbs_attr *uattr; @@ -90,68 +90,62 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE); int ret; - if (!ib_dev->read_counters) + if (!counters->device->read_counters) return -EOPNOTSUPP; if (!atomic_read(&counters->usecnt)) return -EINVAL; - ret = uverbs_copy_from(&read_attr.flags, attrs, - UVERBS_ATTR_READ_COUNTERS_FLAGS); + ret = uverbs_get_flags32(&read_attr.flags, attrs, + UVERBS_ATTR_READ_COUNTERS_FLAGS, + IB_UVERBS_READ_COUNTERS_PREFER_CACHED); if (ret) return ret; uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF); read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64); - read_attr.counters_buff = kcalloc(read_attr.ncounters, - sizeof(u64), GFP_KERNEL); - if (!read_attr.counters_buff) - return -ENOMEM; - - ret = ib_dev->read_counters(counters, - &read_attr, - attrs); - if (ret) - goto err_read; + read_attr.counters_buff = uverbs_zalloc( + attrs, array_size(read_attr.ncounters, sizeof(u64))); + if (IS_ERR(read_attr.counters_buff)) + return PTR_ERR(read_attr.counters_buff); - ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF, - read_attr.counters_buff, - read_attr.ncounters * sizeof(u64)); + ret = counters->device->read_counters(counters, &read_attr, attrs); + if (ret) + return ret; -err_read: - kfree(read_attr.counters_buff); - return ret; + return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF, + read_attr.counters_buff, + read_attr.ncounters * sizeof(u64)); } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY, - uverbs_destroy_def_handler, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ, - &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, - UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, - UVERBS_ATTR_TYPE(__u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_COUNTERS_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_COUNTERS_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_COUNTERS_READ, + UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, + UVERBS_ATTR_MIN_SIZE(0), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, + enum ib_uverbs_read_counters_flags)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters), + UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); - diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 3d293d01afea..5b5f2052cd52 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -44,21 +44,26 @@ static int uverbs_free_cq(struct ib_uobject *uobject, int ret; ret = ib_destroy_cq(cq); - if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ? - container_of(ev_queue, - struct ib_uverbs_completion_event_file, - ev_queue) : NULL, - ucq); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_ucq( + uobject->context->ufile, + ev_queue ? container_of(ev_queue, + struct ib_uverbs_completion_event_file, + ev_queue) : + NULL, + ucq); return ret; } -static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct ib_ucontext *ucontext = file->ucontext; - struct ib_ucq_object *obj; + struct ib_ucq_object *obj = container_of( + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), + typeof(*obj), uobject); + struct ib_device *ib_dev = obj->uobject.context->device; struct ib_udata uhw; int ret; u64 user_handle; @@ -67,7 +72,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_uobject *ev_file_uobj; - if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ)) + if (!ib_dev->create_cq || !ib_dev->destroy_cq) return -EOPNOTSUPP; ret = uverbs_copy_from(&attr.comp_vector, attrs, @@ -81,28 +86,26 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, if (ret) return ret; - /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ - if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs, - UVERBS_ATTR_CREATE_CQ_FLAGS))) - return -EFAULT; + ret = uverbs_get_flags32(&attr.flags, attrs, + UVERBS_ATTR_CREATE_CQ_FLAGS, + IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION | + IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN); + if (ret) + return ret; ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL); if (!IS_ERR(ev_file_uobj)) { ev_file = container_of(ev_file_uobj, struct ib_uverbs_completion_event_file, - uobj_file.uobj); + uobj); uverbs_uobject_get(ev_file_uobj); } - if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) { + if (attr.comp_vector >= file->device->num_comp_vectors) { ret = -EINVAL; goto err_event_file; } - obj = container_of(uverbs_attr_get_uobject(attrs, - UVERBS_ATTR_CREATE_CQ_HANDLE), - typeof(*obj), uobject); - obj->uverbs_file = ucontext->ufile; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); @@ -111,7 +114,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, /* Temporary, only until drivers get the new uverbs_attr_bundle */ create_udata(attrs, &uhw); - cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw); + cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_event_file; @@ -143,69 +146,64 @@ err_event_file: return ret; }; -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_CQ_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, + enum ib_uverbs_ex_create_cq_flags), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, - UVERBS_OBJECT_COMP_CHANNEL, - UVERBS_ACCESS_READ), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &uverbs_uhw_compat_in, &uverbs_uhw_compat_out); - -static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) + UA_MANDATORY), + UVERBS_ATTR_UHW()); + +static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); - struct ib_uverbs_destroy_cq_resp resp; - struct ib_ucq_object *obj; - int ret; - - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - obj = container_of(uobj, struct ib_ucq_object, uobject); - - if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ)) - return -EOPNOTSUPP; - - ret = rdma_explicit_destroy(uobj); - if (ret) - return ret; - - resp.comp_events_reported = obj->comp_events_reported; - resp.async_events_reported = obj->async_events_reported; + struct ib_ucq_object *obj = + container_of(uobj, struct ib_ucq_object, uobject); + struct ib_uverbs_destroy_cq_resp resp = { + .comp_events_reported = obj->comp_events_reported, + .async_events_reported = obj->async_events_reported + }; return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp, sizeof(resp)); } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, - UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, - uverbs_free_cq), +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_CQ_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, + UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_CQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq), + #if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI) - &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) + &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) #endif - ); - +); diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 8b681575b615..edc3ff7733d4 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -37,20 +37,24 @@ static int uverbs_free_dm(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_dm *dm = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); + if (ret) + return ret; return dm->device->dealloc_dm(dm); } -static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int +UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) { - struct ib_ucontext *ucontext = file->ucontext; struct ib_dm_alloc_attr attr = {}; - struct ib_uobject *uobj; + struct ib_uobject *uobj = + uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE) + ->obj_attr.uobject; + struct ib_device *ib_dev = uobj->context->device; struct ib_dm *dm; int ret; @@ -67,9 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, if (ret) return ret; - uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject; - - dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs); + dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs); if (IS_ERR(dm)) return PTR_ERR(dm); @@ -83,26 +85,27 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, return 0; } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC, - &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE, - uverbs_destroy_def_handler, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, - UVERBS_OBJECT_DM, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_DM_ALLOC, + UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_DM_FREE, + UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, - /* 1 is used in order to free the DM after MRs */ - &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm), + UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm), &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC), &UVERBS_METHOD(UVERBS_METHOD_DM_FREE)); diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index a7be51cf2e42..d8cfafe23bd9 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -37,10 +37,11 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_flow_action *action = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && - atomic_read(&action->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&action->usecnt, why, uobject); + if (ret) + return ret; return action->device->destroy_flow_action(action); } @@ -303,12 +304,13 @@ static int parse_flow_action_esp(struct ib_device *ib_dev, return 0; } -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); + struct ib_device *ib_dev = uobj->context->device; int ret; - struct ib_uobject *uobj; struct ib_flow_action *action; struct ib_flow_action_esp_attr esp_attr = {}; @@ -320,7 +322,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device return ret; /* No need to check as this attribute is marked as MANDATORY */ - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE); action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs); if (IS_ERR(action)) return PTR_ERR(action); @@ -334,102 +335,109 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device return 0; } -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); + struct ib_flow_action *action = uobj->object; int ret; - struct ib_uobject *uobj; - struct ib_flow_action *action; struct ib_flow_action_esp_attr esp_attr = {}; - if (!ib_dev->modify_flow_action_esp) + if (!action->device->modify_flow_action_esp) return -EOPNOTSUPP; - ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true); + ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr, + true); if (ret) return ret; - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE); - action = uobj->object; - if (action->type != IB_FLOW_ACTION_ESP) return -EINVAL; - return ib_dev->modify_flow_action_esp(action, - &esp_attr.hdr, - attrs); + return action->device->modify_flow_action_esp(action, &esp_attr.hdr, + attrs); } static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), - .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - } }, + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_STRUCT( + struct ib_uverbs_flow_action_esp_keymat_aes_gcm, + aes_key), }, }; static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - /* No need to specify any data */ - .len = 0, - } } + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), }, [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), - .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - } } + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, + size), }, }; -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); - -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_WRITE, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); - -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY, - uverbs_destroy_def_handler, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); - +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, + hard_limit_pkts), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_TYPE(__u32), + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat, + UA_MANDATORY), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN( + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, + UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_WRITE, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, + hard_limit_pkts), + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_TYPE(__u32), + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat, + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN( + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_FLOW_ACTION_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 68f7cadf088f..cf02e774303e 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -39,14 +39,18 @@ static int uverbs_free_mr(struct ib_uobject *uobject, return ib_dereg_mr((struct ib_mr *)uobject->object); } -static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_dm_mr_attr attr = {}; - struct ib_uobject *uobj; - struct ib_dm *dm; - struct ib_pd *pd; + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); + struct ib_dm *dm = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE); + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE); + struct ib_device *ib_dev = pd->device; + struct ib_mr *mr; int ret; @@ -62,8 +66,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, if (ret) return ret; - ret = uverbs_copy_from(&attr.access_flags, attrs, - UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS); + ret = uverbs_get_flags32(&attr.access_flags, attrs, + UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + IB_ACCESS_SUPPORTED); if (ret) return ret; @@ -74,12 +79,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, if (ret) return ret; - pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE); - - dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE); - - uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject; - if (attr.offset > dm->length || attr.length > dm->length || attr.length > dm->length - attr.offset) return -EINVAL; @@ -115,33 +114,36 @@ err_dereg: return ret; } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG, - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_DM_MR_REG, + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + enum ib_access_flags), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR, - /* 1 is used in order to free the MR after all the MWs */ - &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr), - &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_MR, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), + &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c new file mode 100644 index 000000000000..73ea6f0db88f --- /dev/null +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + */ +#include <rdma/uverbs_ioctl.h> +#include <rdma/rdma_user_ioctl.h> +#include <linux/bitops.h> +#include "rdma_core.h" +#include "uverbs.h" + +static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size) +{ + void *elm; + int rc; + + if (key == UVERBS_API_KEY_ERR) + return ERR_PTR(-EOVERFLOW); + + elm = kzalloc(alloc_size, GFP_KERNEL); + rc = radix_tree_insert(&uapi->radix, key, elm); + if (rc) { + kfree(elm); + return ERR_PTR(rc); + } + + return elm; +} + +static int uapi_merge_method(struct uverbs_api *uapi, + struct uverbs_api_object *obj_elm, u32 obj_key, + const struct uverbs_method_def *method, + bool is_driver) +{ + u32 method_key = obj_key | uapi_key_ioctl_method(method->id); + struct uverbs_api_ioctl_method *method_elm; + unsigned int i; + + if (!method->attrs) + return 0; + + method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm)); + if (IS_ERR(method_elm)) { + if (method_elm != ERR_PTR(-EEXIST)) + return PTR_ERR(method_elm); + + /* + * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE + */ + if (WARN_ON(method->handler)) + return -EINVAL; + method_elm = radix_tree_lookup(&uapi->radix, method_key); + if (WARN_ON(!method_elm)) + return -EINVAL; + } else { + WARN_ON(!method->handler); + rcu_assign_pointer(method_elm->handler, method->handler); + if (method->handler != uverbs_destroy_def_handler) + method_elm->driver_method = is_driver; + } + + for (i = 0; i != method->num_attrs; i++) { + const struct uverbs_attr_def *attr = (*method->attrs)[i]; + struct uverbs_api_attr *attr_slot; + + if (!attr) + continue; + + /* + * ENUM_IN contains the 'ids' pointer to the driver's .rodata, + * so if it is specified by a driver then it always makes this + * into a driver method. + */ + if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN) + method_elm->driver_method |= is_driver; + + attr_slot = + uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id), + sizeof(*attr_slot)); + /* Attributes are not allowed to be modified by drivers */ + if (IS_ERR(attr_slot)) + return PTR_ERR(attr_slot); + + attr_slot->spec = attr->attr; + } + + return 0; +} + +static int uapi_merge_tree(struct uverbs_api *uapi, + const struct uverbs_object_tree_def *tree, + bool is_driver) +{ + unsigned int i, j; + int rc; + + if (!tree->objects) + return 0; + + for (i = 0; i != tree->num_objects; i++) { + const struct uverbs_object_def *obj = (*tree->objects)[i]; + struct uverbs_api_object *obj_elm; + u32 obj_key; + + if (!obj) + continue; + + obj_key = uapi_key_obj(obj->id); + obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm)); + if (IS_ERR(obj_elm)) { + if (obj_elm != ERR_PTR(-EEXIST)) + return PTR_ERR(obj_elm); + + /* This occurs when a driver uses ADD_UVERBS_METHODS */ + if (WARN_ON(obj->type_attrs)) + return -EINVAL; + obj_elm = radix_tree_lookup(&uapi->radix, obj_key); + if (WARN_ON(!obj_elm)) + return -EINVAL; + } else { + obj_elm->type_attrs = obj->type_attrs; + if (obj->type_attrs) { + obj_elm->type_class = + obj->type_attrs->type_class; + /* + * Today drivers are only permitted to use + * idr_class types. They cannot use FD types + * because we currently have no way to revoke + * the fops pointer after device + * disassociation. + */ + if (WARN_ON(is_driver && + obj->type_attrs->type_class != + &uverbs_idr_class)) + return -EINVAL; + } + } + + if (!obj->methods) + continue; + + for (j = 0; j != obj->num_methods; j++) { + const struct uverbs_method_def *method = + (*obj->methods)[j]; + if (!method) + continue; + + rc = uapi_merge_method(uapi, obj_elm, obj_key, method, + is_driver); + if (rc) + return rc; + } + } + + return 0; +} + +static int +uapi_finalize_ioctl_method(struct uverbs_api *uapi, + struct uverbs_api_ioctl_method *method_elm, + u32 method_key) +{ + struct radix_tree_iter iter; + unsigned int num_attrs = 0; + unsigned int max_bkey = 0; + bool single_uobj = false; + void __rcu **slot; + + method_elm->destroy_bkey = UVERBS_API_ATTR_BKEY_LEN; + radix_tree_for_each_slot (slot, &uapi->radix, &iter, + uapi_key_attrs_start(method_key)) { + struct uverbs_api_attr *elm = + rcu_dereference_protected(*slot, true); + u32 attr_key = iter.index & UVERBS_API_ATTR_KEY_MASK; + u32 attr_bkey = uapi_bkey_attr(attr_key); + u8 type = elm->spec.type; + + if (uapi_key_attr_to_method(iter.index) != + uapi_key_attr_to_method(method_key)) + break; + + if (elm->spec.mandatory) + __set_bit(attr_bkey, method_elm->attr_mandatory); + + if (type == UVERBS_ATTR_TYPE_IDR || + type == UVERBS_ATTR_TYPE_FD) { + u8 access = elm->spec.u.obj.access; + + /* + * Verbs specs may only have one NEW/DESTROY, we don't + * have the infrastructure to abort multiple NEW's or + * cope with multiple DESTROY failure. + */ + if (access == UVERBS_ACCESS_NEW || + access == UVERBS_ACCESS_DESTROY) { + if (WARN_ON(single_uobj)) + return -EINVAL; + + single_uobj = true; + if (WARN_ON(!elm->spec.mandatory)) + return -EINVAL; + } + + if (access == UVERBS_ACCESS_DESTROY) + method_elm->destroy_bkey = attr_bkey; + } + + max_bkey = max(max_bkey, attr_bkey); + num_attrs++; + } + + method_elm->key_bitmap_len = max_bkey + 1; + WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN); + + uapi_compute_bundle_size(method_elm, num_attrs); + return 0; +} + +static int uapi_finalize(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + void __rcu **slot; + int rc; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (uapi_key_is_ioctl_method(iter.index)) { + rc = uapi_finalize_ioctl_method(uapi, method_elm, + iter.index); + if (rc) + return rc; + } + } + + return 0; +} + +void uverbs_destroy_api(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + if (!uapi) + return; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + kfree(rcu_dereference_protected(*slot, true)); + radix_tree_iter_delete(&uapi->radix, &iter, slot); + } +} + +struct uverbs_api *uverbs_alloc_api( + const struct uverbs_object_tree_def *const *driver_specs, + enum rdma_driver_id driver_id) +{ + struct uverbs_api *uapi; + int rc; + + uapi = kzalloc(sizeof(*uapi), GFP_KERNEL); + if (!uapi) + return ERR_PTR(-ENOMEM); + + INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL); + uapi->driver_id = driver_id; + + rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false); + if (rc) + goto err; + + for (; driver_specs && *driver_specs; driver_specs++) { + rc = uapi_merge_tree(uapi, *driver_specs, true); + if (rc) + goto err; + } + + rc = uapi_finalize(uapi); + if (rc) + goto err; + + return uapi; +err: + if (rc != -ENOMEM) + pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n", + rc); + + uverbs_destroy_api(uapi); + return ERR_PTR(rc); +} + +/* + * The pre version is done before destroying the HW objects, it only blocks + * off method access. All methods that require the ib_dev or the module data + * must test one of these assignments prior to continuing. + */ +void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev) +{ + struct uverbs_api *uapi = uverbs_dev->uapi; + struct radix_tree_iter iter; + void __rcu **slot; + + rcu_assign_pointer(uverbs_dev->ib_dev, NULL); + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_ioctl_method(iter.index)) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (method_elm->driver_method) + rcu_assign_pointer(method_elm->handler, NULL); + } + } + + synchronize_srcu(&uverbs_dev->disassociate_srcu); +} + +/* + * Called when a driver disassociates from the ib_uverbs_device. The + * assumption is that the driver module will unload after. Replace everything + * related to the driver with NULL as a safety measure. + */ +void uverbs_disassociate_api(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_object(iter.index)) { + struct uverbs_api_object *object_elm = + rcu_dereference_protected(*slot, true); + + /* + * Some type_attrs are in the driver module. We don't + * bother to keep track of which since there should be + * no use of this after disassociate. + */ + object_elm->type_attrs = NULL; + } else if (uapi_key_is_attr(iter.index)) { + struct uverbs_api_attr *elm = + rcu_dereference_protected(*slot, true); + + if (elm->spec.type == UVERBS_ATTR_TYPE_ENUM_IN) + elm->spec.u2.enum_def.ids = NULL; + } + } +} diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 9d6beb948535..6ee03d6089eb 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -326,12 +326,162 @@ EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ +/** + * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination. + * @dest: Pointer to destination ah_attr. Contents of the destination + * pointer is assumed to be invalid and attribute are overwritten. + * @src: Pointer to source ah_attr. + */ +void rdma_copy_ah_attr(struct rdma_ah_attr *dest, + const struct rdma_ah_attr *src) +{ + *dest = *src; + if (dest->grh.sgid_attr) + rdma_hold_gid_attr(dest->grh.sgid_attr); +} +EXPORT_SYMBOL(rdma_copy_ah_attr); + +/** + * rdma_replace_ah_attr - Replace valid ah_attr with new new one. + * @old: Pointer to existing ah_attr which needs to be replaced. + * old is assumed to be valid or zero'd + * @new: Pointer to the new ah_attr. + * + * rdma_replace_ah_attr() first releases any reference in the old ah_attr if + * old the ah_attr is valid; after that it copies the new attribute and holds + * the reference to the replaced ah_attr. + */ +void rdma_replace_ah_attr(struct rdma_ah_attr *old, + const struct rdma_ah_attr *new) +{ + rdma_destroy_ah_attr(old); + *old = *new; + if (old->grh.sgid_attr) + rdma_hold_gid_attr(old->grh.sgid_attr); +} +EXPORT_SYMBOL(rdma_replace_ah_attr); + +/** + * rdma_move_ah_attr - Move ah_attr pointed by source to destination. + * @dest: Pointer to destination ah_attr to copy to. + * dest is assumed to be valid or zero'd + * @src: Pointer to the new ah_attr. + * + * rdma_move_ah_attr() first releases any reference in the destination ah_attr + * if it is valid. This also transfers ownership of internal references from + * src to dest, making src invalid in the process. No new reference of the src + * ah_attr is taken. + */ +void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src) +{ + rdma_destroy_ah_attr(dest); + *dest = *src; + src->grh.sgid_attr = NULL; +} +EXPORT_SYMBOL(rdma_move_ah_attr); + +/* + * Validate that the rdma_ah_attr is valid for the device before passing it + * off to the driver. + */ +static int rdma_check_ah_attr(struct ib_device *device, + struct rdma_ah_attr *ah_attr) +{ + if (!rdma_is_port_valid(device, ah_attr->port_num)) + return -EINVAL; + + if ((rdma_is_grh_required(device, ah_attr->port_num) || + ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) && + !(ah_attr->ah_flags & IB_AH_GRH)) + return -EINVAL; + + if (ah_attr->grh.sgid_attr) { + /* + * Make sure the passed sgid_attr is consistent with the + * parameters + */ + if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index || + ah_attr->grh.sgid_attr->port_num != ah_attr->port_num) + return -EINVAL; + } + return 0; +} + +/* + * If the ah requires a GRH then ensure that sgid_attr pointer is filled in. + * On success the caller is responsible to call rdma_unfill_sgid_attr(). + */ +static int rdma_fill_sgid_attr(struct ib_device *device, + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr **old_sgid_attr) +{ + const struct ib_gid_attr *sgid_attr; + struct ib_global_route *grh; + int ret; + + *old_sgid_attr = ah_attr->grh.sgid_attr; + + ret = rdma_check_ah_attr(device, ah_attr); + if (ret) + return ret; + + if (!(ah_attr->ah_flags & IB_AH_GRH)) + return 0; + + grh = rdma_ah_retrieve_grh(ah_attr); + if (grh->sgid_attr) + return 0; + + sgid_attr = + rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); + + /* Move ownerhip of the kref into the ah_attr */ + grh->sgid_attr = sgid_attr; + return 0; +} + +static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *old_sgid_attr) +{ + /* + * Fill didn't change anything, the caller retains ownership of + * whatever it passed + */ + if (ah_attr->grh.sgid_attr == old_sgid_attr) + return; + + /* + * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller + * doesn't see any change in the rdma_ah_attr. If we get here + * old_sgid_attr is NULL. + */ + rdma_destroy_ah_attr(ah_attr); +} + +static const struct ib_gid_attr * +rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *old_attr) +{ + if (old_attr) + rdma_put_gid_attr(old_attr); + if (ah_attr->ah_flags & IB_AH_GRH) { + rdma_hold_gid_attr(ah_attr->grh.sgid_attr); + return ah_attr->grh.sgid_attr; + } + return NULL; +} + static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata) { struct ib_ah *ah; + if (!pd->device->create_ah) + return ERR_PTR(-EOPNOTSUPP); + ah = pd->device->create_ah(pd, ah_attr, udata); if (!IS_ERR(ah)) { @@ -339,15 +489,38 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, ah->pd = pd; ah->uobject = NULL; ah->type = ah_attr->type; + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + atomic_inc(&pd->usecnt); } return ah; } +/** + * rdma_create_ah - Creates an address handle for the + * given address vector. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) { - return _rdma_create_ah(pd, ah_attr, NULL); + const struct ib_gid_attr *old_sgid_attr; + struct ib_ah *ah; + int ret; + + ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); + if (ret) + return ERR_PTR(ret); + + ah = _rdma_create_ah(pd, ah_attr, NULL); + + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ah; } EXPORT_SYMBOL(rdma_create_ah); @@ -368,15 +541,27 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata) { + const struct ib_gid_attr *old_sgid_attr; + struct ib_ah *ah; int err; + err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); + if (err) + return ERR_PTR(err); + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { err = ib_resolve_eth_dmac(pd->device, ah_attr); - if (err) - return ERR_PTR(err); + if (err) { + ah = ERR_PTR(err); + goto out; + } } - return _rdma_create_ah(pd, ah_attr, udata); + ah = _rdma_create_ah(pd, ah_attr, udata); + +out: + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ah; } EXPORT_SYMBOL(rdma_create_user_ah); @@ -455,16 +640,16 @@ static bool find_gid_index(const union ib_gid *gid, return true; } -static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, - u16 vlan_id, const union ib_gid *sgid, - enum ib_gid_type gid_type, - u16 *gid_index) +static const struct ib_gid_attr * +get_sgid_attr_from_eth(struct ib_device *device, u8 port_num, + u16 vlan_id, const union ib_gid *sgid, + enum ib_gid_type gid_type) { struct find_gid_index_context context = {.vlan_id = vlan_id, .gid_type = gid_type}; - return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, - &context, gid_index); + return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index, + &context); } int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, @@ -508,39 +693,24 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); static int ib_resolve_unicast_gid_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) { - struct ib_gid_attr sgid_attr; - struct ib_global_route *grh; + struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr); + const struct ib_gid_attr *sgid_attr = grh->sgid_attr; int hop_limit = 0xff; - union ib_gid sgid; - int ret; - - grh = rdma_ah_retrieve_grh(ah_attr); - - ret = ib_query_gid(device, - rdma_ah_get_port_num(ah_attr), - grh->sgid_index, - &sgid, &sgid_attr); - if (ret || !sgid_attr.ndev) { - if (!ret) - ret = -ENXIO; - return ret; - } + int ret = 0; /* If destination is link local and source GID is RoCEv1, * IP stack is not used. */ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) && - sgid_attr.gid_type == IB_GID_TYPE_ROCE) { + sgid_attr->gid_type == IB_GID_TYPE_ROCE) { rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, ah_attr->roce.dmac); - goto done; + return ret; } - ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, + ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid, ah_attr->roce.dmac, - sgid_attr.ndev, &hop_limit); -done: - dev_put(sgid_attr.ndev); + sgid_attr->ndev, &hop_limit); grh->hop_limit = hop_limit; return ret; @@ -555,16 +725,18 @@ done: * as sgid and, sgid is used as dgid because sgid contains destinations * GID whom to respond to. * + * On success the caller is responsible to call rdma_destroy_ah_attr on the + * attr. */ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr) { u32 flow_class; - u16 gid_index; int ret; enum rdma_network_type net_type = RDMA_NETWORK_IB; enum ib_gid_type gid_type = IB_GID_TYPE_IB; + const struct ib_gid_attr *sgid_attr; int hoplimit = 0xff; union ib_gid dgid; union ib_gid sgid; @@ -595,72 +767,141 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; - ret = get_sgid_index_from_eth(device, port_num, - vlan_id, &dgid, - gid_type, &gid_index); - if (ret) - return ret; + sgid_attr = get_sgid_attr_from_eth(device, port_num, + vlan_id, &dgid, + gid_type); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); flow_class = be32_to_cpu(grh->version_tclass_flow); - rdma_ah_set_grh(ah_attr, &sgid, - flow_class & 0xFFFFF, - (u8)gid_index, hoplimit, - (flow_class >> 20) & 0xFF); - return ib_resolve_unicast_gid_dmac(device, ah_attr); + rdma_move_grh_sgid_attr(ah_attr, + &sgid, + flow_class & 0xFFFFF, + hoplimit, + (flow_class >> 20) & 0xFF, + sgid_attr); + + ret = ib_resolve_unicast_gid_dmac(device, ah_attr); + if (ret) + rdma_destroy_ah_attr(ah_attr); + + return ret; } else { rdma_ah_set_dlid(ah_attr, wc->slid); rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits); - if (wc->wc_flags & IB_WC_GRH) { - if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { - ret = ib_find_cached_gid_by_port(device, &dgid, - IB_GID_TYPE_IB, - port_num, NULL, - &gid_index); - if (ret) - return ret; - } else { - gid_index = 0; - } + if ((wc->wc_flags & IB_WC_GRH) == 0) + return 0; + + if (dgid.global.interface_id != + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { + sgid_attr = rdma_find_gid_by_port( + device, &dgid, IB_GID_TYPE_IB, port_num, NULL); + } else + sgid_attr = rdma_get_gid_attr(device, port_num, 0); - flow_class = be32_to_cpu(grh->version_tclass_flow); - rdma_ah_set_grh(ah_attr, &sgid, + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); + flow_class = be32_to_cpu(grh->version_tclass_flow); + rdma_move_grh_sgid_attr(ah_attr, + &sgid, flow_class & 0xFFFFF, - (u8)gid_index, hoplimit, - (flow_class >> 20) & 0xFF); - } + hoplimit, + (flow_class >> 20) & 0xFF, + sgid_attr); + return 0; } } EXPORT_SYMBOL(ib_init_ah_attr_from_wc); +/** + * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership + * of the reference + * + * @attr: Pointer to AH attribute structure + * @dgid: Destination GID + * @flow_label: Flow label + * @hop_limit: Hop limit + * @traffic_class: traffic class + * @sgid_attr: Pointer to SGID attribute + * + * This takes ownership of the sgid_attr reference. The caller must ensure + * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after + * calling this function. + */ +void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, + u32 flow_label, u8 hop_limit, u8 traffic_class, + const struct ib_gid_attr *sgid_attr) +{ + rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit, + traffic_class); + attr->grh.sgid_attr = sgid_attr; +} +EXPORT_SYMBOL(rdma_move_grh_sgid_attr); + +/** + * rdma_destroy_ah_attr - Release reference to SGID attribute of + * ah attribute. + * @ah_attr: Pointer to ah attribute + * + * Release reference to the SGID attribute of the ah attribute if it is + * non NULL. It is safe to call this multiple times, and safe to call it on + * a zero initialized ah_attr. + */ +void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr) +{ + if (ah_attr->grh.sgid_attr) { + rdma_put_gid_attr(ah_attr->grh.sgid_attr); + ah_attr->grh.sgid_attr = NULL; + } +} +EXPORT_SYMBOL(rdma_destroy_ah_attr); + struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u8 port_num) { struct rdma_ah_attr ah_attr; + struct ib_ah *ah; int ret; ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr); if (ret) return ERR_PTR(ret); - return rdma_create_ah(pd, &ah_attr); + ah = rdma_create_ah(pd, &ah_attr); + + rdma_destroy_ah_attr(&ah_attr); + return ah; } EXPORT_SYMBOL(ib_create_ah_from_wc); int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { + const struct ib_gid_attr *old_sgid_attr; + int ret; + if (ah->type != ah_attr->type) return -EINVAL; - return ah->device->modify_ah ? + ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr); + if (ret) + return ret; + + ret = ah->device->modify_ah ? ah->device->modify_ah(ah, ah_attr) : -EOPNOTSUPP; + + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr); + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ret; } EXPORT_SYMBOL(rdma_modify_ah); int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { + ah_attr->grh.sgid_attr = NULL; + return ah->device->query_ah ? ah->device->query_ah(ah, ah_attr) : -EOPNOTSUPP; @@ -669,13 +910,17 @@ EXPORT_SYMBOL(rdma_query_ah); int rdma_destroy_ah(struct ib_ah *ah) { + const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; int ret; pd = ah->pd; ret = ah->device->destroy_ah(ah); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + if (sgid_attr) + rdma_put_gid_attr(sgid_attr); + } return ret; } @@ -1290,16 +1535,19 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); +/** + * ib_resolve_eth_dmac - Resolve destination mac address + * @device: Device to consider + * @ah_attr: address handle attribute which describes the + * source and destination parameters + * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It + * returns 0 on success or appropriate error code. It initializes the + * necessary ah_attr fields when call is successful. + */ static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) { - int ret = 0; - struct ib_global_route *grh; - - if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr))) - return -EINVAL; - - grh = rdma_ah_retrieve_grh(ah_attr); + int ret = 0; if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) { @@ -1317,6 +1565,14 @@ static int ib_resolve_eth_dmac(struct ib_device *device, return ret; } +static bool is_qp_type_connected(const struct ib_qp *qp) +{ + return (qp->qp_type == IB_QPT_UC || + qp->qp_type == IB_QPT_RC || + qp->qp_type == IB_QPT_XRC_INI || + qp->qp_type == IB_QPT_XRC_TGT); +} + /** * IB core internal function to perform QP attributes modification. */ @@ -1324,8 +1580,53 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + const struct ib_gid_attr *old_sgid_attr_av; + const struct ib_gid_attr *old_sgid_attr_alt_av; int ret; + if (attr_mask & IB_QP_AV) { + ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, + &old_sgid_attr_av); + if (ret) + return ret; + } + if (attr_mask & IB_QP_ALT_PATH) { + /* + * FIXME: This does not track the migration state, so if the + * user loads a new alternate path after the HW has migrated + * from primary->alternate we will keep the wrong + * references. This is OK for IB because the reference + * counting does not serve any functional purpose. + */ + ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr, + &old_sgid_attr_alt_av); + if (ret) + goto out_av; + + /* + * Today the core code can only handle alternate paths and APM + * for IB. Ban them in roce mode. + */ + if (!(rdma_protocol_ib(qp->device, + attr->alt_ah_attr.port_num) && + rdma_protocol_ib(qp->device, port))) { + ret = EINVAL; + goto out; + } + } + + /* + * If the user provided the qp_attr then we have to resolve it. Kernel + * users have to provide already resolved rdma_ah_attr's + */ + if (udata && (attr_mask & IB_QP_AV) && + attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && + is_qp_type_connected(qp)) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); + if (ret) + goto out; + } + if (rdma_ib_or_roce(qp->device, port)) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n", @@ -1341,20 +1642,27 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, } ret = ib_security_modify_qp(qp, attr, attr_mask, udata); - if (!ret && (attr_mask & IB_QP_PORT)) - qp->port = attr->port_num; + if (ret) + goto out; + if (attr_mask & IB_QP_PORT) + qp->port = attr->port_num; + if (attr_mask & IB_QP_AV) + qp->av_sgid_attr = + rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr); + if (attr_mask & IB_QP_ALT_PATH) + qp->alt_path_sgid_attr = rdma_update_sgid_attr( + &attr->alt_ah_attr, qp->alt_path_sgid_attr); + +out: + if (attr_mask & IB_QP_ALT_PATH) + rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); +out_av: + if (attr_mask & IB_QP_AV) + rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); return ret; } -static bool is_qp_type_connected(const struct ib_qp *qp) -{ - return (qp->qp_type == IB_QPT_UC || - qp->qp_type == IB_QPT_RC || - qp->qp_type == IB_QPT_XRC_INI || - qp->qp_type == IB_QPT_XRC_TGT); -} - /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. * @ib_qp: The QP to modify. @@ -1369,17 +1677,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp) int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { - struct ib_qp *qp = ib_qp->real_qp; - int ret; - - if (attr_mask & IB_QP_AV && - attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && - is_qp_type_connected(qp)) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - return ret; - } - return _ib_modify_qp(qp, attr, attr_mask, udata); + return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata); } EXPORT_SYMBOL(ib_modify_qp_with_udata); @@ -1451,6 +1749,9 @@ int ib_query_qp(struct ib_qp *qp, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { + qp_attr->ah_attr.grh.sgid_attr = NULL; + qp_attr->alt_ah_attr.grh.sgid_attr = NULL; + return qp->device->query_qp ? qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : -EOPNOTSUPP; @@ -1509,6 +1810,8 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp) int ib_destroy_qp(struct ib_qp *qp) { + const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr; + const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr; struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; @@ -1539,6 +1842,10 @@ int ib_destroy_qp(struct ib_qp *qp) rdma_restrack_del(&qp->res); ret = qp->device->destroy_qp(qp); if (!ret) { + if (alt_path_sgid_attr) + rdma_put_gid_attr(alt_path_sgid_attr); + if (av_sgid_attr) + rdma_put_gid_attr(av_sgid_attr); if (pd) atomic_dec(&pd->usecnt); if (scq) @@ -1977,35 +2284,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) } EXPORT_SYMBOL(ib_destroy_rwq_ind_table); -struct ib_flow *ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain) -{ - struct ib_flow *flow_id; - if (!qp->device->create_flow) - return ERR_PTR(-EOPNOTSUPP); - - flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL); - if (!IS_ERR(flow_id)) { - atomic_inc(&qp->usecnt); - flow_id->qp = qp; - } - return flow_id; -} -EXPORT_SYMBOL(ib_create_flow); - -int ib_destroy_flow(struct ib_flow *flow_id) -{ - int err; - struct ib_qp *qp = flow_id->qp; - - err = qp->device->destroy_flow(flow_id); - if (!err) - atomic_dec(&qp->usecnt); - return err; -} -EXPORT_SYMBOL(ib_destroy_flow); - int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { @@ -2200,7 +2478,6 @@ static void __ib_drain_sq(struct ib_qp *qp) struct ib_cq *cq = qp->send_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe sdrain; - struct ib_send_wr *bad_swr; struct ib_rdma_wr swr = { .wr = { .next = NULL, @@ -2219,7 +2496,7 @@ static void __ib_drain_sq(struct ib_qp *qp) sdrain.cqe.done = ib_drain_qp_done; init_completion(&sdrain.done); - ret = ib_post_send(qp, &swr.wr, &bad_swr); + ret = ib_post_send(qp, &swr.wr, NULL); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; @@ -2240,7 +2517,7 @@ static void __ib_drain_rq(struct ib_qp *qp) struct ib_cq *cq = qp->recv_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe rdrain; - struct ib_recv_wr rwr = {}, *bad_rwr; + struct ib_recv_wr rwr = {}; int ret; ret = ib_modify_qp(qp, &attr, IB_QP_STATE); @@ -2253,7 +2530,7 @@ static void __ib_drain_rq(struct ib_qp *qp) rdrain.cqe.done = ib_drain_qp_done; init_completion(&rdrain.done); - ret = ib_post_recv(qp, &rwr, &bad_rwr); + ret = ib_post_recv(qp, &rwr, NULL); if (ret) { WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); return; |