diff options
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_pool.c')
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_pool.c | 640 |
1 files changed, 200 insertions, 440 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index fbcbac52290b..f50620f5a0a1 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -1,542 +1,302 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" -#include "rxe_loc.h" -/* info about object pools - * note that mr and mw share a single index space - * so that one can map an lkey to the correct type of object - */ -struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { +#define RXE_POOL_TIMEOUT (200) +#define RXE_POOL_ALIGN (16) + +static const struct rxe_type_info { + const char *name; + size_t size; + size_t elem_offset; + void (*cleanup)(struct rxe_pool_elem *elem); + u32 min_index; + u32 max_index; + u32 max_elem; +} rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { - .name = "rxe-uc", + .name = "uc", .size = sizeof(struct rxe_ucontext), - .flags = RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_ucontext, elem), + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_PD] = { - .name = "rxe-pd", + .name = "pd", .size = sizeof(struct rxe_pd), - .flags = RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_pd, elem), + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_AH] = { - .name = "rxe-ah", + .name = "ah", .size = sizeof(struct rxe_ah), - .flags = RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_ah, elem), + .min_index = RXE_MIN_AH_INDEX, + .max_index = RXE_MAX_AH_INDEX, + .max_elem = RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1, }, [RXE_TYPE_SRQ] = { - .name = "rxe-srq", + .name = "srq", .size = sizeof(struct rxe_srq), - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_srq, elem), + .cleanup = rxe_srq_cleanup, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, + .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1, }, [RXE_TYPE_QP] = { - .name = "rxe-qp", + .name = "qp", .size = sizeof(struct rxe_qp), + .elem_offset = offsetof(struct rxe_qp, elem), .cleanup = rxe_qp_cleanup, - .flags = RXE_POOL_INDEX, .min_index = RXE_MIN_QP_INDEX, .max_index = RXE_MAX_QP_INDEX, + .max_elem = RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1, }, [RXE_TYPE_CQ] = { - .name = "rxe-cq", + .name = "cq", .size = sizeof(struct rxe_cq), - .flags = RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_cq, elem), .cleanup = rxe_cq_cleanup, + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_MR] = { - .name = "rxe-mr", - .size = sizeof(struct rxe_mem), - .cleanup = rxe_mem_cleanup, - .flags = RXE_POOL_INDEX, - .max_index = RXE_MAX_MR_INDEX, + .name = "mr", + .size = sizeof(struct rxe_mr), + .elem_offset = offsetof(struct rxe_mr, elem), + .cleanup = rxe_mr_cleanup, .min_index = RXE_MIN_MR_INDEX, + .max_index = RXE_MAX_MR_INDEX, + .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1, }, [RXE_TYPE_MW] = { - .name = "rxe-mw", - .size = sizeof(struct rxe_mem), - .flags = RXE_POOL_INDEX, - .max_index = RXE_MAX_MW_INDEX, + .name = "mw", + .size = sizeof(struct rxe_mw), + .elem_offset = offsetof(struct rxe_mw, elem), + .cleanup = rxe_mw_cleanup, .min_index = RXE_MIN_MW_INDEX, - }, - [RXE_TYPE_MC_GRP] = { - .name = "rxe-mc_grp", - .size = sizeof(struct rxe_mc_grp), - .cleanup = rxe_mc_cleanup, - .flags = RXE_POOL_KEY, - .key_offset = offsetof(struct rxe_mc_grp, mgid), - .key_size = sizeof(union ib_gid), - }, - [RXE_TYPE_MC_ELEM] = { - .name = "rxe-mc_elem", - .size = sizeof(struct rxe_mc_elem), - .flags = RXE_POOL_ATOMIC, + .max_index = RXE_MAX_MW_INDEX, + .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1, }, }; -static inline const char *pool_name(struct rxe_pool *pool) -{ - return rxe_type_info[pool->type].name; -} - -static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) -{ - return rxe_type_info[pool->type].cache; -} - -static void rxe_cache_clean(size_t cnt) -{ - int i; - struct rxe_type_info *type; - - for (i = 0; i < cnt; i++) { - type = &rxe_type_info[i]; - if (!(type->flags & RXE_POOL_NO_ALLOC)) { - kmem_cache_destroy(type->cache); - type->cache = NULL; - } - } -} - -int rxe_cache_init(void) +void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type) { - int err; - int i; - size_t size; - struct rxe_type_info *type; - - for (i = 0; i < RXE_NUM_TYPES; i++) { - type = &rxe_type_info[i]; - size = ALIGN(type->size, RXE_POOL_ALIGN); - if (!(type->flags & RXE_POOL_NO_ALLOC)) { - type->cache = - kmem_cache_create(type->name, size, - RXE_POOL_ALIGN, - RXE_POOL_CACHE_FLAGS, NULL); - if (!type->cache) { - pr_err("Unable to init kmem cache for %s\n", - type->name); - err = -ENOMEM; - goto err1; - } - } - } - - return 0; - -err1: - rxe_cache_clean(i); - - return err; -} - -void rxe_cache_exit(void) -{ - rxe_cache_clean(RXE_NUM_TYPES); -} - -static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) -{ - int err = 0; - size_t size; - - if ((max - min + 1) < pool->max_elem) { - pr_warn("not enough indices for max_elem\n"); - err = -EINVAL; - goto out; - } - - pool->max_index = max; - pool->min_index = min; - - size = BITS_TO_LONGS(max - min + 1) * sizeof(long); - pool->table = kmalloc(size, GFP_KERNEL); - if (!pool->table) { - err = -ENOMEM; - goto out; - } - - pool->table_size = size; - bitmap_zero(pool->table, max - min + 1); - -out: - return err; -} - -int rxe_pool_init( - struct rxe_dev *rxe, - struct rxe_pool *pool, - enum rxe_elem_type type, - unsigned int max_elem) -{ - int err = 0; - size_t size = rxe_type_info[type].size; + const struct rxe_type_info *info = &rxe_type_info[type]; memset(pool, 0, sizeof(*pool)); pool->rxe = rxe; + pool->name = info->name; pool->type = type; - pool->max_elem = max_elem; - pool->elem_size = ALIGN(size, RXE_POOL_ALIGN); - pool->flags = rxe_type_info[type].flags; - pool->tree = RB_ROOT; - pool->cleanup = rxe_type_info[type].cleanup; + pool->max_elem = info->max_elem; + pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN); + pool->elem_offset = info->elem_offset; + pool->cleanup = info->cleanup; atomic_set(&pool->num_elem, 0); - kref_init(&pool->ref_cnt); - - rwlock_init(&pool->pool_lock); - - if (rxe_type_info[type].flags & RXE_POOL_INDEX) { - err = rxe_pool_init_index(pool, - rxe_type_info[type].max_index, - rxe_type_info[type].min_index); - if (err) - goto out; - } - - if (rxe_type_info[type].flags & RXE_POOL_KEY) { - pool->key_offset = rxe_type_info[type].key_offset; - pool->key_size = rxe_type_info[type].key_size; - } - - pool->state = RXE_POOL_STATE_VALID; - -out: - return err; -} - -static void rxe_pool_release(struct kref *kref) -{ - struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); - - pool->state = RXE_POOL_STATE_INVALID; - kfree(pool->table); -} - -static void rxe_pool_put(struct rxe_pool *pool) -{ - kref_put(&pool->ref_cnt, rxe_pool_release); + xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); + pool->limit.min = info->min_index; + pool->limit.max = info->max_index; } void rxe_pool_cleanup(struct rxe_pool *pool) { - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - pool->state = RXE_POOL_STATE_INVALID; - if (atomic_read(&pool->num_elem) > 0) - pr_warn("%s pool destroyed with unfree'd elem\n", - pool_name(pool)); - write_unlock_irqrestore(&pool->pool_lock, flags); - - rxe_pool_put(pool); -} - -static u32 alloc_index(struct rxe_pool *pool) -{ - u32 index; - u32 range = pool->max_index - pool->min_index + 1; - - index = find_next_zero_bit(pool->table, range, pool->last); - if (index >= range) - index = find_first_zero_bit(pool->table, range); - - WARN_ON_ONCE(index >= range); - set_bit(index, pool->table); - pool->last = index; - return index + pool->min_index; -} - -static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) -{ - struct rb_node **link = &pool->tree.rb_node; - struct rb_node *parent = NULL; - struct rxe_pool_entry *elem; - - while (*link) { - parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, node); - - if (elem->index == new->index) { - pr_warn("element already exists!\n"); - goto out; - } - - if (elem->index > new->index) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &pool->tree); -out: - return; -} - -static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) -{ - struct rb_node **link = &pool->tree.rb_node; - struct rb_node *parent = NULL; - struct rxe_pool_entry *elem; - int cmp; - - while (*link) { - parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, node); - - cmp = memcmp((u8 *)elem + pool->key_offset, - (u8 *)new + pool->key_offset, pool->key_size); - - if (cmp == 0) { - pr_warn("key already exists!\n"); - goto out; - } - - if (cmp > 0) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &pool->tree); -out: - return; -} - -void rxe_add_key(void *arg, void *key) -{ - struct rxe_pool_entry *elem = arg; - struct rxe_pool *pool = elem->pool; - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); - insert_key(pool, elem); - write_unlock_irqrestore(&pool->pool_lock, flags); -} - -void rxe_drop_key(void *arg) -{ - struct rxe_pool_entry *elem = arg; - struct rxe_pool *pool = elem->pool; - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - rb_erase(&elem->node, &pool->tree); - write_unlock_irqrestore(&pool->pool_lock, flags); -} - -void rxe_add_index(void *arg) -{ - struct rxe_pool_entry *elem = arg; - struct rxe_pool *pool = elem->pool; - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - elem->index = alloc_index(pool); - insert_index(pool, elem); - write_unlock_irqrestore(&pool->pool_lock, flags); -} - -void rxe_drop_index(void *arg) -{ - struct rxe_pool_entry *elem = arg; - struct rxe_pool *pool = elem->pool; - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - clear_bit(elem->index - pool->min_index, pool->table); - rb_erase(&elem->node, &pool->tree); - write_unlock_irqrestore(&pool->pool_lock, flags); + WARN_ON(!xa_empty(&pool->xa)); } void *rxe_alloc(struct rxe_pool *pool) { - struct rxe_pool_entry *elem; - unsigned long flags; - - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + struct rxe_pool_elem *elem; + void *obj; + int err; - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); + if (WARN_ON(!(pool->type == RXE_TYPE_MR))) return NULL; - } - kref_get(&pool->ref_cnt); - read_unlock_irqrestore(&pool->pool_lock, flags); - - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + goto err_cnt; - elem = kmem_cache_zalloc(pool_cache(pool), - (pool->flags & RXE_POOL_ATOMIC) ? - GFP_ATOMIC : GFP_KERNEL); - if (!elem) - goto out_cnt; + obj = kzalloc(pool->elem_size, GFP_KERNEL); + if (!obj) + goto err_cnt; + + elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset); elem->pool = pool; + elem->obj = obj; kref_init(&elem->ref_cnt); + init_completion(&elem->complete); + + /* allocate index in array but leave pointer as NULL so it + * can't be looked up until rxe_finalize() is called + */ + err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit, + &pool->next, GFP_KERNEL); + if (err < 0) + goto err_free; - return elem; + return obj; -out_cnt: +err_free: + kfree(obj); +err_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return NULL; } -int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, + bool sleepable) { - unsigned long flags; - - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + int err; + gfp_t gfp_flags; - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); + if (WARN_ON(pool->type == RXE_TYPE_MR)) return -EINVAL; - } - kref_get(&pool->ref_cnt); - read_unlock_irqrestore(&pool->pool_lock, flags); - - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + goto err_cnt; elem->pool = pool; + elem->obj = (u8 *)elem - pool->elem_offset; kref_init(&elem->ref_cnt); + init_completion(&elem->complete); + + /* AH objects are unique in that the create_ah verb + * can be called in atomic context. If the create_ah + * call is not sleepable use GFP_ATOMIC. + */ + gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC; + + if (sleepable) + might_sleep(); + err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit, + &pool->next, gfp_flags); + if (err < 0) + goto err_cnt; return 0; -out_cnt: +err_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return -EINVAL; } -void rxe_elem_release(struct kref *kref) +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) { - struct rxe_pool_entry *elem = - container_of(kref, struct rxe_pool_entry, ref_cnt); - struct rxe_pool *pool = elem->pool; - - if (pool->cleanup) - pool->cleanup(elem); - - if (!(pool->flags & RXE_POOL_NO_ALLOC)) - kmem_cache_free(pool_cache(pool), elem); - atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); - rxe_pool_put(pool); + struct rxe_pool_elem *elem; + struct xarray *xa = &pool->xa; + void *obj; + + rcu_read_lock(); + elem = xa_load(xa, index); + if (elem && kref_get_unless_zero(&elem->ref_cnt)) + obj = elem->obj; + else + obj = NULL; + rcu_read_unlock(); + + return obj; } -void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) +static void rxe_elem_release(struct kref *kref) { - struct rb_node *node = NULL; - struct rxe_pool_entry *elem = NULL; - unsigned long flags; - - read_lock_irqsave(&pool->pool_lock, flags); - - if (pool->state != RXE_POOL_STATE_VALID) - goto out; - - node = pool->tree.rb_node; + struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt); - while (node) { - elem = rb_entry(node, struct rxe_pool_entry, node); - - if (elem->index > index) - node = node->rb_left; - else if (elem->index < index) - node = node->rb_right; - else { - kref_get(&elem->ref_cnt); - break; - } - } - -out: - read_unlock_irqrestore(&pool->pool_lock, flags); - return node ? elem : NULL; + complete(&elem->complete); } -void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable) { - struct rb_node *node = NULL; - struct rxe_pool_entry *elem = NULL; - int cmp; - unsigned long flags; + struct rxe_pool *pool = elem->pool; + struct xarray *xa = &pool->xa; + static int timeout = RXE_POOL_TIMEOUT; + int ret, err = 0; + void *xa_ret; + + if (sleepable) + might_sleep(); + + /* erase xarray entry to prevent looking up + * the pool elem from its index + */ + xa_ret = xa_erase(xa, elem->index); + WARN_ON(xa_err(xa_ret)); + + /* if this is the last call to rxe_put complete the + * object. It is safe to touch obj->elem after this since + * it is freed below + */ + __rxe_put(elem); + + /* wait until all references to the object have been + * dropped before final object specific cleanup and + * return to rdma-core + */ + if (sleepable) { + if (!completion_done(&elem->complete) && timeout) { + ret = wait_for_completion_timeout(&elem->complete, + timeout); + + /* Shouldn't happen. There are still references to + * the object but, rather than deadlock, free the + * object or pass back to rdma-core. + */ + if (WARN_ON(!ret)) + err = -EINVAL; + } + } else { + unsigned long until = jiffies + timeout; + + /* AH objects are unique in that the destroy_ah verb + * can be called in atomic context. This delay + * replaces the wait_for_completion call above + * when the destroy_ah call is not sleepable + */ + while (!completion_done(&elem->complete) && + time_before(jiffies, until)) + mdelay(1); + + if (WARN_ON(!completion_done(&elem->complete))) + err = -EINVAL; + } - read_lock_irqsave(&pool->pool_lock, flags); + if (pool->cleanup) + pool->cleanup(elem); - if (pool->state != RXE_POOL_STATE_VALID) - goto out; + if (pool->type == RXE_TYPE_MR) + kfree_rcu(elem->obj); - node = pool->tree.rb_node; + atomic_dec(&pool->num_elem); - while (node) { - elem = rb_entry(node, struct rxe_pool_entry, node); + return err; +} - cmp = memcmp((u8 *)elem + pool->key_offset, - key, pool->key_size); +int __rxe_get(struct rxe_pool_elem *elem) +{ + return kref_get_unless_zero(&elem->ref_cnt); +} - if (cmp > 0) - node = node->rb_left; - else if (cmp < 0) - node = node->rb_right; - else - break; - } +int __rxe_put(struct rxe_pool_elem *elem) +{ + return kref_put(&elem->ref_cnt, rxe_elem_release); +} - if (node) - kref_get(&elem->ref_cnt); +void __rxe_finalize(struct rxe_pool_elem *elem) +{ + void *xa_ret; -out: - read_unlock_irqrestore(&pool->pool_lock, flags); - return node ? elem : NULL; + xa_ret = xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL); + WARN_ON(xa_err(xa_ret)); } |