aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile5
-rw-r--r--drivers/infiniband/core/addr.c16
-rw-r--r--drivers/infiniband/core/cache.c724
-rw-r--r--drivers/infiniband/core/cm.c147
-rw-r--r--drivers/infiniband/core/cm_msgs.h7
-rw-r--r--drivers/infiniband/core/cma.c362
-rw-r--r--drivers/infiniband/core/core_priv.h4
-rw-r--r--drivers/infiniband/core/device.c23
-rw-r--r--drivers/infiniband/core/mad.c113
-rw-r--r--drivers/infiniband/core/mad_priv.h7
-rw-r--r--drivers/infiniband/core/multicast.c40
-rw-r--r--drivers/infiniband/core/nldev.c16
-rw-r--r--drivers/infiniband/core/rdma_core.c1020
-rw-r--r--drivers/infiniband/core/rdma_core.h96
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c306
-rw-r--r--drivers/infiniband/core/rw.c8
-rw-r--r--drivers/infiniband/core/sa_query.c138
-rw-r--r--drivers/infiniband/core/sysfs.c66
-rw-r--r--drivers/infiniband/core/ucm.c15
-rw-r--r--drivers/infiniband/core/umem.c62
-rw-r--r--drivers/infiniband/core/umem_odp.c33
-rw-r--r--drivers/infiniband/core/user_mad.c1
-rw-r--r--drivers/infiniband/core/uverbs.h34
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c684
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c709
-rw-r--r--drivers/infiniband/core/uverbs_ioctl_merge.c664
-rw-r--r--drivers/infiniband/core/uverbs_main.c232
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c2
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c200
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c108
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c154
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c61
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c170
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c88
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c346
-rw-r--r--drivers/infiniband/core/verbs.c523
36 files changed, 3911 insertions, 3273 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 61667705d746..867cee5e27b2 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -35,6 +35,7 @@ ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
- uverbs_ioctl_merge.o uverbs_std_types_cq.o \
+ uverbs_std_types_cq.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
- uverbs_std_types_mr.o uverbs_std_types_counters.o
+ uverbs_std_types_mr.o uverbs_std_types_counters.o \
+ uverbs_uapi.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 4f32c4062fb6..46b855a42884 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -188,7 +188,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
return -ENODATA;
}
-int rdma_addr_size(struct sockaddr *addr)
+int rdma_addr_size(const struct sockaddr *addr)
{
switch (addr->sa_family) {
case AF_INET:
@@ -315,19 +315,17 @@ static int dst_fetch_ha(const struct dst_entry *dst,
int ret = 0;
n = dst_neigh_lookup(dst, daddr);
+ if (!n)
+ return -ENODATA;
- rcu_read_lock();
- if (!n || !(n->nud_state & NUD_VALID)) {
- if (n)
- neigh_event_send(n, NULL);
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
ret = -ENODATA;
} else {
rdma_copy_addr(dev_addr, dst->dev, n->ha);
}
- rcu_read_unlock();
- if (n)
- neigh_release(n);
+ neigh_release(n);
return ret;
}
@@ -587,7 +585,7 @@ static void process_one_req(struct work_struct *_work)
spin_unlock_bh(&lock);
}
-int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 81d66f56e38f..0bee1f4b914e 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -66,20 +66,28 @@ enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3,
};
-enum gid_table_entry_props {
- GID_TABLE_ENTRY_INVALID = 1UL << 0,
- GID_TABLE_ENTRY_DEFAULT = 1UL << 1,
+enum gid_table_entry_state {
+ GID_TABLE_ENTRY_INVALID = 1,
+ GID_TABLE_ENTRY_VALID = 2,
+ /*
+ * Indicates that entry is pending to be removed, there may
+ * be active users of this GID entry.
+ * When last user of the GID entry releases reference to it,
+ * GID entry is detached from the table.
+ */
+ GID_TABLE_ENTRY_PENDING_DEL = 3,
};
struct ib_gid_table_entry {
- unsigned long props;
- union ib_gid gid;
- struct ib_gid_attr attr;
- void *context;
+ struct kref kref;
+ struct work_struct del_work;
+ struct ib_gid_attr attr;
+ void *context;
+ enum gid_table_entry_state state;
};
struct ib_gid_table {
- int sz;
+ int sz;
/* In RoCE, adding a GID to the table requires:
* (a) Find if this GID is already exists.
* (b) Find a free space.
@@ -91,13 +99,16 @@ struct ib_gid_table {
*
**/
/* Any writer to data_vec must hold this lock and the write side of
- * rwlock. readers must hold only rwlock. All writers must be in a
+ * rwlock. Readers must hold only rwlock. All writers must be in a
* sleepable context.
*/
- struct mutex lock;
- /* rwlock protects data_vec[ix]->props. */
- rwlock_t rwlock;
- struct ib_gid_table_entry *data_vec;
+ struct mutex lock;
+ /* rwlock protects data_vec[ix]->state and entry pointer.
+ */
+ rwlock_t rwlock;
+ struct ib_gid_table_entry **data_vec;
+ /* bit field, each bit indicates the index of default GID */
+ u32 default_gid_indices;
};
static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
@@ -135,6 +146,19 @@ bool rdma_is_zero_gid(const union ib_gid *gid)
}
EXPORT_SYMBOL(rdma_is_zero_gid);
+/** is_gid_index_default - Check if a given index belongs to
+ * reserved default GIDs or not.
+ * @table: GID table pointer
+ * @index: Index to check in GID table
+ * Returns true if index is one of the reserved default GID index otherwise
+ * returns false.
+ */
+static bool is_gid_index_default(const struct ib_gid_table *table,
+ unsigned int index)
+{
+ return index < 32 && (BIT(index) & table->default_gid_indices);
+}
+
int ib_cache_gid_parse_type_str(const char *buf)
{
unsigned int i;
@@ -164,26 +188,136 @@ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
return device->cache.ports[port - rdma_start_port(device)].gid;
}
-static void del_roce_gid(struct ib_device *device, u8 port_num,
- struct ib_gid_table *table, int ix)
+static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
+{
+ return !entry;
+}
+
+static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry)
+{
+ return entry && entry->state == GID_TABLE_ENTRY_VALID;
+}
+
+static void schedule_free_gid(struct kref *kref)
{
+ struct ib_gid_table_entry *entry =
+ container_of(kref, struct ib_gid_table_entry, kref);
+
+ queue_work(ib_wq, &entry->del_work);
+}
+
+static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
+{
+ struct ib_device *device = entry->attr.device;
+ u8 port_num = entry->attr.port_num;
+ struct ib_gid_table *table = rdma_gid_table(device, port_num);
+
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
- device->name, port_num, ix,
- table->data_vec[ix].gid.raw);
+ device->name, port_num, entry->attr.index,
+ entry->attr.gid.raw);
+
+ if (rdma_cap_roce_gid_table(device, port_num) &&
+ entry->state != GID_TABLE_ENTRY_INVALID)
+ device->del_gid(&entry->attr, &entry->context);
+
+ write_lock_irq(&table->rwlock);
- if (rdma_cap_roce_gid_table(device, port_num))
- device->del_gid(&table->data_vec[ix].attr,
- &table->data_vec[ix].context);
- dev_put(table->data_vec[ix].attr.ndev);
+ /*
+ * The only way to avoid overwriting NULL in table is
+ * by comparing if it is same entry in table or not!
+ * If new entry in table is added by the time we free here,
+ * don't overwrite the table entry.
+ */
+ if (entry == table->data_vec[entry->attr.index])
+ table->data_vec[entry->attr.index] = NULL;
+ /* Now this index is ready to be allocated */
+ write_unlock_irq(&table->rwlock);
+
+ if (entry->attr.ndev)
+ dev_put(entry->attr.ndev);
+ kfree(entry);
}
-static int add_roce_gid(struct ib_gid_table *table,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr)
+static void free_gid_entry(struct kref *kref)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(kref, struct ib_gid_table_entry, kref);
+
+ free_gid_entry_locked(entry);
+}
+
+/**
+ * free_gid_work - Release reference to the GID entry
+ * @work: Work structure to refer to GID entry which needs to be
+ * deleted.
+ *
+ * free_gid_work() frees the entry from the HCA's hardware table
+ * if provider supports it. It releases reference to netdevice.
+ */
+static void free_gid_work(struct work_struct *work)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(work, struct ib_gid_table_entry, del_work);
+ struct ib_device *device = entry->attr.device;
+ u8 port_num = entry->attr.port_num;
+ struct ib_gid_table *table = rdma_gid_table(device, port_num);
+
+ mutex_lock(&table->lock);
+ free_gid_entry_locked(entry);
+ mutex_unlock(&table->lock);
+}
+
+static struct ib_gid_table_entry *
+alloc_gid_entry(const struct ib_gid_attr *attr)
{
struct ib_gid_table_entry *entry;
- int ix = attr->index;
- int ret = 0;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+ kref_init(&entry->kref);
+ memcpy(&entry->attr, attr, sizeof(*attr));
+ if (entry->attr.ndev)
+ dev_hold(entry->attr.ndev);
+ INIT_WORK(&entry->del_work, free_gid_work);
+ entry->state = GID_TABLE_ENTRY_INVALID;
+ return entry;
+}
+
+static void store_gid_entry(struct ib_gid_table *table,
+ struct ib_gid_table_entry *entry)
+{
+ entry->state = GID_TABLE_ENTRY_VALID;
+
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ entry->attr.device->name, entry->attr.port_num,
+ entry->attr.index, entry->attr.gid.raw);
+
+ lockdep_assert_held(&table->lock);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[entry->attr.index] = entry;
+ write_unlock_irq(&table->rwlock);
+}
+
+static void get_gid_entry(struct ib_gid_table_entry *entry)
+{
+ kref_get(&entry->kref);
+}
+
+static void put_gid_entry(struct ib_gid_table_entry *entry)
+{
+ kref_put(&entry->kref, schedule_free_gid);
+}
+
+static void put_gid_entry_locked(struct ib_gid_table_entry *entry)
+{
+ kref_put(&entry->kref, free_gid_entry);
+}
+
+static int add_roce_gid(struct ib_gid_table_entry *entry)
+{
+ const struct ib_gid_attr *attr = &entry->attr;
+ int ret;
if (!attr->ndev) {
pr_err("%s NULL netdev device=%s port=%d index=%d\n",
@@ -191,38 +325,22 @@ static int add_roce_gid(struct ib_gid_table *table,
attr->index);
return -EINVAL;
}
-
- entry = &table->data_vec[ix];
- if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
- WARN(1, "GID table corruption device=%s port=%d index=%d\n",
- attr->device->name, attr->port_num,
- attr->index);
- return -EINVAL;
- }
-
if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
- ret = attr->device->add_gid(gid, attr, &entry->context);
+ ret = attr->device->add_gid(attr, &entry->context);
if (ret) {
pr_err("%s GID add failed device=%s port=%d index=%d\n",
__func__, attr->device->name, attr->port_num,
attr->index);
- goto add_err;
+ return ret;
}
}
- dev_hold(attr->ndev);
-
-add_err:
- if (!ret)
- pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
- attr->device->name, attr->port_num, ix, gid->raw);
- return ret;
+ return 0;
}
/**
* add_modify_gid - Add or modify GID table entry
*
* @table: GID table in which GID to be added or modified
- * @gid: GID content
* @attr: Attributes of the GID
*
* Returns 0 on success or appropriate error code. It accepts zero
@@ -230,34 +348,42 @@ add_err:
* GID. However such zero GIDs are not added to the cache.
*/
static int add_modify_gid(struct ib_gid_table *table,
- const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
- int ret;
+ struct ib_gid_table_entry *entry;
+ int ret = 0;
+
+ /*
+ * Invalidate any old entry in the table to make it safe to write to
+ * this index.
+ */
+ if (is_gid_entry_valid(table->data_vec[attr->index]))
+ put_gid_entry(table->data_vec[attr->index]);
+
+ /*
+ * Some HCA's report multiple GID entries with only one valid GID, and
+ * leave other unused entries as the zero GID. Convert zero GIDs to
+ * empty table entries instead of storing them.
+ */
+ if (rdma_is_zero_gid(&attr->gid))
+ return 0;
+
+ entry = alloc_gid_entry(attr);
+ if (!entry)
+ return -ENOMEM;
if (rdma_protocol_roce(attr->device, attr->port_num)) {
- ret = add_roce_gid(table, gid, attr);
+ ret = add_roce_gid(entry);
if (ret)
- return ret;
- } else {
- /*
- * Some HCA's report multiple GID entries with only one
- * valid GID, but remaining as zero GID.
- * So ignore such behavior for IB link layer and don't
- * fail the call, but don't add such entry to GID cache.
- */
- if (rdma_is_zero_gid(gid))
- return 0;
+ goto done;
}
- lockdep_assert_held(&table->lock);
- memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
- memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
-
- write_lock_irq(&table->rwlock);
- table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
- write_unlock_irq(&table->rwlock);
+ store_gid_entry(table, entry);
return 0;
+
+done:
+ put_gid_entry(entry);
+ return ret;
}
/**
@@ -272,16 +398,25 @@ static int add_modify_gid(struct ib_gid_table *table,
static void del_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix)
{
+ struct ib_gid_table_entry *entry;
+
lockdep_assert_held(&table->lock);
+
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ ib_dev->name, port, ix,
+ table->data_vec[ix]->attr.gid.raw);
+
write_lock_irq(&table->rwlock);
- table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
+ entry = table->data_vec[ix];
+ entry->state = GID_TABLE_ENTRY_PENDING_DEL;
+ /*
+ * For non RoCE protocol, GID entry slot is ready to use.
+ */
+ if (!rdma_protocol_roce(ib_dev, port))
+ table->data_vec[ix] = NULL;
write_unlock_irq(&table->rwlock);
- if (rdma_protocol_roce(ib_dev, port))
- del_roce_gid(ib_dev, port, table, ix);
- memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
- memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
- table->data_vec[ix].context = NULL;
+ put_gid_entry_locked(entry);
}
/* rwlock should be read locked, or lock should be held */
@@ -294,8 +429,8 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
int empty = pempty ? -1 : 0;
while (i < table->sz && (found < 0 || empty < 0)) {
- struct ib_gid_table_entry *data = &table->data_vec[i];
- struct ib_gid_attr *attr = &data->attr;
+ struct ib_gid_table_entry *data = table->data_vec[i];
+ struct ib_gid_attr *attr;
int curr_index = i;
i++;
@@ -306,9 +441,9 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
* so lookup free slot only if requested.
*/
if (pempty && empty < 0) {
- if (data->props & GID_TABLE_ENTRY_INVALID &&
- (default_gid ==
- !!(data->props & GID_TABLE_ENTRY_DEFAULT))) {
+ if (is_gid_entry_free(data) &&
+ default_gid ==
+ is_gid_index_default(table, curr_index)) {
/*
* Found an invalid (free) entry; allocate it.
* If default GID is requested, then our
@@ -323,22 +458,23 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
/*
* Additionally find_gid() is used to find valid entry during
- * lookup operation, where validity needs to be checked. So
- * find the empty entry first to continue to search for a free
- * slot and ignore its INVALID flag.
+ * lookup operation; so ignore the entries which are marked as
+ * pending for removal and the entries which are marked as
+ * invalid.
*/
- if (data->props & GID_TABLE_ENTRY_INVALID)
+ if (!is_gid_entry_valid(data))
continue;
if (found >= 0)
continue;
+ attr = &data->attr;
if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
attr->gid_type != val->gid_type)
continue;
if (mask & GID_ATTR_FIND_MASK_GID &&
- memcmp(gid, &data->gid, sizeof(*gid)))
+ memcmp(gid, &data->attr.gid, sizeof(*gid)))
continue;
if (mask & GID_ATTR_FIND_MASK_NETDEV &&
@@ -346,8 +482,7 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
continue;
if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
- !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
- default_gid)
+ is_gid_index_default(table, curr_index) != default_gid)
continue;
found = curr_index;
@@ -396,7 +531,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
attr->device = ib_dev;
attr->index = empty;
attr->port_num = port;
- ret = add_modify_gid(table, gid, attr);
+ attr->gid = *gid;
+ ret = add_modify_gid(table, attr);
if (!ret)
dispatch_gid_change_event(ib_dev, port);
@@ -492,7 +628,8 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
mutex_lock(&table->lock);
for (ix = 0; ix < table->sz; ix++) {
- if (table->data_vec[ix].attr.ndev == ndev) {
+ if (is_gid_entry_valid(table->data_vec[ix]) &&
+ table->data_vec[ix]->attr.ndev == ndev) {
del_gid(ib_dev, port, table, ix);
deleted = true;
}
@@ -506,103 +643,37 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
return 0;
}
-static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
- union ib_gid *gid, struct ib_gid_attr *attr)
-{
- struct ib_gid_table *table;
-
- table = rdma_gid_table(ib_dev, port);
-
- if (index < 0 || index >= table->sz)
- return -EINVAL;
-
- if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
- return -EINVAL;
-
- memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
- if (attr) {
- memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
- if (attr->ndev)
- dev_hold(attr->ndev);
- }
-
- return 0;
-}
-
-static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
- const union ib_gid *gid,
- const struct ib_gid_attr *val,
- unsigned long mask,
- u8 *port, u16 *index)
-{
- struct ib_gid_table *table;
- u8 p;
- int local_index;
- unsigned long flags;
-
- for (p = 0; p < ib_dev->phys_port_cnt; p++) {
- table = ib_dev->cache.ports[p].gid;
- read_lock_irqsave(&table->rwlock, flags);
- local_index = find_gid(table, gid, val, false, mask, NULL);
- if (local_index >= 0) {
- if (index)
- *index = local_index;
- if (port)
- *port = p + rdma_start_port(ib_dev);
- read_unlock_irqrestore(&table->rwlock, flags);
- return 0;
- }
- read_unlock_irqrestore(&table->rwlock, flags);
- }
-
- return -ENOENT;
-}
-
-static int ib_cache_gid_find(struct ib_device *ib_dev,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- struct net_device *ndev, u8 *port,
- u16 *index)
-{
- unsigned long mask = GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE;
- struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
-
- if (ndev)
- mask |= GID_ATTR_FIND_MASK_NETDEV;
-
- return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
- mask, port, index);
-}
-
/**
- * ib_find_cached_gid_by_port - Returns the GID table index where a specified
- * GID value occurs. It searches for the specified GID value in the local
- * software cache.
+ * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
+ * a valid GID entry for given search parameters. It searches for the specified
+ * GID value in the local software cache.
* @device: The device to query.
* @gid: The GID value to search for.
* @gid_type: The GID type to search for.
* @port_num: The port number of the device where the GID value should be
* searched.
- * @ndev: In RoCE, the net device of the device. Null means ignore.
- * @index: The index into the cached GID table where the GID was found. This
- * parameter may be NULL.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
+ *
+ * Returns sgid attributes if the GID is found with valid reference or
+ * returns ERR_PTR for the error.
+ * The caller must invoke rdma_put_gid_attr() to release the reference.
*/
-int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- u8 port, struct net_device *ndev,
- u16 *index)
+const struct ib_gid_attr *
+rdma_find_gid_by_port(struct ib_device *ib_dev,
+ const union ib_gid *gid,
+ enum ib_gid_type gid_type,
+ u8 port, struct net_device *ndev)
{
int local_index;
struct ib_gid_table *table;
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+ const struct ib_gid_attr *attr;
unsigned long flags;
if (!rdma_is_port_valid(ib_dev, port))
- return -ENOENT;
+ return ERR_PTR(-ENOENT);
table = rdma_gid_table(ib_dev, port);
@@ -612,89 +683,73 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
read_lock_irqsave(&table->rwlock, flags);
local_index = find_gid(table, gid, &val, false, mask, NULL);
if (local_index >= 0) {
- if (index)
- *index = local_index;
+ get_gid_entry(table->data_vec[local_index]);
+ attr = &table->data_vec[local_index]->attr;
read_unlock_irqrestore(&table->rwlock, flags);
- return 0;
+ return attr;
}
read_unlock_irqrestore(&table->rwlock, flags);
- return -ENOENT;
+ return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL(ib_find_cached_gid_by_port);
+EXPORT_SYMBOL(rdma_find_gid_by_port);
/**
- * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
- * GID value occurs
+ * rdma_find_gid_by_filter - Returns the GID table attribute where a
+ * specified GID value occurs
* @device: The device to query.
* @gid: The GID value to search for.
- * @port_num: The port number of the device where the GID value could be
+ * @port: The port number of the device where the GID value could be
* searched.
* @filter: The filter function is executed on any matching GID in the table.
* If the filter function returns true, the corresponding index is returned,
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
- * @index: The index into the cached GID table where the GID was found. This
- * parameter may be NULL.
*
- * ib_cache_gid_find_by_filter() searches for the specified GID value
+ * rdma_find_gid_by_filter() searches for the specified GID value
* of which the filter function returns true in the port's GID table.
- * This function is only supported on RoCE ports.
*
*/
-static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
- const union ib_gid *gid,
- u8 port,
- bool (*filter)(const union ib_gid *,
- const struct ib_gid_attr *,
- void *),
- void *context,
- u16 *index)
+const struct ib_gid_attr *rdma_find_gid_by_filter(
+ struct ib_device *ib_dev, const union ib_gid *gid, u8 port,
+ bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
+ void *),
+ void *context)
{
+ const struct ib_gid_attr *res = ERR_PTR(-ENOENT);
struct ib_gid_table *table;
- unsigned int i;
unsigned long flags;
- bool found = false;
-
+ unsigned int i;
- if (!rdma_is_port_valid(ib_dev, port) ||
- !rdma_protocol_roce(ib_dev, port))
- return -EPROTONOSUPPORT;
+ if (!rdma_is_port_valid(ib_dev, port))
+ return ERR_PTR(-EINVAL);
table = rdma_gid_table(ib_dev, port);
read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
- struct ib_gid_attr attr;
+ struct ib_gid_table_entry *entry = table->data_vec[i];
- if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
+ if (!is_gid_entry_valid(entry))
continue;
- if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
+ if (memcmp(gid, &entry->attr.gid, sizeof(*gid)))
continue;
- memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
-
- if (filter(gid, &attr, context)) {
- found = true;
- if (index)
- *index = i;
+ if (filter(gid, &entry->attr, context)) {
+ get_gid_entry(entry);
+ res = &entry->attr;
break;
}
}
read_unlock_irqrestore(&table->rwlock, flags);
-
- if (!found)
- return -ENOENT;
- return 0;
+ return res;
}
static struct ib_gid_table *alloc_gid_table(int sz)
{
- struct ib_gid_table *table =
- kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
- int i;
+ struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return NULL;
@@ -707,12 +762,6 @@ static struct ib_gid_table *alloc_gid_table(int sz)
table->sz = sz;
rwlock_init(&table->rwlock);
-
- /* Mark all entries as invalid so that allocator can allocate
- * one of the invalid (free) entry.
- */
- for (i = 0; i < sz; i++)
- table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
return table;
err_free_table:
@@ -720,12 +769,30 @@ err_free_table:
return NULL;
}
-static void release_gid_table(struct ib_gid_table *table)
+static void release_gid_table(struct ib_device *device, u8 port,
+ struct ib_gid_table *table)
{
- if (table) {
- kfree(table->data_vec);
- kfree(table);
+ bool leak = false;
+ int i;
+
+ if (!table)
+ return;
+
+ for (i = 0; i < table->sz; i++) {
+ if (is_gid_entry_free(table->data_vec[i]))
+ continue;
+ if (kref_read(&table->data_vec[i]->kref) > 1) {
+ pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
+ device->name, i,
+ kref_read(&table->data_vec[i]->kref));
+ leak = true;
+ }
}
+ if (leak)
+ return;
+
+ kfree(table->data_vec);
+ kfree(table);
}
static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
@@ -739,7 +806,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
- if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
+ if (is_gid_entry_valid(table->data_vec[i])) {
del_gid(ib_dev, port, table, i);
deleted = true;
}
@@ -757,12 +824,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
{
union ib_gid gid = { };
struct ib_gid_attr gid_attr;
- struct ib_gid_table *table;
unsigned int gid_type;
unsigned long mask;
- table = rdma_gid_table(ib_dev, port);
-
mask = GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_DEFAULT |
GID_ATTR_FIND_MASK_NETDEV;
@@ -792,19 +856,12 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
unsigned int i;
unsigned long roce_gid_type_mask;
unsigned int num_default_gids;
- unsigned int current_gid = 0;
roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
num_default_gids = hweight_long(roce_gid_type_mask);
- for (i = 0; i < num_default_gids && i < table->sz; i++) {
- struct ib_gid_table_entry *entry = &table->data_vec[i];
-
- entry->props |= GID_TABLE_ENTRY_DEFAULT;
- current_gid = find_next_bit(&roce_gid_type_mask,
- BITS_PER_LONG,
- current_gid);
- entry->attr.gid_type = current_gid++;
- }
+ /* Reserve starting indices for default GIDs */
+ for (i = 0; i < num_default_gids && i < table->sz; i++)
+ table->default_gid_indices |= BIT(i);
}
@@ -815,7 +872,7 @@ static void gid_table_release_one(struct ib_device *ib_dev)
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
table = ib_dev->cache.ports[port].gid;
- release_gid_table(table);
+ release_gid_table(ib_dev, port, table);
ib_dev->cache.ports[port].gid = NULL;
}
}
@@ -869,69 +926,94 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
return err;
}
-int ib_get_cached_gid(struct ib_device *device,
- u8 port_num,
- int index,
- union ib_gid *gid,
- struct ib_gid_attr *gid_attr)
+/**
+ * rdma_query_gid - Read the GID content from the GID software cache
+ * @device: Device to query the GID
+ * @port_num: Port number of the device
+ * @index: Index of the GID table entry to read
+ * @gid: Pointer to GID where to store the entry's GID
+ *
+ * rdma_query_gid() only reads the GID entry content for requested device,
+ * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't
+ * hold any reference to the GID table entry in the HCA or software cache.
+ *
+ * Returns 0 on success or appropriate error code.
+ *
+ */
+int rdma_query_gid(struct ib_device *device, u8 port_num,
+ int index, union ib_gid *gid)
{
- int res;
- unsigned long flags;
struct ib_gid_table *table;
+ unsigned long flags;
+ int res = -EINVAL;
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
- res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
- read_unlock_irqrestore(&table->rwlock, flags);
+ if (index < 0 || index >= table->sz ||
+ !is_gid_entry_valid(table->data_vec[index]))
+ goto done;
+
+ memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
+ res = 0;
+
+done:
+ read_unlock_irqrestore(&table->rwlock, flags);
return res;
}
-EXPORT_SYMBOL(ib_get_cached_gid);
+EXPORT_SYMBOL(rdma_query_gid);
/**
- * ib_find_cached_gid - Returns the port number and GID table index where
- * a specified GID value occurs.
+ * rdma_find_gid - Returns SGID attributes if the matching GID is found.
* @device: The device to query.
* @gid: The GID value to search for.
* @gid_type: The GID type to search for.
* @ndev: In RoCE, the net device of the device. NULL means ignore.
- * @port_num: The port number of the device where the GID value was found.
- * @index: The index into the cached GID table where the GID was found. This
- * parameter may be NULL.
*
- * ib_find_cached_gid() searches for the specified GID value in
- * the local software cache.
+ * rdma_find_gid() searches for the specified GID value in the software cache.
+ *
+ * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
+ * error. The caller must invoke rdma_put_gid_attr() to release the reference.
+ *
*/
-int ib_find_cached_gid(struct ib_device *device,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- struct net_device *ndev,
- u8 *port_num,
- u16 *index)
-{
- return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
-}
-EXPORT_SYMBOL(ib_find_cached_gid);
-
-int ib_find_gid_by_filter(struct ib_device *device,
- const union ib_gid *gid,
- u8 port_num,
- bool (*filter)(const union ib_gid *gid,
- const struct ib_gid_attr *,
- void *),
- void *context, u16 *index)
+const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
+ const union ib_gid *gid,
+ enum ib_gid_type gid_type,
+ struct net_device *ndev)
{
- /* Only RoCE GID table supports filter function */
- if (!rdma_protocol_roce(device, port_num) && filter)
- return -EPROTONOSUPPORT;
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
+ u8 p;
+
+ if (ndev)
+ mask |= GID_ATTR_FIND_MASK_NETDEV;
+
+ for (p = 0; p < device->phys_port_cnt; p++) {
+ struct ib_gid_table *table;
+ unsigned long flags;
+ int index;
+
+ table = device->cache.ports[p].gid;
+ read_lock_irqsave(&table->rwlock, flags);
+ index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
+ if (index >= 0) {
+ const struct ib_gid_attr *attr;
+
+ get_gid_entry(table->data_vec[index]);
+ attr = &table->data_vec[index]->attr;
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return attr;
+ }
+ read_unlock_irqrestore(&table->rwlock, flags);
+ }
- return ib_cache_gid_find_by_filter(device, gid,
- port_num, filter,
- context, index);
+ return ERR_PTR(-ENOENT);
}
+EXPORT_SYMBOL(rdma_find_gid);
int ib_get_cached_pkey(struct ib_device *device,
u8 port_num,
@@ -1089,12 +1171,92 @@ int ib_get_cached_port_state(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_port_state);
+/**
+ * rdma_get_gid_attr - Returns GID attributes for a port of a device
+ * at a requested gid_index, if a valid GID entry exists.
+ * @device: The device to query.
+ * @port_num: The port number on the device where the GID value
+ * is to be queried.
+ * @index: Index of the GID table entry whose attributes are to
+ * be queried.
+ *
+ * rdma_get_gid_attr() acquires reference count of gid attributes from the
+ * cached GID table. Caller must invoke rdma_put_gid_attr() to release
+ * reference to gid attribute regardless of link layer.
+ *
+ * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
+ * code.
+ */
+const struct ib_gid_attr *
+rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
+{
+ const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+ struct ib_gid_table *table;
+ unsigned long flags;
+
+ if (!rdma_is_port_valid(device, port_num))
+ return ERR_PTR(-EINVAL);
+
+ table = rdma_gid_table(device, port_num);
+ if (index < 0 || index >= table->sz)
+ return ERR_PTR(-EINVAL);
+
+ read_lock_irqsave(&table->rwlock, flags);
+ if (!is_gid_entry_valid(table->data_vec[index]))
+ goto done;
+
+ get_gid_entry(table->data_vec[index]);
+ attr = &table->data_vec[index]->attr;
+done:
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return attr;
+}
+EXPORT_SYMBOL(rdma_get_gid_attr);
+
+/**
+ * rdma_put_gid_attr - Release reference to the GID attribute
+ * @attr: Pointer to the GID attribute whose reference
+ * needs to be released.
+ *
+ * rdma_put_gid_attr() must be used to release reference whose
+ * reference is acquired using rdma_get_gid_attr() or any APIs
+ * which returns a pointer to the ib_gid_attr regardless of link layer
+ * of IB or RoCE.
+ *
+ */
+void rdma_put_gid_attr(const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(attr, struct ib_gid_table_entry, attr);
+
+ put_gid_entry(entry);
+}
+EXPORT_SYMBOL(rdma_put_gid_attr);
+
+/**
+ * rdma_hold_gid_attr - Get reference to existing GID attribute
+ *
+ * @attr: Pointer to the GID attribute whose reference
+ * needs to be taken.
+ *
+ * Increase the reference count to a GID attribute to keep it from being
+ * freed. Callers are required to already be holding a reference to attribute.
+ *
+ */
+void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(attr, struct ib_gid_table_entry, attr);
+
+ get_gid_entry(entry);
+}
+EXPORT_SYMBOL(rdma_hold_gid_attr);
+
static int config_non_roce_gid_cache(struct ib_device *device,
u8 port, int gid_tbl_len)
{
struct ib_gid_attr gid_attr = {};
struct ib_gid_table *table;
- union ib_gid gid;
int ret = 0;
int i;
@@ -1106,14 +1268,14 @@ static int config_non_roce_gid_cache(struct ib_device *device,
for (i = 0; i < gid_tbl_len; ++i) {
if (!device->query_gid)
continue;
- ret = device->query_gid(device, port, i, &gid);
+ ret = device->query_gid(device, port, i, &gid_attr.gid);
if (ret) {
pr_warn("query_gid failed (%d) for %s (index %d)\n",
ret, device->name, i);
goto err;
}
gid_attr.index = i;
- add_modify_gid(table, &gid, &gid_attr);
+ add_modify_gid(table, &gid_attr);
}
err:
mutex_unlock(&table->lock);
@@ -1128,13 +1290,10 @@ static void ib_cache_update(struct ib_device *device,
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
int i;
int ret;
- struct ib_gid_table *table;
if (!rdma_is_port_valid(device, port))
return;
- table = rdma_gid_table(device, port);
-
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
return;
@@ -1296,4 +1455,9 @@ void ib_cache_cleanup_one(struct ib_device *device)
ib_unregister_event_handler(&device->cache.event_handler);
flush_workqueue(ib_wq);
gid_table_cleanup_one(device);
+
+ /*
+ * Flush the wq second time for any pending GID delete work.
+ */
+ flush_workqueue(ib_wq);
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 27a7b0a2e27a..6e39c27dca8e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -474,7 +474,7 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
if (ret)
return ret;
- memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+ rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
return 0;
}
@@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
return ret;
}
-static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
+static struct cm_port *
+get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
unsigned long flags;
- u8 p;
- struct net_device *ndev = ib_get_ndev_from_path(path);
-
- read_lock_irqsave(&cm.device_lock, flags);
- list_for_each_entry(cm_dev, &cm.device_list, list) {
- if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- sa_conv_pathrec_to_gid_type(path),
- ndev, &p, NULL)) {
- port = cm_dev->port[p - 1];
- break;
+
+ if (attr) {
+ read_lock_irqsave(&cm.device_lock, flags);
+ list_for_each_entry(cm_dev, &cm.device_list, list) {
+ if (cm_dev->ib_device == attr->device) {
+ port = cm_dev->port[attr->port_num - 1];
+ break;
+ }
+ }
+ read_unlock_irqrestore(&cm.device_lock, flags);
+ } else {
+ /* SGID attribute can be NULL in following
+ * conditions.
+ * (a) Alternative path
+ * (b) IB link layer without GRH
+ * (c) LAP send messages
+ */
+ read_lock_irqsave(&cm.device_lock, flags);
+ list_for_each_entry(cm_dev, &cm.device_list, list) {
+ attr = rdma_find_gid(cm_dev->ib_device,
+ &path->sgid,
+ sa_conv_pathrec_to_gid_type(path),
+ NULL);
+ if (!IS_ERR(attr)) {
+ port = cm_dev->port[attr->port_num - 1];
+ break;
+ }
}
+ read_unlock_irqrestore(&cm.device_lock, flags);
+ if (port)
+ rdma_put_gid_attr(attr);
}
- read_unlock_irqrestore(&cm.device_lock, flags);
-
- if (ndev)
- dev_put(ndev);
return port;
}
-static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
+static int cm_init_av_by_path(struct sa_path_rec *path,
+ const struct ib_gid_attr *sgid_attr,
+ struct cm_av *av,
struct cm_id_private *cm_id_priv)
{
struct rdma_ah_attr new_ah_attr;
@@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_port *port;
int ret;
- port = get_cm_port_from_path(path);
+ port = get_cm_port_from_path(path, sgid_attr);
if (!port)
return -EINVAL;
cm_dev = port->cm_dev;
@@ -554,22 +573,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
/*
* av->ah_attr might be initialized based on wc or during
- * request processing time. So initialize a new ah_attr on stack.
+ * request processing time which might have reference to sgid_attr.
+ * So initialize a new ah_attr on stack.
* If initialization fails, old ah_attr is used for sending any
* responses. If initialization is successful, than new ah_attr
- * is used by overwriting the old one.
+ * is used by overwriting the old one. So that right ah_attr
+ * can be used to return an error response.
*/
ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
- &new_ah_attr);
+ &new_ah_attr, sgid_attr);
if (ret)
return ret;
av->timeout = path->packet_life_time + 1;
ret = add_cm_id_to_port_list(cm_id_priv, av, port);
- if (ret)
+ if (ret) {
+ rdma_destroy_ah_attr(&new_ah_attr);
return ret;
- memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+ }
+ rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
return 0;
}
@@ -1091,6 +1114,9 @@ retest:
wait_for_completion(&cm_id_priv->comp);
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
cm_free_work(work);
+
+ rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
+ rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
kfree(cm_id_priv->private_data);
kfree(cm_id_priv);
}
@@ -1230,14 +1256,12 @@ new_id:
}
EXPORT_SYMBOL(ib_cm_insert_listen);
-static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
- enum cm_msg_sequence msg_seq)
+static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
{
u64 hi_tid, low_tid;
hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
- low_tid = (u64) ((__force u32)cm_id_priv->id.local_id |
- (msg_seq << 30));
+ low_tid = (u64)cm_id_priv->id.local_id;
return cpu_to_be64(hi_tid | low_tid);
}
@@ -1265,7 +1289,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
pri_path->opa.slid);
cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
+ cm_form_tid(cm_id_priv));
req_msg->local_comm_id = cm_id_priv->id.local_id;
req_msg->service_id = param->service_id;
@@ -1413,12 +1437,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
goto out;
}
- ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
+ ret = cm_init_av_by_path(param->primary_path,
+ param->ppath_sgid_attr, &cm_id_priv->av,
cm_id_priv);
if (ret)
goto error1;
if (param->alternate_path) {
- ret = cm_init_av_by_path(param->alternate_path,
+ ret = cm_init_av_by_path(param->alternate_path, NULL,
&cm_id_priv->alt_av, cm_id_priv);
if (ret)
goto error1;
@@ -1646,7 +1671,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work,
(ib_is_opa_gid(&path->sgid))) {
union ib_gid sgid;
- if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) {
+ if (rdma_query_gid(dev, port_num, 0, &sgid)) {
dev_warn(&dev->dev,
"Error updating sgid in CM request\n");
return;
@@ -1691,6 +1716,7 @@ static void cm_format_req_event(struct cm_work *work,
param->retry_count = cm_req_get_retry_count(req_msg);
param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
param->srq = cm_req_get_srq(req_msg);
+ param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
work->cm_event.private_data = &req_msg->private_data;
}
@@ -1914,9 +1940,8 @@ static int cm_req_handler(struct cm_work *work)
struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
- union ib_gid gid;
- struct ib_gid_attr gid_attr;
const struct ib_global_route *grh;
+ const struct ib_gid_attr *gid_attr;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1961,24 +1986,13 @@ static int cm_req_handler(struct cm_work *work)
if (cm_req_has_alt_path(req_msg))
memset(&work->path[1], 0, sizeof(work->path[1]));
grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
- ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num,
- grh->sgid_index,
- &gid, &gid_attr);
- if (ret) {
- ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0);
- goto rejected;
- }
+ gid_attr = grh->sgid_attr;
- if (gid_attr.ndev) {
+ if (gid_attr && gid_attr->ndev) {
work->path[0].rec_type =
- sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
- sa_path_set_ifindex(&work->path[0],
- gid_attr.ndev->ifindex);
- sa_path_set_ndev(&work->path[0],
- dev_net(gid_attr.ndev));
- dev_put(gid_attr.ndev);
+ sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
} else {
+ /* If no GID attribute or ndev is null, it is not RoCE. */
cm_path_set_rec_type(work->port->cm_dev->ib_device,
work->port->port_num,
&work->path[0],
@@ -1992,15 +2006,14 @@ static int cm_req_handler(struct cm_work *work)
sa_path_set_dmac(&work->path[0],
cm_id_priv->av.ah_attr.roce.dmac);
work->path[0].hop_limit = grh->hop_limit;
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+ ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
cm_id_priv);
if (ret) {
int err;
- err = ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0,
- &work->path[0].sgid,
- NULL);
+ err = rdma_query_gid(work->port->cm_dev->ib_device,
+ work->port->port_num, 0,
+ &work->path[0].sgid);
if (err)
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
NULL, 0, NULL, 0);
@@ -2012,8 +2025,8 @@ static int cm_req_handler(struct cm_work *work)
goto rejected;
}
if (cm_req_has_alt_path(req_msg)) {
- ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
- cm_id_priv);
+ ret = cm_init_av_by_path(&work->path[1], NULL,
+ &cm_id_priv->alt_av, cm_id_priv);
if (ret) {
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
@@ -2451,7 +2464,7 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
u8 private_data_len)
{
cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
+ cm_form_tid(cm_id_priv));
dreq_msg->local_comm_id = cm_id_priv->id.local_id;
dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
@@ -3082,7 +3095,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
alternate_path->opa.slid);
cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
+ cm_form_tid(cm_id_priv));
lap_msg->local_comm_id = cm_id_priv->id.local_id;
lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
@@ -3136,7 +3149,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
goto out;
}
- ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
+ ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
cm_id_priv);
if (ret)
goto out;
@@ -3279,7 +3292,7 @@ static int cm_lap_handler(struct cm_work *work)
if (ret)
goto unlock;
- cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
+ cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
cm_id_priv);
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
@@ -3458,7 +3471,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
struct ib_cm_sidr_req_param *param)
{
cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
+ cm_form_tid(cm_id_priv));
sidr_req_msg->request_id = cm_id_priv->id.local_id;
sidr_req_msg->pkey = param->path->pkey;
sidr_req_msg->service_id = param->service_id;
@@ -3481,7 +3494,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
+ ret = cm_init_av_by_path(param->path, param->sgid_attr,
+ &cm_id_priv->av,
+ cm_id_priv);
if (ret)
goto out;
@@ -3518,6 +3533,7 @@ out:
EXPORT_SYMBOL(ib_send_cm_sidr_req);
static void cm_format_sidr_req_event(struct cm_work *work,
+ const struct cm_id_private *rx_cm_id,
struct ib_cm_id *listen_id)
{
struct cm_sidr_req_msg *sidr_req_msg;
@@ -3531,6 +3547,7 @@ static void cm_format_sidr_req_event(struct cm_work *work,
param->service_id = sidr_req_msg->service_id;
param->bth_pkey = cm_get_bth_pkey(work);
param->port = work->port->port_num;
+ param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
work->cm_event.private_data = &sidr_req_msg->private_data;
}
@@ -3588,7 +3605,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
cm_id_priv->id.service_id = sidr_req_msg->service_id;
cm_id_priv->id.service_mask = ~cpu_to_be64(0);
- cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
+ cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
cm_process_work(cm_id_priv, work);
cm_deref_id(cur_cm_id_priv);
return 0;
@@ -3665,7 +3682,8 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
}
EXPORT_SYMBOL(ib_send_cm_sidr_rep);
-static void cm_format_sidr_rep_event(struct cm_work *work)
+static void cm_format_sidr_rep_event(struct cm_work *work,
+ const struct cm_id_private *cm_id_priv)
{
struct cm_sidr_rep_msg *sidr_rep_msg;
struct ib_cm_sidr_rep_event_param *param;
@@ -3678,6 +3696,7 @@ static void cm_format_sidr_rep_event(struct cm_work *work)
param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
param->info = &sidr_rep_msg->info;
param->info_len = sidr_rep_msg->info_length;
+ param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
work->cm_event.private_data = &sidr_rep_msg->private_data;
}
@@ -3701,7 +3720,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
spin_unlock_irq(&cm_id_priv->lock);
- cm_format_sidr_rep_event(work);
+ cm_format_sidr_rep_event(work, cm_id_priv);
cm_process_work(cm_id_priv, work);
return 0;
out:
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 8b76f0ef965e..476d4309576d 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -44,13 +44,6 @@
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
-enum cm_msg_sequence {
- CM_MSG_SEQUENCE_REQ,
- CM_MSG_SEQUENCE_LAP,
- CM_MSG_SEQUENCE_DREQ,
- CM_MSG_SEQUENCE_SIDR
-};
-
struct cm_req_msg {
struct ib_mad_hdr hdr;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index bff10ab141b0..f72677291b69 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -366,7 +366,6 @@ struct cma_multicast {
void *context;
struct sockaddr_storage addr;
struct kref mcref;
- bool igmp_joined;
u8 join_state;
};
@@ -412,11 +411,11 @@ struct cma_req_info {
struct sockaddr_storage listen_addr_storage;
struct sockaddr_storage src_addr_storage;
struct ib_device *device;
- int port;
union ib_gid local_gid;
__be64 service_id;
+ int port;
+ bool has_gid;
u16 pkey;
- bool has_gid:1;
};
static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
@@ -491,12 +490,10 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
{
cma_ref_dev(cma_dev);
id_priv->cma_dev = cma_dev;
- id_priv->gid_type = 0;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
- id_priv->res.type = RDMA_RESTRACK_CM_ID;
rdma_restrack_add(&id_priv->res);
}
@@ -603,46 +600,53 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
return ret;
}
-static inline int cma_validate_port(struct ib_device *device, u8 port,
- enum ib_gid_type gid_type,
- union ib_gid *gid,
- struct rdma_id_private *id_priv)
+static const struct ib_gid_attr *
+cma_validate_port(struct ib_device *device, u8 port,
+ enum ib_gid_type gid_type,
+ union ib_gid *gid,
+ struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int bound_if_index = dev_addr->bound_dev_if;
+ const struct ib_gid_attr *sgid_attr;
int dev_type = dev_addr->dev_type;
struct net_device *ndev = NULL;
- int ret = -ENODEV;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
- return ret;
+ return ERR_PTR(-ENODEV);
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
- return ret;
+ return ERR_PTR(-ENODEV);
if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
ndev = dev_get_by_index(dev_addr->net, bound_if_index);
if (!ndev)
- return ret;
+ return ERR_PTR(-ENODEV);
} else {
gid_type = IB_GID_TYPE_IB;
}
- ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
- ndev, NULL);
-
+ sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
if (ndev)
dev_put(ndev);
+ return sgid_attr;
+}
- return ret;
+static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
+ const struct ib_gid_attr *sgid_attr)
+{
+ WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr);
+ id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
}
static int cma_acquire_dev(struct rdma_id_private *id_priv,
- struct rdma_id_private *listen_id_priv)
+ const struct rdma_id_private *listen_id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ const struct ib_gid_attr *sgid_attr;
struct cma_device *cma_dev;
union ib_gid gid, iboe_gid, *gidp;
+ enum ib_gid_type gid_type;
int ret = -ENODEV;
u8 port;
@@ -662,14 +666,13 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
port = listen_id_priv->id.port_num;
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
-
- ret = cma_validate_port(cma_dev->device, port,
- rdma_protocol_ib(cma_dev->device, port) ?
- IB_GID_TYPE_IB :
- listen_id_priv->gid_type, gidp,
- id_priv);
- if (!ret) {
+ gid_type = listen_id_priv->gid_type;
+ sgid_attr = cma_validate_port(cma_dev->device, port,
+ gid_type, gidp, id_priv);
+ if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ ret = 0;
goto out;
}
}
@@ -683,14 +686,13 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
-
- ret = cma_validate_port(cma_dev->device, port,
- rdma_protocol_ib(cma_dev->device, port) ?
- IB_GID_TYPE_IB :
- cma_dev->default_gid_type[port - 1],
- gidp, id_priv);
- if (!ret) {
+ gid_type = cma_dev->default_gid_type[port - 1];
+ sgid_attr = cma_validate_port(cma_dev->device, port,
+ gid_type, gidp, id_priv);
+ if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ ret = 0;
goto out;
}
}
@@ -732,8 +734,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
continue;
- for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
- &gid, NULL);
+ for (i = 0; !rdma_query_gid(cur_dev->device,
+ p, i, &gid);
i++) {
if (!memcmp(&gid, dgid, sizeof(gid))) {
cma_dev = cur_dev;
@@ -785,12 +787,14 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->res.kern_name = caller;
else
rdma_restrack_set_task(&id_priv->res, current);
+ id_priv->res.type = RDMA_RESTRACK_CM_ID;
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
id_priv->tos_set = false;
+ id_priv->gid_type = IB_GID_TYPE_IB;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
@@ -1036,35 +1040,38 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
}
EXPORT_SYMBOL(rdma_init_qp_attr);
-static inline int cma_zero_addr(struct sockaddr *addr)
+static inline bool cma_zero_addr(const struct sockaddr *addr)
{
switch (addr->sa_family) {
case AF_INET:
return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
case AF_INET6:
- return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
+ return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr);
case AF_IB:
- return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
+ return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr);
default:
- return 0;
+ return false;
}
}
-static inline int cma_loopback_addr(struct sockaddr *addr)
+static inline bool cma_loopback_addr(const struct sockaddr *addr)
{
switch (addr->sa_family) {
case AF_INET:
- return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ return ipv4_is_loopback(
+ ((struct sockaddr_in *)addr)->sin_addr.s_addr);
case AF_INET6:
- return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
+ return ipv6_addr_loopback(
+ &((struct sockaddr_in6 *)addr)->sin6_addr);
case AF_IB:
- return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
+ return ib_addr_loopback(
+ &((struct sockaddr_ib *)addr)->sib_addr);
default:
- return 0;
+ return false;
}
}
-static inline int cma_any_addr(struct sockaddr *addr)
+static inline bool cma_any_addr(const struct sockaddr *addr)
{
return cma_zero_addr(addr) || cma_loopback_addr(addr);
}
@@ -1087,7 +1094,7 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
}
}
-static __be16 cma_port(struct sockaddr *addr)
+static __be16 cma_port(const struct sockaddr *addr)
{
struct sockaddr_ib *sib;
@@ -1105,15 +1112,15 @@ static __be16 cma_port(struct sockaddr *addr)
}
}
-static inline int cma_any_port(struct sockaddr *addr)
+static inline int cma_any_port(const struct sockaddr *addr)
{
return !cma_port(addr);
}
static void cma_save_ib_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
- struct rdma_cm_id *listen_id,
- struct sa_path_rec *path)
+ const struct rdma_cm_id *listen_id,
+ const struct sa_path_rec *path)
{
struct sockaddr_ib *listen_ib, *ib;
@@ -1198,7 +1205,7 @@ static u16 cma_port_from_service_id(__be64 service_id)
static int cma_save_ip_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
- struct ib_cm_event *ib_event,
+ const struct ib_cm_event *ib_event,
__be64 service_id)
{
struct cma_hdr *hdr;
@@ -1228,8 +1235,8 @@ static int cma_save_ip_info(struct sockaddr *src_addr,
static int cma_save_net_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
- struct rdma_cm_id *listen_id,
- struct ib_cm_event *ib_event,
+ const struct rdma_cm_id *listen_id,
+ const struct ib_cm_event *ib_event,
sa_family_t sa_family, __be64 service_id)
{
if (sa_family == AF_IB) {
@@ -1361,7 +1368,23 @@ static bool validate_net_dev(struct net_device *net_dev,
}
}
-static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
+static struct net_device *
+roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
+{
+ const struct ib_gid_attr *sgid_attr = NULL;
+
+ if (ib_event->event == IB_CM_REQ_RECEIVED)
+ sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr;
+ else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
+ sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr;
+
+ if (!sgid_attr)
+ return NULL;
+ dev_hold(sgid_attr->ndev);
+ return sgid_attr->ndev;
+}
+
+static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event,
struct cma_req_info *req)
{
struct sockaddr *listen_addr =
@@ -1376,8 +1399,12 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
if (err)
return ERR_PTR(err);
- net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey,
- gid, listen_addr);
+ if (rdma_protocol_roce(req->device, req->port))
+ net_dev = roce_get_net_dev_by_cm_event(ib_event);
+ else
+ net_dev = ib_get_net_dev_by_params(req->device, req->port,
+ req->pkey,
+ gid, listen_addr);
if (!net_dev)
return ERR_PTR(-ENODEV);
@@ -1440,14 +1467,20 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id,
const struct rdma_addr *addr = &id->route.addr;
if (!net_dev)
- /* This request is an AF_IB request or a RoCE request */
+ /* This request is an AF_IB request */
return (!id->port_num || id->port_num == port_num) &&
- (addr->src_addr.ss_family == AF_IB ||
- rdma_protocol_roce(id->device, port_num));
+ (addr->src_addr.ss_family == AF_IB);
- return !addr->dev_addr.bound_dev_if ||
- (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
- addr->dev_addr.bound_dev_if == net_dev->ifindex);
+ /*
+ * Net namespaces must match, and if the listner is listening
+ * on a specific netdevice than netdevice must match as well.
+ */
+ if (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
+ (!!addr->dev_addr.bound_dev_if ==
+ (addr->dev_addr.bound_dev_if == net_dev->ifindex)))
+ return true;
+ else
+ return false;
}
static struct rdma_id_private *cma_find_listener(
@@ -1480,9 +1513,10 @@ static struct rdma_id_private *cma_find_listener(
return ERR_PTR(-EINVAL);
}
-static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
- struct ib_cm_event *ib_event,
- struct net_device **net_dev)
+static struct rdma_id_private *
+cma_ib_id_from_event(struct ib_cm_id *cm_id,
+ const struct ib_cm_event *ib_event,
+ struct net_device **net_dev)
{
struct cma_req_info req;
struct rdma_bind_list *bind_list;
@@ -1498,10 +1532,6 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
/* Assuming the protocol is AF_IB */
*net_dev = NULL;
- } else if (rdma_protocol_roce(req.device, req.port)) {
- /* TODO find the net dev matching the request parameters
- * through the RoCE GID table */
- *net_dev = NULL;
} else {
return ERR_CAST(*net_dev);
}
@@ -1629,6 +1659,21 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
+static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ if (ndev) {
+ cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
+ dev_put(ndev);
+ }
+ kref_put(&mc->mcref, release_mc);
+}
+
static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
{
struct cma_multicast *mc;
@@ -1642,22 +1687,7 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
} else {
- if (mc->igmp_joined) {
- struct rdma_dev_addr *dev_addr =
- &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev = NULL;
-
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net,
- dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev,
- &mc->multicast.ib->rec.mgid,
- false);
- dev_put(ndev);
- }
- }
- kref_put(&mc->mcref, release_mc);
+ cma_leave_roce_mc_group(id_priv, mc);
}
}
}
@@ -1699,6 +1729,10 @@ void rdma_destroy_id(struct rdma_cm_id *id)
cma_deref_id(id_priv->id.context);
kfree(id_priv->id.route.path_rec);
+
+ if (id_priv->id.route.addr.dev_addr.sgid_attr)
+ rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+
put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
}
@@ -1730,7 +1764,7 @@ reject:
}
static void cma_set_rep_event_data(struct rdma_cm_event *event,
- struct ib_cm_rep_event_param *rep_data,
+ const struct ib_cm_rep_event_param *rep_data,
void *private_data)
{
event->param.conn.private_data = private_data;
@@ -1743,10 +1777,11 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
event->param.conn.qp_num = rep_data->remote_qpn;
}
-static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+static int cma_ib_handler(struct ib_cm_id *cm_id,
+ const struct ib_cm_event *ib_event)
{
struct rdma_id_private *id_priv = cm_id->context;
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
int ret = 0;
mutex_lock(&id_priv->handler_mutex);
@@ -1756,7 +1791,6 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
id_priv->state != RDMA_CM_DISCONNECT))
goto out;
- memset(&event, 0, sizeof event);
switch (ib_event->event) {
case IB_CM_REQ_ERROR:
case IB_CM_REP_ERROR:
@@ -1825,9 +1859,10 @@ out:
return ret;
}
-static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
- struct ib_cm_event *ib_event,
- struct net_device *net_dev)
+static struct rdma_id_private *
+cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
+ const struct ib_cm_event *ib_event,
+ struct net_device *net_dev)
{
struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
@@ -1888,11 +1923,12 @@ err:
return NULL;
}
-static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
- struct ib_cm_event *ib_event,
- struct net_device *net_dev)
+static struct rdma_id_private *
+cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
+ const struct ib_cm_event *ib_event,
+ struct net_device *net_dev)
{
- struct rdma_id_private *listen_id_priv;
+ const struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
@@ -1932,7 +1968,7 @@ err:
}
static void cma_set_req_event_data(struct rdma_cm_event *event,
- struct ib_cm_req_event_param *req_data,
+ const struct ib_cm_req_event_param *req_data,
void *private_data, int offset)
{
event->param.conn.private_data = private_data + offset;
@@ -1946,7 +1982,8 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
event->param.conn.qp_num = req_data->remote_qpn;
}
-static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
+static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
+ const struct ib_cm_event *ib_event)
{
return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
(ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
@@ -1955,19 +1992,20 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e
(!id->qp_type));
}
-static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+static int cma_ib_req_handler(struct ib_cm_id *cm_id,
+ const struct ib_cm_event *ib_event)
{
struct rdma_id_private *listen_id, *conn_id = NULL;
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
struct net_device *net_dev;
u8 offset;
int ret;
- listen_id = cma_id_from_event(cm_id, ib_event, &net_dev);
+ listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev);
if (IS_ERR(listen_id))
return PTR_ERR(listen_id);
- if (!cma_check_req_qp_type(&listen_id->id, ib_event)) {
+ if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
ret = -EINVAL;
goto net_dev_put;
}
@@ -1978,16 +2016,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
goto err1;
}
- memset(&event, 0, sizeof event);
offset = cma_user_data_offset(listen_id);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
- conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev);
+ conn_id = cma_ib_new_udp_id(&listen_id->id, ib_event, net_dev);
event.param.ud.private_data = ib_event->private_data + offset;
event.param.ud.private_data_len =
IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
} else {
- conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev);
+ conn_id = cma_ib_new_conn_id(&listen_id->id, ib_event, net_dev);
cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
ib_event->private_data, offset);
}
@@ -2087,7 +2124,7 @@ EXPORT_SYMBOL(rdma_read_gids);
static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
{
struct rdma_id_private *id_priv = iw_id->context;
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
int ret = 0;
struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
@@ -2096,7 +2133,6 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
if (id_priv->state != RDMA_CM_CONNECT)
goto out;
- memset(&event, 0, sizeof event);
switch (iw_event->event) {
case IW_CM_EVENT_CLOSE:
event.event = RDMA_CM_EVENT_DISCONNECTED;
@@ -2156,11 +2192,17 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
{
struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
int ret = -ECONNABORTED;
struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
+ event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
+ event.param.conn.private_data = iw_event->private_data;
+ event.param.conn.private_data_len = iw_event->private_data_len;
+ event.param.conn.initiator_depth = iw_event->ird;
+ event.param.conn.responder_resources = iw_event->ord;
+
listen_id = cm_id->context;
mutex_lock(&listen_id->handler_mutex);
@@ -2202,13 +2244,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
- memset(&event, 0, sizeof event);
- event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
- event.param.conn.private_data = iw_event->private_data;
- event.param.conn.private_data_len = iw_event->private_data_len;
- event.param.conn.initiator_depth = iw_event->ird;
- event.param.conn.responder_resources = iw_event->ord;
-
/*
* Protect against the user destroying conn_id from another thread
* until we're done accessing it.
@@ -2241,7 +2276,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
addr = cma_src_addr(id_priv);
svc_id = rdma_get_service_id(&id_priv->id, addr);
- id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id);
+ id = ib_cm_insert_listen(id_priv->id.device,
+ cma_ib_req_handler, svc_id);
if (IS_ERR(id))
return PTR_ERR(id);
id_priv->cm_id.ib = id;
@@ -2561,8 +2597,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
route->path_rec->roce.route_resolved = true;
- sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
- sa_path_set_ifindex(route->path_rec, ndev->ifindex);
sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
return ndev;
}
@@ -2791,7 +2825,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
p = 1;
port_found:
- ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
+ ret = rdma_query_gid(cma_dev->device, p, 0, &gid);
if (ret)
goto out;
@@ -2817,9 +2851,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *dev_addr, void *context)
{
struct rdma_id_private *id_priv = context;
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
- memset(&event, 0, sizeof event);
mutex_lock(&id_priv->handler_mutex);
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
RDMA_CM_ADDR_RESOLVED))
@@ -2910,7 +2943,7 @@ err:
}
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- struct sockaddr *dst_addr)
+ const struct sockaddr *dst_addr)
{
if (!src_addr || !src_addr->sa_family) {
src_addr = (struct sockaddr *) &id->route.addr.src_addr;
@@ -2931,31 +2964,25 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- struct sockaddr *dst_addr, int timeout_ms)
+ const struct sockaddr *dst_addr, int timeout_ms)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+ if (ret)
return ret;
- }
}
- if (cma_family(id_priv) != dst_addr->sa_family) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+ if (cma_family(id_priv) != dst_addr->sa_family)
return -EINVAL;
- }
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
return -EINVAL;
- }
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
atomic_inc(&id_priv->refcount);
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
@@ -3451,18 +3478,18 @@ static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
}
static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
- struct ib_cm_event *ib_event)
+ const struct ib_cm_event *ib_event)
{
struct rdma_id_private *id_priv = cm_id->context;
- struct rdma_cm_event event;
- struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
+ struct rdma_cm_event event = {};
+ const struct ib_cm_sidr_rep_event_param *rep =
+ &ib_event->param.sidr_rep_rcvd;
int ret = 0;
mutex_lock(&id_priv->handler_mutex);
if (id_priv->state != RDMA_CM_CONNECT)
goto out;
- memset(&event, 0, sizeof event);
switch (ib_event->event) {
case IB_CM_SIDR_REQ_ERROR:
event.event = RDMA_CM_EVENT_UNREACHABLE;
@@ -3488,7 +3515,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
ib_init_ah_attr_from_path(id_priv->id.device,
id_priv->id.port_num,
id_priv->id.route.path_rec,
- &event.param.ud.ah_attr);
+ &event.param.ud.ah_attr,
+ rep->sgid_attr);
event.param.ud.qp_num = rep->qpn;
event.param.ud.qkey = rep->qkey;
event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -3501,6 +3529,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
}
ret = id_priv->id.event_handler(&id_priv->id, &event);
+
+ rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
@@ -3557,6 +3587,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
id_priv->cm_id.ib = id;
req.path = id_priv->id.route.path_rec;
+ req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
@@ -3618,6 +3649,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
if (route->num_paths == 2)
req.alternate_path = &route->path_rec[1];
+ req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
+ /* Alternate path SGID attribute currently unsupported */
req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
req.qp_num = id_priv->qp_num;
req.qp_type = id_priv->id.qp_type;
@@ -3928,7 +3961,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
{
struct rdma_id_private *id_priv;
struct cma_multicast *mc = multicast->context;
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
int ret = 0;
id_priv = mc->id_priv;
@@ -3952,7 +3985,6 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
}
mutex_unlock(&id_priv->qp_mutex);
- memset(&event, 0, sizeof event);
event.status = status;
event.param.ud.private_data = mc->context;
if (!status) {
@@ -3981,6 +4013,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
ret = id_priv->id.event_handler(&id_priv->id, &event);
+
+ rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
@@ -4010,7 +4044,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else if (addr->sa_family == AF_IB) {
memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
- } else if ((addr->sa_family == AF_INET6)) {
+ } else if (addr->sa_family == AF_INET6) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
@@ -4168,8 +4202,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (!send_only) {
err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
true);
- if (!err)
- mc->igmp_joined = true;
}
}
} else {
@@ -4221,26 +4253,29 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
memcpy(&mc->addr, addr, rdma_addr_size(addr));
mc->context = context;
mc->id_priv = id_priv;
- mc->igmp_joined = false;
mc->join_state = join_state;
- spin_lock(&id_priv->lock);
- list_add(&mc->list, &id_priv->mc_list);
- spin_unlock(&id_priv->lock);
if (rdma_protocol_roce(id->device, id->port_num)) {
kref_init(&mc->mcref);
ret = cma_iboe_join_multicast(id_priv, mc);
- } else if (rdma_cap_ib_mcast(id->device, id->port_num))
+ if (ret)
+ goto out_err;
+ } else if (rdma_cap_ib_mcast(id->device, id->port_num)) {
ret = cma_join_ib_multicast(id_priv, mc);
- else
+ if (ret)
+ goto out_err;
+ } else {
ret = -ENOSYS;
-
- if (ret) {
- spin_lock_irq(&id_priv->lock);
- list_del(&mc->list);
- spin_unlock_irq(&id_priv->lock);
- kfree(mc);
+ goto out_err;
}
+
+ spin_lock(&id_priv->lock);
+ list_add(&mc->list, &id_priv->mc_list);
+ spin_unlock(&id_priv->lock);
+
+ return 0;
+out_err:
+ kfree(mc);
return ret;
}
EXPORT_SYMBOL(rdma_join_multicast);
@@ -4268,23 +4303,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
} else if (rdma_protocol_roce(id->device, id->port_num)) {
- if (mc->igmp_joined) {
- struct rdma_dev_addr *dev_addr =
- &id->route.addr.dev_addr;
- struct net_device *ndev = NULL;
-
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(dev_addr->net,
- dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev,
- &mc->multicast.ib->rec.mgid,
- false);
- dev_put(ndev);
- }
- mc->igmp_joined = false;
- }
- kref_put(&mc->mcref, release_mc);
+ cma_leave_roce_mc_group(id_priv, mc);
}
return;
}
@@ -4410,7 +4429,7 @@ free_cma_dev:
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
{
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
enum rdma_cm_state state;
int ret = 0;
@@ -4426,7 +4445,6 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
goto out;
- memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
ret = id_priv->id.event_handler(&id_priv->id, &event);
out:
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index fae417a391fb..77c7005c396c 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -91,8 +91,8 @@ void ib_device_unregister_sysfs(struct ib_device *device);
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
-typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port,
- struct net_device *idev, void *cookie);
+typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
+ struct net_device *idev, void *cookie);
void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_filter filter,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 6fa4c59dc7a7..db3b6271f09d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -105,8 +105,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
IB_MANDATORY_FUNC(query_pkey),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
- IB_MANDATORY_FUNC(create_ah),
- IB_MANDATORY_FUNC(destroy_ah),
IB_MANDATORY_FUNC(create_qp),
IB_MANDATORY_FUNC(modify_qp),
IB_MANDATORY_FUNC(destroy_qp),
@@ -862,25 +860,6 @@ int ib_query_port(struct ib_device *device,
EXPORT_SYMBOL(ib_query_port);
/**
- * ib_query_gid - Get GID table entry
- * @device:Device to query
- * @port_num:Port number to query
- * @index:GID table index to query
- * @gid:Returned GID
- * @attr: Returned GID attributes related to this GID index (only in RoCE).
- * NULL means ignore.
- *
- * ib_query_gid() fetches the specified GID table entry from the cache.
- */
-int ib_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid,
- struct ib_gid_attr *attr)
-{
- return ib_get_cached_gid(device, port_num, index, gid, attr);
-}
-EXPORT_SYMBOL(ib_query_gid);
-
-/**
* ib_enum_roce_netdev - enumerate all RoCE ports
* @ib_dev : IB device we want to query
* @filter: Should we call the callback?
@@ -1057,7 +1036,7 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
continue;
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
- ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
+ ret = rdma_query_gid(device, port, i, &tmp_gid);
if (ret)
return ret;
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index f742ae7a768b..ef459f2f2eeb 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -38,6 +38,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/dma-mapping.h>
+#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/security.h>
@@ -58,8 +59,13 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
+/*
+ * The mlx4 driver uses the top byte to distinguish which virtual function
+ * generated the MAD, so we must avoid using it.
+ */
+#define AGENT_ID_LIMIT (1 << 24)
+static DEFINE_IDR(ib_mad_clients);
static struct list_head ib_mad_port_list;
-static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
/* Port list lock */
static DEFINE_SPINLOCK(ib_mad_port_list_lock);
@@ -190,6 +196,8 @@ EXPORT_SYMBOL(ib_response_mad);
/*
* ib_register_mad_agent - Register to send/receive MADs
+ *
+ * Context: Process context.
*/
struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
u8 port_num,
@@ -210,7 +218,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
struct ib_mad_mgmt_vendor_class *vendor_class;
struct ib_mad_mgmt_method_table *method;
int ret2, qpn;
- unsigned long flags;
u8 mgmt_class, vclass;
/* Validate parameters */
@@ -376,13 +383,24 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
goto error4;
}
- spin_lock_irqsave(&port_priv->reg_lock, flags);
- mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id);
+ idr_preload(GFP_KERNEL);
+ idr_lock(&ib_mad_clients);
+ ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0,
+ AGENT_ID_LIMIT, GFP_ATOMIC);
+ idr_unlock(&ib_mad_clients);
+ idr_preload_end();
+
+ if (ret2 < 0) {
+ ret = ERR_PTR(ret2);
+ goto error5;
+ }
+ mad_agent_priv->agent.hi_tid = ret2;
/*
* Make sure MAD registration (if supplied)
* is non overlapping with any existing ones
*/
+ spin_lock_irq(&port_priv->reg_lock);
if (mad_reg_req) {
mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
if (!is_vendor_class(mgmt_class)) {
@@ -393,7 +411,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
if (method) {
if (method_in_use(&method,
mad_reg_req))
- goto error5;
+ goto error6;
}
}
ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
@@ -409,24 +427,25 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
if (is_vendor_method_in_use(
vendor_class,
mad_reg_req))
- goto error5;
+ goto error6;
}
}
ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
}
if (ret2) {
ret = ERR_PTR(ret2);
- goto error5;
+ goto error6;
}
}
-
- /* Add mad agent into port's agent list */
- list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+ spin_unlock_irq(&port_priv->reg_lock);
return &mad_agent_priv->agent;
+error6:
+ spin_unlock_irq(&port_priv->reg_lock);
+ idr_lock(&ib_mad_clients);
+ idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
+ idr_unlock(&ib_mad_clients);
error5:
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
error4:
kfree(reg_req);
@@ -575,7 +594,6 @@ static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv
static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_port_private *port_priv;
- unsigned long flags;
/* Note that we could still be handling received MADs */
@@ -587,10 +605,12 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
port_priv = mad_agent_priv->qp_info->port_priv;
cancel_delayed_work(&mad_agent_priv->timed_work);
- spin_lock_irqsave(&port_priv->reg_lock, flags);
+ spin_lock_irq(&port_priv->reg_lock);
remove_mad_reg_req(mad_agent_priv);
- list_del(&mad_agent_priv->agent_list);
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+ spin_unlock_irq(&port_priv->reg_lock);
+ idr_lock(&ib_mad_clients);
+ idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
+ idr_unlock(&ib_mad_clients);
flush_workqueue(port_priv->wq);
ib_cancel_rmpp_recvs(mad_agent_priv);
@@ -601,7 +621,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
kfree(mad_agent_priv->reg_req);
- kfree(mad_agent_priv);
+ kfree_rcu(mad_agent_priv, rcu);
}
static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
@@ -625,6 +645,8 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
+ *
+ * Context: Process context.
*/
void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
@@ -1159,7 +1181,6 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_mad_qp_info *qp_info;
struct list_head *list;
- struct ib_send_wr *bad_send_wr;
struct ib_mad_agent *mad_agent;
struct ib_sge *sge;
unsigned long flags;
@@ -1197,7 +1218,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
- &bad_send_wr);
+ NULL);
list = &qp_info->send_queue.list;
} else {
ret = 0;
@@ -1720,22 +1741,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
struct ib_mad_agent_private *mad_agent = NULL;
unsigned long flags;
- spin_lock_irqsave(&port_priv->reg_lock, flags);
if (ib_response_mad(mad_hdr)) {
u32 hi_tid;
- struct ib_mad_agent_private *entry;
/*
* Routing is based on high 32 bits of transaction ID
* of MAD.
*/
hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
- list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
- if (entry->agent.hi_tid == hi_tid) {
- mad_agent = entry;
- break;
- }
- }
+ rcu_read_lock();
+ mad_agent = idr_find(&ib_mad_clients, hi_tid);
+ if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount))
+ mad_agent = NULL;
+ rcu_read_unlock();
} else {
struct ib_mad_mgmt_class_table *class;
struct ib_mad_mgmt_method_table *method;
@@ -1744,6 +1762,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
const struct ib_vendor_mad *vendor_mad;
int index;
+ spin_lock_irqsave(&port_priv->reg_lock, flags);
/*
* Routing is based on version, class, and method
* For "newer" vendor MADs, also based on OUI
@@ -1783,20 +1802,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
~IB_MGMT_METHOD_RESP];
}
}
+ if (mad_agent)
+ atomic_inc(&mad_agent->refcount);
+out:
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
}
- if (mad_agent) {
- if (mad_agent->agent.recv_handler)
- atomic_inc(&mad_agent->refcount);
- else {
- dev_notice(&port_priv->device->dev,
- "No receive handler for client %p on port %d\n",
- &mad_agent->agent, port_priv->port_num);
- mad_agent = NULL;
- }
+ if (mad_agent && !mad_agent->agent.recv_handler) {
+ dev_notice(&port_priv->device->dev,
+ "No receive handler for client %p on port %d\n",
+ &mad_agent->agent, port_priv->port_num);
+ deref_mad_agent(mad_agent);
+ mad_agent = NULL;
}
-out:
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
return mad_agent;
}
@@ -1896,8 +1914,8 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
const struct ib_global_route *grh =
rdma_ah_read_grh(&attr);
- if (ib_get_cached_gid(device, port_num,
- grh->sgid_index, &sgid, NULL))
+ if (rdma_query_gid(device, port_num,
+ grh->sgid_index, &sgid))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
@@ -2457,7 +2475,6 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
struct ib_mad_qp_info *qp_info;
struct ib_mad_queue *send_queue;
- struct ib_send_wr *bad_send_wr;
struct ib_mad_send_wc mad_send_wc;
unsigned long flags;
int ret;
@@ -2507,7 +2524,7 @@ retry:
if (queued_send_wr) {
ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
- &bad_send_wr);
+ NULL);
if (ret) {
dev_err(&port_priv->device->dev,
"ib_post_send failed: %d\n", ret);
@@ -2552,11 +2569,9 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
if (wc->status == IB_WC_WR_FLUSH_ERR) {
if (mad_send_wr->retry) {
/* Repost send */
- struct ib_send_wr *bad_send_wr;
-
mad_send_wr->retry = 0;
ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
- &bad_send_wr);
+ NULL);
if (!ret)
return false;
}
@@ -2872,7 +2887,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
int post, ret;
struct ib_mad_private *mad_priv;
struct ib_sge sg_list;
- struct ib_recv_wr recv_wr, *bad_recv_wr;
+ struct ib_recv_wr recv_wr;
struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
/* Initialize common scatter list fields */
@@ -2916,7 +2931,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
post = (++recv_queue->count < recv_queue->max_active);
list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
spin_unlock_irqrestore(&recv_queue->lock, flags);
- ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
+ ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
if (ret) {
spin_lock_irqsave(&recv_queue->lock, flags);
list_del(&mad_priv->header.mad_list.list);
@@ -3159,7 +3174,6 @@ static int ib_mad_port_open(struct ib_device *device,
port_priv->device = device;
port_priv->port_num = port_num;
spin_lock_init(&port_priv->reg_lock);
- INIT_LIST_HEAD(&port_priv->agent_list);
init_mad_qp(port_priv, &port_priv->qp_info[0]);
init_mad_qp(port_priv, &port_priv->qp_info[1]);
@@ -3338,6 +3352,9 @@ int ib_mad_init(void)
INIT_LIST_HEAD(&ib_mad_port_list);
+ /* Client ID 0 is used for snoop-only clients */
+ idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL);
+
if (ib_register_client(&mad_client)) {
pr_err("Couldn't register ib_mad client\n");
return -EINVAL;
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 28669f6419e1..d84ae1671898 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -89,7 +89,6 @@ struct ib_rmpp_segment {
};
struct ib_mad_agent_private {
- struct list_head agent_list;
struct ib_mad_agent agent;
struct ib_mad_reg_req *reg_req;
struct ib_mad_qp_info *qp_info;
@@ -105,7 +104,10 @@ struct ib_mad_agent_private {
struct list_head rmpp_list;
atomic_t refcount;
- struct completion comp;
+ union {
+ struct completion comp;
+ struct rcu_head rcu;
+ };
};
struct ib_mad_snoop_private {
@@ -203,7 +205,6 @@ struct ib_mad_port_private {
spinlock_t reg_lock;
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
- struct list_head agent_list;
struct workqueue_struct *wq;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
};
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 6c48f4193dda..d50ff70bb24b 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -716,14 +716,28 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
}
EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
+/**
+ * ib_init_ah_from_mcmember - Initialize AH attribute from multicast
+ * member record and gid of the device.
+ * @device: RDMA device
+ * @port_num: Port of the rdma device to consider
+ * @ndev: Optional netdevice, applicable only for RoCE
+ * @gid_type: GID type to consider
+ * @ah_attr: AH attribute to fillup on successful completion
+ *
+ * ib_init_ah_from_mcmember() initializes AH attribute based on multicast
+ * member record and other device properties. On success the caller is
+ * responsible to call rdma_destroy_ah_attr on the ah_attr. Returns 0 on
+ * success or appropriate error code.
+ *
+ */
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
struct rdma_ah_attr *ah_attr)
{
- int ret;
- u16 gid_index;
+ const struct ib_gid_attr *sgid_attr;
/* GID table is not based on the netdevice for IB link layer,
* so ignore ndev during search.
@@ -733,26 +747,22 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
else if (!rdma_protocol_roce(device, port_num))
return -EINVAL;
- ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
- gid_type, port_num,
- ndev,
- &gid_index);
- if (ret)
- return ret;
+ sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid,
+ gid_type, port_num, ndev);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
- memset(ah_attr, 0, sizeof *ah_attr);
+ memset(ah_attr, 0, sizeof(*ah_attr));
ah_attr->type = rdma_ah_find_type(device, port_num);
rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid));
rdma_ah_set_sl(ah_attr, rec->sl);
rdma_ah_set_port_num(ah_attr, port_num);
rdma_ah_set_static_rate(ah_attr, rec->rate);
-
- rdma_ah_set_grh(ah_attr, &rec->mgid,
- be32_to_cpu(rec->flow_label),
- (u8)gid_index,
- rec->hop_limit,
- rec->traffic_class);
+ rdma_move_grh_sgid_attr(ah_attr, &rec->mgid,
+ be32_to_cpu(rec->flow_label),
+ rec->hop_limit, rec->traffic_class,
+ sgid_attr);
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_mcmember);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 340c7bea45ab..0385ab438320 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -237,15 +237,15 @@ static int fill_port_info(struct sk_buff *msg,
if (ret)
return ret;
- BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
- if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
- (u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD))
- return -EMSGSIZE;
- if (rdma_protocol_ib(device, port) &&
- nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
- attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
- return -EMSGSIZE;
if (rdma_protocol_ib(device, port)) {
+ BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
+ (u64)attr.port_cap_flags,
+ RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
+ attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index a6e904973ba8..6eb64c6f0802 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -32,6 +32,7 @@
#include <linux/file.h>
#include <linux/anon_inodes.h>
+#include <linux/sched/mm.h>
#include <rdma/ib_verbs.h>
#include <rdma/uverbs_types.h>
#include <linux/rcupdate.h>
@@ -41,51 +42,6 @@
#include "core_priv.h"
#include "rdma_core.h"
-int uverbs_ns_idx(u16 *id, unsigned int ns_count)
-{
- int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT;
-
- if (ret >= ns_count)
- return -EINVAL;
-
- *id &= ~UVERBS_ID_NS_MASK;
- return ret;
-}
-
-const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
- uint16_t object)
-{
- const struct uverbs_root_spec *object_hash = ibdev->specs_root;
- const struct uverbs_object_spec_hash *objects;
- int ret = uverbs_ns_idx(&object, object_hash->num_buckets);
-
- if (ret < 0)
- return NULL;
-
- objects = object_hash->object_buckets[ret];
-
- if (object >= objects->num_objects)
- return NULL;
-
- return objects->objects[object];
-}
-
-const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
- uint16_t method)
-{
- const struct uverbs_method_spec_hash *methods;
- int ret = uverbs_ns_idx(&method, object->num_buckets);
-
- if (ret < 0)
- return NULL;
-
- methods = object->method_buckets[ret];
- if (method >= methods->num_methods)
- return NULL;
-
- return methods->methods[method];
-}
-
void uverbs_uobject_get(struct ib_uobject *uobject)
{
kref_get(&uobject->ref);
@@ -96,7 +52,7 @@ static void uverbs_uobject_free(struct kref *ref)
struct ib_uobject *uobj =
container_of(ref, struct ib_uobject, ref);
- if (uobj->type->type_class->needs_kfree_rcu)
+ if (uobj->uapi_object->type_class->needs_kfree_rcu)
kfree_rcu(uobj, rcu);
else
kfree(uobj);
@@ -107,7 +63,8 @@ void uverbs_uobject_put(struct ib_uobject *uobject)
kref_put(&uobject->ref, uverbs_uobject_free);
}
-static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
+static int uverbs_try_lock_object(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
{
/*
* When a shared access is required, we use a positive counter. Each
@@ -120,27 +77,211 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
- if (!exclusive)
- return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+ switch (mode) {
+ case UVERBS_LOOKUP_READ:
+ return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
-EBUSY : 0;
+ case UVERBS_LOOKUP_WRITE:
+ /* lock is exclusive */
+ return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+ case UVERBS_LOOKUP_DESTROY:
+ return 0;
+ }
+ return 0;
+}
+
+static void assert_uverbs_usecnt(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+#ifdef CONFIG_LOCKDEP
+ switch (mode) {
+ case UVERBS_LOOKUP_READ:
+ WARN_ON(atomic_read(&uobj->usecnt) <= 0);
+ break;
+ case UVERBS_LOOKUP_WRITE:
+ WARN_ON(atomic_read(&uobj->usecnt) != -1);
+ break;
+ case UVERBS_LOOKUP_DESTROY:
+ break;
+ }
+#endif
+}
+
+/*
+ * This must be called with the hw_destroy_rwsem locked for read or write,
+ * also the uobject itself must be locked for write.
+ *
+ * Upon return the HW object is guaranteed to be destroyed.
+ *
+ * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
+ * however the type's allocat_commit function cannot have been called and the
+ * uobject cannot be on the uobjects_lists
+ *
+ * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
+ * rdma_lookup_get_uobject) and the object is left in a state where the caller
+ * needs to call rdma_lookup_put_uobject.
+ *
+ * For all other destroy modes this function internally unlocks the uobject
+ * and consumes the kref on the uobj.
+ */
+static int uverbs_destroy_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason reason)
+{
+ struct ib_uverbs_file *ufile = uobj->ufile;
+ unsigned long flags;
+ int ret;
+
+ lockdep_assert_held(&ufile->hw_destroy_rwsem);
+ assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
+
+ if (uobj->object) {
+ ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason);
+ if (ret) {
+ if (ib_is_destroy_retryable(ret, reason, uobj))
+ return ret;
+
+ /* Nothing to be done, dangle the memory and move on */
+ WARN(true,
+ "ib_uverbs: failed to remove uobject id %d, driver err=%d",
+ uobj->id, ret);
+ }
+
+ uobj->object = NULL;
+ }
- /* lock is either WRITE or DESTROY - should be exclusive */
- return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+ if (reason == RDMA_REMOVE_ABORT) {
+ WARN_ON(!list_empty(&uobj->list));
+ WARN_ON(!uobj->context);
+ uobj->uapi_object->type_class->alloc_abort(uobj);
+ }
+
+ uobj->context = NULL;
+
+ /*
+ * For DESTROY the usecnt is held write locked, the caller is expected
+ * to put it unlock and put the object when done with it. Only DESTROY
+ * can remove the IDR handle.
+ */
+ if (reason != RDMA_REMOVE_DESTROY)
+ atomic_set(&uobj->usecnt, 0);
+ else
+ uobj->uapi_object->type_class->remove_handle(uobj);
+
+ if (!list_empty(&uobj->list)) {
+ spin_lock_irqsave(&ufile->uobjects_lock, flags);
+ list_del_init(&uobj->list);
+ spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
+
+ /*
+ * Pairs with the get in rdma_alloc_commit_uobject(), could
+ * destroy uobj.
+ */
+ uverbs_uobject_put(uobj);
+ }
+
+ /*
+ * When aborting the stack kref remains owned by the core code, and is
+ * not transferred into the type. Pairs with the get in alloc_uobj
+ */
+ if (reason == RDMA_REMOVE_ABORT)
+ uverbs_uobject_put(uobj);
+
+ return 0;
}
-static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
- const struct uverbs_obj_type *type)
+/*
+ * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
+ * sequence. It should only be used from command callbacks. On success the
+ * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
+ * version requires the caller to have already obtained an
+ * LOOKUP_DESTROY uobject kref.
+ */
+int uobj_destroy(struct ib_uobject *uobj)
{
- struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
+ struct ib_uverbs_file *ufile = uobj->ufile;
+ int ret;
+
+ down_read(&ufile->hw_destroy_rwsem);
+
+ ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
+ if (ret)
+ goto out_unlock;
+
+ ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY);
+ if (ret) {
+ atomic_set(&uobj->usecnt, 0);
+ goto out_unlock;
+ }
+out_unlock:
+ up_read(&ufile->hw_destroy_rwsem);
+ return ret;
+}
+
+/*
+ * uobj_get_destroy destroys the HW object and returns a handle to the uobj
+ * with a NULL object pointer. The caller must pair this with
+ * uverbs_put_destroy.
+ */
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
+ u32 id, struct ib_uverbs_file *ufile)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ ret = uobj_destroy(uobj);
+ if (ret) {
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
+ return ERR_PTR(ret);
+ }
+
+ return uobj;
+}
+
+/*
+ * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res
+ * on success (negative errno on failure). For use by callers that do not need
+ * the uobj.
+ */
+int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
+ struct ib_uverbs_file *ufile, int success_res)
+{
+ struct ib_uobject *uobj;
+
+ uobj = __uobj_get_destroy(obj, id, ufile);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+ return success_res;
+}
+
+/* alloc_uobj must be undone by uverbs_destroy_uobject() */
+static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
+ const struct uverbs_api_object *obj)
+{
+ struct ib_uobject *uobj;
+ struct ib_ucontext *ucontext;
+
+ ucontext = ib_uverbs_get_ucontext(ufile);
+ if (IS_ERR(ucontext))
+ return ERR_CAST(ucontext);
+
+ uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
if (!uobj)
return ERR_PTR(-ENOMEM);
/*
* user_handle should be filled by the handler,
* The object is added to the list in the commit stage.
*/
- uobj->context = context;
- uobj->type = type;
+ uobj->ufile = ufile;
+ uobj->context = ucontext;
+ INIT_LIST_HEAD(&uobj->list);
+ uobj->uapi_object = obj;
/*
* Allocated objects start out as write locked to deny any other
* syscalls from accessing them until they are committed. See
@@ -157,45 +298,39 @@ static int idr_add_uobj(struct ib_uobject *uobj)
int ret;
idr_preload(GFP_KERNEL);
- spin_lock(&uobj->context->ufile->idr_lock);
+ spin_lock(&uobj->ufile->idr_lock);
/*
* We start with allocating an idr pointing to NULL. This represents an
* object which isn't initialized yet. We'll replace it later on with
* the real object once we commit.
*/
- ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
+ ret = idr_alloc(&uobj->ufile->idr, NULL, 0,
min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
if (ret >= 0)
uobj->id = ret;
- spin_unlock(&uobj->context->ufile->idr_lock);
+ spin_unlock(&uobj->ufile->idr_lock);
idr_preload_end();
return ret < 0 ? ret : 0;
}
-/*
- * It only removes it from the uobjects list, uverbs_uobject_put() is still
- * required.
- */
-static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
-{
- spin_lock(&uobj->context->ufile->idr_lock);
- idr_remove(&uobj->context->ufile->idr, uobj->id);
- spin_unlock(&uobj->context->ufile->idr_lock);
-}
-
/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
-static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext,
- int id, bool exclusive)
+static struct ib_uobject *
+lookup_get_idr_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode)
{
struct ib_uobject *uobj;
+ unsigned long idrno = id;
+
+ if (id < 0 || id > ULONG_MAX)
+ return ERR_PTR(-EINVAL);
rcu_read_lock();
/* object won't be released as we're protected in rcu */
- uobj = idr_find(&ucontext->ufile->idr, id);
+ uobj = idr_find(&ufile->idr, idrno);
if (!uobj) {
uobj = ERR_PTR(-ENOENT);
goto free;
@@ -215,19 +350,28 @@ free:
return uobj;
}
-static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext,
- int id, bool exclusive)
+static struct ib_uobject *
+lookup_get_fd_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode)
{
+ const struct uverbs_obj_fd_type *fd_type;
struct file *f;
struct ib_uobject *uobject;
- const struct uverbs_obj_fd_type *fd_type =
- container_of(type, struct uverbs_obj_fd_type, type);
+ int fdno = id;
- if (exclusive)
+ if (fdno != id)
+ return ERR_PTR(-EINVAL);
+
+ if (mode != UVERBS_LOOKUP_READ)
return ERR_PTR(-EOPNOTSUPP);
- f = fget(id);
+ if (!obj->type_attrs)
+ return ERR_PTR(-EIO);
+ fd_type =
+ container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
+
+ f = fget(fdno);
if (!f)
return ERR_PTR(-EBADF);
@@ -246,43 +390,55 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty
return uobject;
}
-struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext,
- int id, bool exclusive)
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode)
{
struct ib_uobject *uobj;
int ret;
- uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
+ if (!obj)
+ return ERR_PTR(-EINVAL);
+
+ uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
if (IS_ERR(uobj))
return uobj;
- if (uobj->type != type) {
+ if (uobj->uapi_object != obj) {
ret = -EINVAL;
goto free;
}
- ret = uverbs_try_lock_object(uobj, exclusive);
- if (ret) {
- WARN(ucontext->cleanup_reason,
- "ib_uverbs: Trying to lookup_get while cleanup context\n");
+ /*
+ * If we have been disassociated block every command except for
+ * DESTROY based commands.
+ */
+ if (mode != UVERBS_LOOKUP_DESTROY &&
+ !srcu_dereference(ufile->device->ib_dev,
+ &ufile->device->disassociate_srcu)) {
+ ret = -EIO;
goto free;
}
+ ret = uverbs_try_lock_object(uobj, mode);
+ if (ret)
+ goto free;
+
return uobj;
free:
- uobj->type->type_class->lookup_put(uobj, exclusive);
+ obj->type_class->lookup_put(uobj, mode);
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
-static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext)
+static struct ib_uobject *
+alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile)
{
int ret;
struct ib_uobject *uobj;
- uobj = alloc_uobj(ucontext, type);
+ uobj = alloc_uobj(ufile, obj);
if (IS_ERR(uobj))
return uobj;
@@ -290,7 +446,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *
if (ret)
goto uobj_put;
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
RDMACG_RESOURCE_HCA_OBJECT);
if (ret)
goto idr_remove;
@@ -298,304 +454,305 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *
return uobj;
idr_remove:
- uverbs_idr_remove_uobj(uobj);
+ spin_lock(&ufile->idr_lock);
+ idr_remove(&ufile->idr, uobj->id);
+ spin_unlock(&ufile->idr_lock);
uobj_put:
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
-static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext)
+static struct ib_uobject *
+alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile)
{
- const struct uverbs_obj_fd_type *fd_type =
- container_of(type, struct uverbs_obj_fd_type, type);
int new_fd;
struct ib_uobject *uobj;
- struct ib_uobject_file *uobj_file;
- struct file *filp;
new_fd = get_unused_fd_flags(O_CLOEXEC);
if (new_fd < 0)
return ERR_PTR(new_fd);
- uobj = alloc_uobj(ucontext, type);
+ uobj = alloc_uobj(ufile, obj);
if (IS_ERR(uobj)) {
put_unused_fd(new_fd);
return uobj;
}
- uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
- filp = anon_inode_getfile(fd_type->name,
- fd_type->fops,
- uobj_file,
- fd_type->flags);
- if (IS_ERR(filp)) {
- put_unused_fd(new_fd);
- uverbs_uobject_put(uobj);
- return (void *)filp;
- }
-
- uobj_file->uobj.id = new_fd;
- uobj_file->uobj.object = filp;
- uobj_file->ufile = ucontext->ufile;
- INIT_LIST_HEAD(&uobj->list);
- kref_get(&uobj_file->ufile->ref);
+ uobj->id = new_fd;
+ uobj->ufile = ufile;
return uobj;
}
-struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext)
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile)
{
- return type->type_class->alloc_begin(type, ucontext);
-}
+ struct ib_uobject *ret;
-static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
-{
- const struct uverbs_obj_idr_type *idr_type =
- container_of(uobj->type, struct uverbs_obj_idr_type,
- type);
- int ret = idr_type->destroy_object(uobj, why);
+ if (!obj)
+ return ERR_PTR(-EINVAL);
/*
- * We can only fail gracefully if the user requested to destroy the
- * object. In the rest of the cases, just remove whatever you can.
+ * The hw_destroy_rwsem is held across the entire object creation and
+ * released during rdma_alloc_commit_uobject or
+ * rdma_alloc_abort_uobject
*/
- if (why == RDMA_REMOVE_DESTROY && ret)
- return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- uverbs_idr_remove_uobj(uobj);
+ if (!down_read_trylock(&ufile->hw_destroy_rwsem))
+ return ERR_PTR(-EIO);
+ ret = obj->type_class->alloc_begin(obj, ufile);
+ if (IS_ERR(ret)) {
+ up_read(&ufile->hw_destroy_rwsem);
+ return ret;
+ }
return ret;
}
-static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
+static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
{
- struct ib_uobject_file *uobj_file =
- container_of(uobj, struct ib_uobject_file, uobj);
- struct file *filp = uobj->object;
- int id = uobj_file->uobj.id;
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
- /* Unsuccessful NEW */
- fput(filp);
- put_unused_fd(id);
+ spin_lock(&uobj->ufile->idr_lock);
+ idr_remove(&uobj->ufile->idr, uobj->id);
+ spin_unlock(&uobj->ufile->idr_lock);
}
-static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
- const struct uverbs_obj_fd_type *fd_type =
- container_of(uobj->type, struct uverbs_obj_fd_type, type);
- struct ib_uobject_file *uobj_file =
- container_of(uobj, struct ib_uobject_file, uobj);
- int ret = fd_type->context_closed(uobj_file, why);
+ const struct uverbs_obj_idr_type *idr_type =
+ container_of(uobj->uapi_object->type_attrs,
+ struct uverbs_obj_idr_type, type);
+ int ret = idr_type->destroy_object(uobj, why);
- if (why == RDMA_REMOVE_DESTROY && ret)
+ /*
+ * We can only fail gracefully if the user requested to destroy the
+ * object or when a retry may be called upon an error.
+ * In the rest of the cases, just remove whatever you can.
+ */
+ if (ib_is_destroy_retryable(ret, why, uobj))
return ret;
- if (why == RDMA_REMOVE_DURING_CLEANUP) {
- alloc_abort_fd_uobject(uobj);
- return ret;
- }
+ if (why == RDMA_REMOVE_ABORT)
+ return 0;
- uobj_file->uobj.context = NULL;
- return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+
+ return 0;
}
-static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
+static void remove_handle_idr_uobject(struct ib_uobject *uobj)
{
-#ifdef CONFIG_LOCKDEP
- if (exclusive)
- WARN_ON(atomic_read(&uobj->usecnt) != -1);
- else
- WARN_ON(atomic_read(&uobj->usecnt) <= 0);
-#endif
+ spin_lock(&uobj->ufile->idr_lock);
+ idr_remove(&uobj->ufile->idr, uobj->id);
+ spin_unlock(&uobj->ufile->idr_lock);
+ /* Matches the kref in alloc_commit_idr_uobject */
+ uverbs_uobject_put(uobj);
}
-static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
{
- int ret;
- struct ib_ucontext *ucontext = uobj->context;
-
- ret = uobj->type->type_class->remove_commit(uobj, why);
- if (ret && why == RDMA_REMOVE_DESTROY) {
- /* We couldn't remove the object, so just unlock the uobject */
- atomic_set(&uobj->usecnt, 0);
- uobj->type->type_class->lookup_put(uobj, true);
- } else {
- mutex_lock(&ucontext->uobjects_lock);
- list_del(&uobj->list);
- mutex_unlock(&ucontext->uobjects_lock);
- /* put the ref we took when we created the object */
- uverbs_uobject_put(uobj);
- }
-
- return ret;
+ put_unused_fd(uobj->id);
}
-/* This is called only for user requested DESTROY reasons */
-int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
+static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
- int ret;
- struct ib_ucontext *ucontext = uobj->context;
-
- /* put the ref count we took at lookup_get */
- uverbs_uobject_put(uobj);
- /* Cleanup is running. Calling this should have been impossible */
- if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
- WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
- return 0;
- }
- assert_uverbs_usecnt(uobj, true);
- ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
+ const struct uverbs_obj_fd_type *fd_type = container_of(
+ uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
+ int ret = fd_type->context_closed(uobj, why);
- up_read(&ucontext->cleanup_rwsem);
- return ret;
-}
+ if (ib_is_destroy_retryable(ret, why, uobj))
+ return ret;
-static int null_obj_type_class_remove_commit(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
-{
return 0;
}
-static const struct uverbs_obj_type null_obj_type = {
- .type_class = &((const struct uverbs_obj_type_class){
- .remove_commit = null_obj_type_class_remove_commit,
- /* be cautious */
- .needs_kfree_rcu = true}),
-};
-
-int rdma_explicit_destroy(struct ib_uobject *uobject)
+static void remove_handle_fd_uobject(struct ib_uobject *uobj)
{
- int ret;
- struct ib_ucontext *ucontext = uobject->context;
-
- /* Cleanup is running. Calling this should have been impossible */
- if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
- WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
- return 0;
- }
- assert_uverbs_usecnt(uobject, true);
- ret = uobject->type->type_class->remove_commit(uobject,
- RDMA_REMOVE_DESTROY);
- if (ret)
- goto out;
-
- uobject->type = &null_obj_type;
-
-out:
- up_read(&ucontext->cleanup_rwsem);
- return ret;
}
-static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
+static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
- spin_lock(&uobj->context->ufile->idr_lock);
+ struct ib_uverbs_file *ufile = uobj->ufile;
+
+ spin_lock(&ufile->idr_lock);
/*
* We already allocated this IDR with a NULL object, so
* this shouldn't fail.
+ *
+ * NOTE: Once we set the IDR we loose ownership of our kref on uobj.
+ * It will be put by remove_commit_idr_uobject()
*/
- WARN_ON(idr_replace(&uobj->context->ufile->idr,
- uobj, uobj->id));
- spin_unlock(&uobj->context->ufile->idr_lock);
+ WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
+ spin_unlock(&ufile->idr_lock);
+
+ return 0;
}
-static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
+static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
{
- struct ib_uobject_file *uobj_file =
- container_of(uobj, struct ib_uobject_file, uobj);
+ const struct uverbs_obj_fd_type *fd_type = container_of(
+ uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
+ int fd = uobj->id;
+ struct file *filp;
+
+ /*
+ * The kref for uobj is moved into filp->private data and put in
+ * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
+ * must be guaranteed to be called from the provided fops release
+ * callback.
+ */
+ filp = anon_inode_getfile(fd_type->name,
+ fd_type->fops,
+ uobj,
+ fd_type->flags);
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+
+ uobj->object = filp;
+
+ /* Matching put will be done in uverbs_close_fd() */
+ kref_get(&uobj->ufile->ref);
- fd_install(uobj_file->uobj.id, uobj->object);
/* This shouldn't be used anymore. Use the file object instead */
- uobj_file->uobj.id = 0;
- /* Get another reference as we export this to the fops */
- uverbs_uobject_get(&uobj_file->uobj);
+ uobj->id = 0;
+
+ /*
+ * NOTE: Once we install the file we loose ownership of our kref on
+ * uobj. It will be put by uverbs_close_fd()
+ */
+ fd_install(fd, filp);
+
+ return 0;
}
-int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
+/*
+ * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
+ * caller can no longer assume uobj is valid. If this function fails it
+ * destroys the uboject, including the attached HW object.
+ */
+int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
{
- /* Cleanup is running. Calling this should have been impossible */
- if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
- int ret;
+ struct ib_uverbs_file *ufile = uobj->ufile;
+ int ret;
- WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
- ret = uobj->type->type_class->remove_commit(uobj,
- RDMA_REMOVE_DURING_CLEANUP);
- if (ret)
- pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
- uobj->id);
+ /* alloc_commit consumes the uobj kref */
+ ret = uobj->uapi_object->type_class->alloc_commit(uobj);
+ if (ret) {
+ uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
+ up_read(&ufile->hw_destroy_rwsem);
return ret;
}
+ /* kref is held so long as the uobj is on the uobj list. */
+ uverbs_uobject_get(uobj);
+ spin_lock_irq(&ufile->uobjects_lock);
+ list_add(&uobj->list, &ufile->uobjects);
+ spin_unlock_irq(&ufile->uobjects_lock);
+
/* matches atomic_set(-1) in alloc_uobj */
- assert_uverbs_usecnt(uobj, true);
atomic_set(&uobj->usecnt, 0);
- mutex_lock(&uobj->context->uobjects_lock);
- list_add(&uobj->list, &uobj->context->uobjects);
- mutex_unlock(&uobj->context->uobjects_lock);
-
- uobj->type->type_class->alloc_commit(uobj);
- up_read(&uobj->context->cleanup_rwsem);
+ /* Matches the down_read in rdma_alloc_begin_uobject */
+ up_read(&ufile->hw_destroy_rwsem);
return 0;
}
-static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
-{
- uverbs_idr_remove_uobj(uobj);
- ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- uverbs_uobject_put(uobj);
-}
-
+/*
+ * This consumes the kref for uobj. It is up to the caller to unwind the HW
+ * object and anything else connected to uobj before calling this.
+ */
void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
{
- uobj->type->type_class->alloc_abort(uobj);
+ struct ib_uverbs_file *ufile = uobj->ufile;
+
+ uobj->object = NULL;
+ uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
+
+ /* Matches the down_read in rdma_alloc_begin_uobject */
+ up_read(&ufile->hw_destroy_rwsem);
}
-static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
+static void lookup_put_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
{
}
-static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
+static void lookup_put_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
{
struct file *filp = uobj->object;
- WARN_ON(exclusive);
+ WARN_ON(mode != UVERBS_LOOKUP_READ);
/* This indirectly calls uverbs_close_fd and free the object */
fput(filp);
}
-void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
+void rdma_lookup_put_uobject(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
{
- assert_uverbs_usecnt(uobj, exclusive);
- uobj->type->type_class->lookup_put(uobj, exclusive);
+ assert_uverbs_usecnt(uobj, mode);
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
/*
* In order to unlock an object, either decrease its usecnt for
* read access or zero it in case of exclusive access. See
* uverbs_try_lock_object for locking schema information.
*/
- if (!exclusive)
+ switch (mode) {
+ case UVERBS_LOOKUP_READ:
atomic_dec(&uobj->usecnt);
- else
+ break;
+ case UVERBS_LOOKUP_WRITE:
atomic_set(&uobj->usecnt, 0);
+ break;
+ case UVERBS_LOOKUP_DESTROY:
+ break;
+ }
+ /* Pairs with the kref obtained by type->lookup_get */
uverbs_uobject_put(uobj);
}
+void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
+{
+ spin_lock_init(&ufile->idr_lock);
+ idr_init(&ufile->idr);
+}
+
+void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
+{
+ struct ib_uobject *entry;
+ int id;
+
+ /*
+ * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
+ * there are no HW objects left, however the IDR is still populated
+ * with anything that has not been cleaned up by userspace. Since the
+ * kref on ufile is 0, nothing is allowed to call lookup_get.
+ *
+ * This is an optimized equivalent to remove_handle_idr_uobject
+ */
+ idr_for_each_entry(&ufile->idr, entry, id) {
+ WARN_ON(entry->object);
+ uverbs_uobject_put(entry);
+ }
+
+ idr_destroy(&ufile->idr);
+}
+
const struct uverbs_obj_type_class uverbs_idr_class = {
.alloc_begin = alloc_begin_idr_uobject,
.lookup_get = lookup_get_idr_uobject,
.alloc_commit = alloc_commit_idr_uobject,
.alloc_abort = alloc_abort_idr_uobject,
.lookup_put = lookup_put_idr_uobject,
- .remove_commit = remove_commit_idr_uobject,
+ .destroy_hw = destroy_hw_idr_uobject,
+ .remove_handle = remove_handle_idr_uobject,
/*
* When we destroy an object, we first just lock it for WRITE and
* actually DESTROY it in the finalize stage. So, the problematic
@@ -611,103 +768,180 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
*/
.needs_kfree_rcu = true,
};
+EXPORT_SYMBOL(uverbs_idr_class);
-static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
+void uverbs_close_fd(struct file *f)
{
- struct ib_ucontext *ucontext;
- struct ib_uverbs_file *ufile = uobj_file->ufile;
- int ret;
+ struct ib_uobject *uobj = f->private_data;
+ struct ib_uverbs_file *ufile = uobj->ufile;
- mutex_lock(&uobj_file->ufile->cleanup_mutex);
+ if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
+ /*
+ * lookup_get_fd_uobject holds the kref on the struct file any
+ * time a FD uobj is locked, which prevents this release
+ * method from being invoked. Meaning we can always get the
+ * write lock here, or we have a kernel bug.
+ */
+ WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
+ uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE);
+ up_read(&ufile->hw_destroy_rwsem);
+ }
- /* uobject was either already cleaned up or is cleaned up right now anyway */
- if (!uobj_file->uobj.context ||
- !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
- goto unlock;
+ /* Matches the get in alloc_begin_fd_uobject */
+ kref_put(&ufile->ref, ib_uverbs_release_file);
- ucontext = uobj_file->uobj.context;
- ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
- up_read(&ucontext->cleanup_rwsem);
- if (ret)
- pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
-unlock:
- mutex_unlock(&ufile->cleanup_mutex);
+ /* Pairs with filp->private_data in alloc_begin_fd_uobject */
+ uverbs_uobject_put(uobj);
}
-void uverbs_close_fd(struct file *f)
-{
- struct ib_uobject_file *uobj_file = f->private_data;
- struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
+static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct ib_device *ib_dev = ibcontext->device;
+ struct task_struct *owning_process = NULL;
+ struct mm_struct *owning_mm = NULL;
+
+ owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
+ if (!owning_process)
+ return;
+
+ owning_mm = get_task_mm(owning_process);
+ if (!owning_mm) {
+ pr_info("no mm, disassociate ucontext is pending task termination\n");
+ while (1) {
+ put_task_struct(owning_process);
+ usleep_range(1000, 2000);
+ owning_process = get_pid_task(ibcontext->tgid,
+ PIDTYPE_PID);
+ if (!owning_process ||
+ owning_process->state == TASK_DEAD) {
+ pr_info("disassociate ucontext done, task was terminated\n");
+ /* in case task was dead need to release the
+ * task struct.
+ */
+ if (owning_process)
+ put_task_struct(owning_process);
+ return;
+ }
+ }
+ }
- _uverbs_close_fd(uobj_file);
- uverbs_uobject_put(&uobj_file->uobj);
- kref_put(uverbs_file_ref, ib_uverbs_release_file);
+ down_write(&owning_mm->mmap_sem);
+ ib_dev->disassociate_ucontext(ibcontext);
+ up_write(&owning_mm->mmap_sem);
+ mmput(owning_mm);
+ put_task_struct(owning_process);
}
-void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+/*
+ * Drop the ucontext off the ufile and completely disconnect it from the
+ * ib_device
+ */
+static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason)
{
- enum rdma_remove_reason reason = device_removed ?
- RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
- unsigned int cur_order = 0;
+ struct ib_ucontext *ucontext = ufile->ucontext;
+ int ret;
+
+ if (reason == RDMA_REMOVE_DRIVER_REMOVE)
+ ufile_disassociate_ucontext(ucontext);
+
+ put_pid(ucontext->tgid);
+ ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
- ucontext->cleanup_reason = reason;
/*
- * Waits for all remove_commit and alloc_commit to finish. Logically, We
- * want to hold this forever as the context is going to be destroyed,
- * but we'll release it since it causes a "held lock freed" BUG message.
+ * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
+ * the error return.
*/
- down_write(&ucontext->cleanup_rwsem);
+ ret = ucontext->device->dealloc_ucontext(ucontext);
+ WARN_ON(ret);
- while (!list_empty(&ucontext->uobjects)) {
- struct ib_uobject *obj, *next_obj;
- unsigned int next_order = UINT_MAX;
+ ufile->ucontext = NULL;
+}
+
+static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason)
+{
+ struct ib_uobject *obj, *next_obj;
+ int ret = -EINVAL;
+ /*
+ * This shouldn't run while executing other commands on this
+ * context. Thus, the only thing we should take care of is
+ * releasing a FD while traversing this list. The FD could be
+ * closed and released from the _release fop of this FD.
+ * In order to mitigate this, we add a lock.
+ * We take and release the lock per traversal in order to let
+ * other threads (which might still use the FDs) chance to run.
+ */
+ list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
/*
- * This shouldn't run while executing other commands on this
- * context. Thus, the only thing we should take care of is
- * releasing a FD while traversing this list. The FD could be
- * closed and released from the _release fop of this FD.
- * In order to mitigate this, we add a lock.
- * We take and release the lock per order traversal in order
- * to let other threads (which might still use the FDs) chance
- * to run.
+ * if we hit this WARN_ON, that means we are
+ * racing with a lookup_get.
*/
- mutex_lock(&ucontext->uobjects_lock);
- list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
- list) {
- if (obj->type->destroy_order == cur_order) {
- int ret;
-
- /*
- * if we hit this WARN_ON, that means we are
- * racing with a lookup_get.
- */
- WARN_ON(uverbs_try_lock_object(obj, true));
- ret = obj->type->type_class->remove_commit(obj,
- reason);
- list_del(&obj->list);
- if (ret)
- pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
- obj->id, cur_order);
- /* put the ref we took when we created the object */
- uverbs_uobject_put(obj);
- } else {
- next_order = min(next_order,
- obj->type->destroy_order);
- }
- }
- mutex_unlock(&ucontext->uobjects_lock);
- cur_order = next_order;
+ WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
+ if (!uverbs_destroy_uobject(obj, reason))
+ ret = 0;
}
- up_write(&ucontext->cleanup_rwsem);
+ return ret;
}
-void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
+/*
+ * Destroy the uncontext and every uobject associated with it. If called with
+ * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
+ * been completed and ufile->ucontext is NULL.
+ *
+ * This is internally locked and can be called in parallel from multiple
+ * contexts.
+ */
+void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason)
{
- ucontext->cleanup_reason = 0;
- mutex_init(&ucontext->uobjects_lock);
- INIT_LIST_HEAD(&ucontext->uobjects);
- init_rwsem(&ucontext->cleanup_rwsem);
+ if (reason == RDMA_REMOVE_CLOSE) {
+ /*
+ * During destruction we might trigger something that
+ * synchronously calls release on any file descriptor. For
+ * this reason all paths that come from file_operations
+ * release must use try_lock. They can progress knowing that
+ * there is an ongoing uverbs_destroy_ufile_hw that will clean
+ * up the driver resources.
+ */
+ if (!mutex_trylock(&ufile->ucontext_lock))
+ return;
+
+ } else {
+ mutex_lock(&ufile->ucontext_lock);
+ }
+
+ down_write(&ufile->hw_destroy_rwsem);
+
+ /*
+ * If a ucontext was never created then we can't have any uobjects to
+ * cleanup, nothing to do.
+ */
+ if (!ufile->ucontext)
+ goto done;
+
+ ufile->ucontext->closing = true;
+ ufile->ucontext->cleanup_retryable = true;
+ while (!list_empty(&ufile->uobjects))
+ if (__uverbs_cleanup_ufile(ufile, reason)) {
+ /*
+ * No entry was cleaned-up successfully during this
+ * iteration
+ */
+ break;
+ }
+
+ ufile->ucontext->cleanup_retryable = false;
+ if (!list_empty(&ufile->uobjects))
+ __uverbs_cleanup_ufile(ufile, reason);
+
+ ufile_destroy_ucontext(ufile, reason);
+
+done:
+ up_write(&ufile->hw_destroy_rwsem);
+ mutex_unlock(&ufile->ucontext_lock);
}
const struct uverbs_obj_type_class uverbs_fd_class = {
@@ -716,23 +950,33 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
.alloc_commit = alloc_commit_fd_uobject,
.alloc_abort = alloc_abort_fd_uobject,
.lookup_put = lookup_put_fd_uobject,
- .remove_commit = remove_commit_fd_uobject,
+ .destroy_hw = destroy_hw_fd_uobject,
+ .remove_handle = remove_handle_fd_uobject,
.needs_kfree_rcu = false,
};
+EXPORT_SYMBOL(uverbs_fd_class);
-struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
- struct ib_ucontext *ucontext,
- enum uverbs_obj_access access,
- int id)
+struct ib_uobject *
+uverbs_get_uobject_from_file(u16 object_id,
+ struct ib_uverbs_file *ufile,
+ enum uverbs_obj_access access, s64 id)
{
+ const struct uverbs_api_object *obj =
+ uapi_get_object(ufile->device->uapi, object_id);
+
switch (access) {
case UVERBS_ACCESS_READ:
- return rdma_lookup_get_uobject(type_attrs, ucontext, id, false);
+ return rdma_lookup_get_uobject(obj, ufile, id,
+ UVERBS_LOOKUP_READ);
case UVERBS_ACCESS_DESTROY:
+ /* Actual destruction is done inside uverbs_handle_method */
+ return rdma_lookup_get_uobject(obj, ufile, id,
+ UVERBS_LOOKUP_DESTROY);
case UVERBS_ACCESS_WRITE:
- return rdma_lookup_get_uobject(type_attrs, ucontext, id, true);
+ return rdma_lookup_get_uobject(obj, ufile, id,
+ UVERBS_LOOKUP_WRITE);
case UVERBS_ACCESS_NEW:
- return rdma_alloc_begin_uobject(type_attrs, ucontext);
+ return rdma_alloc_begin_uobject(obj, ufile);
default:
WARN_ON(true);
return ERR_PTR(-EOPNOTSUPP);
@@ -753,16 +997,14 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
switch (access) {
case UVERBS_ACCESS_READ:
- rdma_lookup_put_uobject(uobj, false);
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
break;
case UVERBS_ACCESS_WRITE:
- rdma_lookup_put_uobject(uobj, true);
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
break;
case UVERBS_ACCESS_DESTROY:
- if (commit)
- ret = rdma_remove_commit_uobject(uobj);
- else
- rdma_lookup_put_uobject(uobj, true);
+ if (uobj)
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
break;
case UVERBS_ACCESS_NEW:
if (commit)
@@ -777,43 +1019,3 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
return ret;
}
-
-int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle,
- struct uverbs_attr_spec_hash * const *spec_hash,
- size_t num,
- bool commit)
-{
- unsigned int i;
- int ret = 0;
-
- for (i = 0; i < num; i++) {
- struct uverbs_attr_bundle_hash *curr_bundle =
- &attrs_bundle->hash[i];
- const struct uverbs_attr_spec_hash *curr_spec_bucket =
- spec_hash[i];
- unsigned int j;
-
- for (j = 0; j < curr_bundle->num_attrs; j++) {
- struct uverbs_attr *attr;
- const struct uverbs_attr_spec *spec;
-
- if (!uverbs_attr_is_valid_in_hash(curr_bundle, j))
- continue;
-
- attr = &curr_bundle->attrs[j];
- spec = &curr_spec_bucket->attrs[j];
-
- if (spec->type == UVERBS_ATTR_TYPE_IDR ||
- spec->type == UVERBS_ATTR_TYPE_FD) {
- int current_ret;
-
- current_ret = uverbs_finalize_object(attr->obj_attr.uobject,
- spec->obj.access,
- commit);
- if (!ret)
- ret = current_ret;
- }
- }
- }
- return ret;
-}
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 1efcf93238dd..f962f2a593ba 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -43,20 +43,12 @@
#include <rdma/ib_verbs.h>
#include <linux/mutex.h>
-int uverbs_ns_idx(u16 *id, unsigned int ns_count);
-const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
- uint16_t object);
-const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
- uint16_t method);
-/*
- * These functions initialize the context and cleanups its uobjects.
- * The context has a list of objects which is protected by a mutex
- * on the context. initialize_ucontext should be called when we create
- * a context.
- * cleanup_ucontext removes all uobjects from the context and puts them.
- */
-void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
-void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
+struct ib_uverbs_device;
+
+void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason);
+
+int uobj_destroy(struct ib_uobject *uobj);
/*
* uverbs_uobject_get is called in order to increase the reference count on
@@ -82,7 +74,7 @@ void uverbs_uobject_put(struct ib_uobject *uobject);
void uverbs_close_fd(struct file *f);
/*
- * Get an ib_uobject that corresponds to the given id from ucontext, assuming
+ * Get an ib_uobject that corresponds to the given id from ufile, assuming
* the object is from the given type. Lock it to the required access when
* applicable.
* This function could create (access == NEW), destroy (access == DESTROY)
@@ -90,13 +82,11 @@ void uverbs_close_fd(struct file *f);
* The action will be finalized only when uverbs_finalize_object or
* uverbs_finalize_objects are called.
*/
-struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
- struct ib_ucontext *ucontext,
- enum uverbs_obj_access access,
- int id);
-int uverbs_finalize_object(struct ib_uobject *uobj,
- enum uverbs_obj_access access,
- bool commit);
+struct ib_uobject *
+uverbs_get_uobject_from_file(u16 object_id,
+ struct ib_uverbs_file *ufile,
+ enum uverbs_obj_access access, s64 id);
+
/*
* Note that certain finalize stages could return a status:
* (a) alloc_commit could return a failure if the object is committed at the
@@ -112,9 +102,63 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
* function. For example, this could happen when we couldn't destroy an
* object.
*/
-int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle,
- struct uverbs_attr_spec_hash * const *spec_hash,
- size_t num,
- bool commit);
+int uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access,
+ bool commit);
+
+void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+
+/*
+ * This is the runtime description of the uverbs API, used by the syscall
+ * machinery to validate and dispatch calls.
+ */
+
+/*
+ * Depending on ID the slot pointer in the radix tree points at one of these
+ * structs.
+ */
+struct uverbs_api_object {
+ const struct uverbs_obj_type *type_attrs;
+ const struct uverbs_obj_type_class *type_class;
+};
+
+struct uverbs_api_ioctl_method {
+ int (__rcu *handler)(struct ib_uverbs_file *ufile,
+ struct uverbs_attr_bundle *ctx);
+ DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
+ u16 bundle_size;
+ u8 use_stack:1;
+ u8 driver_method:1;
+ u8 key_bitmap_len;
+ u8 destroy_bkey;
+};
+
+struct uverbs_api_attr {
+ struct uverbs_attr_spec spec;
+};
+
+struct uverbs_api_object;
+struct uverbs_api {
+ /* radix tree contains struct uverbs_api_* pointers */
+ struct radix_tree_root radix;
+ enum rdma_driver_id driver_id;
+};
+
+static inline const struct uverbs_api_object *
+uapi_get_object(struct uverbs_api *uapi, u16 object_id)
+{
+ return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+}
+
+char *uapi_key_format(char *S, unsigned int key);
+struct uverbs_api *uverbs_alloc_api(
+ const struct uverbs_object_tree_def *const *driver_specs,
+ enum rdma_driver_id driver_id);
+void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
+void uverbs_disassociate_api(struct uverbs_api *uapi);
+void uverbs_destroy_api(struct uverbs_api *uapi);
+void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
+ unsigned int num_attrs);
#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index a4fbdc5d28fa..ee366199b169 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -143,14 +143,15 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
BONDING_SLAVE_STATE_NA)
-static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
- struct net_device *rdma_ndev, void *cookie)
+static bool
+is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port,
+ struct net_device *rdma_ndev, void *cookie)
{
struct net_device *real_dev;
- int res;
+ bool res;
if (!rdma_ndev)
- return 0;
+ return false;
rcu_read_lock();
real_dev = rdma_vlan_dev_real_dev(cookie);
@@ -166,14 +167,15 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
return res;
}
-static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port,
- struct net_device *rdma_ndev, void *cookie)
+static bool
+is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port,
+ struct net_device *rdma_ndev, void *cookie)
{
struct net_device *master_dev;
- int res;
+ bool res;
if (!rdma_ndev)
- return 0;
+ return false;
rcu_read_lock();
master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev);
@@ -184,22 +186,59 @@ static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port,
return res;
}
-static int pass_all_filter(struct ib_device *ib_dev, u8 port,
- struct net_device *rdma_ndev, void *cookie)
+/** is_ndev_for_default_gid_filter - Check if a given netdevice
+ * can be considered for default GIDs or not.
+ * @ib_dev: IB device to check
+ * @port: Port to consider for adding default GID
+ * @rdma_ndev: rdma netdevice pointer
+ * @cookie_ndev: Netdevice to consider to form a default GID
+ *
+ * is_ndev_for_default_gid_filter() returns true if a given netdevice can be
+ * considered for deriving default RoCE GID, returns false otherwise.
+ */
+static bool
+is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port,
+ struct net_device *rdma_ndev, void *cookie)
+{
+ struct net_device *cookie_ndev = cookie;
+ bool res;
+
+ if (!rdma_ndev)
+ return false;
+
+ rcu_read_lock();
+
+ /*
+ * When rdma netdevice is used in bonding, bonding master netdevice
+ * should be considered for default GIDs. Therefore, ignore slave rdma
+ * netdevices when bonding is considered.
+ * Additionally when event(cookie) netdevice is bond master device,
+ * make sure that it the upper netdevice of rdma netdevice.
+ */
+ res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) ||
+ (netif_is_bond_master(cookie_ndev) &&
+ rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)));
+
+ rcu_read_unlock();
+ return res;
+}
+
+static bool pass_all_filter(struct ib_device *ib_dev, u8 port,
+ struct net_device *rdma_ndev, void *cookie)
{
- return 1;
+ return true;
}
-static int upper_device_filter(struct ib_device *ib_dev, u8 port,
- struct net_device *rdma_ndev, void *cookie)
+static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
+ struct net_device *rdma_ndev, void *cookie)
{
- int res;
+ bool res;
if (!rdma_ndev)
- return 0;
+ return false;
if (rdma_ndev == cookie)
- return 1;
+ return true;
rcu_read_lock();
res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
@@ -208,6 +247,34 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port,
return res;
}
+/**
+ * is_upper_ndev_bond_master_filter - Check if a given netdevice
+ * is bond master device of netdevice of the the RDMA device of port.
+ * @ib_dev: IB device to check
+ * @port: Port to consider for adding default GID
+ * @rdma_ndev: Pointer to rdma netdevice
+ * @cookie: Netdevice to consider to form a default GID
+ *
+ * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev
+ * is bond master device and rdma_ndev is its lower netdevice. It might
+ * not have been established as slave device yet.
+ */
+static bool
+is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
+ struct net_device *rdma_ndev,
+ void *cookie)
+{
+ struct net_device *cookie_ndev = cookie;
+ bool match = false;
+
+ rcu_read_lock();
+ if (netif_is_bond_master(cookie_ndev) &&
+ rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))
+ match = true;
+ rcu_read_unlock();
+ return match;
+}
+
static void update_gid_ip(enum gid_op_type gid_op,
struct ib_device *ib_dev,
u8 port, struct net_device *ndev,
@@ -223,34 +290,10 @@ static void update_gid_ip(enum gid_op_type gid_op,
update_gid(gid_op, ib_dev, port, &gid, &gid_attr);
}
-static void enum_netdev_default_gids(struct ib_device *ib_dev,
- u8 port, struct net_device *event_ndev,
- struct net_device *rdma_ndev)
-{
- unsigned long gid_type_mask;
-
- rcu_read_lock();
- if (!rdma_ndev ||
- ((rdma_ndev != event_ndev &&
- !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
- is_eth_active_slave_of_bonding_rcu(rdma_ndev,
- netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
- BONDING_SLAVE_STATE_INACTIVE)) {
- rcu_read_unlock();
- return;
- }
- rcu_read_unlock();
-
- gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
-
- ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
- IB_CACHE_GID_DEFAULT_MODE_SET);
-}
-
static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
u8 port,
- struct net_device *event_ndev,
- struct net_device *rdma_ndev)
+ struct net_device *rdma_ndev,
+ struct net_device *event_ndev)
{
struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
unsigned long gid_type_mask;
@@ -381,7 +424,6 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
_add_netdev_ips(ib_dev, port, cookie);
}
@@ -391,6 +433,38 @@ static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
}
+/**
+ * del_default_gids - Delete default GIDs of the event/cookie netdevice
+ * @ib_dev: RDMA device pointer
+ * @port: Port of the RDMA device whose GID table to consider
+ * @rdma_ndev: Unused rdma netdevice
+ * @cookie: Pointer to event netdevice
+ *
+ * del_default_gids() deletes the default GIDs of the event/cookie netdevice.
+ */
+static void del_default_gids(struct ib_device *ib_dev, u8 port,
+ struct net_device *rdma_ndev, void *cookie)
+{
+ struct net_device *cookie_ndev = cookie;
+ unsigned long gid_type_mask;
+
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+ ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask,
+ IB_CACHE_GID_DEFAULT_MODE_DELETE);
+}
+
+static void add_default_gids(struct ib_device *ib_dev, u8 port,
+ struct net_device *rdma_ndev, void *cookie)
+{
+ struct net_device *event_ndev = cookie;
+ unsigned long gid_type_mask;
+
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+ ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask,
+ IB_CACHE_GID_DEFAULT_MODE_SET);
+}
+
static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
u8 port,
struct net_device *rdma_ndev,
@@ -405,9 +479,20 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
rtnl_lock();
down_read(&net_rwsem);
for_each_net(net)
- for_each_netdev(net, ndev)
- if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev))
- add_netdev_ips(ib_dev, port, rdma_ndev, ndev);
+ for_each_netdev(net, ndev) {
+ /*
+ * Filter and add default GIDs of the primary netdevice
+ * when not in bonding mode, or add default GIDs
+ * of bond master device, when in bonding mode.
+ */
+ if (is_ndev_for_default_gid_filter(ib_dev, port,
+ rdma_ndev, ndev))
+ add_default_gids(ib_dev, port, rdma_ndev, ndev);
+
+ if (is_eth_port_of_netdev_filter(ib_dev, port,
+ rdma_ndev, ndev))
+ _add_netdev_ips(ib_dev, port, ndev);
+ }
up_read(&net_rwsem);
rtnl_unlock();
}
@@ -513,18 +598,12 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
rcu_read_unlock();
if (master_ndev) {
- bond_delete_netdev_default_gids(ib_dev, port, master_ndev,
- rdma_ndev);
+ bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev,
+ master_ndev);
dev_put(master_ndev);
}
}
-static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port,
- struct net_device *rdma_ndev, void *cookie)
-{
- bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
-}
-
/* The following functions operate on all IB devices. netdevice_event and
* addr_event execute ib_enum_all_roce_netdevs through a work.
* ib_enum_all_roce_netdevs iterates through all IB devices.
@@ -575,40 +654,94 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
}
static const struct netdev_event_work_cmd add_cmd = {
- .cb = add_netdev_ips, .filter = is_eth_port_of_netdev};
+ .cb = add_netdev_ips,
+ .filter = is_eth_port_of_netdev_filter
+};
+
static const struct netdev_event_work_cmd add_cmd_upper_ips = {
- .cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev};
+ .cb = add_netdev_upper_ips,
+ .filter = is_eth_port_of_netdev_filter
+};
-static void netdevice_event_changeupper(struct netdev_notifier_changeupper_info *changeupper_info,
- struct netdev_event_work_cmd *cmds)
+static void
+ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info,
+ struct netdev_event_work_cmd *cmds)
{
- static const struct netdev_event_work_cmd upper_ips_del_cmd = {
- .cb = del_netdev_upper_ips, .filter = upper_device_filter};
- static const struct netdev_event_work_cmd bonding_default_del_cmd = {
- .cb = del_netdev_default_ips, .filter = is_eth_port_inactive_slave};
-
- if (changeupper_info->linking == false) {
- cmds[0] = upper_ips_del_cmd;
- cmds[0].ndev = changeupper_info->upper_dev;
- cmds[1] = add_cmd;
- } else {
- cmds[0] = bonding_default_del_cmd;
- cmds[0].ndev = changeupper_info->upper_dev;
- cmds[1] = add_cmd_upper_ips;
- cmds[1].ndev = changeupper_info->upper_dev;
- cmds[1].filter_ndev = changeupper_info->upper_dev;
- }
+ static const struct netdev_event_work_cmd
+ upper_ips_del_cmd = {
+ .cb = del_netdev_upper_ips,
+ .filter = upper_device_filter
+ };
+
+ cmds[0] = upper_ips_del_cmd;
+ cmds[0].ndev = changeupper_info->upper_dev;
+ cmds[1] = add_cmd;
}
+static const struct netdev_event_work_cmd bonding_default_add_cmd = {
+ .cb = add_default_gids,
+ .filter = is_upper_ndev_bond_master_filter
+};
+
+static void
+ndev_event_link(struct net_device *event_ndev,
+ struct netdev_notifier_changeupper_info *changeupper_info,
+ struct netdev_event_work_cmd *cmds)
+{
+ static const struct netdev_event_work_cmd
+ bonding_default_del_cmd = {
+ .cb = del_default_gids,
+ .filter = is_upper_ndev_bond_master_filter
+ };
+ /*
+ * When a lower netdev is linked to its upper bonding
+ * netdev, delete lower slave netdev's default GIDs.
+ */
+ cmds[0] = bonding_default_del_cmd;
+ cmds[0].ndev = event_ndev;
+ cmds[0].filter_ndev = changeupper_info->upper_dev;
+
+ /* Now add bonding upper device default GIDs */
+ cmds[1] = bonding_default_add_cmd;
+ cmds[1].ndev = changeupper_info->upper_dev;
+ cmds[1].filter_ndev = changeupper_info->upper_dev;
+
+ /* Now add bonding upper device IP based GIDs */
+ cmds[2] = add_cmd_upper_ips;
+ cmds[2].ndev = changeupper_info->upper_dev;
+ cmds[2].filter_ndev = changeupper_info->upper_dev;
+}
+
+static void netdevice_event_changeupper(struct net_device *event_ndev,
+ struct netdev_notifier_changeupper_info *changeupper_info,
+ struct netdev_event_work_cmd *cmds)
+{
+ if (changeupper_info->linking)
+ ndev_event_link(event_ndev, changeupper_info, cmds);
+ else
+ ndev_event_unlink(changeupper_info, cmds);
+}
+
+static const struct netdev_event_work_cmd add_default_gid_cmd = {
+ .cb = add_default_gids,
+ .filter = is_ndev_for_default_gid_filter,
+};
+
static int netdevice_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
static const struct netdev_event_work_cmd del_cmd = {
.cb = del_netdev_ips, .filter = pass_all_filter};
- static const struct netdev_event_work_cmd bonding_default_del_cmd_join = {
- .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave};
- static const struct netdev_event_work_cmd default_del_cmd = {
- .cb = del_netdev_default_ips, .filter = pass_all_filter};
+ static const struct netdev_event_work_cmd
+ bonding_default_del_cmd_join = {
+ .cb = del_netdev_default_ips_join,
+ .filter = is_eth_port_inactive_slave_filter
+ };
+ static const struct netdev_event_work_cmd
+ netdev_del_cmd = {
+ .cb = del_netdev_ips,
+ .filter = is_eth_port_of_netdev_filter
+ };
static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = {
.cb = del_netdev_upper_ips, .filter = upper_device_filter};
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
@@ -621,7 +754,8 @@ static int netdevice_event(struct notifier_block *this, unsigned long event,
case NETDEV_REGISTER:
case NETDEV_UP:
cmds[0] = bonding_default_del_cmd_join;
- cmds[1] = add_cmd;
+ cmds[1] = add_default_gid_cmd;
+ cmds[2] = add_cmd;
break;
case NETDEV_UNREGISTER:
@@ -632,19 +766,22 @@ static int netdevice_event(struct notifier_block *this, unsigned long event,
break;
case NETDEV_CHANGEADDR:
- cmds[0] = default_del_cmd;
- cmds[1] = add_cmd;
+ cmds[0] = netdev_del_cmd;
+ cmds[1] = add_default_gid_cmd;
+ cmds[2] = add_cmd;
break;
case NETDEV_CHANGEUPPER:
- netdevice_event_changeupper(
+ netdevice_event_changeupper(ndev,
container_of(ptr, struct netdev_notifier_changeupper_info, info),
cmds);
break;
case NETDEV_BONDING_FAILOVER:
cmds[0] = bonding_event_ips_del_cmd;
- cmds[1] = bonding_default_del_cmd_join;
+ /* Add default GIDs of the bond device */
+ cmds[1] = bonding_default_add_cmd;
+ /* Add IP based GIDs of the bond device */
cmds[2] = add_cmd_upper_ips;
break;
@@ -660,7 +797,8 @@ static void update_gid_event_work_handler(struct work_struct *_work)
struct update_gid_event_work *work =
container_of(_work, struct update_gid_event_work, work);
- ib_enum_all_roce_netdevs(is_eth_port_of_netdev, work->gid_attr.ndev,
+ ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter,
+ work->gid_attr.ndev,
callback_for_addr_gid_device_scan, work);
dev_put(work->gid_attr.ndev);
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index c8963e91f92a..683e6d11a564 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -87,7 +87,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
}
ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
- if (ret < nents) {
+ if (ret < 0 || ret < nents) {
ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
return -EINVAL;
}
@@ -325,7 +325,7 @@ out_unmap_sg:
EXPORT_SYMBOL(rdma_rw_ctx_init);
/**
- * rdma_rw_ctx_signature init - initialize a RW context with signature offload
+ * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
* @ctx: context to initialize
* @qp: queue pair to operate on
* @port_num: port num to which the connection is bound
@@ -564,10 +564,10 @@ EXPORT_SYMBOL(rdma_rw_ctx_wrs);
int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
{
- struct ib_send_wr *first_wr, *bad_wr;
+ struct ib_send_wr *first_wr;
first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
- return ib_post_send(qp, first_wr, &bad_wr);
+ return ib_post_send(qp, first_wr, NULL);
}
EXPORT_SYMBOL(rdma_rw_ctx_post);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index a61ec7e33613..7b794a14d6e8 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1227,20 +1227,10 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-static int
-roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec)
+static int roce_resolve_route_from_path(struct sa_path_rec *rec,
+ const struct ib_gid_attr *attr)
{
- struct net_device *resolved_dev;
- struct net_device *ndev;
- struct net_device *idev;
- struct rdma_dev_addr dev_addr = {
- .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
- sa_path_get_ifindex(rec) : 0),
- .net = sa_path_get_ndev(rec) ?
- sa_path_get_ndev(rec) :
- &init_net
- };
+ struct rdma_dev_addr dev_addr = {};
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
@@ -1250,9 +1240,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
if (rec->roce.route_resolved)
return 0;
+ if (!attr || !attr->ndev)
+ return -EINVAL;
- if (!device->get_netdev)
- return -EOPNOTSUPP;
+ dev_addr.bound_dev_if = attr->ndev->ifindex;
+ /* TODO: Use net from the ib_gid_attr once it is added to it,
+ * until than, limit itself to init_net.
+ */
+ dev_addr.net = &init_net;
rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
@@ -1268,60 +1263,52 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
-
- resolved_dev = dev_get_by_index(dev_addr.net,
- dev_addr.bound_dev_if);
- if (!resolved_dev) {
- ret = -ENODEV;
- goto done;
- }
- ndev = ib_get_ndev_from_path(rec);
- rcu_read_lock();
- if ((ndev && ndev != resolved_dev) ||
- (resolved_dev != idev &&
- !rdma_is_upper_dev_rcu(idev, resolved_dev)))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(resolved_dev);
- if (ndev)
- dev_put(ndev);
-done:
- dev_put(idev);
- if (!ret)
- rec->roce.route_resolved = true;
- return ret;
+ rec->roce.route_resolved = true;
+ return 0;
}
static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *gid_attr)
{
enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
- struct net_device *ndev;
- u16 gid_index;
- int ret;
- ndev = ib_get_ndev_from_path(rec);
- ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
- port_num, ndev, &gid_index);
- if (ndev)
- dev_put(ndev);
- if (ret)
- return ret;
+ if (!gid_attr) {
+ gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
+ port_num, NULL);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
+ } else
+ rdma_hold_gid_attr(gid_attr);
- rdma_ah_set_grh(ah_attr, &rec->dgid,
- be32_to_cpu(rec->flow_label),
- gid_index, rec->hop_limit,
- rec->traffic_class);
+ rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
+ be32_to_cpu(rec->flow_label),
+ rec->hop_limit, rec->traffic_class,
+ gid_attr);
return 0;
}
+/**
+ * ib_init_ah_attr_from_path - Initialize address handle attributes based on
+ * an SA path record.
+ * @device: Device associated ah attributes initialization.
+ * @port_num: Port on the specified device.
+ * @rec: path record entry to use for ah attributes initialization.
+ * @ah_attr: address handle attributes to initialization from path record.
+ * @sgid_attr: SGID attribute to consider during initialization.
+ *
+ * When ib_init_ah_attr_from_path() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_destroy_ah_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_path().
+ */
int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *gid_attr)
{
int ret = 0;
@@ -1332,7 +1319,7 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
rdma_ah_set_static_rate(ah_attr, rec->rate);
if (sa_path_is_roce(rec)) {
- ret = roce_resolve_route_from_path(device, port_num, rec);
+ ret = roce_resolve_route_from_path(rec, gid_attr);
if (ret)
return ret;
@@ -1349,7 +1336,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
}
if (rec->hop_limit > 0 || sa_path_is_roce(rec))
- ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
+ ret = init_ah_attr_grh_fields(device, port_num,
+ rec, ah_attr, gid_attr);
return ret;
}
EXPORT_SYMBOL(ib_init_ah_attr_from_path);
@@ -1557,8 +1545,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
ARRAY_SIZE(path_rec_table),
mad->data, &rec);
rec.rec_type = SA_PATH_REC_TYPE_IB;
- sa_path_set_ndev(&rec, NULL);
- sa_path_set_ifindex(&rec, 0);
sa_path_set_dmac_zero(&rec);
if (query->conv_pr) {
@@ -2290,6 +2276,7 @@ static void update_sm_ah(struct work_struct *work)
struct ib_sa_sm_ah *new_ah;
struct ib_port_attr port_attr;
struct rdma_ah_attr ah_attr;
+ bool grh_required;
if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
pr_warn("Couldn't query port\n");
@@ -2314,16 +2301,27 @@ static void update_sm_ah(struct work_struct *work)
rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
rdma_ah_set_port_num(&ah_attr, port->port_num);
- if (port_attr.grh_required) {
- if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) {
- rdma_ah_set_make_grd(&ah_attr, true);
- } else {
- rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
- rdma_ah_set_subnet_prefix(&ah_attr,
- cpu_to_be64(port_attr.subnet_prefix));
- rdma_ah_set_interface_id(&ah_attr,
- cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
- }
+
+ grh_required = rdma_is_grh_required(port->agent->device,
+ port->port_num);
+
+ /*
+ * The OPA sm_lid of 0xFFFF needs special handling so that it can be
+ * differentiated from a permissive LID of 0xFFFF. We set the
+ * grh_required flag here so the SA can program the DGID in the
+ * address handle appropriately
+ */
+ if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
+ (grh_required ||
+ port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
+ rdma_ah_set_make_grd(&ah_attr, true);
+
+ if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
+ rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
+ rdma_ah_set_subnet_prefix(&ah_attr,
+ cpu_to_be64(port_attr.subnet_prefix));
+ rdma_ah_set_interface_id(&ah_attr,
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
}
new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 31c7efaf8e7a..7fd14ead7b37 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -42,6 +42,7 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
+#include <rdma/ib_cache.h>
struct ib_port;
@@ -346,7 +347,7 @@ static struct attribute *port_default_attrs[] = {
NULL
};
-static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
{
if (!gid_attr->ndev)
return -EINVAL;
@@ -354,33 +355,26 @@ static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
return sprintf(buf, "%s\n", gid_attr->ndev->name);
}
-static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
{
return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
}
-static ssize_t _show_port_gid_attr(struct ib_port *p,
- struct port_attribute *attr,
- char *buf,
- size_t (*print)(struct ib_gid_attr *gid_attr,
- char *buf))
+static ssize_t _show_port_gid_attr(
+ struct ib_port *p, struct port_attribute *attr, char *buf,
+ size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
- union ib_gid gid;
- struct ib_gid_attr gid_attr = {};
+ const struct ib_gid_attr *gid_attr;
ssize_t ret;
- ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
- &gid_attr);
- if (ret)
- goto err;
+ gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
- ret = print(&gid_attr, buf);
-
-err:
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
+ ret = print(gid_attr, buf);
+ rdma_put_gid_attr(gid_attr);
return ret;
}
@@ -389,26 +383,28 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
- union ib_gid *pgid;
- union ib_gid gid;
+ const struct ib_gid_attr *gid_attr;
ssize_t ret;
- ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
+ gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
+ if (IS_ERR(gid_attr)) {
+ const union ib_gid zgid = {};
+
+ /* If reading GID fails, it is likely due to GID entry being
+ * empty (invalid) or reserved GID in the table. User space
+ * expects to read GID table entries as long as it given index
+ * is within GID table size. Administrative/debugging tool
+ * fails to query rest of the GID entries if it hits error
+ * while querying a GID of the given index. To avoid user
+ * space throwing such error on fail to read gid, return zero
+ * GID as before. This maintains backward compatibility.
+ */
+ return sprintf(buf, "%pI6\n", zgid.raw);
+ }
- /* If reading GID fails, it is likely due to GID entry being empty
- * (invalid) or reserved GID in the table.
- * User space expects to read GID table entries as long as it given
- * index is within GID table size.
- * Administrative/debugging tool fails to query rest of the GID entries
- * if it hits error while querying a GID of the given index.
- * To avoid user space throwing such error on fail to read gid, return
- * zero GID as before. This maintains backward compatibility.
- */
- if (ret)
- pgid = &zgid;
- else
- pgid = &gid;
- return sprintf(buf, "%pI6\n", pgid->raw);
+ ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw);
+ rdma_put_gid_attr(gid_attr);
+ return ret;
}
static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 9eef96dacbd7..faa9e6116b2f 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -207,7 +207,7 @@ error:
}
static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
- struct ib_cm_req_event_param *kreq)
+ const struct ib_cm_req_event_param *kreq)
{
ureq->remote_ca_guid = kreq->remote_ca_guid;
ureq->remote_qkey = kreq->remote_qkey;
@@ -231,7 +231,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
}
static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
- struct ib_cm_rep_event_param *krep)
+ const struct ib_cm_rep_event_param *krep)
{
urep->remote_ca_guid = krep->remote_ca_guid;
urep->remote_qkey = krep->remote_qkey;
@@ -247,14 +247,14 @@ static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
}
static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep,
- struct ib_cm_sidr_rep_event_param *krep)
+ const struct ib_cm_sidr_rep_event_param *krep)
{
urep->status = krep->status;
urep->qkey = krep->qkey;
urep->qpn = krep->qpn;
};
-static int ib_ucm_event_process(struct ib_cm_event *evt,
+static int ib_ucm_event_process(const struct ib_cm_event *evt,
struct ib_ucm_event *uvt)
{
void *info = NULL;
@@ -351,7 +351,7 @@ err1:
}
static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
- struct ib_cm_event *event)
+ const struct ib_cm_event *event)
{
struct ib_ucm_event *uevent;
struct ib_ucm_context *ctx;
@@ -1000,14 +1000,11 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
const char __user *inbuf,
int in_len, int out_len)
{
- struct ib_cm_sidr_req_param param;
+ struct ib_cm_sidr_req_param param = {};
struct ib_ucm_context *ctx;
struct ib_ucm_sidr_req cmd;
int result;
- param.private_data = NULL;
- param.path = NULL;
-
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 54ab6335c48d..a41792dbae1f 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -84,7 +84,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
- unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
@@ -92,7 +91,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
int i;
unsigned long dma_attrs = 0;
struct scatterlist *sg, *sg_list_start;
- int need_release = 0;
unsigned int gup_flags = FOLL_WRITE;
if (dmasync)
@@ -121,10 +119,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (access & IB_ACCESS_ON_DEMAND) {
ret = ib_umem_odp_get(context, umem, access);
- if (ret) {
- kfree(umem);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto umem_kfree;
return umem;
}
@@ -135,8 +131,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
- kfree(umem);
- return ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
+ goto umem_kfree;
}
/*
@@ -149,41 +145,43 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
npages = ib_umem_num_pages(umem);
- down_write(&current->mm->mmap_sem);
-
- locked = npages + current->mm->pinned_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ down_write(&current->mm->mmap_sem);
+ current->mm->pinned_vm += npages;
+ if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ up_write(&current->mm->mmap_sem);
ret = -ENOMEM;
- goto out;
+ goto vma;
}
+ up_write(&current->mm->mmap_sem);
cur_base = addr & PAGE_MASK;
if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
- goto out;
+ goto vma;
}
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
if (ret)
- goto out;
+ goto vma;
if (!umem->writable)
gup_flags |= FOLL_FORCE;
- need_release = 1;
sg_list_start = umem->sg_head.sgl;
+ down_read(&current->mm->mmap_sem);
while (npages) {
ret = get_user_pages_longterm(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
gup_flags, page_list, vma_list);
-
- if (ret < 0)
- goto out;
+ if (ret < 0) {
+ up_read(&current->mm->mmap_sem);
+ goto umem_release;
+ }
umem->npages += ret;
cur_base += ret * PAGE_SIZE;
@@ -199,6 +197,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
/* preparing for next loop */
sg_list_start = sg;
}
+ up_read(&current->mm->mmap_sem);
umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl,
@@ -206,27 +205,28 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
DMA_BIDIRECTIONAL,
dma_attrs);
- if (umem->nmap <= 0) {
+ if (!umem->nmap) {
ret = -ENOMEM;
- goto out;
+ goto umem_release;
}
ret = 0;
+ goto out;
-out:
- if (ret < 0) {
- if (need_release)
- __ib_umem_release(context->device, umem, 0);
- kfree(umem);
- } else
- current->mm->pinned_vm = locked;
-
+umem_release:
+ __ib_umem_release(context->device, umem, 0);
+vma:
+ down_write(&current->mm->mmap_sem);
+ current->mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&current->mm->mmap_sem);
+out:
if (vma_list)
free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);
-
- return ret < 0 ? ERR_PTR(ret) : umem;
+umem_kfree:
+ if (ret)
+ kfree(umem);
+ return ret ? ERR_PTR(ret) : umem;
}
EXPORT_SYMBOL(ib_umem_get);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 182436b92ba9..6ec748eccff7 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -186,6 +186,7 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn,
rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
ULLONG_MAX,
ib_umem_notifier_release_trampoline,
+ true,
NULL);
up_read(&context->umem_rwsem);
}
@@ -207,22 +208,31 @@ static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
return 0;
}
-static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+ int ret;
if (!context->invalidate_range)
- return;
+ return 0;
+
+ if (blockable)
+ down_read(&context->umem_rwsem);
+ else if (!down_read_trylock(&context->umem_rwsem))
+ return -EAGAIN;
ib_ucontext_notifier_start_account(context);
- down_read(&context->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+ ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
end,
- invalidate_range_start_trampoline, NULL);
+ invalidate_range_start_trampoline,
+ blockable, NULL);
up_read(&context->umem_rwsem);
+
+ return ret;
}
static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
@@ -242,10 +252,15 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
if (!context->invalidate_range)
return;
+ /*
+ * TODO: we currently bail out if there is any sleepable work to be done
+ * in ib_umem_notifier_invalidate_range_start so we shouldn't really block
+ * here. But this is ugly and fragile.
+ */
down_read(&context->umem_rwsem);
rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
end,
- invalidate_range_end_trampoline, NULL);
+ invalidate_range_end_trampoline, true, NULL);
up_read(&context->umem_rwsem);
ib_ucontext_notifier_end_account(context);
}
@@ -798,6 +813,7 @@ EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
u64 start, u64 last,
umem_call_back cb,
+ bool blockable,
void *cookie)
{
int ret_val = 0;
@@ -809,6 +825,9 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
for (node = rbt_ib_umem_iter_first(root, start, last - 1);
node; node = next) {
+ /* TODO move the blockable decision up to the callback */
+ if (!blockable)
+ return -EAGAIN;
next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index bb98c9e4a7fd..c34a6852d691 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -268,6 +268,7 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.traffic_class = grh->traffic_class;
memcpy(packet->mad.hdr.gid, &grh->dgid, 16);
packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label);
+ rdma_destroy_ah_attr(&ah_attr);
}
if (queue_packet(file, agent, packet))
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index c0d40fc3a53a..5df8e548cc14 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -111,7 +111,7 @@ struct ib_uverbs_device {
struct mutex lists_mutex; /* protect lists */
struct list_head uverbs_file_list;
struct list_head uverbs_events_file_list;
- struct uverbs_root_spec *specs_root;
+ struct uverbs_api *uapi;
};
struct ib_uverbs_event_queue {
@@ -130,21 +130,37 @@ struct ib_uverbs_async_event_file {
};
struct ib_uverbs_completion_event_file {
- struct ib_uobject_file uobj_file;
+ struct ib_uobject uobj;
struct ib_uverbs_event_queue ev_queue;
};
struct ib_uverbs_file {
struct kref ref;
- struct mutex mutex;
- struct mutex cleanup_mutex; /* protect cleanup */
struct ib_uverbs_device *device;
+ struct mutex ucontext_lock;
+ /*
+ * ucontext must be accessed via ib_uverbs_get_ucontext() or with
+ * ucontext_lock held
+ */
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
struct ib_uverbs_async_event_file *async_file;
struct list_head list;
int is_closed;
+ /*
+ * To access the uobjects list hw_destroy_rwsem must be held for write
+ * OR hw_destroy_rwsem held for read AND uobjects_lock held.
+ * hw_destroy_rwsem should be called across any destruction of the HW
+ * object of an associated uobject.
+ */
+ struct rw_semaphore hw_destroy_rwsem;
+ spinlock_t uobjects_lock;
+ struct list_head uobjects;
+
+ u64 uverbs_cmd_mask;
+ u64 uverbs_ex_cmd_mask;
+
struct idr idr;
/* spinlock protects write access to idr */
spinlock_t idr_lock;
@@ -196,7 +212,6 @@ struct ib_uwq_object {
struct ib_ucq_object {
struct ib_uobject uobject;
- struct ib_uverbs_file *uverbs_file;
struct list_head comp_list;
struct list_head async_list;
u32 comp_events_reported;
@@ -230,7 +245,7 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
-int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
enum rdma_remove_reason why);
int uverbs_dealloc_mw(struct ib_mw *mw);
@@ -238,12 +253,7 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj);
void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
-extern const struct uverbs_attr_def uverbs_uhw_compat_in;
-extern const struct uverbs_attr_def uverbs_uhw_compat_out;
long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-int uverbs_destroy_def_handler(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs);
struct ib_uverbs_flow_spec {
union {
@@ -292,7 +302,6 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
- struct ib_device *ib_dev, \
const char __user *buf, int in_len, \
int out_len)
@@ -334,7 +343,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd);
#define IB_UVERBS_DECLARE_EX_CMD(name) \
int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
- struct ib_device *ib_dev, \
struct ib_udata *ucore, \
struct ib_udata *uhw)
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3e90b6a1d9d2..a21d5214afc3 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -48,11 +48,10 @@
#include "core_priv.h"
static struct ib_uverbs_completion_event_file *
-ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
+_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
{
- struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
- fd, context);
- struct ib_uobject_file *uobj_file;
+ struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
+ fd, ufile);
if (IS_ERR(uobj))
return (void *)uobj;
@@ -60,13 +59,13 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
uverbs_uobject_get(uobj);
uobj_put_read(uobj);
- uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
- return container_of(uobj_file, struct ib_uverbs_completion_event_file,
- uobj_file);
+ return container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
}
+#define ib_uverbs_lookup_comp_file(_fd, _ufile) \
+ _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
@@ -76,6 +75,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_ucontext *ucontext;
struct file *filp;
struct ib_rdmacg_object cg_obj;
+ struct ib_device *ib_dev;
int ret;
if (out_len < sizeof resp)
@@ -84,7 +84,13 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- mutex_lock(&file->mutex);
+ mutex_lock(&file->ucontext_lock);
+ ib_dev = srcu_dereference(file->device->ib_dev,
+ &file->device->disassociate_srcu);
+ if (!ib_dev) {
+ ret = -EIO;
+ goto err;
+ }
if (file->ucontext) {
ret = -EINVAL;
@@ -110,12 +116,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext->cg_obj = cg_obj;
/* ufile is required when some objects are released */
ucontext->ufile = file;
- uverbs_initialize_ucontext(ucontext);
rcu_read_lock();
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
ucontext->closing = 0;
+ ucontext->cleanup_retryable = false;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
ucontext->umem_tree = RB_ROOT_CACHED;
@@ -146,11 +152,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_file;
}
- file->ucontext = ucontext;
-
fd_install(resp.async_fd, filp);
- mutex_unlock(&file->mutex);
+ /*
+ * Make sure that ib_uverbs_get_ucontext() sees the pointer update
+ * only after all writes to setup the ucontext have completed
+ */
+ smp_store_release(&file->ucontext, ucontext);
+
+ mutex_unlock(&file->ucontext_lock);
return in_len;
@@ -169,15 +179,16 @@ err_alloc:
ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
err:
- mutex_unlock(&file->mutex);
+ mutex_unlock(&file->ucontext_lock);
return ret;
}
-static void copy_query_dev_fields(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
+static void copy_query_dev_fields(struct ib_ucontext *ucontext,
struct ib_uverbs_query_device_resp *resp,
struct ib_device_attr *attr)
{
+ struct ib_device *ib_dev = ucontext->device;
+
resp->fw_ver = attr->fw_ver;
resp->node_guid = ib_dev->node_guid;
resp->sys_image_guid = attr->sys_image_guid;
@@ -189,7 +200,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
resp->max_qp = attr->max_qp;
resp->max_qp_wr = attr->max_qp_wr;
resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
- resp->max_sge = attr->max_sge;
+ resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge);
resp->max_sge_rd = attr->max_sge_rd;
resp->max_cq = attr->max_cq;
resp->max_cqe = attr->max_cqe;
@@ -221,12 +232,16 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
struct ib_uverbs_query_device cmd;
struct ib_uverbs_query_device_resp resp;
+ struct ib_ucontext *ucontext;
+
+ ucontext = ib_uverbs_get_ucontext(file);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
if (out_len < sizeof resp)
return -ENOSPC;
@@ -235,7 +250,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
return -EFAULT;
memset(&resp, 0, sizeof resp);
- copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
+ copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
return -EFAULT;
@@ -243,8 +258,28 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
return in_len;
}
+/*
+ * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
+ * PortInfo CapabilityMask, but was extended with unique bits.
+ */
+static u32 make_port_cap_flags(const struct ib_port_attr *attr)
+{
+ u32 res;
+
+ /* All IBA CapabilityMask bits are passed through here, except bit 26,
+ * which is overridden with IP_BASED_GIDS. This is due to a historical
+ * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
+ * bits match the IBA definition across all kernel versions.
+ */
+ res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ if (attr->ip_gids)
+ res |= IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ return res;
+}
+
ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
@@ -252,6 +287,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
struct ib_uverbs_query_port_resp resp;
struct ib_port_attr attr;
int ret;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+
+ ucontext = ib_uverbs_get_ucontext(file);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -269,12 +311,15 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
resp.max_mtu = attr.max_mtu;
resp.active_mtu = attr.active_mtu;
resp.gid_tbl_len = attr.gid_tbl_len;
- resp.port_cap_flags = attr.port_cap_flags;
+ resp.port_cap_flags = make_port_cap_flags(&attr);
resp.max_msg_sz = attr.max_msg_sz;
resp.bad_pkey_cntr = attr.bad_pkey_cntr;
resp.qkey_viol_cntr = attr.qkey_viol_cntr;
resp.pkey_tbl_len = attr.pkey_tbl_len;
+ if (rdma_is_grh_required(ib_dev, cmd.port_num))
+ resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED;
+
if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) {
resp.lid = OPA_TO_IB_UCAST_LID(attr.lid);
resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid);
@@ -300,7 +345,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
@@ -310,6 +354,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
+ struct ib_device *ib_dev;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -322,11 +367,11 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
+ pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
goto err;
@@ -348,9 +393,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
goto err_copy;
}
- uobj_alloc_commit(uobj);
-
- return in_len;
+ return uobj_alloc_commit(uobj, in_len);
err_copy:
ib_dealloc_pd(pd);
@@ -361,25 +404,16 @@ err:
}
ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
struct ib_uverbs_dealloc_pd cmd;
- struct ib_uobject *uobj;
- int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
-
- return ret ?: in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file,
+ in_len);
}
struct xrcd_table_entry {
@@ -468,7 +502,6 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
}
ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -481,6 +514,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
struct inode *inode = NULL;
int ret = 0;
int new_xrcd = 0;
+ struct ib_device *ib_dev;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -517,15 +551,15 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD,
- file->ucontext);
+ obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file,
+ &ib_dev);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
goto err_tree_mutex_unlock;
}
if (!xrcd) {
- xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
+ xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata);
if (IS_ERR(xrcd)) {
ret = PTR_ERR(xrcd);
goto err;
@@ -564,9 +598,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
mutex_unlock(&file->device->xrcd_tree_mutex);
- uobj_alloc_commit(&obj->uobject);
-
- return in_len;
+ return uobj_alloc_commit(&obj->uobject, in_len);
err_copy:
if (inode) {
@@ -591,32 +623,25 @@ err_tree_mutex_unlock:
}
ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_close_xrcd cmd;
- struct ib_uobject *uobj;
- int ret = 0;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
- return ret ?: in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file,
+ in_len);
}
-int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
struct ib_xrcd *xrcd,
enum rdma_remove_reason why)
{
struct inode *inode;
int ret;
+ struct ib_uverbs_device *dev = uobject->context->ufile->device;
inode = xrcd->inode;
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
@@ -624,16 +649,18 @@ int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
ret = ib_dealloc_xrcd(xrcd);
- if (why == RDMA_REMOVE_DESTROY && ret)
+ if (ib_is_destroy_retryable(ret, why, uobject)) {
atomic_inc(&xrcd->usecnt);
- else if (inode)
+ return ret;
+ }
+
+ if (inode)
xrcd_table_delete(dev, inode);
return ret;
}
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -644,6 +671,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
+ struct ib_device *ib_dev;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -663,11 +691,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -711,9 +739,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
uobj_put_obj_read(pd);
- uobj_alloc_commit(uobj);
-
- return in_len;
+ return uobj_alloc_commit(uobj, in_len);
err_copy:
ib_dereg_mr(mr);
@@ -727,7 +753,6 @@ err_free:
}
ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -759,8 +784,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
- file->ucontext);
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -778,7 +802,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+ file);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -819,29 +844,19 @@ put_uobjs:
}
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_dereg_mr cmd;
- struct ib_uobject *uobj;
- int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
-
- return ret ?: in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file,
+ in_len);
}
ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -852,6 +867,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
struct ib_mw *mw;
struct ib_udata udata;
int ret;
+ struct ib_device *ib_dev;
if (out_len < sizeof(resp))
return -ENOSPC;
@@ -859,11 +875,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -897,9 +913,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
}
uobj_put_obj_read(pd);
- uobj_alloc_commit(uobj);
-
- return in_len;
+ return uobj_alloc_commit(uobj, in_len);
err_copy:
uverbs_dealloc_mw(mw);
@@ -911,28 +925,19 @@ err_free:
}
ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_dealloc_mw cmd;
- struct ib_uobject *uobj;
- int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
- return ret ?: in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file,
+ in_len);
}
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -940,6 +945,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
struct ib_uverbs_create_comp_channel_resp resp;
struct ib_uobject *uobj;
struct ib_uverbs_completion_event_file *ev_file;
+ struct ib_device *ib_dev;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -947,14 +953,14 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
resp.fd = uobj->id;
ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
- uobj_file.uobj);
+ uobj);
ib_uverbs_init_event_queue(&ev_file->ev_queue);
if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
@@ -962,12 +968,10 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
return -EFAULT;
}
- uobj_alloc_commit(uobj);
- return in_len;
+ return uobj_alloc_commit(uobj, in_len);
}
static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw,
struct ib_uverbs_ex_create_cq *cmd,
@@ -985,21 +989,23 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
int ret;
struct ib_uverbs_ex_create_cq_resp resp;
struct ib_cq_init_attr attr = {};
-
- if (!ib_dev->create_cq)
- return ERR_PTR(-EOPNOTSUPP);
+ struct ib_device *ib_dev;
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ,
- file->ucontext);
+ obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file,
+ &ib_dev);
if (IS_ERR(obj))
return obj;
+ if (!ib_dev->create_cq) {
+ ret = -EOPNOTSUPP;
+ goto err;
+ }
+
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
- file->ucontext);
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
if (IS_ERR(ev_file)) {
ret = PTR_ERR(ev_file);
goto err;
@@ -1007,7 +1013,6 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
}
obj->uobject.user_handle = cmd->user_handle;
- obj->uverbs_file = file;
obj->comp_events_reported = 0;
obj->async_events_reported = 0;
INIT_LIST_HEAD(&obj->comp_list);
@@ -1019,7 +1024,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
attr.flags = cmd->flags;
- cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw);
+ cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
@@ -1047,7 +1052,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (ret)
goto err_cb;
- uobj_alloc_commit(&obj->uobject);
+ ret = uobj_alloc_commit(&obj->uobject, 0);
+ if (ret)
+ return ERR_PTR(ret);
return obj;
err_cb:
@@ -1075,7 +1082,6 @@ static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1106,7 +1112,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
cmd_ex.comp_vector = cmd.comp_vector;
cmd_ex.comp_channel = cmd.comp_channel;
- obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
+ obj = create_cq(file, &ucore, &uhw, &cmd_ex,
offsetof(typeof(cmd_ex), comp_channel) +
sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
NULL);
@@ -1129,7 +1135,6 @@ static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
}
int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -1155,7 +1160,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
sizeof(resp.response_length)))
return -ENOSPC;
- obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
+ obj = create_cq(file, ucore, uhw, &cmd,
min(ucore->inlen, sizeof(cmd)),
ib_uverbs_ex_create_cq_cb, NULL);
@@ -1163,7 +1168,6 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1181,7 +1185,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (!cq)
return -EINVAL;
@@ -1231,7 +1235,6 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
}
ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1246,7 +1249,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (!cq)
return -EINVAL;
@@ -1262,7 +1265,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (!ret)
break;
- ret = copy_wc_to_user(ib_dev, data_ptr, &wc);
+ ret = copy_wc_to_user(cq->device, data_ptr, &wc);
if (ret)
goto out_put;
@@ -1283,7 +1286,6 @@ out_put:
}
ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1293,7 +1295,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (!cq)
return -EINVAL;
@@ -1306,45 +1308,28 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_destroy_cq cmd;
struct ib_uverbs_destroy_cq_resp resp;
struct ib_uobject *uobj;
- struct ib_cq *cq;
struct ib_ucq_object *obj;
- int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle,
- file->ucontext);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- /*
- * Make sure we don't free the memory in remove_commit as we still
- * needs the uobject memory to create the response.
- */
- uverbs_uobject_get(uobj);
- cq = uobj->object;
- obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
-
+ obj = container_of(uobj, struct ib_ucq_object, uobject);
memset(&resp, 0, sizeof(resp));
-
- ret = uobj_remove_commit(uobj);
- if (ret) {
- uverbs_uobject_put(uobj);
- return ret;
- }
-
resp.comp_events_reported = obj->comp_events_reported;
resp.async_events_reported = obj->async_events_reported;
- uverbs_uobject_put(uobj);
+ uobj_put_destroy(uobj);
+
if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
return -EFAULT;
@@ -1375,12 +1360,13 @@ static int create_qp(struct ib_uverbs_file *file,
int ret;
struct ib_rwq_ind_table *ind_tbl = NULL;
bool has_sq = true;
+ struct ib_device *ib_dev;
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
- file->ucontext);
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+ &ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
obj->uxrcd = NULL;
@@ -1390,9 +1376,9 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL,
- cmd->rwq_ind_tbl_handle,
- file->ucontext);
+ ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ cmd->rwq_ind_tbl_handle, file);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1418,7 +1404,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_XRC_TGT) {
xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
- file->ucontext);
+ file);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -1437,8 +1423,8 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle,
- file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
+ cmd->srq_handle, file);
if (!srq || srq->srq_type == IB_SRQT_XRC) {
ret = -EINVAL;
goto err_put;
@@ -1447,8 +1433,9 @@ static int create_qp(struct ib_uverbs_file *file,
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle,
- file->ucontext);
+ rcq = uobj_get_obj_read(
+ cq, UVERBS_OBJECT_CQ,
+ cmd->recv_cq_handle, file);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1458,11 +1445,12 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (has_sq)
- scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle,
- file->ucontext);
+ scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+ cmd->send_cq_handle, file);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
+ file);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1602,9 +1590,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (ind_tbl)
uobj_put_obj_read(ind_tbl);
- uobj_alloc_commit(&obj->uevent.uobject);
-
- return 0;
+ return uobj_alloc_commit(&obj->uevent.uobject, 0);
err_cb:
ib_destroy_qp(qp);
@@ -1637,7 +1623,6 @@ static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1698,7 +1683,6 @@ static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
}
int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -1735,7 +1719,6 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_open_qp cmd;
@@ -1747,6 +1730,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_qp_open_attr attr;
int ret;
+ struct ib_device *ib_dev;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -1759,13 +1743,12 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
- file->ucontext);
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+ &ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
- xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle,
- file->ucontext);
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err_put;
@@ -1809,10 +1792,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
qp->uobject = &obj->uevent.uobject;
uobj_put_read(xrcd_uobj);
-
- uobj_alloc_commit(&obj->uevent.uobject);
-
- return in_len;
+ return uobj_alloc_commit(&obj->uevent.uobject, in_len);
err_destroy:
ib_destroy_qp(qp);
@@ -1846,7 +1826,6 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
}
ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1867,7 +1846,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1968,11 +1947,11 @@ static int modify_qp(struct ib_uverbs_file *file,
struct ib_qp *qp;
int ret;
- attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1984,15 +1963,64 @@ static int modify_qp(struct ib_uverbs_file *file,
goto release_qp;
}
- if ((cmd->base.attr_mask & IB_QP_AV) &&
- !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) {
- ret = -EINVAL;
- goto release_qp;
+ if ((cmd->base.attr_mask & IB_QP_AV)) {
+ if (!rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
+ if (cmd->base.attr_mask & IB_QP_STATE &&
+ cmd->base.qp_state == IB_QPS_RTR) {
+ /* We are in INIT->RTR TRANSITION (if we are not,
+ * this transition will be rejected in subsequent checks).
+ * In the INIT->RTR transition, we cannot have IB_QP_PORT set,
+ * but the IB_QP_STATE flag is required.
+ *
+ * Since kernel 3.14 (commit dbf727de7440), the uverbs driver,
+ * when IB_QP_AV is set, has required inclusion of a valid
+ * port number in the primary AV. (AVs are created and handled
+ * differently for infiniband and ethernet (RoCE) ports).
+ *
+ * Check the port number included in the primary AV against
+ * the port number in the qp struct, which was set (and saved)
+ * in the RST->INIT transition.
+ */
+ if (cmd->base.dest.port_num != qp->real_qp->port) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+ } else {
+ /* We are in SQD->SQD. (If we are not, this transition will
+ * be rejected later in the verbs layer checks).
+ * Check for both IB_QP_PORT and IB_QP_AV, these can be set
+ * together in the SQD->SQD transition.
+ *
+ * If only IP_QP_AV was set, add in IB_QP_PORT as well (the
+ * verbs layer driver does not track primary port changes
+ * resulting from path migration. Thus, in SQD, if the primary
+ * AV is modified, the primary port should also be modified).
+ *
+ * Note that in this transition, the IB_QP_STATE flag
+ * is not allowed.
+ */
+ if (((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT))
+ == (IB_QP_AV | IB_QP_PORT)) &&
+ cmd->base.port_num != cmd->base.dest.port_num) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+ if ((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT))
+ == IB_QP_AV) {
+ cmd->base.attr_mask |= IB_QP_PORT;
+ cmd->base.port_num = cmd->base.dest.port_num;
+ }
+ }
}
if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
(!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) ||
- !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) {
+ !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num) ||
+ cmd->base.alt_port_num != cmd->base.alt_dest.port_num)) {
ret = -EINVAL;
goto release_qp;
}
@@ -2049,7 +2077,6 @@ out:
}
ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2076,7 +2103,6 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
}
int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -2112,7 +2138,6 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2120,33 +2145,19 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_qp_resp resp;
struct ib_uobject *uobj;
struct ib_uqp_object *obj;
- int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- memset(&resp, 0, sizeof resp);
-
- uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle,
- file->ucontext);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
- /*
- * Make sure we don't free the memory in remove_commit as we still
- * needs the uobject memory to create the response.
- */
- uverbs_uobject_get(uobj);
-
- ret = uobj_remove_commit(uobj);
- if (ret) {
- uverbs_uobject_put(uobj);
- return ret;
- }
-
+ memset(&resp, 0, sizeof(resp));
resp.events_reported = obj->uevent.events_reported;
- uverbs_uobject_put(uobj);
+
+ uobj_put_destroy(uobj);
if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
return -EFAULT;
@@ -2165,14 +2176,14 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge)
}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_post_send cmd;
struct ib_uverbs_post_send_resp resp;
struct ib_uverbs_send_wr *user_wr;
- struct ib_send_wr *wr = NULL, *last, *next, *bad_wr;
+ struct ib_send_wr *wr = NULL, *last, *next;
+ const struct ib_send_wr *bad_wr;
struct ib_qp *qp;
int i, sg_ind;
int is_ud;
@@ -2193,7 +2204,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (!user_wr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp)
goto out;
@@ -2229,8 +2240,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah,
- file->ucontext);
+ ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
+ user_wr->wr.ud.ah, file);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2445,13 +2456,13 @@ err:
}
ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_post_recv cmd;
struct ib_uverbs_post_recv_resp resp;
- struct ib_recv_wr *wr, *next, *bad_wr;
+ struct ib_recv_wr *wr, *next;
+ const struct ib_recv_wr *bad_wr;
struct ib_qp *qp;
ssize_t ret = -EINVAL;
@@ -2464,7 +2475,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp)
goto out;
@@ -2494,13 +2505,13 @@ out:
}
ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_post_srq_recv cmd;
struct ib_uverbs_post_srq_recv_resp resp;
- struct ib_recv_wr *wr, *next, *bad_wr;
+ struct ib_recv_wr *wr, *next;
+ const struct ib_recv_wr *bad_wr;
struct ib_srq *srq;
ssize_t ret = -EINVAL;
@@ -2513,12 +2524,13 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
if (!srq)
goto out;
resp.bad_wr = 0;
- ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
+ ret = srq->device->post_srq_recv ?
+ srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP;
uobj_put_obj_read(srq);
@@ -2543,7 +2555,6 @@ out:
}
ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2552,9 +2563,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_ah *ah;
- struct rdma_ah_attr attr;
+ struct rdma_ah_attr attr = {};
int ret;
struct ib_udata udata;
+ struct ib_device *ib_dev;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -2562,19 +2574,21 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
- return -EINVAL;
-
ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
u64_to_user_ptr(cmd.response) + sizeof(resp),
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+ if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -2616,9 +2630,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
}
uobj_put_obj_read(pd);
- uobj_alloc_commit(uobj);
-
- return in_len;
+ return uobj_alloc_commit(uobj, in_len);
err_copy:
rdma_destroy_ah(ah);
@@ -2632,27 +2644,18 @@ err:
}
ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_destroy_ah cmd;
- struct ib_uobject *uobj;
- int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
- return ret ?: in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file,
+ in_len);
}
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2665,7 +2668,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp)
return -EINVAL;
@@ -2702,7 +2705,6 @@ out_put:
}
ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2716,7 +2718,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp)
return -EINVAL;
@@ -2761,29 +2763,27 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
resources = kzalloc(sizeof(*resources), GFP_KERNEL);
if (!resources)
- goto err_res;
+ return NULL;
+
+ if (!num_specs)
+ goto out;
resources->counters =
kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
-
- if (!resources->counters)
- goto err_cnt;
-
resources->collection =
kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
- if (!resources->collection)
- goto err_collection;
+ if (!resources->counters || !resources->collection)
+ goto err;
+out:
resources->max = num_specs;
-
return resources;
-err_collection:
+err:
kfree(resources->counters);
-err_cnt:
kfree(resources);
-err_res:
+
return NULL;
}
@@ -2791,6 +2791,9 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
{
unsigned int i;
+ if (!uflow_res)
+ return;
+
for (i = 0; i < uflow_res->collection_num; i++)
atomic_dec(&uflow_res->collection[i]->usecnt);
@@ -2826,7 +2829,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
uflow_res->num++;
}
-static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@@ -2855,7 +2858,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
ib_spec->action.act = uobj_get_obj_read(flow_action,
UVERBS_OBJECT_FLOW_ACTION,
kern_spec->action.handle,
- ucontext);
+ ufile);
if (!ib_spec->action.act)
return -EINVAL;
ib_spec->action.size =
@@ -2873,7 +2876,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
uobj_get_obj_read(counters,
UVERBS_OBJECT_COUNTERS,
kern_spec->flow_count.handle,
- ucontext);
+ ufile);
if (!ib_spec->flow_count.counters)
return -EINVAL;
ib_spec->flow_count.size =
@@ -3042,9 +3045,6 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
void *kern_spec_mask;
void *kern_spec_val;
- if (kern_spec->reserved)
- return -EINVAL;
-
kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
kern_spec_val = (void *)kern_spec +
@@ -3057,7 +3057,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
kern_filter_sz, ib_spec);
}
-static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
+static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@@ -3066,14 +3066,13 @@ static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
return -EINVAL;
if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
- return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec,
+ return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec,
uflow_res);
else
return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
}
int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -3087,6 +3086,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_wq_init_attr wq_init_attr = {};
size_t required_cmd_sz;
size_t required_resp_len;
+ struct ib_device *ib_dev;
required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
@@ -3109,18 +3109,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ,
- file->ucontext);
+ obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file,
+ &ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3174,8 +3174,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
uobj_put_obj_read(pd);
uobj_put_obj_read(cq);
- uobj_alloc_commit(&obj->uevent.uobject);
- return 0;
+ return uobj_alloc_commit(&obj->uevent.uobject, 0);
err_copy:
ib_destroy_wq(wq);
@@ -3190,7 +3189,6 @@ err_uobj:
}
int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -3224,29 +3222,19 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
- uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle,
- file->ucontext);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
- /*
- * Make sure we don't free the memory in remove_commit as we still
- * needs the uobject memory to create the response.
- */
- uverbs_uobject_get(uobj);
-
- ret = uobj_remove_commit(uobj);
resp.events_reported = obj->uevent.events_reported;
- uverbs_uobject_put(uobj);
- if (ret)
- return ret;
+
+ uobj_put_destroy(uobj);
return ib_copy_to_udata(ucore, &resp, resp.response_length);
}
int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -3275,7 +3263,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file);
if (!wq)
return -EINVAL;
@@ -3296,7 +3284,6 @@ out:
}
int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -3314,6 +3301,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
u32 expected_in_size;
size_t required_cmd_sz_header;
size_t required_resp_len;
+ struct ib_device *ib_dev;
required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
@@ -3369,8 +3357,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs],
- file->ucontext);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
+ wqs_handles[num_read_wqs], file);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3379,7 +3367,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto put_wqs;
@@ -3423,8 +3411,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (j = 0; j < num_read_wqs; j++)
uobj_put_obj_read(wqs[j]);
- uobj_alloc_commit(uobj);
- return 0;
+ return uobj_alloc_commit(uobj, 0);
err_copy:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@ -3440,12 +3427,10 @@ err_free:
}
int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- struct ib_uobject *uobj;
int ret;
size_t required_cmd_sz;
@@ -3466,16 +3451,11 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- return uobj_remove_commit(uobj);
+ return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
+ cmd.ind_tbl_handle, file, 0);
}
int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -3488,10 +3468,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_flow_attr *flow_attr;
struct ib_qp *qp;
struct ib_uflow_resources *uflow_res;
+ struct ib_uverbs_flow_spec_hdr *kern_spec;
int err = 0;
- void *kern_spec;
void *ib_spec;
int i;
+ struct ib_device *ib_dev;
if (ucore->inlen < sizeof(cmd))
return -EINVAL;
@@ -3538,8 +3519,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (!kern_flow_attr)
return -ENOMEM;
- memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
- err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
+ *kern_flow_attr = cmd.flow_attr;
+ err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore,
cmd.flow_attr.size);
if (err)
goto err_free_attr;
@@ -3547,18 +3528,28 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kern_flow_attr = &cmd.flow_attr;
}
- uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto err_free_attr;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp) {
err = -EINVAL;
goto err_uobj;
}
+ if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) {
+ err = -EINVAL;
+ goto err_put;
+ }
+
+ if (!qp->device->create_flow) {
+ err = -EOPNOTSUPP;
+ goto err_put;
+ }
+
flow_attr = kzalloc(struct_size(flow_attr, flows,
cmd.flow_attr.num_of_specs), GFP_KERNEL);
if (!flow_attr) {
@@ -3578,21 +3569,22 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
flow_attr->flags = kern_flow_attr->flags;
flow_attr->size = sizeof(*flow_attr);
- kern_spec = kern_flow_attr + 1;
+ kern_spec = kern_flow_attr->flow_specs;
ib_spec = flow_attr + 1;
for (i = 0; i < flow_attr->num_of_specs &&
- cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
- cmd.flow_attr.size >=
- ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
- err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec,
- uflow_res);
+ cmd.flow_attr.size >= sizeof(*kern_spec) &&
+ cmd.flow_attr.size >= kern_spec->size;
+ i++) {
+ err = kern_spec_to_ib_spec(
+ file, (struct ib_uverbs_flow_spec *)kern_spec,
+ ib_spec, uflow_res);
if (err)
goto err_free;
flow_attr->size +=
((union ib_flow_spec *) ib_spec)->size;
- cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
- kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
+ cmd.flow_attr.size -= kern_spec->size;
+ kern_spec = ((void *)kern_spec) + kern_spec->size;
ib_spec += ((union ib_flow_spec *) ib_spec)->size;
}
if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
@@ -3611,6 +3603,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
}
atomic_inc(&qp->usecnt);
flow_id->qp = qp;
+ flow_id->device = qp->device;
flow_id->uobject = uobj;
uobj->object = flow_id;
uflow = container_of(uobj, typeof(*uflow), uobject);
@@ -3625,13 +3618,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
goto err_copy;
uobj_put_obj_read(qp);
- uobj_alloc_commit(uobj);
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- return 0;
+ return uobj_alloc_commit(uobj, 0);
err_copy:
- ib_destroy_flow(flow_id);
+ if (!qp->device->destroy_flow(flow_id))
+ atomic_dec(&qp->usecnt);
err_free:
ib_uverbs_flow_resources_free(uflow_res);
err_free_flow_attr:
@@ -3647,12 +3640,10 @@ err_free_attr:
}
int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
struct ib_uverbs_destroy_flow cmd;
- struct ib_uobject *uobj;
int ret;
if (ucore->inlen < sizeof(cmd))
@@ -3665,17 +3656,11 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
- return ret;
+ return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file,
+ 0);
}
static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
{
@@ -3686,9 +3671,10 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_srq_init_attr attr;
int ret;
+ struct ib_device *ib_dev;
- obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ,
- file->ucontext);
+ obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file,
+ &ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -3697,7 +3683,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC) {
xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
- file->ucontext);
+ file);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
@@ -3714,15 +3700,15 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
if (ib_srq_has_cq(cmd->srq_type)) {
- attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle,
- file->ucontext);
+ attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+ cmd->cq_handle, file);
if (!attr.ext.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
@@ -3787,9 +3773,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
uobj_put_obj_read(attr.ext.cq);
uobj_put_obj_read(pd);
- uobj_alloc_commit(&obj->uevent.uobject);
-
- return 0;
+ return uobj_alloc_commit(&obj->uevent.uobject, 0);
err_copy:
ib_destroy_srq(srq);
@@ -3813,7 +3797,6 @@ err:
}
ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -3843,7 +3826,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
+ ret = __uverbs_create_xsrq(file, &xcmd, &udata);
if (ret)
return ret;
@@ -3851,7 +3834,6 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_create_xsrq cmd;
@@ -3870,7 +3852,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
+ ret = __uverbs_create_xsrq(file, &cmd, &udata);
if (ret)
return ret;
@@ -3878,7 +3860,6 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -3894,7 +3875,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
if (!srq)
return -EINVAL;
@@ -3909,7 +3890,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
@@ -3925,7 +3905,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
if (!srq)
return -EINVAL;
@@ -3949,7 +3929,6 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -3957,32 +3936,20 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_srq_resp resp;
struct ib_uobject *uobj;
struct ib_uevent_object *obj;
- int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle,
- file->ucontext);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
obj = container_of(uobj, struct ib_uevent_object, uobject);
- /*
- * Make sure we don't free the memory in remove_commit as we still
- * needs the uobject memory to create the response.
- */
- uverbs_uobject_get(uobj);
-
memset(&resp, 0, sizeof(resp));
-
- ret = uobj_remove_commit(uobj);
- if (ret) {
- uverbs_uobject_put(uobj);
- return ret;
- }
resp.events_reported = obj->events_reported;
- uverbs_uobject_put(uobj);
+
+ uobj_put_destroy(uobj);
+
if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
return -EFAULT;
@@ -3990,15 +3957,21 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
}
int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_query_device_resp resp = { {0} };
struct ib_uverbs_ex_query_device cmd;
struct ib_device_attr attr = {0};
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
int err;
+ ucontext = ib_uverbs_get_ucontext(file);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
if (!ib_dev->query_device)
return -EOPNOTSUPP;
@@ -4024,7 +3997,7 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
if (err)
return err;
- copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
+ copy_query_dev_fields(ucontext, &resp.base, &attr);
if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
goto end;
@@ -4111,7 +4084,6 @@ end:
}
int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -4141,7 +4113,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
if (cmd.attr_mask > IB_CQ_MODERATE)
return -EOPNOTSUPP;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (!cq)
return -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 8d32c4ae368c..1a6b229e3db3 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -35,6 +35,103 @@
#include "rdma_core.h"
#include "uverbs.h"
+struct bundle_alloc_head {
+ struct bundle_alloc_head *next;
+ u8 data[];
+};
+
+struct bundle_priv {
+ /* Must be first */
+ struct bundle_alloc_head alloc_head;
+ struct bundle_alloc_head *allocated_mem;
+ size_t internal_avail;
+ size_t internal_used;
+
+ struct radix_tree_root *radix;
+ const struct uverbs_api_ioctl_method *method_elm;
+ void __rcu **radix_slots;
+ unsigned long radix_slots_len;
+ u32 method_key;
+
+ struct ib_uverbs_attr __user *user_attrs;
+ struct ib_uverbs_attr *uattrs;
+
+ DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
+
+ /*
+ * Must be last. bundle ends in a flex array which overlaps
+ * internal_buffer.
+ */
+ struct uverbs_attr_bundle bundle;
+ u64 internal_buffer[32];
+};
+
+/*
+ * Each method has an absolute minimum amount of memory it needs to allocate,
+ * precompute that amount and determine if the onstack memory can be used or
+ * if allocation is need.
+ */
+void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
+ unsigned int num_attrs)
+{
+ struct bundle_priv *pbundle;
+ size_t bundle_size =
+ offsetof(struct bundle_priv, internal_buffer) +
+ sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len +
+ sizeof(*pbundle->uattrs) * num_attrs;
+
+ method_elm->use_stack = bundle_size <= sizeof(*pbundle);
+ method_elm->bundle_size =
+ ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer));
+
+ /* Do not want order-2 allocations for this. */
+ WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE);
+}
+
+/**
+ * uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * @bundle: The bundle
+ * @size: Number of bytes to allocate
+ * @flags: Allocator flags
+ *
+ * The bundle allocator is intended for allocations that are connected with
+ * processing the system call related to the bundle. The allocated memory is
+ * always freed once the system call completes, and cannot be freed any other
+ * way.
+ *
+ * This tries to use a small pool of pre-allocated memory for performance.
+ */
+__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
+ gfp_t flags)
+{
+ struct bundle_priv *pbundle =
+ container_of(bundle, struct bundle_priv, bundle);
+ size_t new_used;
+ void *res;
+
+ if (check_add_overflow(size, pbundle->internal_used, &new_used))
+ return ERR_PTR(-EOVERFLOW);
+
+ if (new_used > pbundle->internal_avail) {
+ struct bundle_alloc_head *buf;
+
+ buf = kvmalloc(struct_size(buf, data, size), flags);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+ buf->next = pbundle->allocated_mem;
+ pbundle->allocated_mem = buf;
+ return buf->data;
+ }
+
+ res = (void *)pbundle->internal_buffer + pbundle->internal_used;
+ pbundle->internal_used =
+ ALIGN(new_used, sizeof(*pbundle->internal_buffer));
+ if (flags & __GFP_ZERO)
+ memset(res, 0, size);
+ return res;
+}
+EXPORT_SYMBOL(_uverbs_alloc);
+
static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
u16 len)
{
@@ -46,45 +143,24 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
0, uattr->len - len);
}
-static int uverbs_process_attr(struct ib_device *ibdev,
- struct ib_ucontext *ucontext,
- const struct ib_uverbs_attr *uattr,
- u16 attr_id,
- const struct uverbs_attr_spec_hash *attr_spec_bucket,
- struct uverbs_attr_bundle_hash *attr_bundle_h,
- struct ib_uverbs_attr __user *uattr_ptr)
+static int uverbs_process_attr(struct bundle_priv *pbundle,
+ const struct uverbs_api_attr *attr_uapi,
+ struct ib_uverbs_attr *uattr, u32 attr_bkey)
{
- const struct uverbs_attr_spec *spec;
- const struct uverbs_attr_spec *val_spec;
- struct uverbs_attr *e;
- const struct uverbs_object_spec *object;
+ const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+ struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey];
+ const struct uverbs_attr_spec *val_spec = spec;
struct uverbs_obj_attr *o_attr;
- struct uverbs_attr *elements = attr_bundle_h->attrs;
-
- if (attr_id >= attr_spec_bucket->num_attrs) {
- if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
- return -EINVAL;
- else
- return 0;
- }
-
- if (test_bit(attr_id, attr_bundle_h->valid_bitmap))
- return -EINVAL;
-
- spec = &attr_spec_bucket->attrs[attr_id];
- val_spec = spec;
- e = &elements[attr_id];
- e->uattr = uattr_ptr;
switch (spec->type) {
case UVERBS_ATTR_TYPE_ENUM_IN:
- if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems)
+ if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems)
return -EOPNOTSUPP;
if (uattr->attr_data.enum_data.reserved)
return -EINVAL;
- val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id];
+ val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id];
/* Currently we only support PTR_IN based enums */
if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
@@ -98,64 +174,75 @@ static int uverbs_process_attr(struct ib_device *ibdev,
* longer struct will fail here if used with an old kernel and
* non-zero content, making ABI compat/discovery simpler.
*/
- if (uattr->len > val_spec->ptr.len &&
- val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO &&
- !uverbs_is_attr_cleared(uattr, val_spec->ptr.len))
+ if (uattr->len > val_spec->u.ptr.len &&
+ val_spec->zero_trailing &&
+ !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
return -EOPNOTSUPP;
/* fall through */
case UVERBS_ATTR_TYPE_PTR_OUT:
- if (uattr->len < val_spec->ptr.min_len ||
- (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) &&
- uattr->len > val_spec->ptr.len))
+ if (uattr->len < val_spec->u.ptr.min_len ||
+ (!val_spec->zero_trailing &&
+ uattr->len > val_spec->u.ptr.len))
return -EINVAL;
if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
uattr->attr_data.reserved)
return -EINVAL;
- e->ptr_attr.data = uattr->data;
+ e->ptr_attr.uattr_idx = uattr - pbundle->uattrs;
e->ptr_attr.len = uattr->len;
- e->ptr_attr.flags = uattr->flags;
+
+ if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
+ void *p;
+
+ p = uverbs_alloc(&pbundle->bundle, uattr->len);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ e->ptr_attr.ptr = p;
+
+ if (copy_from_user(p, u64_to_user_ptr(uattr->data),
+ uattr->len))
+ return -EFAULT;
+ } else {
+ e->ptr_attr.data = uattr->data;
+ }
break;
case UVERBS_ATTR_TYPE_IDR:
- if (uattr->data >> 32)
- return -EINVAL;
- /* fall through */
case UVERBS_ATTR_TYPE_FD:
if (uattr->attr_data.reserved)
return -EINVAL;
- if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX)
+ if (uattr->len != 0)
return -EINVAL;
o_attr = &e->obj_attr;
- object = uverbs_get_object(ibdev, spec->obj.obj_type);
- if (!object)
- return -EINVAL;
- o_attr->type = object->type_attrs;
-
- o_attr->id = (int)uattr->data;
- o_attr->uobject = uverbs_get_uobject_from_context(
- o_attr->type,
- ucontext,
- spec->obj.access,
- o_attr->id);
+ o_attr->attr_elm = attr_uapi;
+ /*
+ * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and
+ * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64
+ * here without caring about truncation as we know that the
+ * IDR implementation today rejects negative IDs
+ */
+ o_attr->uobject = uverbs_get_uobject_from_file(
+ spec->u.obj.obj_type,
+ pbundle->bundle.ufile,
+ spec->u.obj.access,
+ uattr->data_s64);
if (IS_ERR(o_attr->uobject))
return PTR_ERR(o_attr->uobject);
+ __set_bit(attr_bkey, pbundle->uobj_finalize);
- if (spec->obj.access == UVERBS_ACCESS_NEW) {
- u64 id = o_attr->uobject->id;
+ if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
+ unsigned int uattr_idx = uattr - pbundle->uattrs;
+ s64 id = o_attr->uobject->id;
/* Copy the allocated id to the user-space */
- if (put_user(id, &e->uattr->data)) {
- uverbs_finalize_object(o_attr->uobject,
- UVERBS_ACCESS_NEW,
- false);
+ if (put_user(id, &pbundle->user_attrs[uattr_idx].data))
return -EFAULT;
- }
}
break;
@@ -163,220 +250,225 @@ static int uverbs_process_attr(struct ib_device *ibdev,
return -EOPNOTSUPP;
}
- set_bit(attr_id, attr_bundle_h->valid_bitmap);
return 0;
}
-static int uverbs_uattrs_process(struct ib_device *ibdev,
- struct ib_ucontext *ucontext,
- const struct ib_uverbs_attr *uattrs,
- size_t num_uattrs,
- const struct uverbs_method_spec *method,
- struct uverbs_attr_bundle *attr_bundle,
- struct ib_uverbs_attr __user *uattr_ptr)
+/*
+ * We search the radix tree with the method prefix and now we want to fast
+ * search the suffix bits to get a particular attribute pointer. It is not
+ * totally clear to me if this breaks the radix tree encasulation or not, but
+ * it uses the iter data to determine if the method iter points at the same
+ * chunk that will store the attribute, if so it just derefs it directly. By
+ * construction in most kernel configs the method and attrs will all fit in a
+ * single radix chunk, so in most cases this will have no search. Other cases
+ * this falls back to a full search.
+ */
+static void __rcu **uapi_get_attr_for_method(struct bundle_priv *pbundle,
+ u32 attr_key)
{
- size_t i;
- int ret = 0;
- int num_given_buckets = 0;
-
- for (i = 0; i < num_uattrs; i++) {
- const struct ib_uverbs_attr *uattr = &uattrs[i];
- u16 attr_id = uattr->attr_id;
- struct uverbs_attr_spec_hash *attr_spec_bucket;
-
- ret = uverbs_ns_idx(&attr_id, method->num_buckets);
- if (ret < 0) {
- if (uattr->flags & UVERBS_ATTR_F_MANDATORY) {
- uverbs_finalize_objects(attr_bundle,
- method->attr_buckets,
- num_given_buckets,
- false);
- return ret;
- }
- continue;
- }
+ void __rcu **slot;
- /*
- * ret is the found ns, so increase num_given_buckets if
- * necessary.
- */
- if (ret >= num_given_buckets)
- num_given_buckets = ret + 1;
-
- attr_spec_bucket = method->attr_buckets[ret];
- ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id,
- attr_spec_bucket, &attr_bundle->hash[ret],
- uattr_ptr++);
- if (ret) {
- uverbs_finalize_objects(attr_bundle,
- method->attr_buckets,
- num_given_buckets,
- false);
- return ret;
- }
+ if (likely(attr_key < pbundle->radix_slots_len)) {
+ void *entry;
+
+ slot = pbundle->radix_slots + attr_key;
+ entry = rcu_dereference_raw(*slot);
+ if (likely(!radix_tree_is_internal_node(entry) && entry))
+ return slot;
}
- return num_given_buckets;
+ return radix_tree_lookup_slot(pbundle->radix,
+ pbundle->method_key | attr_key);
}
-static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec,
- struct uverbs_attr_bundle *attr_bundle)
+static int uverbs_set_attr(struct bundle_priv *pbundle,
+ struct ib_uverbs_attr *uattr)
{
- unsigned int i;
-
- for (i = 0; i < attr_bundle->num_buckets; i++) {
- struct uverbs_attr_spec_hash *attr_spec_bucket =
- method_spec->attr_buckets[i];
+ u32 attr_key = uapi_key_attr(uattr->attr_id);
+ u32 attr_bkey = uapi_bkey_attr(attr_key);
+ const struct uverbs_api_attr *attr;
+ void __rcu **slot;
+ int ret;
- if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask,
- attr_bundle->hash[i].valid_bitmap,
- attr_spec_bucket->num_attrs))
- return -EINVAL;
+ slot = uapi_get_attr_for_method(pbundle, attr_key);
+ if (!slot) {
+ /*
+ * Kernel does not support the attribute but user-space says it
+ * is mandatory
+ */
+ if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
+ return -EPROTONOSUPPORT;
+ return 0;
}
+ attr = srcu_dereference(
+ *slot, &pbundle->bundle.ufile->device->disassociate_srcu);
- for (; i < method_spec->num_buckets; i++) {
- struct uverbs_attr_spec_hash *attr_spec_bucket =
- method_spec->attr_buckets[i];
+ /* Reject duplicate attributes from user-space */
+ if (test_bit(attr_bkey, pbundle->bundle.attr_present))
+ return -EINVAL;
- if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask,
- attr_spec_bucket->num_attrs))
- return -EINVAL;
- }
+ ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey);
+ if (ret)
+ return ret;
+
+ __set_bit(attr_bkey, pbundle->bundle.attr_present);
return 0;
}
-static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr,
- const struct ib_uverbs_attr *uattrs,
- size_t num_uattrs,
- struct ib_device *ibdev,
- struct ib_uverbs_file *ufile,
- const struct uverbs_method_spec *method_spec,
- struct uverbs_attr_bundle *attr_bundle)
+static int ib_uverbs_run_method(struct bundle_priv *pbundle,
+ unsigned int num_attrs)
{
+ int (*handler)(struct ib_uverbs_file *ufile,
+ struct uverbs_attr_bundle *ctx);
+ size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
+ unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
+ unsigned int i;
int ret;
- int finalize_ret;
- int num_given_buckets;
- num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs,
- num_uattrs, method_spec,
- attr_bundle, uattr_ptr);
- if (num_given_buckets <= 0)
+ /* See uverbs_disassociate_api() */
+ handler = srcu_dereference(
+ pbundle->method_elm->handler,
+ &pbundle->bundle.ufile->device->disassociate_srcu);
+ if (!handler)
+ return -EIO;
+
+ pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size);
+ if (IS_ERR(pbundle->uattrs))
+ return PTR_ERR(pbundle->uattrs);
+ if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size))
+ return -EFAULT;
+
+ for (i = 0; i != num_attrs; i++) {
+ ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]);
+ if (unlikely(ret))
+ return ret;
+ }
+
+ /* User space did not provide all the mandatory attributes */
+ if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory,
+ pbundle->bundle.attr_present,
+ pbundle->method_elm->key_bitmap_len)))
return -EINVAL;
- attr_bundle->num_buckets = num_given_buckets;
- ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle);
- if (ret)
- goto cleanup;
+ if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
+ struct uverbs_obj_attr *destroy_attr =
+ &pbundle->bundle.attrs[destroy_bkey].obj_attr;
- ret = method_spec->handler(ibdev, ufile, attr_bundle);
-cleanup:
- finalize_ret = uverbs_finalize_objects(attr_bundle,
- method_spec->attr_buckets,
- attr_bundle->num_buckets,
- !ret);
+ ret = uobj_destroy(destroy_attr->uobject);
+ if (ret)
+ return ret;
+ __clear_bit(destroy_bkey, pbundle->uobj_finalize);
- return ret ? ret : finalize_ret;
-}
+ ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+ uobj_put_destroy(destroy_attr->uobject);
+ } else {
+ ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+ }
-#define UVERBS_OPTIMIZE_USING_STACK_SZ 256
-static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct ib_uverbs_ioctl_hdr *hdr,
- void __user *buf)
-{
- const struct uverbs_object_spec *object_spec;
- const struct uverbs_method_spec *method_spec;
- long err = 0;
- unsigned int i;
- struct {
- struct ib_uverbs_attr *uattrs;
- struct uverbs_attr_bundle *uverbs_attr_bundle;
- } *ctx = NULL;
- struct uverbs_attr *curr_attr;
- unsigned long *curr_bitmap;
- size_t ctx_size;
- uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
-
- if (hdr->driver_id != ib_dev->driver_id)
+ /*
+ * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
+ * not invoke the method because the request is not supported. No
+ * other cases should return this code.
+ */
+ if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT))
return -EINVAL;
- object_spec = uverbs_get_object(ib_dev, hdr->object_id);
- if (!object_spec)
- return -EPROTONOSUPPORT;
+ return ret;
+}
- method_spec = uverbs_get_method(object_spec, hdr->method_id);
- if (!method_spec)
- return -EPROTONOSUPPORT;
+static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
+{
+ unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len;
+ struct bundle_alloc_head *memblock;
+ unsigned int i;
+ int ret = 0;
- if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext)
- return -EINVAL;
+ i = -1;
+ while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
+ i + 1)) < key_bitmap_len) {
+ struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
+ int current_ret;
+
+ current_ret = uverbs_finalize_object(
+ attr->obj_attr.uobject,
+ attr->obj_attr.attr_elm->spec.u.obj.access, commit);
+ if (!ret)
+ ret = current_ret;
+ }
- ctx_size = sizeof(*ctx) +
- sizeof(struct uverbs_attr_bundle) +
- sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets +
- sizeof(*ctx->uattrs) * hdr->num_attrs +
- sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) *
- method_spec->num_child_attrs +
- sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) *
- (method_spec->num_child_attrs / BITS_PER_LONG +
- method_spec->num_buckets);
-
- if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ)
- ctx = (void *)data;
- if (!ctx)
- ctx = kmalloc(ctx_size, GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
-
- ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx);
- ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) +
- (sizeof(ctx->uverbs_attr_bundle->hash[0]) *
- method_spec->num_buckets);
- curr_attr = (void *)(ctx->uattrs + hdr->num_attrs);
- curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs);
+ for (memblock = pbundle->allocated_mem; memblock;) {
+ struct bundle_alloc_head *tmp = memblock;
- /*
- * We just fill the pointers and num_attrs here. The data itself will be
- * filled at a later stage (uverbs_process_attr)
- */
- for (i = 0; i < method_spec->num_buckets; i++) {
- unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs;
-
- ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr;
- curr_attr += curr_num_attrs;
- ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs;
- ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap;
- bitmap_zero(curr_bitmap, curr_num_attrs);
- curr_bitmap += BITS_TO_LONGS(curr_num_attrs);
+ memblock = memblock->next;
+ kvfree(tmp);
}
- err = copy_from_user(ctx->uattrs, buf,
- sizeof(*ctx->uattrs) * hdr->num_attrs);
- if (err) {
- err = -EFAULT;
- goto out;
- }
+ return ret;
+}
- err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev,
- file, method_spec, ctx->uverbs_attr_bundle);
+static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
+ struct ib_uverbs_ioctl_hdr *hdr,
+ struct ib_uverbs_attr __user *user_attrs)
+{
+ const struct uverbs_api_ioctl_method *method_elm;
+ struct uverbs_api *uapi = ufile->device->uapi;
+ struct radix_tree_iter attrs_iter;
+ struct bundle_priv *pbundle;
+ struct bundle_priv onstack;
+ void __rcu **slot;
+ int destroy_ret;
+ int ret;
- /*
- * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
- * not invoke the method because the request is not supported. No
- * other cases should return this code.
- */
- if (unlikely(err == -EPROTONOSUPPORT)) {
- WARN_ON_ONCE(err == -EPROTONOSUPPORT);
- err = -EINVAL;
+ if (unlikely(hdr->driver_id != uapi->driver_id))
+ return -EINVAL;
+
+ slot = radix_tree_iter_lookup(
+ &uapi->radix, &attrs_iter,
+ uapi_key_obj(hdr->object_id) |
+ uapi_key_ioctl_method(hdr->method_id));
+ if (unlikely(!slot))
+ return -EPROTONOSUPPORT;
+ method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu);
+
+ if (!method_elm->use_stack) {
+ pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
+ if (!pbundle)
+ return -ENOMEM;
+ pbundle->internal_avail =
+ method_elm->bundle_size -
+ offsetof(struct bundle_priv, internal_buffer);
+ pbundle->alloc_head.next = NULL;
+ pbundle->allocated_mem = &pbundle->alloc_head;
+ } else {
+ pbundle = &onstack;
+ pbundle->internal_avail = sizeof(pbundle->internal_buffer);
+ pbundle->allocated_mem = NULL;
}
-out:
- if (ctx != (void *)data)
- kfree(ctx);
- return err;
-}
-#define IB_UVERBS_MAX_CMD_SZ 4096
+ /* Space for the pbundle->bundle.attrs flex array */
+ pbundle->method_elm = method_elm;
+ pbundle->method_key = attrs_iter.index;
+ pbundle->bundle.ufile = ufile;
+ pbundle->radix = &uapi->radix;
+ pbundle->radix_slots = slot;
+ pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter);
+ pbundle->user_attrs = user_attrs;
+
+ pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len *
+ sizeof(*pbundle->bundle.attrs),
+ sizeof(*pbundle->internal_buffer));
+ memset(pbundle->bundle.attr_present, 0,
+ sizeof(pbundle->bundle.attr_present));
+ memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
+
+ ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
+ destroy_ret = bundle_destroy(pbundle, ret == 0);
+ if (unlikely(destroy_ret && !ret))
+ return destroy_ret;
+
+ return ret;
+}
long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
@@ -384,39 +476,138 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct ib_uverbs_ioctl_hdr __user *user_hdr =
(struct ib_uverbs_ioctl_hdr __user *)arg;
struct ib_uverbs_ioctl_hdr hdr;
- struct ib_device *ib_dev;
int srcu_key;
- long err;
+ int err;
+
+ if (unlikely(cmd != RDMA_VERBS_IOCTL))
+ return -ENOIOCTLCMD;
+
+ err = copy_from_user(&hdr, user_hdr, sizeof(hdr));
+ if (err)
+ return -EFAULT;
+
+ if (hdr.length > PAGE_SIZE ||
+ hdr.length != struct_size(&hdr, attrs, hdr.num_attrs))
+ return -EINVAL;
+
+ if (hdr.reserved1 || hdr.reserved2)
+ return -EPROTONOSUPPORT;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- ib_dev = srcu_dereference(file->device->ib_dev,
- &file->device->disassociate_srcu);
- if (!ib_dev) {
- err = -EIO;
- goto out;
+ err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs);
+ srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
+ return err;
+}
+
+int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 allowed_bits)
+{
+ const struct uverbs_attr *attr;
+ u64 flags;
+
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ /* Missing attribute means 0 flags */
+ if (IS_ERR(attr)) {
+ *to = 0;
+ return 0;
}
- if (cmd == RDMA_VERBS_IOCTL) {
- err = copy_from_user(&hdr, user_hdr, sizeof(hdr));
+ /*
+ * New userspace code should use 8 bytes to pass flags, but we
+ * transparently support old userspaces that were using 4 bytes as
+ * well.
+ */
+ if (attr->ptr_attr.len == 8)
+ flags = attr->ptr_attr.data;
+ else if (attr->ptr_attr.len == 4)
+ flags = *(u32 *)&attr->ptr_attr.data;
+ else
+ return -EINVAL;
- if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ ||
- hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) {
- err = -EINVAL;
- goto out;
- }
+ if (flags & ~allowed_bits)
+ return -EINVAL;
- if (hdr.reserved1 || hdr.reserved2) {
- err = -EPROTONOSUPPORT;
- goto out;
- }
+ *to = flags;
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_get_flags64);
- err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr,
- (__user void *)arg + sizeof(hdr));
+int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 allowed_bits)
+{
+ u64 flags;
+ int ret;
+
+ ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits);
+ if (ret)
+ return ret;
+
+ if (flags > U32_MAX)
+ return -EINVAL;
+ *to = flags;
+
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_get_flags32);
+
+/*
+ * This is for ease of conversion. The purpose is to convert all drivers to
+ * use uverbs_attr_bundle instead of ib_udata. Assume attr == 0 is input and
+ * attr == 1 is output.
+ */
+void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata)
+{
+ struct bundle_priv *pbundle =
+ container_of(bundle, struct bundle_priv, bundle);
+ const struct uverbs_attr *uhw_in =
+ uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN);
+ const struct uverbs_attr *uhw_out =
+ uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT);
+
+ if (!IS_ERR(uhw_in)) {
+ udata->inlen = uhw_in->ptr_attr.len;
+ if (uverbs_attr_ptr_is_inline(uhw_in))
+ udata->inbuf =
+ &pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx]
+ .data;
+ else
+ udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
} else {
- err = -ENOIOCTLCMD;
+ udata->inbuf = NULL;
+ udata->inlen = 0;
}
-out:
- srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
- return err;
+ if (!IS_ERR(uhw_out)) {
+ udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
+ udata->outlen = uhw_out->ptr_attr.len;
+ } else {
+ udata->outbuf = NULL;
+ udata->outlen = 0;
+ }
+}
+
+int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
+ const void *from, size_t size)
+{
+ struct bundle_priv *pbundle =
+ container_of(bundle, struct bundle_priv, bundle);
+ const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+ u16 flags;
+ size_t min_size;
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ min_size = min_t(size_t, attr->ptr_attr.len, size);
+ if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
+ return -EFAULT;
+
+ flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags |
+ UVERBS_ATTR_F_VALID_OUTPUT;
+ if (put_user(flags,
+ &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags))
+ return -EFAULT;
+
+ return 0;
}
+EXPORT_SYMBOL(uverbs_copy_to);
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
deleted file mode 100644
index 6ceb672c4d46..000000000000
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ /dev/null
@@ -1,664 +0,0 @@
-/*
- * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/uverbs_ioctl.h>
-#include <rdma/rdma_user_ioctl.h>
-#include <linux/bitops.h>
-#include "uverbs.h"
-
-#define UVERBS_NUM_NS (UVERBS_ID_NS_MASK >> UVERBS_ID_NS_SHIFT)
-#define GET_NS_ID(idx) (((idx) & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT)
-#define GET_ID(idx) ((idx) & ~UVERBS_ID_NS_MASK)
-
-#define _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset, \
- buckets_offset) \
- for (tmpj = 0, \
- elem = (*(const void ***)((hashes)[tmpi] + \
- (buckets_offset)))[0]; \
- tmpj < *(size_t *)((hashes)[tmpi] + (num_buckets_offset)); \
- tmpj++) \
- if ((elem = ((*(const void ***)(hashes[tmpi] + \
- (buckets_offset)))[tmpj])))
-
-/*
- * Iterate all elements of a few @hashes. The number of given hashes is
- * indicated by @num_hashes. The offset of the number of buckets in the hash is
- * represented by @num_buckets_offset, while the offset of the buckets array in
- * the hash structure is represented by @buckets_offset. tmpi and tmpj are two
- * short (or int) based indices that are given by the user. tmpi iterates over
- * the different hashes. @elem points the current element in the hashes[tmpi]
- * bucket we are looping on. To be honest, @hashes representation isn't exactly
- * a hash, but more a collection of elements. These elements' ids are treated
- * in a hash like manner, where the first upper bits are the bucket number.
- * These elements are later mapped into a perfect-hash.
- */
-#define for_each_element(elem, tmpi, tmpj, hashes, num_hashes, \
- num_buckets_offset, buckets_offset) \
- for (tmpi = 0; tmpi < (num_hashes); tmpi++) \
- _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,\
- buckets_offset)
-
-#define get_elements_iterators_entry_above(iters, num_elements, elements, \
- num_objects_fld, objects_fld, bucket,\
- min_id) \
- get_elements_above_id((const void **)iters, num_elements, \
- (const void **)(elements), \
- offsetof(typeof(**elements), \
- num_objects_fld), \
- offsetof(typeof(**elements), objects_fld),\
- offsetof(typeof(***(*elements)->objects_fld), id),\
- bucket, min_id)
-
-#define get_objects_above_id(iters, num_trees, trees, bucket, min_id) \
- get_elements_iterators_entry_above(iters, num_trees, trees, \
- num_objects, objects, bucket, min_id)
-
-#define get_methods_above_id(method_iters, num_iters, iters, bucket, min_id)\
- get_elements_iterators_entry_above(method_iters, num_iters, iters, \
- num_methods, methods, bucket, min_id)
-
-#define get_attrs_above_id(attrs_iters, num_iters, iters, bucket, min_id)\
- get_elements_iterators_entry_above(attrs_iters, num_iters, iters, \
- num_attrs, attrs, bucket, min_id)
-
-/*
- * get_elements_above_id get a few hashes represented by @elements and
- * @num_elements. The hashes fields are described by @num_offset, @data_offset
- * and @id_offset in the same way as required by for_each_element. The function
- * returns an array of @iters, represents an array of elements in the hashes
- * buckets, which their ids are the smallest ids in all hashes but are all
- * larger than the id given by min_id. Elements are only added to the iters
- * array if their id belongs to the bucket @bucket. The number of elements in
- * the returned array is returned by the function. @min_id is also updated to
- * reflect the new min_id of all elements in iters.
- */
-static size_t get_elements_above_id(const void **iters,
- unsigned int num_elements,
- const void **elements,
- size_t num_offset,
- size_t data_offset,
- size_t id_offset,
- u16 bucket,
- short *min_id)
-{
- size_t num_iters = 0;
- short min = SHRT_MAX;
- const void *elem;
- int i, j, last_stored = -1;
- unsigned int equal_min = 0;
-
- for_each_element(elem, i, j, elements, num_elements, num_offset,
- data_offset) {
- u16 id = *(u16 *)(elem + id_offset);
-
- if (GET_NS_ID(id) != bucket)
- continue;
-
- if (GET_ID(id) < *min_id ||
- (min != SHRT_MAX && GET_ID(id) > min))
- continue;
-
- /*
- * We first iterate all hashes represented by @elements. When
- * we do, we try to find an element @elem in the bucket @bucket
- * which its id is min. Since we can't ensure the user sorted
- * the elements in increasing order, we override this hash's
- * minimal id element we found, if a new element with a smaller
- * id was just found.
- */
- iters[last_stored == i ? num_iters - 1 : num_iters++] = elem;
- last_stored = i;
- if (min == GET_ID(id))
- equal_min++;
- else
- equal_min = 1;
- min = GET_ID(id);
- }
-
- /*
- * We only insert to our iters array an element, if its id is smaller
- * than all previous ids. Therefore, the final iters array is sorted so
- * that smaller ids are in the end of the array.
- * Therefore, we need to clean the beginning of the array to make sure
- * all ids of final elements are equal to min.
- */
- memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min);
-
- *min_id = min;
- return equal_min;
-}
-
-#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
- objects_fld, bucket) \
- find_max_element_id(num_elements, (const void **)(elements), \
- offsetof(typeof(**elements), num_objects_fld), \
- offsetof(typeof(**elements), objects_fld), \
- offsetof(typeof(***(*elements)->objects_fld), id),\
- bucket)
-
-static short find_max_element_ns_id(unsigned int num_elements,
- const void **elements,
- size_t num_offset,
- size_t data_offset,
- size_t id_offset)
-{
- short max_ns = SHRT_MIN;
- const void *elem;
- int i, j;
-
- for_each_element(elem, i, j, elements, num_elements, num_offset,
- data_offset) {
- u16 id = *(u16 *)(elem + id_offset);
-
- if (GET_NS_ID(id) > max_ns)
- max_ns = GET_NS_ID(id);
- }
-
- return max_ns;
-}
-
-static short find_max_element_id(unsigned int num_elements,
- const void **elements,
- size_t num_offset,
- size_t data_offset,
- size_t id_offset,
- u16 bucket)
-{
- short max_id = SHRT_MIN;
- const void *elem;
- int i, j;
-
- for_each_element(elem, i, j, elements, num_elements, num_offset,
- data_offset) {
- u16 id = *(u16 *)(elem + id_offset);
-
- if (GET_NS_ID(id) == bucket &&
- GET_ID(id) > max_id)
- max_id = GET_ID(id);
- }
- return max_id;
-}
-
-#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
- objects_fld, bucket) \
- find_max_element_id(num_elements, (const void **)(elements), \
- offsetof(typeof(**elements), num_objects_fld), \
- offsetof(typeof(**elements), objects_fld), \
- offsetof(typeof(***(*elements)->objects_fld), id),\
- bucket)
-
-#define find_max_element_ns_entry_id(num_elements, elements, \
- num_objects_fld, objects_fld) \
- find_max_element_ns_id(num_elements, (const void **)(elements), \
- offsetof(typeof(**elements), num_objects_fld),\
- offsetof(typeof(**elements), objects_fld), \
- offsetof(typeof(***(*elements)->objects_fld), id))
-
-/*
- * find_max_xxxx_ns_id gets a few elements. Each element is described by an id
- * which its upper bits represents a namespace. It finds the max namespace. This
- * could be used in order to know how many buckets do we need to allocate. If no
- * elements exist, SHRT_MIN is returned. Namespace represents here different
- * buckets. The common example is "common bucket" and "driver bucket".
- *
- * find_max_xxxx_id gets a few elements and a bucket. Each element is described
- * by an id which its upper bits represent a namespace. It returns the max id
- * which is contained in the same namespace defined in @bucket. This could be
- * used in order to know how many elements do we need to allocate in the bucket.
- * If no elements exist, SHRT_MIN is returned.
- */
-
-#define find_max_object_id(num_trees, trees, bucket) \
- find_max_element_entry_id(num_trees, trees, num_objects,\
- objects, bucket)
-#define find_max_object_ns_id(num_trees, trees) \
- find_max_element_ns_entry_id(num_trees, trees, \
- num_objects, objects)
-
-#define find_max_method_id(num_iters, iters, bucket) \
- find_max_element_entry_id(num_iters, iters, num_methods,\
- methods, bucket)
-#define find_max_method_ns_id(num_iters, iters) \
- find_max_element_ns_entry_id(num_iters, iters, \
- num_methods, methods)
-
-#define find_max_attr_id(num_iters, iters, bucket) \
- find_max_element_entry_id(num_iters, iters, num_attrs, \
- attrs, bucket)
-#define find_max_attr_ns_id(num_iters, iters) \
- find_max_element_ns_entry_id(num_iters, iters, \
- num_attrs, attrs)
-
-static void free_method(struct uverbs_method_spec *method)
-{
- unsigned int i;
-
- if (!method)
- return;
-
- for (i = 0; i < method->num_buckets; i++)
- kfree(method->attr_buckets[i]);
-
- kfree(method);
-}
-
-#define IS_ATTR_OBJECT(attr) ((attr)->type == UVERBS_ATTR_TYPE_IDR || \
- (attr)->type == UVERBS_ATTR_TYPE_FD)
-
-/*
- * This function gets array of size @num_method_defs which contains pointers to
- * method definitions @method_defs. The function allocates an
- * uverbs_method_spec structure and initializes its number of buckets and the
- * elements in buckets to the correct attributes. While doing that, it
- * validates that there aren't conflicts between attributes of different
- * method_defs.
- */
-static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_method_def **method_defs,
- size_t num_method_defs)
-{
- int bucket_idx;
- int max_attr_buckets = 0;
- size_t num_attr_buckets = 0;
- int res = 0;
- struct uverbs_method_spec *method = NULL;
- const struct uverbs_attr_def **attr_defs;
- unsigned int num_of_singularities = 0;
-
- max_attr_buckets = find_max_attr_ns_id(num_method_defs, method_defs);
- if (max_attr_buckets >= 0)
- num_attr_buckets = max_attr_buckets + 1;
-
- method = kzalloc(struct_size(method, attr_buckets, num_attr_buckets),
- GFP_KERNEL);
- if (!method)
- return ERR_PTR(-ENOMEM);
-
- method->num_buckets = num_attr_buckets;
- attr_defs = kcalloc(num_method_defs, sizeof(*attr_defs), GFP_KERNEL);
- if (!attr_defs) {
- res = -ENOMEM;
- goto free_method;
- }
- for (bucket_idx = 0; bucket_idx < method->num_buckets; bucket_idx++) {
- short min_id = SHRT_MIN;
- int attr_max_bucket = 0;
- struct uverbs_attr_spec_hash *hash = NULL;
-
- attr_max_bucket = find_max_attr_id(num_method_defs, method_defs,
- bucket_idx);
- if (attr_max_bucket < 0)
- continue;
-
- hash = kzalloc(sizeof(*hash) +
- ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1),
- sizeof(long)) +
- BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long),
- GFP_KERNEL);
- if (!hash) {
- res = -ENOMEM;
- goto free;
- }
- hash->num_attrs = attr_max_bucket + 1;
- method->num_child_attrs += hash->num_attrs;
- hash->mandatory_attrs_bitmask = (void *)(hash + 1) +
- ALIGN(sizeof(*hash->attrs) *
- (attr_max_bucket + 1),
- sizeof(long));
-
- method->attr_buckets[bucket_idx] = hash;
-
- do {
- size_t num_attr_defs;
- struct uverbs_attr_spec *attr;
- bool attr_obj_with_special_access;
-
- num_attr_defs =
- get_attrs_above_id(attr_defs,
- num_method_defs,
- method_defs,
- bucket_idx,
- &min_id);
- /* Last attr in bucket */
- if (!num_attr_defs)
- break;
-
- if (num_attr_defs > 1) {
- /*
- * We don't allow two attribute definitions for
- * the same attribute. This is usually a
- * programmer error. If required, it's better to
- * just add a new attribute to capture the new
- * semantics.
- */
- res = -EEXIST;
- goto free;
- }
-
- attr = &hash->attrs[min_id];
- memcpy(attr, &attr_defs[0]->attr, sizeof(*attr));
-
- attr_obj_with_special_access = IS_ATTR_OBJECT(attr) &&
- (attr->obj.access == UVERBS_ACCESS_NEW ||
- attr->obj.access == UVERBS_ACCESS_DESTROY);
- num_of_singularities += !!attr_obj_with_special_access;
- if (WARN(num_of_singularities > 1,
- "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n",
- min_id) ||
- WARN(attr_obj_with_special_access &&
- !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY),
- "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
- min_id) ||
- WARN(IS_ATTR_OBJECT(attr) &&
- attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
- "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
- min_id)) {
- res = -EINVAL;
- goto free;
- }
-
- if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY)
- set_bit(min_id, hash->mandatory_attrs_bitmask);
- min_id++;
-
- } while (1);
- }
- kfree(attr_defs);
- return method;
-
-free:
- kfree(attr_defs);
-free_method:
- free_method(method);
- return ERR_PTR(res);
-}
-
-static void free_object(struct uverbs_object_spec *object)
-{
- unsigned int i, j;
-
- if (!object)
- return;
-
- for (i = 0; i < object->num_buckets; i++) {
- struct uverbs_method_spec_hash *method_buckets =
- object->method_buckets[i];
-
- if (!method_buckets)
- continue;
-
- for (j = 0; j < method_buckets->num_methods; j++)
- free_method(method_buckets->methods[j]);
-
- kfree(method_buckets);
- }
-
- kfree(object);
-}
-
-/*
- * This function gets array of size @num_object_defs which contains pointers to
- * object definitions @object_defs. The function allocated an
- * uverbs_object_spec structure and initialize its number of buckets and the
- * elements in buckets to the correct methods. While doing that, it
- * sorts out the correct relationship between conflicts in the same method.
- */
-static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_object_def **object_defs,
- size_t num_object_defs)
-{
- u16 bucket_idx;
- int max_method_buckets = 0;
- u16 num_method_buckets = 0;
- int res = 0;
- struct uverbs_object_spec *object = NULL;
- const struct uverbs_method_def **method_defs;
-
- max_method_buckets = find_max_method_ns_id(num_object_defs, object_defs);
- if (max_method_buckets >= 0)
- num_method_buckets = max_method_buckets + 1;
-
- object = kzalloc(struct_size(object, method_buckets,
- num_method_buckets),
- GFP_KERNEL);
- if (!object)
- return ERR_PTR(-ENOMEM);
-
- object->num_buckets = num_method_buckets;
- method_defs = kcalloc(num_object_defs, sizeof(*method_defs), GFP_KERNEL);
- if (!method_defs) {
- res = -ENOMEM;
- goto free_object;
- }
-
- for (bucket_idx = 0; bucket_idx < object->num_buckets; bucket_idx++) {
- short min_id = SHRT_MIN;
- int methods_max_bucket = 0;
- struct uverbs_method_spec_hash *hash = NULL;
-
- methods_max_bucket = find_max_method_id(num_object_defs, object_defs,
- bucket_idx);
- if (methods_max_bucket < 0)
- continue;
-
- hash = kzalloc(struct_size(hash, methods,
- methods_max_bucket + 1),
- GFP_KERNEL);
- if (!hash) {
- res = -ENOMEM;
- goto free;
- }
-
- hash->num_methods = methods_max_bucket + 1;
- object->method_buckets[bucket_idx] = hash;
-
- do {
- size_t num_method_defs;
- struct uverbs_method_spec *method;
- int i;
-
- num_method_defs =
- get_methods_above_id(method_defs,
- num_object_defs,
- object_defs,
- bucket_idx,
- &min_id);
- /* Last method in bucket */
- if (!num_method_defs)
- break;
-
- method = build_method_with_attrs(method_defs,
- num_method_defs);
- if (IS_ERR(method)) {
- res = PTR_ERR(method);
- goto free;
- }
-
- /*
- * The last tree which is given as an argument to the
- * merge overrides previous method handler.
- * Therefore, we iterate backwards and search for the
- * first handler which != NULL. This also defines the
- * set of flags used for this handler.
- */
- for (i = num_method_defs - 1;
- i >= 0 && !method_defs[i]->handler; i--)
- ;
- hash->methods[min_id++] = method;
- /* NULL handler isn't allowed */
- if (WARN(i < 0,
- "ib_uverbs: tried to merge function id %d, but all handlers are NULL\n",
- min_id)) {
- res = -EINVAL;
- goto free;
- }
- method->handler = method_defs[i]->handler;
- method->flags = method_defs[i]->flags;
-
- } while (1);
- }
- kfree(method_defs);
- return object;
-
-free:
- kfree(method_defs);
-free_object:
- free_object(object);
- return ERR_PTR(res);
-}
-
-void uverbs_free_spec_tree(struct uverbs_root_spec *root)
-{
- unsigned int i, j;
-
- if (!root)
- return;
-
- for (i = 0; i < root->num_buckets; i++) {
- struct uverbs_object_spec_hash *object_hash =
- root->object_buckets[i];
-
- if (!object_hash)
- continue;
-
- for (j = 0; j < object_hash->num_objects; j++)
- free_object(object_hash->objects[j]);
-
- kfree(object_hash);
- }
-
- kfree(root);
-}
-EXPORT_SYMBOL(uverbs_free_spec_tree);
-
-struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees,
- const struct uverbs_object_tree_def **trees)
-{
- u16 bucket_idx;
- short max_object_buckets = 0;
- size_t num_objects_buckets = 0;
- struct uverbs_root_spec *root_spec = NULL;
- const struct uverbs_object_def **object_defs;
- int i;
- int res = 0;
-
- max_object_buckets = find_max_object_ns_id(num_trees, trees);
- /*
- * Devices which don't want to support ib_uverbs, should just allocate
- * an empty parsing tree. Every user-space command won't hit any valid
- * entry in the parsing tree and thus will fail.
- */
- if (max_object_buckets >= 0)
- num_objects_buckets = max_object_buckets + 1;
-
- root_spec = kzalloc(struct_size(root_spec, object_buckets,
- num_objects_buckets),
- GFP_KERNEL);
- if (!root_spec)
- return ERR_PTR(-ENOMEM);
- root_spec->num_buckets = num_objects_buckets;
-
- object_defs = kcalloc(num_trees, sizeof(*object_defs),
- GFP_KERNEL);
- if (!object_defs) {
- res = -ENOMEM;
- goto free_root;
- }
-
- for (bucket_idx = 0; bucket_idx < root_spec->num_buckets; bucket_idx++) {
- short min_id = SHRT_MIN;
- short objects_max_bucket;
- struct uverbs_object_spec_hash *hash = NULL;
-
- objects_max_bucket = find_max_object_id(num_trees, trees,
- bucket_idx);
- if (objects_max_bucket < 0)
- continue;
-
- hash = kzalloc(struct_size(hash, objects,
- objects_max_bucket + 1),
- GFP_KERNEL);
- if (!hash) {
- res = -ENOMEM;
- goto free;
- }
- hash->num_objects = objects_max_bucket + 1;
- root_spec->object_buckets[bucket_idx] = hash;
-
- do {
- size_t num_object_defs;
- struct uverbs_object_spec *object;
-
- num_object_defs = get_objects_above_id(object_defs,
- num_trees,
- trees,
- bucket_idx,
- &min_id);
- /* Last object in bucket */
- if (!num_object_defs)
- break;
-
- object = build_object_with_methods(object_defs,
- num_object_defs);
- if (IS_ERR(object)) {
- res = PTR_ERR(object);
- goto free;
- }
-
- /*
- * The last tree which is given as an argument to the
- * merge overrides previous object's type_attrs.
- * Therefore, we iterate backwards and search for the
- * first type_attrs which != NULL.
- */
- for (i = num_object_defs - 1;
- i >= 0 && !object_defs[i]->type_attrs; i--)
- ;
- /*
- * NULL is a valid type_attrs. It means an object we
- * can't instantiate (like DEVICE).
- */
- object->type_attrs = i < 0 ? NULL :
- object_defs[i]->type_attrs;
-
- hash->objects[min_id++] = object;
- } while (1);
- }
-
- kfree(object_defs);
- return root_spec;
-
-free:
- kfree(object_defs);
-free_root:
- uverbs_free_spec_tree(root_spec);
- return ERR_PTR(res);
-}
-EXPORT_SYMBOL(uverbs_alloc_spec_tree);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2094d136513d..823beca448e1 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -41,8 +41,6 @@
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/sched.h>
-#include <linux/sched/mm.h>
-#include <linux/sched/task.h>
#include <linux/file.h>
#include <linux/cdev.h>
#include <linux/anon_inodes.h>
@@ -77,7 +75,6 @@ static struct class *uverbs_class;
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len) = {
[IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
@@ -118,7 +115,6 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
};
static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw) = {
[IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
@@ -138,6 +134,30 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
+/*
+ * Must be called with the ufile->device->disassociate_srcu held, and the lock
+ * must be held until use of the ucontext is finished.
+ */
+struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
+{
+ /*
+ * We do not hold the hw_destroy_rwsem lock for this flow, instead
+ * srcu is used. It does not matter if someone races this with
+ * get_context, we get NULL or valid ucontext.
+ */
+ struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
+
+ if (!srcu_dereference(ufile->device->ib_dev,
+ &ufile->device->disassociate_srcu))
+ return ERR_PTR(-EIO);
+
+ if (!ucontext)
+ return ERR_PTR(-EINVAL);
+
+ return ucontext;
+}
+EXPORT_SYMBOL(ib_uverbs_get_ucontext);
+
int uverbs_dealloc_mw(struct ib_mw *mw)
{
struct ib_pd *pd = mw->pd;
@@ -154,6 +174,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj)
struct ib_uverbs_device *dev =
container_of(kobj, struct ib_uverbs_device, kobj);
+ uverbs_destroy_api(dev->uapi);
cleanup_srcu_struct(&dev->disassociate_srcu);
kfree(dev);
}
@@ -184,7 +205,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
}
spin_unlock_irq(&ev_file->ev_queue.lock);
- uverbs_uobject_put(&ev_file->uobj_file.uobj);
+ uverbs_uobject_put(&ev_file->uobj);
}
spin_lock_irq(&file->async_file->ev_queue.lock);
@@ -220,20 +241,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
}
}
-static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
- struct ib_ucontext *context,
- bool device_removed)
-{
- context->closing = 1;
- uverbs_cleanup_ucontext(context, device_removed);
- put_pid(context->tgid);
-
- ib_rdmacg_uncharge(&context->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_HANDLE);
-
- return context->device->dealloc_ucontext(context);
-}
-
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
{
complete(&dev->comp);
@@ -246,6 +253,8 @@ void ib_uverbs_release_file(struct kref *ref)
struct ib_device *ib_dev;
int srcu_key;
+ release_ufile_idr_uobject(file);
+
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
@@ -338,7 +347,7 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
filp->private_data;
return ib_uverbs_event_read(&comp_ev_file->ev_queue,
- comp_ev_file->uobj_file.ufile, filp,
+ comp_ev_file->uobj.ufile, filp,
buf, count, pos,
sizeof(struct ib_uverbs_comp_event_desc));
}
@@ -420,7 +429,9 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
{
- struct ib_uverbs_completion_event_file *file = filp->private_data;
+ struct ib_uobject *uobj = filp->private_data;
+ struct ib_uverbs_completion_event_file *file = container_of(
+ uobj, struct ib_uverbs_completion_event_file, uobj);
struct ib_uverbs_event *entry, *tmp;
spin_lock_irq(&file->ev_queue.lock);
@@ -528,7 +539,7 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
struct ib_ucq_object, uobject);
- ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
+ ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle,
event->event, &uobj->async_list,
&uobj->async_events_reported);
}
@@ -637,13 +648,13 @@ err_put_refs:
return filp;
}
-static bool verify_command_mask(struct ib_device *ib_dev,
- u32 command, bool extended)
+static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command,
+ bool extended)
{
if (!extended)
- return ib_dev->uverbs_cmd_mask & BIT_ULL(command);
+ return ufile->uverbs_cmd_mask & BIT_ULL(command);
- return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command);
+ return ufile->uverbs_ex_cmd_mask & BIT_ULL(command);
}
static bool verify_command_idx(u32 command, bool extended)
@@ -713,7 +724,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_uverbs_ex_cmd_hdr ex_hdr;
- struct ib_device *ib_dev;
struct ib_uverbs_cmd_hdr hdr;
bool extended;
int srcu_key;
@@ -748,24 +758,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
return ret;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- ib_dev = srcu_dereference(file->device->ib_dev,
- &file->device->disassociate_srcu);
- if (!ib_dev) {
- ret = -EIO;
- goto out;
- }
-
- /*
- * Must be after the ib_dev check, as once the RCU clears ib_dev ==
- * NULL means ucontext == NULL
- */
- if (!file->ucontext &&
- (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended)) {
- ret = -EINVAL;
- goto out;
- }
- if (!verify_command_mask(ib_dev, command, extended)) {
+ if (!verify_command_mask(file, command, extended)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -773,7 +767,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
buf += sizeof(hdr);
if (!extended) {
- ret = uverbs_cmd_table[command](file, ib_dev, buf,
+ ret = uverbs_cmd_table[command](file, buf,
hdr.in_words * 4,
hdr.out_words * 4);
} else {
@@ -792,7 +786,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
ex_hdr.provider_in_words * 8,
ex_hdr.provider_out_words * 8);
- ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw);
+ ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
ret = (ret) ? : count;
}
@@ -804,22 +798,18 @@ out:
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct ib_uverbs_file *file = filp->private_data;
- struct ib_device *ib_dev;
+ struct ib_ucontext *ucontext;
int ret = 0;
int srcu_key;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- ib_dev = srcu_dereference(file->device->ib_dev,
- &file->device->disassociate_srcu);
- if (!ib_dev) {
- ret = -EIO;
+ ucontext = ib_uverbs_get_ucontext(file);
+ if (IS_ERR(ucontext)) {
+ ret = PTR_ERR(ucontext);
goto out;
}
- if (!file->ucontext)
- ret = -ENODEV;
- else
- ret = ib_dev->mmap(file->ucontext, vma);
+ ret = ucontext->device->mmap(ucontext, vma);
out:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return ret;
@@ -879,13 +869,12 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
}
file->device = dev;
- spin_lock_init(&file->idr_lock);
- idr_init(&file->idr);
- file->ucontext = NULL;
- file->async_file = NULL;
kref_init(&file->ref);
- mutex_init(&file->mutex);
- mutex_init(&file->cleanup_mutex);
+ mutex_init(&file->ucontext_lock);
+
+ spin_lock_init(&file->uobjects_lock);
+ INIT_LIST_HEAD(&file->uobjects);
+ init_rwsem(&file->hw_destroy_rwsem);
filp->private_data = file;
kobject_get(&dev->kobj);
@@ -893,6 +882,11 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
+ file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask;
+ file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask;
+
+ setup_ufile_idr_uobject(file);
+
return nonseekable_open(inode, filp);
err_module:
@@ -911,13 +905,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
- mutex_lock(&file->cleanup_mutex);
- if (file->ucontext) {
- ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
- file->ucontext = NULL;
- }
- mutex_unlock(&file->cleanup_mutex);
- idr_destroy(&file->idr);
+ uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
mutex_lock(&file->device->lists_mutex);
if (!file->is_closed) {
@@ -1006,6 +994,19 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_VERBS_ABI_VERSION));
+static int ib_uverbs_create_uapi(struct ib_device *device,
+ struct ib_uverbs_device *uverbs_dev)
+{
+ struct uverbs_api *uapi;
+
+ uapi = uverbs_alloc_api(device->driver_specs, device->driver_id);
+ if (IS_ERR(uapi))
+ return PTR_ERR(uapi);
+
+ uverbs_dev->uapi = uapi;
+ return 0;
+}
+
static void ib_uverbs_add_one(struct ib_device *device)
{
int devnum;
@@ -1048,6 +1049,9 @@ static void ib_uverbs_add_one(struct ib_device *device)
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
+ if (ib_uverbs_create_uapi(device, uverbs_dev))
+ goto err;
+
cdev_init(&uverbs_dev->cdev, NULL);
uverbs_dev->cdev.owner = THIS_MODULE;
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
@@ -1067,18 +1071,6 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
- if (!device->specs_root) {
- const struct uverbs_object_tree_def *default_root[] = {
- uverbs_default_get_objects()};
-
- uverbs_dev->specs_root = uverbs_alloc_spec_tree(1,
- default_root);
- if (IS_ERR(uverbs_dev->specs_root))
- goto err_class;
-
- device->specs_root = uverbs_dev->specs_root;
- }
-
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
@@ -1098,44 +1090,6 @@ err:
return;
}
-static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
-{
- struct ib_device *ib_dev = ibcontext->device;
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
-
- owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
- if (!owning_process)
- return;
-
- owning_mm = get_task_mm(owning_process);
- if (!owning_mm) {
- pr_info("no mm, disassociate ucontext is pending task termination\n");
- while (1) {
- put_task_struct(owning_process);
- usleep_range(1000, 2000);
- owning_process = get_pid_task(ibcontext->tgid,
- PIDTYPE_PID);
- if (!owning_process ||
- owning_process->state == TASK_DEAD) {
- pr_info("disassociate ucontext done, task was terminated\n");
- /* in case task was dead need to release the
- * task struct.
- */
- if (owning_process)
- put_task_struct(owning_process);
- return;
- }
- }
- }
-
- down_write(&owning_mm->mmap_sem);
- ib_dev->disassociate_ucontext(ibcontext);
- up_write(&owning_mm->mmap_sem);
- mmput(owning_mm);
- put_task_struct(owning_process);
-}
-
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
@@ -1144,46 +1098,31 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_event event;
/* Pending running commands to terminate */
- synchronize_srcu(&uverbs_dev->disassociate_srcu);
+ uverbs_disassociate_api_pre(uverbs_dev);
event.event = IB_EVENT_DEVICE_FATAL;
event.element.port_num = 0;
event.device = ib_dev;
mutex_lock(&uverbs_dev->lists_mutex);
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
- struct ib_ucontext *ucontext;
file = list_first_entry(&uverbs_dev->uverbs_file_list,
struct ib_uverbs_file, list);
file->is_closed = 1;
list_del(&file->list);
kref_get(&file->ref);
- mutex_unlock(&uverbs_dev->lists_mutex);
-
-
- mutex_lock(&file->cleanup_mutex);
- ucontext = file->ucontext;
- file->ucontext = NULL;
- mutex_unlock(&file->cleanup_mutex);
- /* At this point ib_uverbs_close cannot be running
- * ib_uverbs_cleanup_ucontext
+ /* We must release the mutex before going ahead and calling
+ * uverbs_cleanup_ufile, as it might end up indirectly calling
+ * uverbs_close, for example due to freeing the resources (e.g
+ * mmput).
*/
- if (ucontext) {
- /* We must release the mutex before going ahead and
- * calling disassociate_ucontext. disassociate_ucontext
- * might end up indirectly calling uverbs_close,
- * for example due to freeing the resources
- * (e.g mmput).
- */
- ib_uverbs_event_handler(&file->event_handler, &event);
- ib_uverbs_disassociate_ucontext(ucontext);
- mutex_lock(&file->cleanup_mutex);
- ib_uverbs_cleanup_ucontext(file, ucontext, true);
- mutex_unlock(&file->cleanup_mutex);
- }
+ mutex_unlock(&uverbs_dev->lists_mutex);
- mutex_lock(&uverbs_dev->lists_mutex);
+ ib_uverbs_event_handler(&file->event_handler, &event);
+ uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
kref_put(&file->ref, ib_uverbs_release_file);
+
+ mutex_lock(&uverbs_dev->lists_mutex);
}
while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
@@ -1205,6 +1144,8 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
mutex_unlock(&uverbs_dev->lists_mutex);
+
+ uverbs_disassociate_api(uverbs_dev->uapi);
}
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
@@ -1232,7 +1173,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
* cdev was deleted, however active clients can still issue
* commands and close their open files.
*/
- rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
ib_uverbs_free_hw_resources(uverbs_dev, device);
wait_clients = 0;
}
@@ -1241,10 +1181,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
ib_uverbs_comp_dev(uverbs_dev);
if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
- if (uverbs_dev->specs_root) {
- uverbs_free_spec_tree(uverbs_dev->specs_root);
- device->specs_root = NULL;
- }
kobject_put(&uverbs_dev->kobj);
}
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index bb372b4713a4..b8d715c68ca4 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
/* TODO: No need to set this */
sa_path_set_dmac_zero(dst);
- sa_path_set_ndev(dst, NULL);
- sa_path_set_ifindex(dst, 0);
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index b570acbd94af..203cc96ac6f5 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -48,14 +48,18 @@ static int uverbs_free_ah(struct ib_uobject *uobject,
static int uverbs_free_flow(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
- int ret;
struct ib_flow *flow = (struct ib_flow *)uobject->object;
struct ib_uflow_object *uflow =
container_of(uobject, struct ib_uflow_object, uobject);
+ struct ib_qp *qp = flow->qp;
+ int ret;
- ret = ib_destroy_flow(flow);
- if (!ret)
+ ret = flow->device->destroy_flow(flow);
+ if (!ret) {
+ if (qp)
+ atomic_dec(&qp->usecnt);
ib_uverbs_flow_resources_free(uflow->resources);
+ }
return ret;
}
@@ -74,6 +78,13 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
container_of(uobject, struct ib_uqp_object, uevent.uobject);
int ret;
+ /*
+ * If this is a user triggered destroy then do not allow destruction
+ * until the user cleans up all the mcast bindings. Unlike in other
+ * places we forcibly clean up the mcast attachments for !DESTROY
+ * because the mcast attaches are not ubojects and will not be
+ * destroyed by anything else during cleanup processing.
+ */
if (why == RDMA_REMOVE_DESTROY) {
if (!list_empty(&uqp->mcast_list))
return -EBUSY;
@@ -82,7 +93,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
}
ret = ib_destroy_qp(qp);
- if (ret && why == RDMA_REMOVE_DESTROY)
+ if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
if (uqp->uxrcd)
@@ -100,8 +111,10 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (!ret || why != RDMA_REMOVE_DESTROY)
- kfree(ind_tbl);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ kfree(ind_tbl);
return ret;
}
@@ -114,8 +127,10 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_wq(wq);
- if (!ret || why != RDMA_REMOVE_DESTROY)
- ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
return ret;
}
@@ -129,8 +144,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_srq(srq);
-
- if (ret && why == RDMA_REMOVE_DESTROY)
+ if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
if (srq_type == IB_SRQT_XRC) {
@@ -152,12 +166,12 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject,
container_of(uobject, struct ib_uxrcd_object, uobject);
int ret;
+ ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject);
+ if (ret)
+ return ret;
+
mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
- if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
- ret = -EBUSY;
- else
- ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
- xrcd, why);
+ ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why);
mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
return ret;
@@ -167,20 +181,22 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_pd *pd = uobject->object;
+ int ret;
- if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
- return -EBUSY;
+ ret = ib_destroy_usecnt(&pd->usecnt, why, uobject);
+ if (ret)
+ return ret;
ib_dealloc_pd((struct ib_pd *)uobject->object);
return 0;
}
-static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
+static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
struct ib_uverbs_completion_event_file *comp_event_file =
- container_of(uobj_file, struct ib_uverbs_completion_event_file,
- uobj_file);
+ container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
spin_lock_irq(&event_queue->lock);
@@ -194,119 +210,77 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_
return 0;
};
-int uverbs_destroy_def_handler(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
+int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
return 0;
}
+EXPORT_SYMBOL(uverbs_destroy_def_handler);
-/*
- * This spec is used in order to pass information to the hardware driver in a
- * legacy way. Every verb that could get driver specific data should get this
- * spec.
- */
-const struct uverbs_attr_def uverbs_uhw_compat_in =
- UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
-const struct uverbs_attr_def uverbs_uhw_compat_out =
- UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
-
-void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
-{
- /*
- * This is for ease of conversion. The purpose is to convert all drivers
- * to use uverbs_attr_bundle instead of ib_udata.
- * Assume attr == 0 is input and attr == 1 is output.
- */
- const struct uverbs_attr *uhw_in =
- uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN);
- const struct uverbs_attr *uhw_out =
- uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT);
-
- if (!IS_ERR(uhw_in)) {
- udata->inlen = uhw_in->ptr_attr.len;
- if (uverbs_attr_ptr_is_inline(uhw_in))
- udata->inbuf = &uhw_in->uattr->data;
- else
- udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
- } else {
- udata->inbuf = NULL;
- udata->inlen = 0;
- }
-
- if (!IS_ERR(uhw_out)) {
- udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
- udata->outlen = uhw_out->ptr_attr.len;
- } else {
- udata->outbuf = NULL;
- udata->outlen = 0;
- }
-}
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
- &UVERBS_TYPE_ALLOC_FD(0,
- sizeof(struct ib_uverbs_completion_event_file),
- uverbs_hot_unplug_completion_event_file,
- &uverbs_event_fops,
- "[infinibandevent]", O_RDONLY));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
+ uverbs_hot_unplug_completion_event_file,
+ &uverbs_event_fops,
+ "[infinibandevent]",
+ O_RDONLY));
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
- uverbs_free_qp));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_QP,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw));
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
- uverbs_free_srq));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
+ uverbs_free_srq));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah));
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
- 0, uverbs_free_flow));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_FLOW,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
+ uverbs_free_flow));
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
- uverbs_free_wq));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_WQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl));
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
- uverbs_free_xrcd));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_XRCD,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
+ uverbs_free_xrcd));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
- /* 2 is used in order to free the PD after MRs */
- &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
-
-static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
- &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
- &UVERBS_OBJECT(UVERBS_OBJECT_PD),
- &UVERBS_OBJECT(UVERBS_OBJECT_MR),
- &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
- &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_QP),
- &UVERBS_OBJECT(UVERBS_OBJECT_AH),
- &UVERBS_OBJECT(UVERBS_OBJECT_MW),
- &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
- &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
- &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
- &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
- &UVERBS_OBJECT(UVERBS_OBJECT_DM),
- &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd));
+
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE);
+
+DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
+ &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
+ &UVERBS_OBJECT(UVERBS_OBJECT_PD),
+ &UVERBS_OBJECT(UVERBS_OBJECT_MR),
+ &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
+ &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_QP),
+ &UVERBS_OBJECT(UVERBS_OBJECT_AH),
+ &UVERBS_OBJECT(UVERBS_OBJECT_MW),
+ &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
+ &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
+ &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
+ &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
+ &UVERBS_OBJECT(UVERBS_OBJECT_DM),
+ &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
{
return &uverbs_default_objects;
}
-EXPORT_SYMBOL_GPL(uverbs_default_get_objects);
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 03b182a684a6..a0ffdcf9a51c 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -38,20 +38,22 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_counters *counters = uobject->object;
+ int ret;
- if (why == RDMA_REMOVE_DESTROY &&
- atomic_read(&counters->usecnt))
- return -EBUSY;
+ ret = ib_destroy_usecnt(&counters->usecnt, why, uobject);
+ if (ret)
+ return ret;
return counters->device->destroy_counters(counters);
}
-static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
+ struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
+ struct ib_device *ib_dev = uobj->context->device;
struct ib_counters *counters;
- struct ib_uobject *uobj;
int ret;
/*
@@ -62,7 +64,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_de
if (!ib_dev->create_counters)
return -EOPNOTSUPP;
- uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
counters = ib_dev->create_counters(ib_dev, attrs);
if (IS_ERR(counters)) {
ret = PTR_ERR(counters);
@@ -80,9 +81,8 @@ err_create_counters:
return ret;
}
-static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
+ struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
{
struct ib_counters_read_attr read_attr = {};
const struct uverbs_attr *uattr;
@@ -90,68 +90,62 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
int ret;
- if (!ib_dev->read_counters)
+ if (!counters->device->read_counters)
return -EOPNOTSUPP;
if (!atomic_read(&counters->usecnt))
return -EINVAL;
- ret = uverbs_copy_from(&read_attr.flags, attrs,
- UVERBS_ATTR_READ_COUNTERS_FLAGS);
+ ret = uverbs_get_flags32(&read_attr.flags, attrs,
+ UVERBS_ATTR_READ_COUNTERS_FLAGS,
+ IB_UVERBS_READ_COUNTERS_PREFER_CACHED);
if (ret)
return ret;
uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
- read_attr.counters_buff = kcalloc(read_attr.ncounters,
- sizeof(u64), GFP_KERNEL);
- if (!read_attr.counters_buff)
- return -ENOMEM;
-
- ret = ib_dev->read_counters(counters,
- &read_attr,
- attrs);
- if (ret)
- goto err_read;
+ read_attr.counters_buff = uverbs_zalloc(
+ attrs, array_size(read_attr.ncounters, sizeof(u64)));
+ if (IS_ERR(read_attr.counters_buff))
+ return PTR_ERR(read_attr.counters_buff);
- ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
- read_attr.counters_buff,
- read_attr.ncounters * sizeof(u64));
+ ret = counters->device->read_counters(counters, &read_attr, attrs);
+ if (ret)
+ return ret;
-err_read:
- kfree(read_attr.counters_buff);
- return ret;
+ return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
+ read_attr.counters_buff,
+ read_attr.ncounters * sizeof(u64));
}
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
- UVERBS_OBJECT_COUNTERS,
- UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY,
- uverbs_destroy_def_handler,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
- UVERBS_OBJECT_COUNTERS,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
- UVERBS_OBJECT_COUNTERS,
- UVERBS_ACCESS_READ,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
- UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
- UVERBS_ATTR_TYPE(__u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COUNTERS_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_COUNTERS_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COUNTERS_READ,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
+ UVERBS_ATTR_MIN_SIZE(0),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
+ enum ib_uverbs_read_counters_flags));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
-
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 3d293d01afea..5b5f2052cd52 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -44,21 +44,26 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_cq(cq);
- if (!ret || why != RDMA_REMOVE_DESTROY)
- ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
- container_of(ev_queue,
- struct ib_uverbs_completion_event_file,
- ev_queue) : NULL,
- ucq);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ ib_uverbs_release_ucq(
+ uobject->context->ufile,
+ ev_queue ? container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) :
+ NULL,
+ ucq);
return ret;
}
-static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
+ struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
{
- struct ib_ucontext *ucontext = file->ucontext;
- struct ib_ucq_object *obj;
+ struct ib_ucq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
+ typeof(*obj), uobject);
+ struct ib_device *ib_dev = obj->uobject.context->device;
struct ib_udata uhw;
int ret;
u64 user_handle;
@@ -67,7 +72,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_uobject *ev_file_uobj;
- if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
+ if (!ib_dev->create_cq || !ib_dev->destroy_cq)
return -EOPNOTSUPP;
ret = uverbs_copy_from(&attr.comp_vector, attrs,
@@ -81,28 +86,26 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
if (ret)
return ret;
- /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
- if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs,
- UVERBS_ATTR_CREATE_CQ_FLAGS)))
- return -EFAULT;
+ ret = uverbs_get_flags32(&attr.flags, attrs,
+ UVERBS_ATTR_CREATE_CQ_FLAGS,
+ IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION |
+ IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN);
+ if (ret)
+ return ret;
ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
if (!IS_ERR(ev_file_uobj)) {
ev_file = container_of(ev_file_uobj,
struct ib_uverbs_completion_event_file,
- uobj_file.uobj);
+ uobj);
uverbs_uobject_get(ev_file_uobj);
}
- if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
+ if (attr.comp_vector >= file->device->num_comp_vectors) {
ret = -EINVAL;
goto err_event_file;
}
- obj = container_of(uverbs_attr_get_uobject(attrs,
- UVERBS_ATTR_CREATE_CQ_HANDLE),
- typeof(*obj), uobject);
- obj->uverbs_file = ucontext->ufile;
obj->comp_events_reported = 0;
obj->async_events_reported = 0;
INIT_LIST_HEAD(&obj->comp_list);
@@ -111,7 +114,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
/* Temporary, only until drivers get the new uverbs_attr_bundle */
create_udata(attrs, &uhw);
- cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
+ cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_event_file;
@@ -143,69 +146,64 @@ err_event_file:
return ret;
};
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ,
- UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_CQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_CQ_FLAGS,
+ enum ib_uverbs_ex_create_cq_flags),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
- UVERBS_ATTR_TYPE(u64),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
- UVERBS_OBJECT_COMP_CHANNEL,
- UVERBS_ACCESS_READ),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
- &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
-
-static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+ UA_MANDATORY),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
+ struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj =
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
- struct ib_uverbs_destroy_cq_resp resp;
- struct ib_ucq_object *obj;
- int ret;
-
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- obj = container_of(uobj, struct ib_ucq_object, uobject);
-
- if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
- return -EOPNOTSUPP;
-
- ret = rdma_explicit_destroy(uobj);
- if (ret)
- return ret;
-
- resp.comp_events_reported = obj->comp_events_reported;
- resp.async_events_reported = obj->async_events_reported;
+ struct ib_ucq_object *obj =
+ container_of(uobj, struct ib_ucq_object, uobject);
+ struct ib_uverbs_destroy_cq_resp resp = {
+ .comp_events_reported = obj->comp_events_reported,
+ .async_events_reported = obj->async_events_reported
+ };
return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
sizeof(resp));
}
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
- UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
- uverbs_free_cq),
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_CQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_CQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq),
+
#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
- &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
#endif
- );
-
+);
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index 8b681575b615..edc3ff7733d4 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -37,20 +37,24 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_dm *dm = uobject->object;
+ int ret;
- if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt))
- return -EBUSY;
+ ret = ib_destroy_usecnt(&dm->usecnt, why, uobject);
+ if (ret)
+ return ret;
return dm->device->dealloc_dm(dm);
}
-static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+static int
+UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
{
- struct ib_ucontext *ucontext = file->ucontext;
struct ib_dm_alloc_attr attr = {};
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)
+ ->obj_attr.uobject;
+ struct ib_device *ib_dev = uobj->context->device;
struct ib_dm *dm;
int ret;
@@ -67,9 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
if (ret)
return ret;
- uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject;
-
- dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs);
+ dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs);
if (IS_ERR(dm))
return PTR_ERR(dm);
@@ -83,26 +85,27 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
return 0;
}
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM,
- UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
- UVERBS_ATTR_TYPE(u64),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
- UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE,
- uverbs_destroy_def_handler,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
- UVERBS_OBJECT_DM,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DM_ALLOC,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_DM_FREE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
- /* 1 is used in order to free the DM after MRs */
- &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm),
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
&UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
&UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index a7be51cf2e42..d8cfafe23bd9 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -37,10 +37,11 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_flow_action *action = uobject->object;
+ int ret;
- if (why == RDMA_REMOVE_DESTROY &&
- atomic_read(&action->usecnt))
- return -EBUSY;
+ ret = ib_destroy_usecnt(&action->usecnt, why, uobject);
+ if (ret)
+ return ret;
return action->device->destroy_flow_action(action);
}
@@ -303,12 +304,13 @@ static int parse_flow_action_esp(struct ib_device *ib_dev,
return 0;
}
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
+ struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
+ struct ib_device *ib_dev = uobj->context->device;
int ret;
- struct ib_uobject *uobj;
struct ib_flow_action *action;
struct ib_flow_action_esp_attr esp_attr = {};
@@ -320,7 +322,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
return ret;
/* No need to check as this attribute is marked as MANDATORY */
- uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
if (IS_ERR(action))
return PTR_ERR(action);
@@ -334,102 +335,109 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
return 0;
}
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
+ struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
+ struct ib_flow_action *action = uobj->object;
int ret;
- struct ib_uobject *uobj;
- struct ib_flow_action *action;
struct ib_flow_action_esp_attr esp_attr = {};
- if (!ib_dev->modify_flow_action_esp)
+ if (!action->device->modify_flow_action_esp)
return -EOPNOTSUPP;
- ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true);
+ ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr,
+ true);
if (ret)
return ret;
- uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
- action = uobj->object;
-
if (action->type != IB_FLOW_ACTION_ESP)
return -EINVAL;
- return ib_dev->modify_flow_action_esp(action,
- &esp_attr.hdr,
- attrs);
+ return action->device->modify_flow_action_esp(action, &esp_attr.hdr,
+ attrs);
}
static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
[IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
- { .ptr = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
- .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
- } },
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_STRUCT(
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm,
+ aes_key),
},
};
static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
- { .ptr = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- /* No need to specify any data */
- .len = 0,
- } }
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
},
[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
- { .ptr = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
- .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
- } }
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp,
+ size),
},
};
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
- UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
- &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
-
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_WRITE,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
- &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat),
- &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
-
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY,
- uverbs_destroy_def_handler,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
-
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
+ hard_limit_pkts),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+ UVERBS_ATTR_TYPE(__u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat,
+ UA_MANDATORY),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
+ hard_limit_pkts),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+ UVERBS_ATTR_TYPE(__u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat,
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_FLOW_ACTION_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 68f7cadf088f..cf02e774303e 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -39,14 +39,18 @@ static int uverbs_free_mr(struct ib_uobject *uobject,
return ib_dereg_mr((struct ib_mr *)uobject->object);
}
-static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
+ struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
{
struct ib_dm_mr_attr attr = {};
- struct ib_uobject *uobj;
- struct ib_dm *dm;
- struct ib_pd *pd;
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
+ struct ib_dm *dm =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
+ struct ib_device *ib_dev = pd->device;
+
struct ib_mr *mr;
int ret;
@@ -62,8 +66,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
if (ret)
return ret;
- ret = uverbs_copy_from(&attr.access_flags, attrs,
- UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS);
+ ret = uverbs_get_flags32(&attr.access_flags, attrs,
+ UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ IB_ACCESS_SUPPORTED);
if (ret)
return ret;
@@ -74,12 +79,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
if (ret)
return ret;
- pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
-
- dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
-
- uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject;
-
if (attr.offset > dm->length || attr.length > dm->length ||
attr.length > dm->length - attr.offset)
return -EINVAL;
@@ -115,33 +114,36 @@ err_dereg:
return ret;
}
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR,
- UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
- UVERBS_ATTR_TYPE(u64),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
- UVERBS_ATTR_TYPE(u64),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD,
- UVERBS_ACCESS_READ,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DM_MR_REG,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ enum ib_access_flags),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM,
- UVERBS_ACCESS_READ,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
- UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
- UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR,
- /* 1 is used in order to free the MR after all the MWs */
- &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr),
- &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_MR,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
new file mode 100644
index 000000000000..73ea6f0db88f
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ */
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/rdma_user_ioctl.h>
+#include <linux/bitops.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
+{
+ void *elm;
+ int rc;
+
+ if (key == UVERBS_API_KEY_ERR)
+ return ERR_PTR(-EOVERFLOW);
+
+ elm = kzalloc(alloc_size, GFP_KERNEL);
+ rc = radix_tree_insert(&uapi->radix, key, elm);
+ if (rc) {
+ kfree(elm);
+ return ERR_PTR(rc);
+ }
+
+ return elm;
+}
+
+static int uapi_merge_method(struct uverbs_api *uapi,
+ struct uverbs_api_object *obj_elm, u32 obj_key,
+ const struct uverbs_method_def *method,
+ bool is_driver)
+{
+ u32 method_key = obj_key | uapi_key_ioctl_method(method->id);
+ struct uverbs_api_ioctl_method *method_elm;
+ unsigned int i;
+
+ if (!method->attrs)
+ return 0;
+
+ method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm));
+ if (IS_ERR(method_elm)) {
+ if (method_elm != ERR_PTR(-EEXIST))
+ return PTR_ERR(method_elm);
+
+ /*
+ * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE
+ */
+ if (WARN_ON(method->handler))
+ return -EINVAL;
+ method_elm = radix_tree_lookup(&uapi->radix, method_key);
+ if (WARN_ON(!method_elm))
+ return -EINVAL;
+ } else {
+ WARN_ON(!method->handler);
+ rcu_assign_pointer(method_elm->handler, method->handler);
+ if (method->handler != uverbs_destroy_def_handler)
+ method_elm->driver_method = is_driver;
+ }
+
+ for (i = 0; i != method->num_attrs; i++) {
+ const struct uverbs_attr_def *attr = (*method->attrs)[i];
+ struct uverbs_api_attr *attr_slot;
+
+ if (!attr)
+ continue;
+
+ /*
+ * ENUM_IN contains the 'ids' pointer to the driver's .rodata,
+ * so if it is specified by a driver then it always makes this
+ * into a driver method.
+ */
+ if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
+ method_elm->driver_method |= is_driver;
+
+ attr_slot =
+ uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
+ sizeof(*attr_slot));
+ /* Attributes are not allowed to be modified by drivers */
+ if (IS_ERR(attr_slot))
+ return PTR_ERR(attr_slot);
+
+ attr_slot->spec = attr->attr;
+ }
+
+ return 0;
+}
+
+static int uapi_merge_tree(struct uverbs_api *uapi,
+ const struct uverbs_object_tree_def *tree,
+ bool is_driver)
+{
+ unsigned int i, j;
+ int rc;
+
+ if (!tree->objects)
+ return 0;
+
+ for (i = 0; i != tree->num_objects; i++) {
+ const struct uverbs_object_def *obj = (*tree->objects)[i];
+ struct uverbs_api_object *obj_elm;
+ u32 obj_key;
+
+ if (!obj)
+ continue;
+
+ obj_key = uapi_key_obj(obj->id);
+ obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm));
+ if (IS_ERR(obj_elm)) {
+ if (obj_elm != ERR_PTR(-EEXIST))
+ return PTR_ERR(obj_elm);
+
+ /* This occurs when a driver uses ADD_UVERBS_METHODS */
+ if (WARN_ON(obj->type_attrs))
+ return -EINVAL;
+ obj_elm = radix_tree_lookup(&uapi->radix, obj_key);
+ if (WARN_ON(!obj_elm))
+ return -EINVAL;
+ } else {
+ obj_elm->type_attrs = obj->type_attrs;
+ if (obj->type_attrs) {
+ obj_elm->type_class =
+ obj->type_attrs->type_class;
+ /*
+ * Today drivers are only permitted to use
+ * idr_class types. They cannot use FD types
+ * because we currently have no way to revoke
+ * the fops pointer after device
+ * disassociation.
+ */
+ if (WARN_ON(is_driver &&
+ obj->type_attrs->type_class !=
+ &uverbs_idr_class))
+ return -EINVAL;
+ }
+ }
+
+ if (!obj->methods)
+ continue;
+
+ for (j = 0; j != obj->num_methods; j++) {
+ const struct uverbs_method_def *method =
+ (*obj->methods)[j];
+ if (!method)
+ continue;
+
+ rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
+ is_driver);
+ if (rc)
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static int
+uapi_finalize_ioctl_method(struct uverbs_api *uapi,
+ struct uverbs_api_ioctl_method *method_elm,
+ u32 method_key)
+{
+ struct radix_tree_iter iter;
+ unsigned int num_attrs = 0;
+ unsigned int max_bkey = 0;
+ bool single_uobj = false;
+ void __rcu **slot;
+
+ method_elm->destroy_bkey = UVERBS_API_ATTR_BKEY_LEN;
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter,
+ uapi_key_attrs_start(method_key)) {
+ struct uverbs_api_attr *elm =
+ rcu_dereference_protected(*slot, true);
+ u32 attr_key = iter.index & UVERBS_API_ATTR_KEY_MASK;
+ u32 attr_bkey = uapi_bkey_attr(attr_key);
+ u8 type = elm->spec.type;
+
+ if (uapi_key_attr_to_method(iter.index) !=
+ uapi_key_attr_to_method(method_key))
+ break;
+
+ if (elm->spec.mandatory)
+ __set_bit(attr_bkey, method_elm->attr_mandatory);
+
+ if (type == UVERBS_ATTR_TYPE_IDR ||
+ type == UVERBS_ATTR_TYPE_FD) {
+ u8 access = elm->spec.u.obj.access;
+
+ /*
+ * Verbs specs may only have one NEW/DESTROY, we don't
+ * have the infrastructure to abort multiple NEW's or
+ * cope with multiple DESTROY failure.
+ */
+ if (access == UVERBS_ACCESS_NEW ||
+ access == UVERBS_ACCESS_DESTROY) {
+ if (WARN_ON(single_uobj))
+ return -EINVAL;
+
+ single_uobj = true;
+ if (WARN_ON(!elm->spec.mandatory))
+ return -EINVAL;
+ }
+
+ if (access == UVERBS_ACCESS_DESTROY)
+ method_elm->destroy_bkey = attr_bkey;
+ }
+
+ max_bkey = max(max_bkey, attr_bkey);
+ num_attrs++;
+ }
+
+ method_elm->key_bitmap_len = max_bkey + 1;
+ WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN);
+
+ uapi_compute_bundle_size(method_elm, num_attrs);
+ return 0;
+}
+
+static int uapi_finalize(struct uverbs_api *uapi)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ int rc;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ rc = uapi_finalize_ioctl_method(uapi, method_elm,
+ iter.index);
+ if (rc)
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+void uverbs_destroy_api(struct uverbs_api *uapi)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ if (!uapi)
+ return;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ kfree(rcu_dereference_protected(*slot, true));
+ radix_tree_iter_delete(&uapi->radix, &iter, slot);
+ }
+}
+
+struct uverbs_api *uverbs_alloc_api(
+ const struct uverbs_object_tree_def *const *driver_specs,
+ enum rdma_driver_id driver_id)
+{
+ struct uverbs_api *uapi;
+ int rc;
+
+ uapi = kzalloc(sizeof(*uapi), GFP_KERNEL);
+ if (!uapi)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
+ uapi->driver_id = driver_id;
+
+ rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false);
+ if (rc)
+ goto err;
+
+ for (; driver_specs && *driver_specs; driver_specs++) {
+ rc = uapi_merge_tree(uapi, *driver_specs, true);
+ if (rc)
+ goto err;
+ }
+
+ rc = uapi_finalize(uapi);
+ if (rc)
+ goto err;
+
+ return uapi;
+err:
+ if (rc != -ENOMEM)
+ pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
+ rc);
+
+ uverbs_destroy_api(uapi);
+ return ERR_PTR(rc);
+}
+
+/*
+ * The pre version is done before destroying the HW objects, it only blocks
+ * off method access. All methods that require the ib_dev or the module data
+ * must test one of these assignments prior to continuing.
+ */
+void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev)
+{
+ struct uverbs_api *uapi = uverbs_dev->uapi;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->driver_method)
+ rcu_assign_pointer(method_elm->handler, NULL);
+ }
+ }
+
+ synchronize_srcu(&uverbs_dev->disassociate_srcu);
+}
+
+/*
+ * Called when a driver disassociates from the ib_uverbs_device. The
+ * assumption is that the driver module will unload after. Replace everything
+ * related to the driver with NULL as a safety measure.
+ */
+void uverbs_disassociate_api(struct uverbs_api *uapi)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_object(iter.index)) {
+ struct uverbs_api_object *object_elm =
+ rcu_dereference_protected(*slot, true);
+
+ /*
+ * Some type_attrs are in the driver module. We don't
+ * bother to keep track of which since there should be
+ * no use of this after disassociate.
+ */
+ object_elm->type_attrs = NULL;
+ } else if (uapi_key_is_attr(iter.index)) {
+ struct uverbs_api_attr *elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (elm->spec.type == UVERBS_ATTR_TYPE_ENUM_IN)
+ elm->spec.u2.enum_def.ids = NULL;
+ }
+ }
+}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 9d6beb948535..6ee03d6089eb 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -326,12 +326,162 @@ EXPORT_SYMBOL(ib_dealloc_pd);
/* Address handles */
+/**
+ * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination.
+ * @dest: Pointer to destination ah_attr. Contents of the destination
+ * pointer is assumed to be invalid and attribute are overwritten.
+ * @src: Pointer to source ah_attr.
+ */
+void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
+ const struct rdma_ah_attr *src)
+{
+ *dest = *src;
+ if (dest->grh.sgid_attr)
+ rdma_hold_gid_attr(dest->grh.sgid_attr);
+}
+EXPORT_SYMBOL(rdma_copy_ah_attr);
+
+/**
+ * rdma_replace_ah_attr - Replace valid ah_attr with new new one.
+ * @old: Pointer to existing ah_attr which needs to be replaced.
+ * old is assumed to be valid or zero'd
+ * @new: Pointer to the new ah_attr.
+ *
+ * rdma_replace_ah_attr() first releases any reference in the old ah_attr if
+ * old the ah_attr is valid; after that it copies the new attribute and holds
+ * the reference to the replaced ah_attr.
+ */
+void rdma_replace_ah_attr(struct rdma_ah_attr *old,
+ const struct rdma_ah_attr *new)
+{
+ rdma_destroy_ah_attr(old);
+ *old = *new;
+ if (old->grh.sgid_attr)
+ rdma_hold_gid_attr(old->grh.sgid_attr);
+}
+EXPORT_SYMBOL(rdma_replace_ah_attr);
+
+/**
+ * rdma_move_ah_attr - Move ah_attr pointed by source to destination.
+ * @dest: Pointer to destination ah_attr to copy to.
+ * dest is assumed to be valid or zero'd
+ * @src: Pointer to the new ah_attr.
+ *
+ * rdma_move_ah_attr() first releases any reference in the destination ah_attr
+ * if it is valid. This also transfers ownership of internal references from
+ * src to dest, making src invalid in the process. No new reference of the src
+ * ah_attr is taken.
+ */
+void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src)
+{
+ rdma_destroy_ah_attr(dest);
+ *dest = *src;
+ src->grh.sgid_attr = NULL;
+}
+EXPORT_SYMBOL(rdma_move_ah_attr);
+
+/*
+ * Validate that the rdma_ah_attr is valid for the device before passing it
+ * off to the driver.
+ */
+static int rdma_check_ah_attr(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
+{
+ if (!rdma_is_port_valid(device, ah_attr->port_num))
+ return -EINVAL;
+
+ if ((rdma_is_grh_required(device, ah_attr->port_num) ||
+ ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) &&
+ !(ah_attr->ah_flags & IB_AH_GRH))
+ return -EINVAL;
+
+ if (ah_attr->grh.sgid_attr) {
+ /*
+ * Make sure the passed sgid_attr is consistent with the
+ * parameters
+ */
+ if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index ||
+ ah_attr->grh.sgid_attr->port_num != ah_attr->port_num)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * If the ah requires a GRH then ensure that sgid_attr pointer is filled in.
+ * On success the caller is responsible to call rdma_unfill_sgid_attr().
+ */
+static int rdma_fill_sgid_attr(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr **old_sgid_attr)
+{
+ const struct ib_gid_attr *sgid_attr;
+ struct ib_global_route *grh;
+ int ret;
+
+ *old_sgid_attr = ah_attr->grh.sgid_attr;
+
+ ret = rdma_check_ah_attr(device, ah_attr);
+ if (ret)
+ return ret;
+
+ if (!(ah_attr->ah_flags & IB_AH_GRH))
+ return 0;
+
+ grh = rdma_ah_retrieve_grh(ah_attr);
+ if (grh->sgid_attr)
+ return 0;
+
+ sgid_attr =
+ rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
+
+ /* Move ownerhip of the kref into the ah_attr */
+ grh->sgid_attr = sgid_attr;
+ return 0;
+}
+
+static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *old_sgid_attr)
+{
+ /*
+ * Fill didn't change anything, the caller retains ownership of
+ * whatever it passed
+ */
+ if (ah_attr->grh.sgid_attr == old_sgid_attr)
+ return;
+
+ /*
+ * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller
+ * doesn't see any change in the rdma_ah_attr. If we get here
+ * old_sgid_attr is NULL.
+ */
+ rdma_destroy_ah_attr(ah_attr);
+}
+
+static const struct ib_gid_attr *
+rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *old_attr)
+{
+ if (old_attr)
+ rdma_put_gid_attr(old_attr);
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ rdma_hold_gid_attr(ah_attr->grh.sgid_attr);
+ return ah_attr->grh.sgid_attr;
+ }
+ return NULL;
+}
+
static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
struct ib_ah *ah;
+ if (!pd->device->create_ah)
+ return ERR_PTR(-EOPNOTSUPP);
+
ah = pd->device->create_ah(pd, ah_attr, udata);
if (!IS_ERR(ah)) {
@@ -339,15 +489,38 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
ah->pd = pd;
ah->uobject = NULL;
ah->type = ah_attr->type;
+ ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
+
atomic_inc(&pd->usecnt);
}
return ah;
}
+/**
+ * rdma_create_ah - Creates an address handle for the
+ * given address vector.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ *
+ * It returns 0 on success and returns appropriate error code on error.
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
{
- return _rdma_create_ah(pd, ah_attr, NULL);
+ const struct ib_gid_attr *old_sgid_attr;
+ struct ib_ah *ah;
+ int ret;
+
+ ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
+ if (ret)
+ return ERR_PTR(ret);
+
+ ah = _rdma_create_ah(pd, ah_attr, NULL);
+
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return ah;
}
EXPORT_SYMBOL(rdma_create_ah);
@@ -368,15 +541,27 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
+ const struct ib_gid_attr *old_sgid_attr;
+ struct ib_ah *ah;
int err;
+ err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
+ if (err)
+ return ERR_PTR(err);
+
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
err = ib_resolve_eth_dmac(pd->device, ah_attr);
- if (err)
- return ERR_PTR(err);
+ if (err) {
+ ah = ERR_PTR(err);
+ goto out;
+ }
}
- return _rdma_create_ah(pd, ah_attr, udata);
+ ah = _rdma_create_ah(pd, ah_attr, udata);
+
+out:
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return ah;
}
EXPORT_SYMBOL(rdma_create_user_ah);
@@ -455,16 +640,16 @@ static bool find_gid_index(const union ib_gid *gid,
return true;
}
-static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
- u16 vlan_id, const union ib_gid *sgid,
- enum ib_gid_type gid_type,
- u16 *gid_index)
+static const struct ib_gid_attr *
+get_sgid_attr_from_eth(struct ib_device *device, u8 port_num,
+ u16 vlan_id, const union ib_gid *sgid,
+ enum ib_gid_type gid_type)
{
struct find_gid_index_context context = {.vlan_id = vlan_id,
.gid_type = gid_type};
- return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
- &context, gid_index);
+ return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index,
+ &context);
}
int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
@@ -508,39 +693,24 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
struct rdma_ah_attr *ah_attr)
{
- struct ib_gid_attr sgid_attr;
- struct ib_global_route *grh;
+ struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
+ const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
int hop_limit = 0xff;
- union ib_gid sgid;
- int ret;
-
- grh = rdma_ah_retrieve_grh(ah_attr);
-
- ret = ib_query_gid(device,
- rdma_ah_get_port_num(ah_attr),
- grh->sgid_index,
- &sgid, &sgid_attr);
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
- return ret;
- }
+ int ret = 0;
/* If destination is link local and source GID is RoCEv1,
* IP stack is not used.
*/
if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
- sgid_attr.gid_type == IB_GID_TYPE_ROCE) {
+ sgid_attr->gid_type == IB_GID_TYPE_ROCE) {
rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
ah_attr->roce.dmac);
- goto done;
+ return ret;
}
- ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
ah_attr->roce.dmac,
- sgid_attr.ndev, &hop_limit);
-done:
- dev_put(sgid_attr.ndev);
+ sgid_attr->ndev, &hop_limit);
grh->hop_limit = hop_limit;
return ret;
@@ -555,16 +725,18 @@ done:
* as sgid and, sgid is used as dgid because sgid contains destinations
* GID whom to respond to.
*
+ * On success the caller is responsible to call rdma_destroy_ah_attr on the
+ * attr.
*/
int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
- u16 gid_index;
int ret;
enum rdma_network_type net_type = RDMA_NETWORK_IB;
enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ const struct ib_gid_attr *sgid_attr;
int hoplimit = 0xff;
union ib_gid dgid;
union ib_gid sgid;
@@ -595,72 +767,141 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- ret = get_sgid_index_from_eth(device, port_num,
- vlan_id, &dgid,
- gid_type, &gid_index);
- if (ret)
- return ret;
+ sgid_attr = get_sgid_attr_from_eth(device, port_num,
+ vlan_id, &dgid,
+ gid_type);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
flow_class = be32_to_cpu(grh->version_tclass_flow);
- rdma_ah_set_grh(ah_attr, &sgid,
- flow_class & 0xFFFFF,
- (u8)gid_index, hoplimit,
- (flow_class >> 20) & 0xFF);
- return ib_resolve_unicast_gid_dmac(device, ah_attr);
+ rdma_move_grh_sgid_attr(ah_attr,
+ &sgid,
+ flow_class & 0xFFFFF,
+ hoplimit,
+ (flow_class >> 20) & 0xFF,
+ sgid_attr);
+
+ ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
+ if (ret)
+ rdma_destroy_ah_attr(ah_attr);
+
+ return ret;
} else {
rdma_ah_set_dlid(ah_attr, wc->slid);
rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- if (wc->wc_flags & IB_WC_GRH) {
- if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
- ret = ib_find_cached_gid_by_port(device, &dgid,
- IB_GID_TYPE_IB,
- port_num, NULL,
- &gid_index);
- if (ret)
- return ret;
- } else {
- gid_index = 0;
- }
+ if ((wc->wc_flags & IB_WC_GRH) == 0)
+ return 0;
+
+ if (dgid.global.interface_id !=
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
+ sgid_attr = rdma_find_gid_by_port(
+ device, &dgid, IB_GID_TYPE_IB, port_num, NULL);
+ } else
+ sgid_attr = rdma_get_gid_attr(device, port_num, 0);
- flow_class = be32_to_cpu(grh->version_tclass_flow);
- rdma_ah_set_grh(ah_attr, &sgid,
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_move_grh_sgid_attr(ah_attr,
+ &sgid,
flow_class & 0xFFFFF,
- (u8)gid_index, hoplimit,
- (flow_class >> 20) & 0xFF);
- }
+ hoplimit,
+ (flow_class >> 20) & 0xFF,
+ sgid_attr);
+
return 0;
}
}
EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
+/**
+ * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership
+ * of the reference
+ *
+ * @attr: Pointer to AH attribute structure
+ * @dgid: Destination GID
+ * @flow_label: Flow label
+ * @hop_limit: Hop limit
+ * @traffic_class: traffic class
+ * @sgid_attr: Pointer to SGID attribute
+ *
+ * This takes ownership of the sgid_attr reference. The caller must ensure
+ * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after
+ * calling this function.
+ */
+void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
+ u32 flow_label, u8 hop_limit, u8 traffic_class,
+ const struct ib_gid_attr *sgid_attr)
+{
+ rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit,
+ traffic_class);
+ attr->grh.sgid_attr = sgid_attr;
+}
+EXPORT_SYMBOL(rdma_move_grh_sgid_attr);
+
+/**
+ * rdma_destroy_ah_attr - Release reference to SGID attribute of
+ * ah attribute.
+ * @ah_attr: Pointer to ah attribute
+ *
+ * Release reference to the SGID attribute of the ah attribute if it is
+ * non NULL. It is safe to call this multiple times, and safe to call it on
+ * a zero initialized ah_attr.
+ */
+void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
+{
+ if (ah_attr->grh.sgid_attr) {
+ rdma_put_gid_attr(ah_attr->grh.sgid_attr);
+ ah_attr->grh.sgid_attr = NULL;
+ }
+}
+EXPORT_SYMBOL(rdma_destroy_ah_attr);
+
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
const struct ib_grh *grh, u8 port_num)
{
struct rdma_ah_attr ah_attr;
+ struct ib_ah *ah;
int ret;
ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
- return rdma_create_ah(pd, &ah_attr);
+ ah = rdma_create_ah(pd, &ah_attr);
+
+ rdma_destroy_ah_attr(&ah_attr);
+ return ah;
}
EXPORT_SYMBOL(ib_create_ah_from_wc);
int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
+ const struct ib_gid_attr *old_sgid_attr;
+ int ret;
+
if (ah->type != ah_attr->type)
return -EINVAL;
- return ah->device->modify_ah ?
+ ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr);
+ if (ret)
+ return ret;
+
+ ret = ah->device->modify_ah ?
ah->device->modify_ah(ah, ah_attr) :
-EOPNOTSUPP;
+
+ ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return ret;
}
EXPORT_SYMBOL(rdma_modify_ah);
int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
+ ah_attr->grh.sgid_attr = NULL;
+
return ah->device->query_ah ?
ah->device->query_ah(ah, ah_attr) :
-EOPNOTSUPP;
@@ -669,13 +910,17 @@ EXPORT_SYMBOL(rdma_query_ah);
int rdma_destroy_ah(struct ib_ah *ah)
{
+ const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
struct ib_pd *pd;
int ret;
pd = ah->pd;
ret = ah->device->destroy_ah(ah);
- if (!ret)
+ if (!ret) {
atomic_dec(&pd->usecnt);
+ if (sgid_attr)
+ rdma_put_gid_attr(sgid_attr);
+ }
return ret;
}
@@ -1290,16 +1535,19 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
+/**
+ * ib_resolve_eth_dmac - Resolve destination mac address
+ * @device: Device to consider
+ * @ah_attr: address handle attribute which describes the
+ * source and destination parameters
+ * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It
+ * returns 0 on success or appropriate error code. It initializes the
+ * necessary ah_attr fields when call is successful.
+ */
static int ib_resolve_eth_dmac(struct ib_device *device,
struct rdma_ah_attr *ah_attr)
{
- int ret = 0;
- struct ib_global_route *grh;
-
- if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
- return -EINVAL;
-
- grh = rdma_ah_retrieve_grh(ah_attr);
+ int ret = 0;
if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
@@ -1317,6 +1565,14 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
return ret;
}
+static bool is_qp_type_connected(const struct ib_qp *qp)
+{
+ return (qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_RC ||
+ qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT);
+}
+
/**
* IB core internal function to perform QP attributes modification.
*/
@@ -1324,8 +1580,53 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ const struct ib_gid_attr *old_sgid_attr_av;
+ const struct ib_gid_attr *old_sgid_attr_alt_av;
int ret;
+ if (attr_mask & IB_QP_AV) {
+ ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
+ &old_sgid_attr_av);
+ if (ret)
+ return ret;
+ }
+ if (attr_mask & IB_QP_ALT_PATH) {
+ /*
+ * FIXME: This does not track the migration state, so if the
+ * user loads a new alternate path after the HW has migrated
+ * from primary->alternate we will keep the wrong
+ * references. This is OK for IB because the reference
+ * counting does not serve any functional purpose.
+ */
+ ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
+ &old_sgid_attr_alt_av);
+ if (ret)
+ goto out_av;
+
+ /*
+ * Today the core code can only handle alternate paths and APM
+ * for IB. Ban them in roce mode.
+ */
+ if (!(rdma_protocol_ib(qp->device,
+ attr->alt_ah_attr.port_num) &&
+ rdma_protocol_ib(qp->device, port))) {
+ ret = EINVAL;
+ goto out;
+ }
+ }
+
+ /*
+ * If the user provided the qp_attr then we have to resolve it. Kernel
+ * users have to provide already resolved rdma_ah_attr's
+ */
+ if (udata && (attr_mask & IB_QP_AV) &&
+ attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
+ ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
+ if (ret)
+ goto out;
+ }
+
if (rdma_ib_or_roce(qp->device, port)) {
if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
@@ -1341,20 +1642,27 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
}
ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
- if (!ret && (attr_mask & IB_QP_PORT))
- qp->port = attr->port_num;
+ if (ret)
+ goto out;
+ if (attr_mask & IB_QP_PORT)
+ qp->port = attr->port_num;
+ if (attr_mask & IB_QP_AV)
+ qp->av_sgid_attr =
+ rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr);
+ if (attr_mask & IB_QP_ALT_PATH)
+ qp->alt_path_sgid_attr = rdma_update_sgid_attr(
+ &attr->alt_ah_attr, qp->alt_path_sgid_attr);
+
+out:
+ if (attr_mask & IB_QP_ALT_PATH)
+ rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
+out_av:
+ if (attr_mask & IB_QP_AV)
+ rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
return ret;
}
-static bool is_qp_type_connected(const struct ib_qp *qp)
-{
- return (qp->qp_type == IB_QPT_UC ||
- qp->qp_type == IB_QPT_RC ||
- qp->qp_type == IB_QPT_XRC_INI ||
- qp->qp_type == IB_QPT_XRC_TGT);
-}
-
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
* @ib_qp: The QP to modify.
@@ -1369,17 +1677,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp)
int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
- struct ib_qp *qp = ib_qp->real_qp;
- int ret;
-
- if (attr_mask & IB_QP_AV &&
- attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
- is_qp_type_connected(qp)) {
- ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
- if (ret)
- return ret;
- }
- return _ib_modify_qp(qp, attr, attr_mask, udata);
+ return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
@@ -1451,6 +1749,9 @@ int ib_query_qp(struct ib_qp *qp,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
+ qp_attr->ah_attr.grh.sgid_attr = NULL;
+ qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
+
return qp->device->query_qp ?
qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
-EOPNOTSUPP;
@@ -1509,6 +1810,8 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp)
int ib_destroy_qp(struct ib_qp *qp)
{
+ const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
+ const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
@@ -1539,6 +1842,10 @@ int ib_destroy_qp(struct ib_qp *qp)
rdma_restrack_del(&qp->res);
ret = qp->device->destroy_qp(qp);
if (!ret) {
+ if (alt_path_sgid_attr)
+ rdma_put_gid_attr(alt_path_sgid_attr);
+ if (av_sgid_attr)
+ rdma_put_gid_attr(av_sgid_attr);
if (pd)
atomic_dec(&pd->usecnt);
if (scq)
@@ -1977,35 +2284,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
}
EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain)
-{
- struct ib_flow *flow_id;
- if (!qp->device->create_flow)
- return ERR_PTR(-EOPNOTSUPP);
-
- flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
- if (!IS_ERR(flow_id)) {
- atomic_inc(&qp->usecnt);
- flow_id->qp = qp;
- }
- return flow_id;
-}
-EXPORT_SYMBOL(ib_create_flow);
-
-int ib_destroy_flow(struct ib_flow *flow_id)
-{
- int err;
- struct ib_qp *qp = flow_id->qp;
-
- err = qp->device->destroy_flow(flow_id);
- if (!err)
- atomic_dec(&qp->usecnt);
- return err;
-}
-EXPORT_SYMBOL(ib_destroy_flow);
-
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
@@ -2200,7 +2478,6 @@ static void __ib_drain_sq(struct ib_qp *qp)
struct ib_cq *cq = qp->send_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
- struct ib_send_wr *bad_swr;
struct ib_rdma_wr swr = {
.wr = {
.next = NULL,
@@ -2219,7 +2496,7 @@ static void __ib_drain_sq(struct ib_qp *qp)
sdrain.cqe.done = ib_drain_qp_done;
init_completion(&sdrain.done);
- ret = ib_post_send(qp, &swr.wr, &bad_swr);
+ ret = ib_post_send(qp, &swr.wr, NULL);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
@@ -2240,7 +2517,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
struct ib_cq *cq = qp->recv_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe rdrain;
- struct ib_recv_wr rwr = {}, *bad_rwr;
+ struct ib_recv_wr rwr = {};
int ret;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
@@ -2253,7 +2530,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
rdrain.cqe.done = ib_drain_qp_done;
init_completion(&rdrain.done);
- ret = ib_post_recv(qp, &rwr, &bad_rwr);
+ ret = ib_post_recv(qp, &rwr, NULL);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;