aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile13
-rw-r--r--drivers/infiniband/core/addr.c20
-rw-r--r--drivers/infiniband/core/cache.c294
-rw-r--r--drivers/infiniband/core/cm.c2117
-rw-r--r--drivers/infiniband/core/cm_msgs.h4
-rw-r--r--drivers/infiniband/core/cm_trace.c15
-rw-r--r--drivers/infiniband/core/cm_trace.h414
-rw-r--r--drivers/infiniband/core/cma.c1904
-rw-r--r--drivers/infiniband/core/cma_configfs.c57
-rw-r--r--drivers/infiniband/core/cma_priv.h30
-rw-r--r--drivers/infiniband/core/cma_trace.h60
-rw-r--r--drivers/infiniband/core/core_priv.h103
-rw-r--r--drivers/infiniband/core/counters.c323
-rw-r--r--drivers/infiniband/core/cq.c221
-rw-r--r--drivers/infiniband/core/device.c342
-rw-r--r--drivers/infiniband/core/fmr_pool.c494
-rw-r--r--drivers/infiniband/core/iwcm.c28
-rw-r--r--drivers/infiniband/core/iwcm.h2
-rw-r--r--drivers/infiniband/core/iwpm_msg.c69
-rw-r--r--drivers/infiniband/core/iwpm_util.c88
-rw-r--r--drivers/infiniband/core/iwpm_util.h23
-rw-r--r--drivers/infiniband/core/lag.c137
-rw-r--r--drivers/infiniband/core/mad.c394
-rw-r--r--drivers/infiniband/core/mad_priv.h7
-rw-r--r--drivers/infiniband/core/mad_rmpp.c37
-rw-r--r--drivers/infiniband/core/multicast.c43
-rw-r--r--drivers/infiniband/core/netlink.c2
-rw-r--r--drivers/infiniband/core/nldev.c708
-rw-r--r--drivers/infiniband/core/opa_smi.h4
-rw-r--r--drivers/infiniband/core/rdma_core.c198
-rw-r--r--drivers/infiniband/core/rdma_core.h7
-rw-r--r--drivers/infiniband/core/restrack.c186
-rw-r--r--drivers/infiniband/core/restrack.h10
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c70
-rw-r--r--drivers/infiniband/core/rw.c107
-rw-r--r--drivers/infiniband/core/sa.h2
-rw-r--r--drivers/infiniband/core/sa_query.c584
-rw-r--r--drivers/infiniband/core/security.c28
-rw-r--r--drivers/infiniband/core/smi.c12
-rw-r--r--drivers/infiniband/core/smi.h4
-rw-r--r--drivers/infiniband/core/sysfs.c1288
-rw-r--r--drivers/infiniband/core/trace.c2
-rw-r--r--drivers/infiniband/core/ucma.c740
-rw-r--r--drivers/infiniband/core/ud_header.c10
-rw-r--r--drivers/infiniband/core/umem.c207
-rw-r--r--drivers/infiniband/core/umem_dmabuf.c234
-rw-r--r--drivers/infiniband/core/umem_odp.c305
-rw-r--r--drivers/infiniband/core/user_mad.c110
-rw-r--r--drivers/infiniband/core/uverbs.h27
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c688
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c71
-rw-r--r--drivers/infiniband/core/uverbs_main.c92
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c2
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c126
-rw-r--r--drivers/infiniband/core/uverbs_std_types_async_fd.c33
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c22
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c32
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c245
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c6
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c389
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c180
-rw-r--r--drivers/infiniband/core/uverbs_std_types_qp.c380
-rw-r--r--drivers/infiniband/core/uverbs_std_types_srq.c234
-rw-r--r--drivers/infiniband/core/uverbs_std_types_wq.c194
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c13
-rw-r--r--drivers/infiniband/core/verbs.c828
66 files changed, 8891 insertions, 6728 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d1b14887960e..8ab4eea5a0a5 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,16 +8,16 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
- device.o fmr_pool.o cache.o netlink.o \
+ device.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
nldev.o restrack.o counters.o ib_core_uverbs.o \
- trace.o
+ trace.o lag.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
-ib_cm-y := cm.o
+ib_cm-y := cm.o cm_trace.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
@@ -36,6 +36,9 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
uverbs_uapi.o uverbs_std_types_device.o \
- uverbs_std_types_async_fd.o
-ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+ uverbs_std_types_async_fd.o \
+ uverbs_std_types_srq.o \
+ uverbs_std_types_wq.o \
+ uverbs_std_types_qp.o
+ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1753a9801b70..f253295795f0 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -37,7 +37,6 @@
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/module.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@@ -76,7 +75,9 @@ static struct workqueue_struct *addr_wq;
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
- .len = sizeof(struct rdma_nla_ls_gid)},
+ .len = sizeof(struct rdma_nla_ls_gid),
+ .validation_type = NLA_VALIDATE_MIN,
+ .min = sizeof(struct rdma_nla_ls_gid)},
};
static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
@@ -371,6 +372,8 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
(const void *)&dst_in6->sin6_addr;
sa_family_t family = dst_in->sa_family;
+ might_sleep();
+
/* If we have a gateway in IB mode then it must be an IB network */
if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
@@ -645,13 +648,12 @@ static void process_one_req(struct work_struct *_work)
req->callback = NULL;
spin_lock_bh(&lock);
+ /*
+ * Although the work will normally have been canceled by the workqueue,
+ * it can still be requeued as long as it is on the req_list.
+ */
+ cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) {
- /*
- * Although the work will normally have been canceled by the
- * workqueue, it can still be requeued as long as it is on the
- * req_list.
- */
- cancel_delayed_work(&req->work);
list_del_init(&req->list);
kfree(req);
}
@@ -727,6 +729,8 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec,
struct rdma_dev_addr dev_addr = {};
int ret;
+ might_sleep();
+
if (rec->roce.route_resolved)
return 0;
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 17bfedd24cc3..4084d05a4510 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -33,7 +33,7 @@
* SOFTWARE.
*/
-#include <linux/module.h>
+#include <linux/if_vlan.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
@@ -46,7 +46,7 @@
struct ib_pkey_cache {
int table_len;
- u16 table[0];
+ u16 table[];
};
struct ib_update_work {
@@ -121,7 +121,7 @@ struct ib_gid_table {
u32 default_gid_indices;
};
-static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
+static void dispatch_gid_change_event(struct ib_device *ib_dev, u32 port)
{
struct ib_event event;
@@ -133,7 +133,11 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
}
static const char * const gid_type_str[] = {
+ /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for
+ * user space compatibility reasons.
+ */
[IB_GID_TYPE_IB] = "IB/RoCE v1",
+ [IB_GID_TYPE_ROCE] = "IB/RoCE v1",
[IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
};
@@ -193,7 +197,7 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
-static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
+static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u32 port)
{
return device->port_data[port].cache.gid;
}
@@ -233,10 +237,10 @@ static void put_gid_ndev(struct rcu_head *head)
static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
{
struct ib_device *device = entry->attr.device;
- u8 port_num = entry->attr.port_num;
+ u32 port_num = entry->attr.port_num;
struct ib_gid_table *table = rdma_gid_table(device, port_num);
- dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
+ dev_dbg(&device->dev, "%s port=%u index=%u gid %pI6\n", __func__,
port_num, entry->attr.index, entry->attr.gid.raw);
write_lock_irq(&table->rwlock);
@@ -278,7 +282,7 @@ static void free_gid_work(struct work_struct *work)
struct ib_gid_table_entry *entry =
container_of(work, struct ib_gid_table_entry, del_work);
struct ib_device *device = entry->attr.device;
- u8 port_num = entry->attr.port_num;
+ u32 port_num = entry->attr.port_num;
struct ib_gid_table *table = rdma_gid_table(device, port_num);
mutex_lock(&table->lock);
@@ -319,7 +323,7 @@ static void store_gid_entry(struct ib_gid_table *table,
{
entry->state = GID_TABLE_ENTRY_VALID;
- dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
+ dev_dbg(&entry->attr.device->dev, "%s port=%u index=%u gid %pI6\n",
__func__, entry->attr.port_num, entry->attr.index,
entry->attr.gid.raw);
@@ -350,7 +354,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
int ret;
if (!attr->ndev) {
- dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
+ dev_err(&attr->device->dev, "%s NULL netdev port=%u index=%u\n",
__func__, attr->port_num, attr->index);
return -EINVAL;
}
@@ -358,7 +362,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
ret = attr->device->ops.add_gid(attr, &entry->context);
if (ret) {
dev_err(&attr->device->dev,
- "%s GID add failed port=%d index=%d\n",
+ "%s GID add failed port=%u index=%u\n",
__func__, attr->port_num, attr->index);
return ret;
}
@@ -375,7 +379,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
* @ix: GID entry index to delete
*
*/
-static void del_gid(struct ib_device *ib_dev, u8 port,
+static void del_gid(struct ib_device *ib_dev, u32 port,
struct ib_gid_table *table, int ix)
{
struct roce_gid_ndev_storage *ndev_storage;
@@ -383,7 +387,7 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
lockdep_assert_held(&table->lock);
- dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
+ dev_dbg(&ib_dev->dev, "%s port=%u index=%d gid %pI6\n", __func__, port,
ix, table->data_vec[ix]->attr.gid.raw);
write_lock_irq(&table->rwlock);
@@ -539,7 +543,7 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
addrconf_ifid_eui48(&gid->raw[8], dev);
}
-static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr,
unsigned long mask, bool default_gid)
{
@@ -583,7 +587,7 @@ out_unlock:
return ret;
}
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
unsigned long mask = GID_ATTR_FIND_MASK_GID |
@@ -594,7 +598,7 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
}
static int
-_ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+_ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr,
unsigned long mask, bool default_gid)
{
@@ -623,7 +627,7 @@ out_unlock:
return ret;
}
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
unsigned long mask = GID_ATTR_FIND_MASK_GID |
@@ -634,7 +638,7 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
}
-int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
struct net_device *ndev)
{
struct ib_gid_table *table;
@@ -665,11 +669,10 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
* rdma_find_gid_by_port - Returns the GID entry attributes when it finds
* a valid GID entry for given search parameters. It searches for the specified
* GID value in the local software cache.
- * @device: The device to query.
+ * @ib_dev: The device to query.
* @gid: The GID value to search for.
* @gid_type: The GID type to search for.
- * @port_num: The port number of the device where the GID value should be
- * searched.
+ * @port: The port number of the device where the GID value should be searched.
* @ndev: In RoCE, the net device of the device. NULL means ignore.
*
* Returns sgid attributes if the GID is found with valid reference or
@@ -680,7 +683,7 @@ const struct ib_gid_attr *
rdma_find_gid_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
enum ib_gid_type gid_type,
- u8 port, struct net_device *ndev)
+ u32 port, struct net_device *ndev)
{
int local_index;
struct ib_gid_table *table;
@@ -715,7 +718,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port);
/**
* rdma_find_gid_by_filter - Returns the GID table attribute where a
* specified GID value occurs
- * @device: The device to query.
+ * @ib_dev: The device to query.
* @gid: The GID value to search for.
* @port: The port number of the device where the GID value could be
* searched.
@@ -724,13 +727,14 @@ EXPORT_SYMBOL(rdma_find_gid_by_port);
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
+ * @context: Private data to pass into the call-back.
*
* rdma_find_gid_by_filter() searches for the specified GID value
* of which the filter function returns true in the port's GID table.
*
*/
const struct ib_gid_attr *rdma_find_gid_by_filter(
- struct ib_device *ib_dev, const union ib_gid *gid, u8 port,
+ struct ib_device *ib_dev, const union ib_gid *gid, u32 port,
bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
void *),
void *context)
@@ -801,7 +805,7 @@ static void release_gid_table(struct ib_device *device,
continue;
if (kref_read(&table->data_vec[i]->kref) > 1) {
dev_err(&device->dev,
- "GID entry ref leak for index %d ref=%d\n", i,
+ "GID entry ref leak for index %d ref=%u\n", i,
kref_read(&table->data_vec[i]->kref));
leak = true;
}
@@ -814,7 +818,7 @@ static void release_gid_table(struct ib_device *device,
kfree(table);
}
-static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
+static void cleanup_gid_table_port(struct ib_device *ib_dev, u32 port,
struct ib_gid_table *table)
{
int i;
@@ -830,7 +834,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
mutex_unlock(&table->lock);
}
-void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
+void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port,
struct net_device *ndev,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
@@ -863,7 +867,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
}
}
-static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
+static void gid_table_reserve_default(struct ib_device *ib_dev, u32 port,
struct ib_gid_table *table)
{
unsigned int i;
@@ -880,7 +884,7 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
static void gid_table_release_one(struct ib_device *ib_dev)
{
- unsigned int p;
+ u32 p;
rdma_for_each_port (ib_dev, p) {
release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid);
@@ -891,7 +895,7 @@ static void gid_table_release_one(struct ib_device *ib_dev)
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
struct ib_gid_table *table;
- unsigned int rdma_port;
+ u32 rdma_port;
rdma_for_each_port (ib_dev, rdma_port) {
table = alloc_gid_table(
@@ -911,7 +915,7 @@ rollback_table_setup:
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
- unsigned int p;
+ u32 p;
rdma_for_each_port (ib_dev, p)
cleanup_gid_table_port(ib_dev, p,
@@ -946,12 +950,12 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
* Returns 0 on success or appropriate error code.
*
*/
-int rdma_query_gid(struct ib_device *device, u8 port_num,
+int rdma_query_gid(struct ib_device *device, u32 port_num,
int index, union ib_gid *gid)
{
struct ib_gid_table *table;
unsigned long flags;
- int res = -EINVAL;
+ int res;
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
@@ -959,9 +963,15 @@ int rdma_query_gid(struct ib_device *device, u8 port_num,
table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
- if (index < 0 || index >= table->sz ||
- !is_gid_entry_valid(table->data_vec[index]))
+ if (index < 0 || index >= table->sz) {
+ res = -EINVAL;
goto done;
+ }
+
+ if (!is_gid_entry_valid(table->data_vec[index])) {
+ res = -ENOENT;
+ goto done;
+ }
memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
res = 0;
@@ -973,6 +983,23 @@ done:
EXPORT_SYMBOL(rdma_query_gid);
/**
+ * rdma_read_gid_hw_context - Read the HW GID context from GID attribute
+ * @attr: Potinter to the GID attribute
+ *
+ * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding
+ * to the SGID attr. Callers are required to already be holding the reference
+ * to an existing GID entry.
+ *
+ * Returns the HW GID context
+ *
+ */
+void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr)
+{
+ return container_of(attr, struct ib_gid_table_entry, attr)->context;
+}
+EXPORT_SYMBOL(rdma_read_gid_hw_context);
+
+/**
* rdma_find_gid - Returns SGID attributes if the matching GID is found.
* @device: The device to query.
* @gid: The GID value to search for.
@@ -993,7 +1020,7 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
- unsigned int p;
+ u32 p;
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -1022,7 +1049,7 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
EXPORT_SYMBOL(rdma_find_gid);
int ib_get_cached_pkey(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
int index,
u16 *pkey)
{
@@ -1037,7 +1064,7 @@ int ib_get_cached_pkey(struct ib_device *device,
cache = device->port_data[port_num].cache.pkey;
- if (index < 0 || index >= cache->table_len)
+ if (!cache || index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*pkey = cache->table[index];
@@ -1048,27 +1075,19 @@ int ib_get_cached_pkey(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_pkey);
-int ib_get_cached_subnet_prefix(struct ib_device *device,
- u8 port_num,
- u64 *sn_pfx)
+void ib_get_cached_subnet_prefix(struct ib_device *device, u32 port_num,
+ u64 *sn_pfx)
{
unsigned long flags;
- if (!rdma_is_port_valid(device, port_num))
- return -EINVAL;
-
read_lock_irqsave(&device->cache_lock, flags);
*sn_pfx = device->port_data[port_num].cache.subnet_prefix;
read_unlock_irqrestore(&device->cache_lock, flags);
-
- return 0;
}
EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
-int ib_find_cached_pkey(struct ib_device *device,
- u8 port_num,
- u16 pkey,
- u16 *index)
+int ib_find_cached_pkey(struct ib_device *device, u32 port_num,
+ u16 pkey, u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
@@ -1082,6 +1101,10 @@ int ib_find_cached_pkey(struct ib_device *device,
read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
+ if (!cache) {
+ ret = -EINVAL;
+ goto err;
+ }
*index = -1;
@@ -1091,8 +1114,9 @@ int ib_find_cached_pkey(struct ib_device *device,
*index = i;
ret = 0;
break;
- } else
+ } else {
partial_ix = i;
+ }
}
if (ret && partial_ix >= 0) {
@@ -1100,16 +1124,15 @@ int ib_find_cached_pkey(struct ib_device *device,
ret = 0;
}
+err:
read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);
-int ib_find_exact_cached_pkey(struct ib_device *device,
- u8 port_num,
- u16 pkey,
- u16 *index)
+int ib_find_exact_cached_pkey(struct ib_device *device, u32 port_num,
+ u16 pkey, u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
@@ -1122,6 +1145,10 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
+ if (!cache) {
+ ret = -EINVAL;
+ goto err;
+ }
*index = -1;
@@ -1132,15 +1159,14 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
break;
}
+err:
read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_exact_cached_pkey);
-int ib_get_cached_lmc(struct ib_device *device,
- u8 port_num,
- u8 *lmc)
+int ib_get_cached_lmc(struct ib_device *device, u32 port_num, u8 *lmc)
{
unsigned long flags;
int ret = 0;
@@ -1156,8 +1182,7 @@ int ib_get_cached_lmc(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_lmc);
-int ib_get_cached_port_state(struct ib_device *device,
- u8 port_num,
+int ib_get_cached_port_state(struct ib_device *device, u32 port_num,
enum ib_port_state *port_state)
{
unsigned long flags;
@@ -1191,9 +1216,9 @@ EXPORT_SYMBOL(ib_get_cached_port_state);
* code.
*/
const struct ib_gid_attr *
-rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
+rdma_get_gid_attr(struct ib_device *device, u32 port_num, int index)
{
- const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+ const struct ib_gid_attr *attr = ERR_PTR(-ENODATA);
struct ib_gid_table *table;
unsigned long flags;
@@ -1217,6 +1242,63 @@ done:
EXPORT_SYMBOL(rdma_get_gid_attr);
/**
+ * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
+ * @device: The device to query.
+ * @entries: Entries where GID entries are returned.
+ * @max_entries: Maximum number of entries that can be returned.
+ * Entries array must be allocated to hold max_entries number of entries.
+ *
+ * Returns number of entries on success or appropriate error code.
+ */
+ssize_t rdma_query_gid_table(struct ib_device *device,
+ struct ib_uverbs_gid_entry *entries,
+ size_t max_entries)
+{
+ const struct ib_gid_attr *gid_attr;
+ ssize_t num_entries = 0, ret;
+ struct ib_gid_table *table;
+ u32 port_num, i;
+ struct net_device *ndev;
+ unsigned long flags;
+
+ rdma_for_each_port(device, port_num) {
+ table = rdma_gid_table(device, port_num);
+ read_lock_irqsave(&table->rwlock, flags);
+ for (i = 0; i < table->sz; i++) {
+ if (!is_gid_entry_valid(table->data_vec[i]))
+ continue;
+ if (num_entries >= max_entries) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ gid_attr = &table->data_vec[i]->attr;
+
+ memcpy(&entries->gid, &gid_attr->gid,
+ sizeof(gid_attr->gid));
+ entries->gid_index = gid_attr->index;
+ entries->port_num = gid_attr->port_num;
+ entries->gid_type = gid_attr->gid_type;
+ ndev = rcu_dereference_protected(
+ gid_attr->ndev,
+ lockdep_is_held(&table->rwlock));
+ if (ndev)
+ entries->netdev_ifindex = ndev->ifindex;
+
+ num_entries++;
+ entries++;
+ }
+ read_unlock_irqrestore(&table->rwlock, flags);
+ }
+
+ return num_entries;
+err:
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_query_gid_table);
+
+/**
* rdma_put_gid_attr - Release reference to the GID attribute
* @attr: Pointer to the GID attribute whose reference
* needs to be released.
@@ -1272,8 +1354,8 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
struct ib_gid_table_entry *entry =
container_of(attr, struct ib_gid_table_entry, attr);
struct ib_device *device = entry->attr.device;
- struct net_device *ndev = ERR_PTR(-ENODEV);
- u8 port_num = entry->attr.port_num;
+ struct net_device *ndev = ERR_PTR(-EINVAL);
+ u32 port_num = entry->attr.port_num;
struct ib_gid_table *table;
unsigned long flags;
bool valid;
@@ -1284,8 +1366,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
valid = is_gid_entry_valid(table->data_vec[attr->index]);
if (valid) {
ndev = rcu_dereference(attr->ndev);
- if (!ndev ||
- (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
+ if (!ndev)
ndev = ERR_PTR(-ENODEV);
}
read_unlock_irqrestore(&table->rwlock, flags);
@@ -1293,9 +1374,10 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
}
EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);
-static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
+static int get_lower_dev_vlan(struct net_device *lower_dev,
+ struct netdev_nested_priv *priv)
{
- u16 *vlan_id = data;
+ u16 *vlan_id = (u16 *)priv->data;
if (is_vlan_dev(lower_dev))
*vlan_id = vlan_dev_vlan_id(lower_dev);
@@ -1321,6 +1403,9 @@ static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
u16 *vlan_id, u8 *smac)
{
+ struct netdev_nested_priv priv = {
+ .data = (void *)vlan_id,
+ };
struct net_device *ndev;
rcu_read_lock();
@@ -1341,7 +1426,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
* the lower vlan device for this gid entry.
*/
netdev_walk_all_lower_dev_rcu(attr->ndev,
- get_lower_dev_vlan, vlan_id);
+ get_lower_dev_vlan, &priv);
}
}
rcu_read_unlock();
@@ -1350,7 +1435,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
EXPORT_SYMBOL(rdma_read_gid_l2_fields);
static int config_non_roce_gid_cache(struct ib_device *device,
- u8 port, int gid_tbl_len)
+ u32 port, struct ib_port_attr *tprops)
{
struct ib_gid_attr gid_attr = {};
struct ib_gid_table *table;
@@ -1362,7 +1447,7 @@ static int config_non_roce_gid_cache(struct ib_device *device,
table = rdma_gid_table(device, port);
mutex_lock(&table->lock);
- for (i = 0; i < gid_tbl_len; ++i) {
+ for (i = 0; i < tprops->gid_tbl_len; ++i) {
if (!device->ops.query_gid)
continue;
ret = device->ops.query_gid(device, port, i, &gid_attr.gid);
@@ -1373,6 +1458,8 @@ static int config_non_roce_gid_cache(struct ib_device *device,
goto err;
}
gid_attr.index = i;
+ tprops->subnet_prefix =
+ be64_to_cpu(gid_attr.gid.global.subnet_prefix);
add_modify_gid(table, &gid_attr);
}
err:
@@ -1381,10 +1468,12 @@ err:
}
static int
-ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
+ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
+ bool update_pkeys, bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
- struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
+ struct ib_pkey_cache *pkey_cache = NULL;
+ struct ib_pkey_cache *old_pkey_cache = NULL;
int i;
int ret;
@@ -1401,38 +1490,44 @@ ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
goto err;
}
- if (!rdma_protocol_roce(device, port)) {
+ if (!rdma_protocol_roce(device, port) && update_gids) {
ret = config_non_roce_gid_cache(device, port,
- tprops->gid_tbl_len);
+ tprops);
if (ret)
goto err;
}
- pkey_cache = kmalloc(struct_size(pkey_cache, table,
- tprops->pkey_tbl_len),
- GFP_KERNEL);
- if (!pkey_cache) {
- ret = -ENOMEM;
- goto err;
- }
-
- pkey_cache->table_len = tprops->pkey_tbl_len;
+ update_pkeys &= !!tprops->pkey_tbl_len;
- for (i = 0; i < pkey_cache->table_len; ++i) {
- ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
- if (ret) {
- dev_warn(&device->dev,
- "ib_query_pkey failed (%d) for index %d\n",
- ret, i);
+ if (update_pkeys) {
+ pkey_cache = kmalloc(struct_size(pkey_cache, table,
+ tprops->pkey_tbl_len),
+ GFP_KERNEL);
+ if (!pkey_cache) {
+ ret = -ENOMEM;
goto err;
}
+
+ pkey_cache->table_len = tprops->pkey_tbl_len;
+
+ for (i = 0; i < pkey_cache->table_len; ++i) {
+ ret = ib_query_pkey(device, port, i,
+ pkey_cache->table + i);
+ if (ret) {
+ dev_warn(&device->dev,
+ "ib_query_pkey failed (%d) for index %d\n",
+ ret, i);
+ goto err;
+ }
+ }
}
write_lock_irq(&device->cache_lock);
- old_pkey_cache = device->port_data[port].cache.pkey;
-
- device->port_data[port].cache.pkey = pkey_cache;
+ if (update_pkeys) {
+ old_pkey_cache = device->port_data[port].cache.pkey;
+ device->port_data[port].cache.pkey = pkey_cache;
+ }
device->port_data[port].cache.lmc = tprops->lmc;
device->port_data[port].cache.port_state = tprops->state;
@@ -1464,6 +1559,8 @@ static void ib_cache_event_task(struct work_struct *_work)
* the cache.
*/
ret = ib_cache_update(work->event.device, work->event.element.port_num,
+ work->event.event == IB_EVENT_GID_CHANGE,
+ work->event.event == IB_EVENT_PKEY_CHANGE,
work->enforce_security);
/* GID event is notified already for individual GID entries by
@@ -1527,24 +1624,25 @@ EXPORT_SYMBOL(ib_dispatch_event);
int ib_cache_setup_one(struct ib_device *device)
{
- unsigned int p;
+ u32 p;
int err;
- rwlock_init(&device->cache_lock);
-
err = gid_table_setup_one(device);
if (err)
return err;
- rdma_for_each_port (device, p)
- ib_cache_update(device, p, true);
+ rdma_for_each_port (device, p) {
+ err = ib_cache_update(device, p, true, true, true);
+ if (err)
+ return err;
+ }
return 0;
}
void ib_cache_release_one(struct ib_device *device)
{
- unsigned int p;
+ u32 p;
/*
* The release function frees all the cache elements.
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 15e99a888427..1f9938a2c475 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -25,8 +25,10 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
+#include <rdma/ib_sysfs.h>
#include "cm_msgs.h"
#include "core_priv.h"
+#include "cm_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
@@ -66,6 +68,8 @@ static const char * const ibcm_rej_reason_strs[] = {
[IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version",
[IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label",
[IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label",
+ [IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED] =
+ "vendor option is not supported",
};
const char *__attribute_const__ ibcm_reject_msg(int reason)
@@ -80,8 +84,22 @@ const char *__attribute_const__ ibcm_reject_msg(int reason)
}
EXPORT_SYMBOL(ibcm_reject_msg);
-static void cm_add_one(struct ib_device *device);
+struct cm_id_private;
+struct cm_work;
+static int cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
+static void cm_process_work(struct cm_id_private *cm_id_priv,
+ struct cm_work *work);
+static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
+ struct ib_cm_sidr_rep_param *param);
+static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
+ const void *private_data, u8 private_data_len);
+static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
+ void *private_data, u8 private_data_len);
+static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len);
static struct ib_client cm_client = {
.name = "cm",
@@ -104,8 +122,6 @@ static struct ib_cm {
__be32 random_id_operand;
struct list_head timewait_list;
struct workqueue_struct *wq;
- /* Sync on cm change port state */
- spinlock_t state_lock;
} cm;
/* Counter indexes ordered by attribute ID */
@@ -133,77 +149,33 @@ enum {
CM_COUNTER_GROUPS
};
-static char const counter_group_names[CM_COUNTER_GROUPS]
- [sizeof("cm_rx_duplicates")] = {
- "cm_tx_msgs", "cm_tx_retries",
- "cm_rx_msgs", "cm_rx_duplicates"
-};
-
-struct cm_counter_group {
- struct kobject obj;
- atomic_long_t counter[CM_ATTR_COUNT];
-};
-
struct cm_counter_attribute {
- struct attribute attr;
- int index;
-};
-
-#define CM_COUNTER_ATTR(_name, _index) \
-struct cm_counter_attribute cm_##_name##_counter_attr = { \
- .attr = { .name = __stringify(_name), .mode = 0444 }, \
- .index = _index \
-}
-
-static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
-static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
-static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
-static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
-static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
-static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
-static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
-static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
-static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
-static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
-static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
-
-static struct attribute *cm_counter_default_attrs[] = {
- &cm_req_counter_attr.attr,
- &cm_mra_counter_attr.attr,
- &cm_rej_counter_attr.attr,
- &cm_rep_counter_attr.attr,
- &cm_rtu_counter_attr.attr,
- &cm_dreq_counter_attr.attr,
- &cm_drep_counter_attr.attr,
- &cm_sidr_req_counter_attr.attr,
- &cm_sidr_rep_counter_attr.attr,
- &cm_lap_counter_attr.attr,
- &cm_apr_counter_attr.attr,
- NULL
+ struct ib_port_attribute attr;
+ unsigned short group;
+ unsigned short index;
};
struct cm_port {
struct cm_device *cm_dev;
struct ib_mad_agent *mad_agent;
- struct kobject port_obj;
- u8 port_num;
- struct list_head cm_priv_prim_list;
- struct list_head cm_priv_altr_list;
- struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
+ u32 port_num;
+ atomic_long_t counters[CM_COUNTER_GROUPS][CM_ATTR_COUNT];
};
struct cm_device {
+ struct kref kref;
struct list_head list;
+ spinlock_t mad_agent_lock;
struct ib_device *ib_device;
u8 ack_delay;
int going_down;
- struct cm_port *port[0];
+ struct cm_port *port[];
};
struct cm_av {
struct cm_port *port;
- union ib_gid dgid;
struct rdma_ah_attr ah_attr;
+ u16 dlid_datapath;
u16 pkey_index;
u8 timeout;
};
@@ -216,7 +188,7 @@ struct cm_work {
__be32 local_id; /* Established / timewait */
__be32 remote_id;
struct ib_cm_event cm_event;
- struct sa_path_rec path[0];
+ struct sa_path_rec path[];
};
struct cm_timewait_info {
@@ -235,11 +207,13 @@ struct cm_id_private {
struct rb_node service_node;
struct rb_node sidr_id_node;
+ u32 sidr_slid;
spinlock_t lock; /* Do not acquire inside cm.lock */
struct completion comp;
refcount_t refcount;
/* Number of clients sharing this ib_cm_id. Only valid for listeners.
- * Protected by the cm.lock spinlock. */
+ * Protected by the cm.lock spinlock.
+ */
int listen_sharecount;
struct rcu_head rcu;
@@ -261,7 +235,6 @@ struct cm_id_private {
__be16 pkey;
u8 private_data_len;
u8 max_cm_retries;
- u8 peer_to_peer;
u8 responder_resources;
u8 initiator_depth;
u8 retry_count;
@@ -269,16 +242,28 @@ struct cm_id_private {
u8 service_timeout;
u8 target_ack_delay;
- struct list_head prim_list;
- struct list_head altr_list;
- /* Indicates that the send port mad is registered and av is set */
- int prim_send_port_not_ready;
- int altr_send_port_not_ready;
-
struct list_head work_list;
atomic_t work_count;
+
+ struct rdma_ucm_ece ece;
};
+static void cm_dev_release(struct kref *kref)
+{
+ struct cm_device *cm_dev = container_of(kref, struct cm_device, kref);
+ u32 i;
+
+ rdma_for_each_port(cm_dev->ib_device, i)
+ kfree(cm_dev->port[i - 1]);
+
+ kfree(cm_dev);
+}
+
+static void cm_device_put(struct cm_device *cm_dev)
+{
+ kref_put(&cm_dev->kref, cm_dev_release);
+}
+
static void cm_work_handler(struct work_struct *work);
static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
@@ -287,52 +272,37 @@ static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
complete(&cm_id_priv->comp);
}
-static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
- struct ib_mad_send_buf **msg)
+static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
{
struct ib_mad_agent *mad_agent;
struct ib_mad_send_buf *m;
struct ib_ah *ah;
- struct cm_av *av;
- unsigned long flags, flags2;
- int ret = 0;
- /* don't let the port to be released till the agent is down */
- spin_lock_irqsave(&cm.state_lock, flags2);
- spin_lock_irqsave(&cm.lock, flags);
- if (!cm_id_priv->prim_send_port_not_ready)
- av = &cm_id_priv->av;
- else if (!cm_id_priv->altr_send_port_not_ready &&
- (cm_id_priv->alt_av.port))
- av = &cm_id_priv->alt_av;
- else {
- pr_info("%s: not valid CM id\n", __func__);
- ret = -ENODEV;
- spin_unlock_irqrestore(&cm.lock, flags);
- goto out;
- }
- spin_unlock_irqrestore(&cm.lock, flags);
- /* Make sure the port haven't released the mad yet */
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ if (!cm_id_priv->av.port)
+ return ERR_PTR(-EINVAL);
+
+ spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
mad_agent = cm_id_priv->av.port->mad_agent;
if (!mad_agent) {
- pr_info("%s: not a valid MAD agent\n", __func__);
- ret = -ENODEV;
+ m = ERR_PTR(-EINVAL);
goto out;
}
- ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
+
+ ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0);
if (IS_ERR(ah)) {
- ret = PTR_ERR(ah);
+ m = ERR_CAST(ah);
goto out;
}
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
- av->pkey_index,
+ cm_id_priv->av.pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
rdma_destroy_ah(ah, 0);
- ret = PTR_ERR(m);
goto out;
}
@@ -342,11 +312,49 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
refcount_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
- *msg = m;
out:
- spin_unlock_irqrestore(&cm.state_lock, flags2);
- return ret;
+ spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ return m;
+}
+
+static void cm_free_msg(struct ib_mad_send_buf *msg)
+{
+ struct cm_id_private *cm_id_priv = msg->context[0];
+
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah, 0);
+ cm_deref_id(cm_id_priv);
+ ib_free_send_mad(msg);
+}
+
+static struct ib_mad_send_buf *
+cm_alloc_priv_msg(struct cm_id_private *cm_id_priv)
+{
+ struct ib_mad_send_buf *msg;
+
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return msg;
+ cm_id_priv->msg = msg;
+ return msg;
+}
+
+static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
+{
+ struct cm_id_private *cm_id_priv = msg->context[0];
+
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ if (!WARN_ON(cm_id_priv->msg != msg))
+ cm_id_priv->msg = NULL;
+
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah, 0);
+ cm_deref_id(cm_id_priv);
+ ib_free_send_mad(msg);
}
static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
@@ -373,15 +381,6 @@ static int cm_create_response_msg_ah(struct cm_port *port,
return 0;
}
-static void cm_free_msg(struct ib_mad_send_buf *msg)
-{
- if (msg->ah)
- rdma_destroy_ah(msg->ah, 0);
- if (msg->context[0])
- cm_deref_id(msg->context[0]);
- ib_free_send_mad(msg);
-}
-
static int cm_alloc_response_msg(struct cm_port *port,
struct ib_mad_recv_wc *mad_recv_wc,
struct ib_mad_send_buf **msg)
@@ -395,7 +394,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
if (ret) {
- cm_free_msg(m);
+ ib_free_send_mad(m);
return ret;
}
@@ -403,8 +402,14 @@ static int cm_alloc_response_msg(struct cm_port *port,
return 0;
}
-static void * cm_copy_private_data(const void *private_data,
- u8 private_data_len)
+static void cm_free_response_msg(struct ib_mad_send_buf *msg)
+{
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah, 0);
+ ib_free_send_mad(msg);
+}
+
+static void *cm_copy_private_data(const void *private_data, u8 private_data_len)
{
void *data;
@@ -428,62 +433,38 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
cm_id_priv->private_data_len = private_data_len;
}
-static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
- struct ib_grh *grh, struct cm_av *av)
+static void cm_set_av_port(struct cm_av *av, struct cm_port *port)
{
- struct rdma_ah_attr new_ah_attr;
- int ret;
+ struct cm_port *old_port = av->port;
- av->port = port;
- av->pkey_index = wc->pkey_index;
+ if (old_port == port)
+ return;
- /*
- * av->ah_attr might be initialized based on past wc during incoming
- * connect request or while sending out connect request. So initialize
- * a new ah_attr on stack. If initialization fails, old ah_attr is
- * used for sending any responses. If initialization is successful,
- * than new ah_attr is used by overwriting old one.
- */
- ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
- port->port_num, wc,
- grh, &new_ah_attr);
- if (ret)
- return ret;
+ av->port = port;
+ if (old_port)
+ cm_device_put(old_port->cm_dev);
+ if (port)
+ kref_get(&port->cm_dev->kref);
+}
- rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
- return 0;
+static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
+ struct rdma_ah_attr *ah_attr, struct cm_av *av)
+{
+ cm_set_av_port(av, port);
+ av->pkey_index = wc->pkey_index;
+ rdma_move_ah_attr(&av->ah_attr, ah_attr);
}
static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
struct ib_grh *grh, struct cm_av *av)
{
- av->port = port;
+ cm_set_av_port(av, port);
av->pkey_index = wc->pkey_index;
return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
port->port_num, wc,
grh, &av->ah_attr);
}
-static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
- struct cm_av *av,
- struct cm_port *port)
-{
- unsigned long flags;
- int ret = 0;
-
- spin_lock_irqsave(&cm.lock, flags);
-
- if (&cm_id_priv->av == av)
- list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
- else if (&cm_id_priv->alt_av == av)
- list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
- else
- ret = -EINVAL;
-
- spin_unlock_irqrestore(&cm.lock, flags);
- return ret;
-}
-
static struct cm_port *
get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
{
@@ -527,8 +508,7 @@ get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
static int cm_init_av_by_path(struct sa_path_rec *path,
const struct ib_gid_attr *sgid_attr,
- struct cm_av *av,
- struct cm_id_private *cm_id_priv)
+ struct cm_av *av)
{
struct rdma_ah_attr new_ah_attr;
struct cm_device *cm_dev;
@@ -545,7 +525,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
if (ret)
return ret;
- av->port = port;
+ cm_set_av_port(av, port);
/*
* av->ah_attr might be initialized based on wc or during
@@ -562,36 +542,29 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
return ret;
av->timeout = path->packet_life_time + 1;
-
- ret = add_cm_id_to_port_list(cm_id_priv, av, port);
- if (ret) {
- rdma_destroy_ah_attr(&new_ah_attr);
- return ret;
- }
rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
return 0;
}
-static int cm_alloc_id(struct cm_id_private *cm_id_priv)
+/* Move av created by cm_init_av_by_path(), so av.dgid is not moved */
+static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src)
{
- int err;
- u32 id;
-
- err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv,
- xa_limit_32b, &cm.local_id_next, GFP_KERNEL);
-
- cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
- return err;
+ cm_set_av_port(dest, src->port);
+ cm_set_av_port(src, NULL);
+ dest->pkey_index = src->pkey_index;
+ rdma_move_ah_attr(&dest->ah_attr, &src->ah_attr);
+ dest->timeout = src->timeout;
}
-static u32 cm_local_id(__be32 local_id)
+static void cm_destroy_av(struct cm_av *av)
{
- return (__force u32) (local_id ^ cm.random_id_operand);
+ rdma_destroy_ah_attr(&av->ah_attr);
+ cm_set_av_port(av, NULL);
}
-static void cm_free_id(__be32 local_id)
+static u32 cm_local_id(__be32 local_id)
{
- xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
+ return (__force u32) (local_id ^ cm.random_id_operand);
}
static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
@@ -633,22 +606,25 @@ static int be64_gt(__be64 a, __be64 b)
return (__force u64) a > (__force u64) b;
}
-static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
+/*
+ * Inserts a new cm_id_priv into the listen_service_table. Returns cm_id_priv
+ * if the new ID was inserted, NULL if it could not be inserted due to a
+ * collision, or the existing cm_id_priv ready for shared usage.
+ */
+static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv,
+ ib_cm_handler shared_handler)
{
struct rb_node **link = &cm.listen_service_table.rb_node;
struct rb_node *parent = NULL;
struct cm_id_private *cur_cm_id_priv;
__be64 service_id = cm_id_priv->id.service_id;
- __be64 service_mask = cm_id_priv->id.service_mask;
+ unsigned long flags;
+ spin_lock_irqsave(&cm.lock, flags);
while (*link) {
parent = *link;
cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
service_node);
- if ((cur_cm_id_priv->id.service_mask & service_id) ==
- (service_mask & cur_cm_id_priv->id.service_id) &&
- (cm_id_priv->id.device == cur_cm_id_priv->id.device))
- return cur_cm_id_priv;
if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
link = &(*link)->rb_left;
@@ -658,26 +634,38 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
link = &(*link)->rb_left;
else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
link = &(*link)->rb_right;
- else
- link = &(*link)->rb_right;
+ else {
+ /*
+ * Sharing an ib_cm_id with different handlers is not
+ * supported
+ */
+ if (cur_cm_id_priv->id.cm_handler != shared_handler ||
+ cur_cm_id_priv->id.context ||
+ WARN_ON(!cur_cm_id_priv->id.cm_handler)) {
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return NULL;
+ }
+ refcount_inc(&cur_cm_id_priv->refcount);
+ cur_cm_id_priv->listen_sharecount++;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return cur_cm_id_priv;
+ }
}
+ cm_id_priv->listen_sharecount++;
rb_link_node(&cm_id_priv->service_node, parent, link);
rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
- return NULL;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return cm_id_priv;
}
-static struct cm_id_private * cm_find_listen(struct ib_device *device,
- __be64 service_id)
+static struct cm_id_private *cm_find_listen(struct ib_device *device,
+ __be64 service_id)
{
struct rb_node *node = cm.listen_service_table.rb_node;
struct cm_id_private *cm_id_priv;
while (node) {
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
- if ((cm_id_priv->id.service_mask & service_id) ==
- cm_id_priv->id.service_id &&
- (cm_id_priv->id.device == device))
- return cm_id_priv;
if (device < cm_id_priv->id.device)
node = node->rb_left;
@@ -687,14 +675,16 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device,
node = node->rb_left;
else if (be64_gt(service_id, cm_id_priv->id.service_id))
node = node->rb_right;
- else
- node = node->rb_right;
+ else {
+ refcount_inc(&cm_id_priv->refcount);
+ return cm_id_priv;
+ }
}
return NULL;
}
-static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
- *timewait_info)
+static struct cm_timewait_info *
+cm_insert_remote_id(struct cm_timewait_info *timewait_info)
{
struct rb_node **link = &cm.remote_id_table.rb_node;
struct rb_node *parent = NULL;
@@ -723,12 +713,14 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
return NULL;
}
-static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
- __be32 remote_id)
+static struct cm_id_private *cm_find_remote_id(__be64 remote_ca_guid,
+ __be32 remote_id)
{
struct rb_node *node = cm.remote_id_table.rb_node;
struct cm_timewait_info *timewait_info;
+ struct cm_id_private *res = NULL;
+ spin_lock_irq(&cm.lock);
while (node) {
timewait_info = rb_entry(node, struct cm_timewait_info,
remote_id_node);
@@ -740,14 +732,18 @@ static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
node = node->rb_left;
else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
node = node->rb_right;
- else
- return timewait_info;
+ else {
+ res = cm_acquire_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+ break;
+ }
}
- return NULL;
+ spin_unlock_irq(&cm.lock);
+ return res;
}
-static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
- *timewait_info)
+static struct cm_timewait_info *
+cm_insert_remote_qpn(struct cm_timewait_info *timewait_info)
{
struct rb_node **link = &cm.remote_qp_table.rb_node;
struct rb_node *parent = NULL;
@@ -776,13 +772,12 @@ static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
return NULL;
}
-static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
- *cm_id_priv)
+static struct cm_id_private *
+cm_insert_remote_sidr(struct cm_id_private *cm_id_priv)
{
struct rb_node **link = &cm.remote_sidr_table.rb_node;
struct rb_node *parent = NULL;
struct cm_id_private *cur_cm_id_priv;
- union ib_gid *port_gid = &cm_id_priv->av.dgid;
__be32 remote_id = cm_id_priv->id.remote_id;
while (*link) {
@@ -794,12 +789,9 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
link = &(*link)->rb_right;
else {
- int cmp;
- cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
- sizeof *port_gid);
- if (cmp < 0)
+ if (cur_cm_id_priv->sidr_slid < cm_id_priv->sidr_slid)
link = &(*link)->rb_left;
- else if (cmp > 0)
+ else if (cur_cm_id_priv->sidr_slid > cm_id_priv->sidr_slid)
link = &(*link)->rb_right;
else
return cur_cm_id_priv;
@@ -810,21 +802,12 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
return NULL;
}
-static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
- enum ib_cm_sidr_status status)
-{
- struct ib_cm_sidr_rep_param param;
-
- memset(&param, 0, sizeof param);
- param.status = status;
- ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
-}
-
-struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
- ib_cm_handler cm_handler,
- void *context)
+static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ void *context)
{
struct cm_id_private *cm_id_priv;
+ u32 id;
int ret;
cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
@@ -836,26 +819,54 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
cm_id_priv->id.cm_handler = cm_handler;
cm_id_priv->id.context = context;
cm_id_priv->id.remote_cm_qpn = 1;
- ret = cm_alloc_id(cm_id_priv);
- if (ret)
- goto error;
+ RB_CLEAR_NODE(&cm_id_priv->service_node);
+ RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
spin_lock_init(&cm_id_priv->lock);
init_completion(&cm_id_priv->comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
- INIT_LIST_HEAD(&cm_id_priv->prim_list);
- INIT_LIST_HEAD(&cm_id_priv->altr_list);
atomic_set(&cm_id_priv->work_count, -1);
refcount_set(&cm_id_priv->refcount, 1);
- return &cm_id_priv->id;
+
+ ret = xa_alloc_cyclic(&cm.local_id_table, &id, NULL, xa_limit_32b,
+ &cm.local_id_next, GFP_KERNEL);
+ if (ret < 0)
+ goto error;
+ cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
+
+ return cm_id_priv;
error:
kfree(cm_id_priv);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Make the ID visible to the MAD handlers and other threads that use the
+ * xarray.
+ */
+static void cm_finalize_id(struct cm_id_private *cm_id_priv)
+{
+ xa_store(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id),
+ cm_id_priv, GFP_ATOMIC);
+}
+
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ void *context)
+{
+ struct cm_id_private *cm_id_priv;
+
+ cm_id_priv = cm_alloc_id_priv(device, cm_handler, context);
+ if (IS_ERR(cm_id_priv))
+ return ERR_CAST(cm_id_priv);
+
+ cm_finalize_id(cm_id_priv);
+ return &cm_id_priv->id;
}
EXPORT_SYMBOL(ib_create_cm_id);
-static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
+static struct cm_work *cm_dequeue_work(struct cm_id_private *cm_id_priv)
{
struct cm_work *work;
@@ -874,6 +885,36 @@ static void cm_free_work(struct cm_work *work)
kfree(work);
}
+static void cm_queue_work_unlock(struct cm_id_private *cm_id_priv,
+ struct cm_work *work)
+ __releases(&cm_id_priv->lock)
+{
+ bool immediate;
+
+ /*
+ * To deliver the event to the user callback we have the drop the
+ * spinlock, however, we need to ensure that the user callback is single
+ * threaded and receives events in the temporal order. If there are
+ * already events being processed then thread new events onto a list,
+ * the thread currently processing will pick them up.
+ */
+ immediate = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!immediate) {
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ /*
+ * This routine always consumes incoming reference. Once queued
+ * to the work_list then a reference is held by the thread
+ * currently running cm_process_work() and this reference is not
+ * needed.
+ */
+ cm_deref_id(cm_id_priv);
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (immediate)
+ cm_process_work(cm_id_priv, work);
+}
+
static inline int cm_convert_to_ms(int iba_time)
{
/* approximate conversion to ms from 4.096us x 2^iba_time */
@@ -899,8 +940,10 @@ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
return min(31, ack_timeout);
}
-static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
+static void cm_remove_remote(struct cm_id_private *cm_id_priv)
{
+ struct cm_timewait_info *timewait_info = cm_id_priv->timewait_info;
+
if (timewait_info->inserted_remote_id) {
rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
timewait_info->inserted_remote_id = 0;
@@ -912,7 +955,7 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
}
}
-static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
+static struct cm_timewait_info *cm_create_timewait_info(__be32 local_id)
{
struct cm_timewait_info *timewait_info;
@@ -932,12 +975,14 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
unsigned long flags;
struct cm_device *cm_dev;
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
if (!cm_dev)
return;
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
spin_unlock_irqrestore(&cm.lock, flags);
@@ -956,6 +1001,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
msecs_to_jiffies(wait_time));
spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * The timewait_info is converted into a work and gets freed during
+ * cm_free_work() in cm_timewait_handler().
+ */
+ BUILD_BUG_ON(offsetof(struct cm_timewait_info, work) != 0);
cm_id_priv->timewait_info = NULL;
}
@@ -963,10 +1013,12 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
{
unsigned long flags;
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_id_priv->id.state = IB_CM_IDLE;
if (cm_id_priv->timewait_info) {
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irqrestore(&cm.lock, flags);
kfree(cm_id_priv->timewait_info);
cm_id_priv->timewait_info = NULL;
@@ -979,104 +1031,116 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
struct cm_work *work;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-retest:
spin_lock_irq(&cm_id_priv->lock);
+retest:
switch (cm_id->state) {
case IB_CM_LISTEN:
- spin_unlock_irq(&cm_id_priv->lock);
-
- spin_lock_irq(&cm.lock);
+ spin_lock(&cm.lock);
if (--cm_id_priv->listen_sharecount > 0) {
/* The id is still shared. */
+ WARN_ON(refcount_read(&cm_id_priv->refcount) == 1);
+ spin_unlock(&cm.lock);
+ spin_unlock_irq(&cm_id_priv->lock);
cm_deref_id(cm_id_priv);
- spin_unlock_irq(&cm.lock);
return;
}
+ cm_id->state = IB_CM_IDLE;
rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
- spin_unlock_irq(&cm.lock);
+ RB_CLEAR_NODE(&cm_id_priv->service_node);
+ spin_unlock(&cm.lock);
break;
case IB_CM_SIDR_REQ_SENT:
cm_id->state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- spin_unlock_irq(&cm_id_priv->lock);
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_SIDR_REQ_RCVD:
- spin_unlock_irq(&cm_id_priv->lock);
- cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
- spin_lock_irq(&cm.lock);
- if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
- rb_erase(&cm_id_priv->sidr_id_node,
- &cm.remote_sidr_table);
- spin_unlock_irq(&cm.lock);
+ cm_send_sidr_rep_locked(cm_id_priv,
+ &(struct ib_cm_sidr_rep_param){
+ .status = IB_SIDR_REJECT });
+ /* cm_send_sidr_rep_locked will not move to IDLE if it fails */
+ cm_id->state = IB_CM_IDLE;
break;
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
- &cm_id_priv->id.device->node_guid,
- sizeof cm_id_priv->id.device->node_guid,
- NULL, 0);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT,
+ &cm_id_priv->id.device->node_guid,
+ sizeof(cm_id_priv->id.device->node_guid),
+ NULL, 0);
break;
case IB_CM_REQ_RCVD:
if (err == -ENOMEM) {
/* Do not reject to allow future retries. */
cm_reset_to_idle(cm_id_priv);
- spin_unlock_irq(&cm_id_priv->lock);
} else {
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
- NULL, 0, NULL, 0);
+ cm_send_rej_locked(cm_id_priv,
+ IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
+ NULL, 0);
}
break;
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* Fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
+ 0, NULL, 0);
+ goto retest;
case IB_CM_MRA_REQ_SENT:
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
- NULL, 0, NULL, 0);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
+ 0, NULL, 0);
break;
case IB_CM_ESTABLISHED:
- spin_unlock_irq(&cm_id_priv->lock);
- if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
+ if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
+ cm_id->state = IB_CM_IDLE;
break;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ }
+ cm_send_dreq_locked(cm_id_priv, NULL, 0);
goto retest;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
- spin_unlock_irq(&cm_id_priv->lock);
- break;
+ goto retest;
case IB_CM_DREQ_RCVD:
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_drep(cm_id, NULL, 0);
+ cm_send_drep_locked(cm_id_priv, NULL, 0);
+ WARN_ON(cm_id->state != IB_CM_TIMEWAIT);
+ goto retest;
+ case IB_CM_TIMEWAIT:
+ /*
+ * The cm_acquire_id in cm_timewait_handler will stop working
+ * once we do xa_erase below, so just move to idle here for
+ * consistency.
+ */
+ cm_id->state = IB_CM_IDLE;
break;
- default:
- spin_unlock_irq(&cm_id_priv->lock);
+ case IB_CM_IDLE:
break;
}
+ WARN_ON(cm_id->state != IB_CM_IDLE);
- spin_lock_irq(&cm.lock);
- if (!list_empty(&cm_id_priv->altr_list) &&
- (!cm_id_priv->altr_send_port_not_ready))
- list_del(&cm_id_priv->altr_list);
- if (!list_empty(&cm_id_priv->prim_list) &&
- (!cm_id_priv->prim_send_port_not_ready))
- list_del(&cm_id_priv->prim_list);
- spin_unlock_irq(&cm.lock);
+ spin_lock(&cm.lock);
+ /* Required for cleanup paths related cm_req_handler() */
+ if (cm_id_priv->timewait_info) {
+ cm_remove_remote(cm_id_priv);
+ kfree(cm_id_priv->timewait_info);
+ cm_id_priv->timewait_info = NULL;
+ }
- cm_free_id(cm_id->local_id);
+ WARN_ON(cm_id_priv->listen_sharecount);
+ WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node));
+ if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
+ rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+ spin_unlock(&cm.lock);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id));
cm_deref_id(cm_id_priv);
wait_for_completion(&cm_id_priv->comp);
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
cm_free_work(work);
- rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
- rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
+ cm_destroy_av(&cm_id_priv->av);
+ cm_destroy_av(&cm_id_priv->alt_av);
kfree(cm_id_priv->private_data);
kfree_rcu(cm_id_priv, rcu);
}
@@ -1087,70 +1151,63 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id)
}
EXPORT_SYMBOL(ib_destroy_cm_id);
+static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id)
+{
+ if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
+ (service_id != IB_CM_ASSIGN_SERVICE_ID))
+ return -EINVAL;
+
+ if (service_id == IB_CM_ASSIGN_SERVICE_ID)
+ cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++);
+ else
+ cm_id_priv->id.service_id = service_id;
+
+ return 0;
+}
+
/**
- * __ib_cm_listen - Initiates listening on the specified service ID for
+ * ib_cm_listen - Initiates listening on the specified service ID for
* connection and service ID resolution requests.
* @cm_id: Connection identifier associated with the listen request.
* @service_id: Service identifier matched against incoming connection
* and service ID resolution requests. The service ID should be specified
* network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
* assign a service ID to the caller.
- * @service_mask: Mask applied to service ID used to listen across a
- * range of service IDs. If set to 0, the service ID is matched
- * exactly. This parameter is ignored if %service_id is set to
- * IB_CM_ASSIGN_SERVICE_ID.
*/
-static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
- __be64 service_mask)
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id)
{
- struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
- int ret = 0;
-
- service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
- service_id &= service_mask;
- if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
- (service_id != IB_CM_ASSIGN_SERVICE_ID))
- return -EINVAL;
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- if (cm_id->state != IB_CM_IDLE)
- return -EINVAL;
-
- cm_id->state = IB_CM_LISTEN;
- ++cm_id_priv->listen_sharecount;
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
- if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
- cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
- cm_id->service_mask = ~cpu_to_be64(0);
- } else {
- cm_id->service_id = service_id;
- cm_id->service_mask = service_mask;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->id.state != IB_CM_IDLE) {
+ ret = -EINVAL;
+ goto out;
}
- cur_cm_id_priv = cm_insert_listen(cm_id_priv);
- if (cur_cm_id_priv) {
- cm_id->state = IB_CM_IDLE;
- --cm_id_priv->listen_sharecount;
+ ret = cm_init_listen(cm_id_priv, service_id);
+ if (ret)
+ goto out;
+
+ if (!cm_insert_listen(cm_id_priv, NULL)) {
ret = -EBUSY;
+ goto out;
}
- return ret;
-}
-
-int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
-{
- unsigned long flags;
- int ret;
- spin_lock_irqsave(&cm.lock, flags);
- ret = __ib_cm_listen(cm_id, service_id, service_mask);
- spin_unlock_irqrestore(&cm.lock, flags);
+ cm_id_priv->id.state = IB_CM_LISTEN;
+ ret = 0;
+out:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_cm_listen);
/**
- * Create a new listening ib_cm_id and listen on the given service ID.
+ * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on
+ * the given service ID.
*
* If there's an existing ID listening on that same device and service ID,
* return it.
@@ -1169,60 +1226,57 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
ib_cm_handler cm_handler,
__be64 service_id)
{
+ struct cm_id_private *listen_id_priv;
struct cm_id_private *cm_id_priv;
- struct ib_cm_id *cm_id;
- unsigned long flags;
int err = 0;
/* Create an ID in advance, since the creation may sleep */
- cm_id = ib_create_cm_id(device, cm_handler, NULL);
- if (IS_ERR(cm_id))
- return cm_id;
+ cm_id_priv = cm_alloc_id_priv(device, cm_handler, NULL);
+ if (IS_ERR(cm_id_priv))
+ return ERR_CAST(cm_id_priv);
- spin_lock_irqsave(&cm.lock, flags);
+ err = cm_init_listen(cm_id_priv, service_id);
+ if (err) {
+ ib_destroy_cm_id(&cm_id_priv->id);
+ return ERR_PTR(err);
+ }
- if (service_id == IB_CM_ASSIGN_SERVICE_ID)
- goto new_id;
-
- /* Find an existing ID */
- cm_id_priv = cm_find_listen(device, service_id);
- if (cm_id_priv) {
- if (cm_id->cm_handler != cm_handler || cm_id->context) {
- /* Sharing an ib_cm_id with different handlers is not
- * supported */
- spin_unlock_irqrestore(&cm.lock, flags);
- ib_destroy_cm_id(cm_id);
+ spin_lock_irq(&cm_id_priv->lock);
+ listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler);
+ if (listen_id_priv != cm_id_priv) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ ib_destroy_cm_id(&cm_id_priv->id);
+ if (!listen_id_priv)
return ERR_PTR(-EINVAL);
- }
- refcount_inc(&cm_id_priv->refcount);
- ++cm_id_priv->listen_sharecount;
- spin_unlock_irqrestore(&cm.lock, flags);
-
- ib_destroy_cm_id(cm_id);
- cm_id = &cm_id_priv->id;
- return cm_id;
+ return &listen_id_priv->id;
}
+ cm_id_priv->id.state = IB_CM_LISTEN;
+ spin_unlock_irq(&cm_id_priv->lock);
-new_id:
- /* Use newly created ID */
- err = __ib_cm_listen(cm_id, service_id, 0);
-
- spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * A listen ID does not need to be in the xarray since it does not
+ * receive mads, is not placed in the remote_id or remote_qpn rbtree,
+ * and does not enter timewait.
+ */
- if (err) {
- ib_destroy_cm_id(cm_id);
- return ERR_PTR(err);
- }
- return cm_id;
+ return &cm_id_priv->id;
}
EXPORT_SYMBOL(ib_cm_insert_listen);
static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
{
- u64 hi_tid, low_tid;
+ u64 hi_tid = 0, low_tid;
- hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
- low_tid = (u64)cm_id_priv->id.local_id;
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ low_tid = (u64)cm_id_priv->id.local_id;
+ if (!cm_id_priv->av.port)
+ return cpu_to_be64(low_tid);
+
+ spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ if (cm_id_priv->av.port->mad_agent)
+ hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32;
+ spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
return cpu_to_be64(hi_tid | low_tid);
}
@@ -1237,6 +1291,13 @@ static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
hdr->tid = tid;
}
+static void cm_format_mad_ece_hdr(struct ib_mad_hdr *hdr, __be16 attr_id,
+ __be64 tid, u32 attr_mod)
+{
+ cm_format_mad_hdr(hdr, attr_id, tid);
+ hdr->attr_mod = cpu_to_be32(attr_mod);
+}
+
static void cm_format_req(struct cm_req_msg *req_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_req_param *param)
@@ -1244,13 +1305,14 @@ static void cm_format_req(struct cm_req_msg *req_msg,
struct sa_path_rec *pri_path = param->primary_path;
struct sa_path_rec *alt_path = param->alternate_path;
bool pri_ext = false;
+ __be16 lid;
if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
pri_path->opa.slid);
- cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
- cm_form_tid(cm_id_priv));
+ cm_format_mad_ece_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
+ cm_form_tid(cm_id_priv), param->ece.attr_mod);
IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
be32_to_cpu(cm_id_priv->id.local_id));
@@ -1303,9 +1365,16 @@ static void cm_format_req(struct cm_req_msg *req_msg,
htons(ntohl(sa_path_get_dlid(
pri_path)))));
} else {
+
+ if (param->primary_path_inbound) {
+ lid = param->primary_path_inbound->ib.dlid;
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(lid));
+ } else
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
+
/* Work-around until there's a way to obtain remote LID info */
- IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
- be16_to_cpu(IB_LID_PERMISSIVE));
IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
be16_to_cpu(IB_LID_PERMISSIVE));
}
@@ -1373,6 +1442,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
alt_path->packet_life_time));
}
+ IBA_SET(CM_REQ_VENDOR_ID, req_msg, param->ece.vendor_id);
if (param->private_data && param->private_data_len)
IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
@@ -1381,10 +1451,6 @@ static void cm_format_req(struct cm_req_msg *req_msg,
static int cm_validate_req_param(struct ib_cm_req_param *param)
{
- /* peer-to-peer not supported */
- if (param->peer_to_peer)
- return -EINVAL;
-
if (!param->primary_path)
return -EINVAL;
@@ -1407,7 +1473,9 @@ static int cm_validate_req_param(struct ib_cm_req_param *param)
int ib_send_cm_req(struct ib_cm_id *cm_id,
struct ib_cm_req_param *param)
{
+ struct cm_av av = {}, alt_av = {};
struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
struct cm_req_msg *req_msg;
unsigned long flags;
int ret;
@@ -1419,10 +1487,9 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
/* Verify that we're not in timewait. */
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_IDLE) {
+ if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1430,22 +1497,23 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
- goto out;
+ cm_id_priv->timewait_info = NULL;
+ return ret;
}
ret = cm_init_av_by_path(param->primary_path,
- param->ppath_sgid_attr, &cm_id_priv->av,
- cm_id_priv);
+ param->ppath_sgid_attr, &av);
if (ret)
- goto error1;
+ return ret;
if (param->alternate_path) {
ret = cm_init_av_by_path(param->alternate_path, NULL,
- &cm_id_priv->alt_av, cm_id_priv);
- if (ret)
- goto error1;
+ &alt_av);
+ if (ret) {
+ cm_destroy_av(&av);
+ return ret;
+ }
}
cm_id->service_id = param->service_id;
- cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = cm_convert_to_ms(
param->primary_path->packet_life_time) * 2 +
cm_convert_to_ms(
@@ -1458,33 +1526,44 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->pkey = param->primary_path->pkey;
cm_id_priv->qp_type = param->qp_type;
- ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
- if (ret)
- goto error1;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+ cm_move_av_from_path(&cm_id_priv->av, &av);
+ if (param->primary_path_outbound)
+ cm_id_priv->av.dlid_datapath =
+ be16_to_cpu(param->primary_path_outbound->ib.dlid);
+
+ if (param->alternate_path)
+ cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
- req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
+ msg = cm_alloc_priv_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
+ goto out_unlock;
+ }
+
+ req_msg = (struct cm_req_msg *)msg->mad;
cm_format_req(req_msg, cm_id_priv, param);
cm_id_priv->tid = req_msg->hdr.tid;
- cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
- cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
+ msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT;
cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- ret = ib_post_send_mad(cm_id_priv->msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- goto error2;
- }
+ trace_icm_send_req(&cm_id_priv->id);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto out_free;
BUG_ON(cm_id->state != IB_CM_IDLE);
cm_id->state = IB_CM_REQ_SENT;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return 0;
-
-error2: cm_free_msg(cm_id_priv->msg);
-error1: kfree(cm_id_priv->timewait_info);
-out: return ret;
+out_free:
+ cm_free_priv_msg(msg);
+out_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
}
EXPORT_SYMBOL(ib_send_cm_req);
@@ -1519,9 +1598,12 @@ static int cm_issue_rej(struct cm_port *port,
IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
+ trace_icm_issue_rej(
+ IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
+ IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
- cm_free_msg(msg);
+ cm_free_response_msg(msg);
return ret;
}
@@ -1534,7 +1616,7 @@ static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
req_msg))));
}
-static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
+static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num,
struct sa_path_rec *path, union ib_gid *gid)
{
if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
@@ -1545,14 +1627,13 @@ static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
struct sa_path_rec *primary_path,
- struct sa_path_rec *alt_path)
+ struct sa_path_rec *alt_path,
+ struct ib_wc *wc)
{
u32 lid;
if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
- sa_path_set_dlid(primary_path,
- IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
- req_msg));
+ sa_path_set_dlid(primary_path, wc->slid);
sa_path_set_slid(primary_path,
IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
req_msg));
@@ -1589,7 +1670,8 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
struct sa_path_rec *primary_path,
- struct sa_path_rec *alt_path)
+ struct sa_path_rec *alt_path,
+ struct ib_wc *wc)
{
primary_path->dgid =
*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
@@ -1647,20 +1729,20 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
if (sa_path_is_roce(alt_path))
alt_path->roce.route_resolved = false;
}
- cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
+ cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc);
}
static u16 cm_get_bth_pkey(struct cm_work *work)
{
struct ib_device *ib_dev = work->port->cm_dev->ib_device;
- u8 port_num = work->port->port_num;
+ u32 port_num = work->port->port_num;
u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
u16 pkey;
int ret;
ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
if (ret) {
- dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
+ dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %u, pkey index %u). %d\n",
port_num, pkey_index, ret);
return 0;
}
@@ -1669,7 +1751,7 @@ static u16 cm_get_bth_pkey(struct cm_work *work)
}
/**
- * Convert OPA SGID to IB SGID
+ * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID
* ULPs (such as IPoIB) do not understand OPA GIDs and will
* reject them as the local_gid will not match the sgid. Therefore,
* change the pathrec's SGID to an IB SGID.
@@ -1681,7 +1763,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work,
struct sa_path_rec *path)
{
struct ib_device *dev = work->port->cm_dev->ib_device;
- u8 port_num = work->port->port_num;
+ u32 port_num = work->port->port_num;
if (rdma_cap_opa_ah(dev, port_num) &&
(ib_is_opa_gid(&path->sgid))) {
@@ -1734,6 +1816,9 @@ static void cm_format_req_event(struct cm_work *work,
param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
+ param->ece.vendor_id = IBA_GET(CM_REQ_VENDOR_ID, req_msg);
+ param->ece.attr_mod = be32_to_cpu(req_msg->hdr.attr_mod);
+
work->cm_event.private_data =
IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
}
@@ -1783,17 +1868,17 @@ static void cm_format_mra(struct cm_mra_msg *mra_msg,
static void cm_format_rej(struct cm_rej_msg *rej_msg,
struct cm_id_private *cm_id_priv,
- enum ib_cm_rej_reason reason,
- void *ari,
- u8 ari_length,
- const void *private_data,
- u8 private_data_len)
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len, enum ib_cm_state state)
{
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
be32_to_cpu(cm_id_priv->id.remote_id));
- switch(cm_id_priv->id.state) {
+ switch (state) {
case IB_CM_REQ_RCVD:
IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0));
IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
@@ -1834,12 +1919,16 @@ static void cm_dup_req_handler(struct cm_work *work,
struct ib_mad_send_buf *msg = NULL;
int ret;
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_REQ_COUNTER]);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES][CM_REQ_COUNTER]);
/* Quick state check to discard duplicate REQs. */
- if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state == IB_CM_REQ_RCVD) {
+ spin_unlock_irq(&cm_id_priv->lock);
return;
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
if (ret)
@@ -1854,30 +1943,31 @@ static void cm_dup_req_handler(struct cm_work *work,
cm_id_priv->private_data_len);
break;
case IB_CM_TIMEWAIT:
- cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
- IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv,
+ IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0,
+ IB_CM_TIMEWAIT);
break;
default:
goto unlock;
}
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
return;
unlock: spin_unlock_irq(&cm_id_priv->lock);
-free: cm_free_msg(msg);
+free: cm_free_response_msg(msg);
}
-static struct cm_id_private * cm_match_req(struct cm_work *work,
- struct cm_id_private *cm_id_priv)
+static struct cm_id_private *cm_match_req(struct cm_work *work,
+ struct cm_id_private *cm_id_priv)
{
struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
struct cm_timewait_info *timewait_info;
struct cm_req_msg *req_msg;
- struct ib_cm_id *cm_id;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1898,7 +1988,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
/* Check for stale connections. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
@@ -1907,8 +1997,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
NULL, 0);
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
return NULL;
@@ -1919,19 +2008,14 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
cm_id_priv->id.device,
cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
if (!listen_cm_id_priv) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irq(&cm.lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
NULL, 0);
- goto out;
+ return NULL;
}
- refcount_inc(&listen_cm_id_priv->refcount);
- refcount_inc(&cm_id_priv->refcount);
- cm_id_priv->id.state = IB_CM_REQ_RCVD;
- atomic_inc(&cm_id_priv->work_count);
spin_unlock_irq(&cm.lock);
-out:
return listen_cm_id_priv;
}
@@ -1973,7 +2057,6 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
static int cm_req_handler(struct cm_work *work)
{
- struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
const struct ib_global_route *grh;
@@ -1982,13 +2065,32 @@ static int cm_req_handler(struct cm_work *work)
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
- if (IS_ERR(cm_id))
- return PTR_ERR(cm_id);
+ cm_id_priv =
+ cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
+ if (IS_ERR(cm_id_priv))
+ return PTR_ERR(cm_id_priv);
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
cm_id_priv->id.remote_id =
cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
+ cm_id_priv->id.service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
+ cm_id_priv->tid = req_msg->hdr.tid;
+ cm_id_priv->timeout_ms = cm_convert_to_ms(
+ IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
+ cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
+ cm_id_priv->remote_qpn =
+ cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+ cm_id_priv->initiator_depth =
+ IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
+ cm_id_priv->responder_resources =
+ IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+ cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
+ cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
+ cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+ cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
+ cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
+
ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
@@ -1998,43 +2100,39 @@ static int cm_req_handler(struct cm_work *work)
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
+ cm_id_priv->timewait_info = NULL;
goto destroy;
}
- cm_id_priv->timewait_info->work.remote_id =
- cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
+ cm_id_priv->timewait_info->work.remote_id = cm_id_priv->id.remote_id;
cm_id_priv->timewait_info->remote_ca_guid =
cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
- cm_id_priv->timewait_info->remote_qpn =
- cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+ cm_id_priv->timewait_info->remote_qpn = cm_id_priv->remote_qpn;
+
+ /*
+ * Note that the ID pointer is not in the xarray at this point,
+ * so this set is only visible to the local thread.
+ */
+ cm_id_priv->id.state = IB_CM_REQ_RCVD;
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
- pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
- be32_to_cpu(cm_id->local_id));
+ trace_icm_no_listener_err(&cm_id_priv->id);
+ cm_id_priv->id.state = IB_CM_IDLE;
ret = -EINVAL;
- goto free_timeinfo;
+ goto destroy;
}
- cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
- cm_id_priv->id.context = listen_cm_id_priv->id.context;
- cm_id_priv->id.service_id =
- cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
-
- cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
-
memset(&work->path[0], 0, sizeof(work->path[0]));
if (cm_req_has_alt_path(req_msg))
memset(&work->path[1], 0, sizeof(work->path[1]));
grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
gid_attr = grh->sgid_attr;
- if (gid_attr &&
- rdma_protocol_roce(work->port->cm_dev->ib_device,
- work->port->port_num)) {
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) {
work->path[0].rec_type =
sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
} else {
+ cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
cm_path_set_rec_type(
work->port->cm_dev->ib_device, work->port->port_num,
&work->path[0],
@@ -2044,13 +2142,15 @@ static int cm_req_handler(struct cm_work *work)
if (cm_req_has_alt_path(req_msg))
work->path[1].rec_type = work->path[0].rec_type;
cm_format_paths_from_req(req_msg, &work->path[0],
- &work->path[1]);
+ &work->path[1], work->mad_recv_wc->wc);
if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
sa_path_set_dmac(&work->path[0],
cm_id_priv->av.ah_attr.roce.dmac);
work->path[0].hop_limit = grh->hop_limit;
- ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
- cm_id_priv);
+
+ /* This destroy call is needed to pair with cm_init_av_for_response */
+ cm_destroy_av(&cm_id_priv->av);
+ ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av);
if (ret) {
int err;
@@ -2058,54 +2158,55 @@ static int cm_req_handler(struct cm_work *work)
work->port->port_num, 0,
&work->path[0].sgid);
if (err)
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
NULL, 0, NULL, 0);
else
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid,
sizeof(work->path[0].sgid),
NULL, 0);
goto rejected;
}
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_IB)
+ cm_id_priv->av.dlid_datapath =
+ IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg);
+
if (cm_req_has_alt_path(req_msg)) {
ret = cm_init_av_by_path(&work->path[1], NULL,
- &cm_id_priv->alt_av, cm_id_priv);
+ &cm_id_priv->alt_av);
if (ret) {
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+ ib_send_cm_rej(&cm_id_priv->id,
+ IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
sizeof(work->path[0].sgid), NULL, 0);
goto rejected;
}
}
- cm_id_priv->tid = req_msg->hdr.tid;
- cm_id_priv->timeout_ms = cm_convert_to_ms(
- IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
- cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
- cm_id_priv->remote_qpn =
- cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
- cm_id_priv->initiator_depth =
- IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
- cm_id_priv->responder_resources =
- IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
- cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
- cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
- cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
- cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
- cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
- cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
+ cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+ cm_id_priv->id.context = listen_cm_id_priv->id.context;
cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
- cm_process_work(cm_id_priv, work);
+
+ /* Now MAD handlers can see the new ID */
+ spin_lock_irq(&cm_id_priv->lock);
+ cm_finalize_id(cm_id_priv);
+
+ /* Refcount belongs to the event, pairs with cm_process_work() */
+ refcount_inc(&cm_id_priv->refcount);
+ cm_queue_work_unlock(cm_id_priv, work);
+ /*
+ * Since this ID was just created and was not made visible to other MAD
+ * handlers until the cm_finalize_id() above we know that the
+ * cm_process_work() will deliver the event and the listen_cm_id
+ * embedded in the event can be derefed here.
+ */
cm_deref_id(listen_cm_id_priv);
return 0;
rejected:
- refcount_dec(&cm_id_priv->refcount);
cm_deref_id(listen_cm_id_priv);
-free_timeinfo:
- kfree(cm_id_priv->timewait_info);
destroy:
- ib_destroy_cm_id(cm_id);
+ ib_destroy_cm_id(&cm_id_priv->id);
return ret;
}
@@ -2113,7 +2214,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_rep_param *param)
{
- cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
+ cm_format_mad_ece_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid,
+ param->ece.attr_mod);
IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg,
be32_to_cpu(cm_id_priv->id.local_id));
IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg,
@@ -2140,6 +2242,10 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num);
}
+ IBA_SET(CM_REP_VENDOR_ID_L, rep_msg, param->ece.vendor_id);
+ IBA_SET(CM_REP_VENDOR_ID_M, rep_msg, param->ece.vendor_id >> 8);
+ IBA_SET(CM_REP_VENDOR_ID_H, rep_msg, param->ece.vendor_id >> 16);
+
if (param->private_data && param->private_data_len)
IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data,
param->private_data_len);
@@ -2162,36 +2268,42 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
- pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
+ trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
ret = -EINVAL;
goto out;
}
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
+ msg = cm_alloc_priv_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
goto out;
+ }
rep_msg = (struct cm_rep_msg *) msg->mad;
cm_format_rep(rep_msg, cm_id_priv, param);
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
+ trace_icm_send_rep(cm_id);
ret = ib_post_send_mad(msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
- }
+ if (ret)
+ goto out_free;
cm_id->state = IB_CM_REP_SENT;
- cm_id_priv->msg = msg;
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
+ WARN_ONCE(param->qp_num & 0xFF000000,
+ "IBTA declares QPN to be 24 bits, but it is 0x%X\n",
+ param->qp_num);
cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return 0;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+out_free:
+ cm_free_priv_msg(msg);
+out:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_rep);
@@ -2233,19 +2345,21 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
- pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_send_cm_rtu_err(cm_id);
ret = -EINVAL;
goto error;
}
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
goto error;
+ }
cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_rtu(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2284,6 +2398,11 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg);
param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
param->srq = IBA_GET(CM_REP_SRQ, rep_msg);
+ param->ece.vendor_id = IBA_GET(CM_REP_VENDOR_ID_H, rep_msg) << 16;
+ param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_M, rep_msg) << 8;
+ param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_L, rep_msg);
+ param->ece.attr_mod = be32_to_cpu(rep_msg->hdr.attr_mod);
+
work->cm_event.private_data =
IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg);
}
@@ -2302,8 +2421,8 @@ static void cm_dup_rep_handler(struct cm_work *work)
if (!cm_id_priv)
return;
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_REP_COUNTER]);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES][CM_REP_COUNTER]);
ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
if (ret)
goto deref;
@@ -2322,13 +2441,14 @@ static void cm_dup_rep_handler(struct cm_work *work)
goto unlock;
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
goto deref;
unlock: spin_unlock_irq(&cm_id_priv->lock);
-free: cm_free_msg(msg);
+free: cm_free_response_msg(msg);
deref: cm_deref_id(cm_id_priv);
}
@@ -2338,7 +2458,6 @@ static int cm_rep_handler(struct cm_work *work)
struct cm_rep_msg *rep_msg;
int ret;
struct cm_id_private *cur_cm_id_priv;
- struct ib_cm_id *cm_id;
struct cm_timewait_info *timewait_info;
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2346,7 +2465,7 @@ static int cm_rep_handler(struct cm_work *work)
cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
- pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
+ trace_icm_remote_no_priv_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
return -EINVAL;
}
@@ -2359,13 +2478,12 @@ static int cm_rep_handler(struct cm_work *work)
case IB_CM_MRA_REQ_RCVD:
break;
default:
- spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug(
- "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
- __func__, cm_id_priv->id.state,
+ trace_icm_rep_unknown_err(
IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
- IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
+ cm_id_priv->id.state);
+ spin_unlock_irq(&cm_id_priv->lock);
goto error;
}
@@ -2381,16 +2499,14 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug("%s: Failed to insert remote id %d\n", __func__,
+ trace_icm_insert_failed_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
goto error;
}
/* Check for a stale connection. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- rb_erase(&cm_id_priv->timewait_info->remote_id_node,
- &cm.remote_id_table);
- cm_id_priv->timewait_info->inserted_remote_id = 0;
+ cm_remove_remote(cm_id_priv);
cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
@@ -2400,14 +2516,12 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
- pr_debug(
- "%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
- __func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ trace_icm_staleconn_err(
+ IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
@@ -2434,18 +2548,8 @@ static int cm_rep_handler(struct cm_work *work)
cm_ack_timeout(cm_id_priv->target_ack_delay,
cm_id_priv->alt_av.timeout - 1);
- /* todo: handle peer_to_peer */
-
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
error:
@@ -2456,7 +2560,6 @@ error:
static int cm_establish_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
- int ret;
/* See comment in cm_establish about lookup. */
cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
@@ -2469,16 +2572,8 @@ static int cm_establish_handler(struct cm_work *work)
goto out;
}
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2489,7 +2584,6 @@ static int cm_rtu_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rtu_msg *rtu_msg;
- int ret;
rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
@@ -2505,22 +2599,14 @@ static int cm_rtu_handler(struct cm_work *work)
if (cm_id_priv->id.state != IB_CM_REP_SENT &&
cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
spin_unlock_irq(&cm_id_priv->lock);
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_RTU_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_RTU_COUNTER]);
goto out;
}
cm_id_priv->id.state = IB_CM_ESTABLISHED;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2546,35 +2632,30 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
private_data_len);
}
-int ib_send_cm_dreq(struct ib_cm_id *cm_id,
- const void *private_data,
- u8 private_data_len)
+static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
+ const void *private_data, u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- unsigned long flags;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_ESTABLISHED) {
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
- ret = -EINVAL;
- goto out;
+ if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
+ trace_icm_dreq_skipped(&cm_id_priv->id);
+ return -EINVAL;
}
- if (cm_id->lap_state == IB_CM_LAP_SENT ||
- cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
+ cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->msg);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret) {
+ msg = cm_alloc_priv_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
cm_enter_timewait(cm_id_priv);
- goto out;
+ return PTR_ERR(msg);
}
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
@@ -2582,17 +2663,29 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
+ trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
+ cm_free_priv_msg(msg);
return ret;
}
- cm_id->state = IB_CM_DREQ_SENT;
- cm_id_priv->msg = msg;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_id_priv->id.state = IB_CM_DREQ_SENT;
+ return 0;
+}
+
+int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_dreq_locked(cm_id_priv, private_data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_dreq);
@@ -2613,51 +2706,58 @@ static void cm_format_drep(struct cm_drep_msg *drep_msg,
private_data_len);
}
-int ib_send_cm_drep(struct ib_cm_id *cm_id,
- const void *private_data,
- u8 private_data_len)
+static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
+ void *private_data, u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- unsigned long flags;
- void *data;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
return -EINVAL;
- data = cm_copy_private_data(private_data, private_data_len);
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_DREQ_RCVD) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
- pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
- __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
+ if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
+ trace_icm_send_drep_err(&cm_id_priv->id);
+ kfree(private_data);
return -EINVAL;
}
- cm_set_private_data(cm_id_priv, data, private_data_len);
+ cm_set_private_data(cm_id_priv, private_data, private_data_len);
cm_enter_timewait(cm_id_priv);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_drep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
return ret;
}
+ return 0;
+}
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ void *data;
+ int ret;
+
+ data = cm_copy_private_data(private_data, private_data_len);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_drep_locked(cm_id_priv, data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_drep);
@@ -2683,9 +2783,12 @@ static int cm_issue_drep(struct cm_port *port,
IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
+ trace_icm_issue_drep(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
- cm_free_msg(msg);
+ cm_free_response_msg(msg);
return ret;
}
@@ -2695,19 +2798,17 @@ static int cm_dreq_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_dreq_msg *dreq_msg;
struct ib_mad_send_buf *msg = NULL;
- int ret;
dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)),
cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)));
if (!cm_id_priv) {
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
- pr_debug(
- "%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
- __func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ trace_icm_no_priv_err(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
return -EINVAL;
}
@@ -2723,18 +2824,17 @@ static int cm_dreq_handler(struct cm_work *work)
switch (cm_id_priv->id.state) {
case IB_CM_REP_SENT:
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- break;
- case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_TIMEWAIT:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
if (IS_ERR(msg))
goto unlock;
@@ -2746,29 +2846,19 @@ static int cm_dreq_handler(struct cm_work *work)
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
ib_post_send_mad(msg, NULL))
- cm_free_msg(msg);
+ cm_free_response_msg(msg);
goto deref;
case IB_CM_DREQ_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
goto unlock;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_dreq_unknown_err(&cm_id_priv->id);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
cm_id_priv->tid = dreq_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -2780,7 +2870,6 @@ static int cm_drep_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_drep_msg *drep_msg;
- int ret;
drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
@@ -2800,81 +2889,82 @@ static int cm_drep_handler(struct cm_work *work)
}
cm_enter_timewait(cm_id_priv);
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
return -EINVAL;
}
-int ib_send_cm_rej(struct ib_cm_id *cm_id,
- enum ib_cm_rej_reason reason,
- void *ari,
- u8 ari_length,
- const void *private_data,
- u8 private_data_len)
+static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
+ enum ib_cm_state state = cm_id_priv->id.state;
struct ib_mad_send_buf *msg;
- unsigned long flags;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
(ari && ari_length > IB_CM_REJ_ARI_LENGTH))
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch (cm_id->state) {
+ switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (!ret)
- cm_format_rej((struct cm_rej_msg *) msg->mad,
- cm_id_priv, reason, ari, ari_length,
- private_data, private_data_len);
-
cm_reset_to_idle(cm_id_priv);
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
+ ari, ari_length, private_data, private_data_len,
+ state);
break;
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (!ret)
- cm_format_rej((struct cm_rej_msg *) msg->mad,
- cm_id_priv, reason, ari, ari_length,
- private_data, private_data_len);
-
cm_enter_timewait(cm_id_priv);
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
+ ari, ari_length, private_data, private_data_len,
+ state);
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
- ret = -EINVAL;
- goto out;
+ trace_icm_send_unknown_rej_err(&cm_id_priv->id);
+ return -EINVAL;
}
- if (ret)
- goto out;
-
+ trace_icm_send_rej(&cm_id_priv->id, reason);
ret = ib_post_send_mad(msg, NULL);
- if (ret)
+ if (ret) {
cm_free_msg(msg);
+ return ret;
+ }
+
+ return 0;
+}
+
+int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason,
+ void *ari, u8 ari_length, const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_rej_locked(cm_id_priv, reason, ari, ari_length,
+ private_data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_rej);
@@ -2893,26 +2983,17 @@ static void cm_format_rej_event(struct cm_work *work)
IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg);
}
-static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
+static struct cm_id_private *cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
{
- struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
__be32 remote_id;
remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
- spin_lock_irq(&cm.lock);
- timewait_info = cm_find_remote_id(
+ cm_id_priv = cm_find_remote_id(
*((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
remote_id);
- if (!timewait_info) {
- spin_unlock_irq(&cm.lock);
- return NULL;
- }
- cm_id_priv =
- cm_acquire_id(timewait_info->work.local_id, remote_id);
- spin_unlock_irq(&cm.lock);
} else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
CM_MSG_RESPONSE_REQ)
cm_id_priv = cm_acquire_id(
@@ -2930,7 +3011,6 @@ static int cm_rej_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rej_msg *rej_msg;
- int ret;
rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_rejected_id(rej_msg);
@@ -2945,8 +3025,8 @@ static int cm_rej_handler(struct cm_work *work)
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ fallthrough;
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN)
@@ -2955,8 +3035,8 @@ static int cm_rej_handler(struct cm_work *work)
cm_reset_to_idle(cm_id_priv);
break;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ fallthrough;
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
cm_enter_timewait(cm_id_priv);
@@ -2965,30 +3045,18 @@ static int cm_rej_handler(struct cm_work *work)
if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
break;
}
- /* fall through */
+ fallthrough;
default:
+ trace_icm_rej_unknown_err(&cm_id_priv->id);
spin_unlock_irq(&cm_id_priv->lock);
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
- ret = -EINVAL;
goto out;
}
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3019,7 +3087,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch(cm_id_priv->id.state) {
+ switch (cm_id_priv->id.state) {
case IB_CM_REQ_RCVD:
cm_state = IB_CM_MRA_REQ_SENT;
lap_state = cm_id->lap_state;
@@ -3037,26 +3105,27 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
msg_response = CM_MSG_RESPONSE_OTHER;
break;
}
- /* fall through */
+ fallthrough;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_mra_unknown_err(&cm_id_priv->id);
ret = -EINVAL;
- goto error1;
+ goto error_unlock;
}
if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto error1;
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
+ goto error_unlock;
+ }
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
msg_response, service_timeout,
private_data, private_data_len);
+ trace_icm_send_mra(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
- goto error2;
+ goto error_free_msg;
}
cm_id->state = cm_state;
@@ -3066,18 +3135,16 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return 0;
-error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
- return ret;
-
-error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
+error_free_msg:
cm_free_msg(msg);
+error_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ kfree(data);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_mra);
-static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
+static struct cm_id_private *cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
{
switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) {
case CM_MSG_RESPONSE_REQ:
@@ -3098,7 +3165,7 @@ static int cm_mra_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_mra_msg *mra_msg;
- int timeout, ret;
+ int timeout;
mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_mraed_id(mra_msg);
@@ -3117,16 +3184,14 @@ static int cm_mra_handler(struct cm_work *work)
case IB_CM_REQ_SENT:
if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
CM_MSG_RESPONSE_REQ ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout))
+ ib_modify_mad(cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
break;
case IB_CM_REP_SENT:
if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
CM_MSG_RESPONSE_REP ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout))
+ ib_modify_mad(cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
break;
@@ -3134,39 +3199,28 @@ static int cm_mra_handler(struct cm_work *work)
if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
CM_MSG_RESPONSE_OTHER ||
cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout)) {
+ ib_modify_mad(cm_id_priv->msg, timeout)) {
if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
- atomic_long_inc(&work->port->
- counter_group[CM_RECV_DUPLICATES].
- counter[CM_MRA_COUNTER]);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES]
+ [CM_MRA_COUNTER]);
goto out;
}
cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
break;
case IB_CM_MRA_REQ_RCVD:
case IB_CM_MRA_REP_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_MRA_COUNTER]);
- /* fall through */
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_MRA_COUNTER]);
+ fallthrough;
default:
- pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_mra_unknown_err(&cm_id_priv->id);
goto out;
}
cm_id_priv->msg->context[1] = (void *) (unsigned long)
cm_id_priv->id.state;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
spin_unlock_irq(&cm_id_priv->lock);
@@ -3226,6 +3280,8 @@ static int cm_lap_handler(struct cm_work *work)
struct cm_lap_msg *lap_msg;
struct ib_cm_lap_event_param *param;
struct ib_mad_send_buf *msg = NULL;
+ struct rdma_ah_attr ah_attr;
+ struct cm_av alt_av = {};
int ret;
/* Currently Alternate path messages are not supported for
@@ -3254,7 +3310,25 @@ static int cm_lap_handler(struct cm_work *work)
work->cm_event.private_data =
IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg);
+ ret = ib_init_ah_attr_from_wc(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+ if (ret)
+ goto deref;
+
+ ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av);
+ if (ret) {
+ rdma_destroy_ah_attr(&ah_attr);
+ goto deref;
+ }
+
spin_lock_irq(&cm_id_priv->lock);
+ cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
+ &ah_attr, &cm_id_priv->av);
+ cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
+
if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
goto unlock;
@@ -3263,8 +3337,8 @@ static int cm_lap_handler(struct cm_work *work)
case IB_CM_LAP_IDLE:
break;
case IB_CM_MRA_LAP_SENT:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_LAP_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_LAP_COUNTER]);
msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
if (IS_ERR(msg))
goto unlock;
@@ -3278,38 +3352,19 @@ static int cm_lap_handler(struct cm_work *work)
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
ib_post_send_mad(msg, NULL))
- cm_free_msg(msg);
+ cm_free_response_msg(msg);
goto deref;
case IB_CM_LAP_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_LAP_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_LAP_COUNTER]);
goto unlock;
default:
goto unlock;
}
- ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
- if (ret)
- goto unlock;
-
- ret = cm_init_av_by_path(param->alternate_path, NULL,
- &cm_id_priv->alt_av, cm_id_priv);
- if (ret)
- goto unlock;
-
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -3321,7 +3376,6 @@ static int cm_apr_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_apr_msg *apr_msg;
- int ret;
/* Currently Alternate path messages are not supported for
* RoCE link layer.
@@ -3354,18 +3408,8 @@ static int cm_apr_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- cm_id_priv->msg = NULL;
-
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3376,7 +3420,6 @@ static int cm_timewait_handler(struct cm_work *work)
{
struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
- int ret;
timewait_info = container_of(work, struct cm_timewait_info, work);
spin_lock_irq(&cm.lock);
@@ -3395,15 +3438,7 @@ static int cm_timewait_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3433,6 +3468,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
+ struct cm_av av = {};
unsigned long flags;
int ret;
@@ -3441,40 +3477,42 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_init_av_by_path(param->path, param->sgid_attr,
- &cm_id_priv->av,
- cm_id_priv);
+ ret = cm_init_av_by_path(param->path, param->sgid_attr, &av);
if (ret)
- goto out;
+ return ret;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ cm_move_av_from_path(&cm_id_priv->av, &av);
cm_id->service_id = param->service_id;
- cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = param->timeout_ms;
cm_id_priv->max_cm_retries = param->max_cm_retries;
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
+ if (cm_id->state != IB_CM_IDLE) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ msg = cm_alloc_priv_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
+ goto out_unlock;
+ }
- cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
+ cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv,
param);
msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
+ msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT;
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state == IB_CM_IDLE)
- ret = ib_post_send_mad(msg, NULL);
- else
- ret = -EINVAL;
-
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- goto out;
- }
+ trace_icm_send_sidr_req(&cm_id_priv->id);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto out_free;
cm_id->state = IB_CM_SIDR_REQ_SENT;
- cm_id_priv->msg = msg;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-out:
+ return 0;
+out_free:
+ cm_free_priv_msg(msg);
+out_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_req);
@@ -3502,64 +3540,73 @@ static void cm_format_sidr_req_event(struct cm_work *work,
static int cm_sidr_req_handler(struct cm_work *work)
{
- struct ib_cm_id *cm_id;
- struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
+ struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_sidr_req_msg *sidr_req_msg;
struct ib_wc *wc;
int ret;
- cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
- if (IS_ERR(cm_id))
- return PTR_ERR(cm_id);
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ cm_id_priv =
+ cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
+ if (IS_ERR(cm_id_priv))
+ return PTR_ERR(cm_id_priv);
/* Record SGID/SLID and request ID for lookup. */
sidr_req_msg = (struct cm_sidr_req_msg *)
work->mad_recv_wc->recv_buf.mad;
+
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
+ cm_id_priv->id.service_id =
+ cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
+ cm_id_priv->tid = sidr_req_msg->hdr.tid;
+
wc = work->mad_recv_wc->wc;
- cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
- cm_id_priv->av.dgid.global.interface_id = 0;
+ cm_id_priv->sidr_slid = wc->slid;
ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
if (ret)
goto out;
- cm_id_priv->id.remote_id =
- cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
- cm_id_priv->tid = sidr_req_msg->hdr.tid;
- atomic_inc(&cm_id_priv->work_count);
-
spin_lock_irq(&cm.lock);
- cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
- if (cur_cm_id_priv) {
+ listen_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
+ if (listen_cm_id_priv) {
spin_unlock_irq(&cm.lock);
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_SIDR_REQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_SIDR_REQ_COUNTER]);
goto out; /* Duplicate message. */
}
cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
- cur_cm_id_priv = cm_find_listen(
- cm_id->device,
- cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)));
- if (!cur_cm_id_priv) {
+ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+ cm_id_priv->id.service_id);
+ if (!listen_cm_id_priv) {
spin_unlock_irq(&cm.lock);
- cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
+ ib_send_cm_sidr_rep(&cm_id_priv->id,
+ &(struct ib_cm_sidr_rep_param){
+ .status = IB_SIDR_UNSUPPORTED });
goto out; /* No match. */
}
- refcount_inc(&cur_cm_id_priv->refcount);
- refcount_inc(&cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
- cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
- cm_id_priv->id.context = cur_cm_id_priv->id.context;
- cm_id_priv->id.service_id =
- cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
+ cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+ cm_id_priv->id.context = listen_cm_id_priv->id.context;
- cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
- cm_process_work(cm_id_priv, work);
- cm_deref_id(cur_cm_id_priv);
+ /*
+ * A SIDR ID does not need to be in the xarray since it does not receive
+ * mads, is not placed in the remote_id or remote_qpn rbtree, and does
+ * not enter timewait.
+ */
+
+ cm_format_sidr_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
+ cm_free_work(work);
+ /*
+ * A pointer to the listen_cm_id is held in the event, so this deref
+ * must be after the event is delivered above.
+ */
+ cm_deref_id(listen_cm_id_priv);
+ if (ret)
+ cm_destroy_id(&cm_id_priv->id, ret);
return 0;
out:
ib_destroy_cm_id(&cm_id_priv->id);
@@ -3570,8 +3617,8 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param)
{
- cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
- cm_id_priv->tid);
+ cm_format_mad_ece_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
+ cm_id_priv->tid, param->ece.attr_mod);
IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg,
be32_to_cpu(cm_id_priv->id.remote_id));
IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status);
@@ -3579,6 +3626,10 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg,
be64_to_cpu(cm_id_priv->id.service_id));
IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey);
+ IBA_SET(CM_SIDR_REP_VENDOR_ID_L, sidr_rep_msg,
+ param->ece.vendor_id & 0xFF);
+ IBA_SET(CM_SIDR_REP_VENDOR_ID_H, sidr_rep_msg,
+ (param->ece.vendor_id >> 8) & 0xFF);
if (param->info && param->info_length)
IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg,
@@ -3589,41 +3640,36 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
param->private_data, param->private_data_len);
}
-int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
- struct ib_cm_sidr_rep_param *param)
+static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
+ struct ib_cm_sidr_rep_param *param)
{
- struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
unsigned long flags;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
(param->private_data &&
param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
- ret = -EINVAL;
- goto error;
- }
+ if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD)
+ return -EINVAL;
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto error;
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
param);
+ trace_icm_send_sidr_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
return ret;
}
- cm_id->state = IB_CM_IDLE;
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-
+ cm_id_priv->id.state = IB_CM_IDLE;
spin_lock_irqsave(&cm.lock, flags);
if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
@@ -3631,8 +3677,19 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
}
spin_unlock_irqrestore(&cm.lock, flags);
return 0;
+}
-error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
+ struct ib_cm_sidr_rep_param *param)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_sidr_rep_locked(cm_id_priv, param);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_rep);
@@ -3676,7 +3733,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
spin_unlock_irq(&cm_id_priv->lock);
cm_format_sidr_rep_event(work, cm_id_priv);
@@ -3687,25 +3744,28 @@ out:
return -EINVAL;
}
-static void cm_process_send_error(struct ib_mad_send_buf *msg,
+static void cm_process_send_error(struct cm_id_private *cm_id_priv,
+ struct ib_mad_send_buf *msg,
+ enum ib_cm_state state,
enum ib_wc_status wc_status)
{
- struct cm_id_private *cm_id_priv;
- struct ib_cm_event cm_event;
- enum ib_cm_state state;
+ struct ib_cm_event cm_event = {};
int ret;
- memset(&cm_event, 0, sizeof cm_event);
- cm_id_priv = msg->context[0];
-
/* Discard old sends or ones without a response. */
spin_lock_irq(&cm_id_priv->lock);
- state = (enum ib_cm_state) (unsigned long) msg->context[1];
- if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
- goto discard;
+ if (msg != cm_id_priv->msg) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ cm_free_msg(msg);
+ return;
+ }
+ cm_free_priv_msg(msg);
+
+ if (state != cm_id_priv->id.state || wc_status == IB_WC_SUCCESS ||
+ wc_status == IB_WC_WR_FLUSH_ERR)
+ goto out_unlock;
- pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
- state, ib_wc_status_msg(wc_status));
+ trace_icm_mad_send_err(state, wc_status);
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
@@ -3726,26 +3786,27 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
cm_event.event = IB_CM_SIDR_REQ_ERROR;
break;
default:
- goto discard;
+ goto out_unlock;
}
spin_unlock_irq(&cm_id_priv->lock);
cm_event.param.send_status = wc_status;
/* No other events can occur on the cm_id at this point. */
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
- cm_free_msg(msg);
if (ret)
ib_destroy_cm_id(&cm_id_priv->id);
return;
-discard:
+out_unlock:
spin_unlock_irq(&cm_id_priv->lock);
- cm_free_msg(msg);
}
static void cm_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+ struct cm_id_private *cm_id_priv = msg->context[0];
+ enum ib_cm_state state =
+ (enum ib_cm_state)(unsigned long)msg->context[1];
struct cm_port *port;
u16 attr_index;
@@ -3758,28 +3819,19 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
* set to a cm_id), and is not a REJ, then it is a send that was
* manually retried.
*/
- if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
+ if (!cm_id_priv && (attr_index != CM_REJ_COUNTER))
msg->retries = 1;
- atomic_long_add(1 + msg->retries,
- &port->counter_group[CM_XMIT].counter[attr_index]);
+ atomic_long_add(1 + msg->retries, &port->counters[CM_XMIT][attr_index]);
if (msg->retries)
atomic_long_add(msg->retries,
- &port->counter_group[CM_XMIT_RETRIES].
- counter[attr_index]);
+ &port->counters[CM_XMIT_RETRIES][attr_index]);
- switch (mad_send_wc->status) {
- case IB_WC_SUCCESS:
- case IB_WC_WR_FLUSH_ERR:
- cm_free_msg(msg);
- break;
- default:
- if (msg->context[0] && msg->context[1])
- cm_process_send_error(msg, mad_send_wc->status);
- else
- cm_free_msg(msg);
- break;
- }
+ if (cm_id_priv)
+ cm_process_send_error(cm_id_priv, msg, state,
+ mad_send_wc->status);
+ else
+ cm_free_response_msg(msg);
}
static void cm_work_handler(struct work_struct *_work)
@@ -3828,7 +3880,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
- pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
+ trace_icm_handler_err(work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3854,8 +3906,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch (cm_id->state)
- {
+ switch (cm_id->state) {
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
cm_id->state = IB_CM_ESTABLISHED;
@@ -3864,8 +3915,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_establish_err(cm_id);
ret = -EINVAL;
break;
}
@@ -3905,9 +3955,7 @@ out:
static int cm_migrate(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
- struct cm_av tmp_av;
unsigned long flags;
- int tmp_send_port_not_ready;
int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3916,14 +3964,7 @@ static int cm_migrate(struct ib_cm_id *cm_id)
(cm_id->lap_state == IB_CM_LAP_UNINIT ||
cm_id->lap_state == IB_CM_LAP_IDLE)) {
cm_id->lap_state = IB_CM_LAP_IDLE;
- /* Swap address vector */
- tmp_av = cm_id_priv->av;
cm_id_priv->av = cm_id_priv->alt_av;
- cm_id_priv->alt_av = tmp_av;
- /* Swap port send ready state */
- tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
- cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
- cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
} else
ret = -EINVAL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -4005,8 +4046,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
}
attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
- atomic_long_inc(&port->counter_group[CM_RECV].
- counter[attr_id - CM_ATTR_ID_OFFSET]);
+ atomic_long_inc(&port->counters[CM_RECV][attr_id - CM_ATTR_ID_OFFSET]);
work = kmalloc(struct_size(work, path, paths), GFP_KERNEL);
if (!work) {
@@ -4058,13 +4098,12 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_ATOMIC;
qp_attr->pkey_index = cm_id_priv->av.pkey_index;
- qp_attr->port_num = cm_id_priv->av.port->port_num;
+ if (cm_id_priv->av.port)
+ qp_attr->port_num = cm_id_priv->av.port->port_num;
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_init_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4091,6 +4130,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+ if ((qp_attr->ah_attr.type == RDMA_AH_ATTR_TYPE_IB) &&
+ cm_id_priv->av.dlid_datapath &&
+ (cm_id_priv->av.dlid_datapath != 0xffff))
+ qp_attr->ah_attr.ib.dlid = cm_id_priv->av.dlid_datapath;
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
@@ -4102,7 +4145,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
cm_id_priv->responder_resources;
qp_attr->min_rnr_timer = 0;
}
- if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr) &&
+ cm_id_priv->alt_av.port) {
*qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
@@ -4112,9 +4156,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rtr_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4151,7 +4193,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
qp_attr->retry_cnt = cm_id_priv->retry_count;
qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
- /* fall through */
+ fallthrough;
case IB_QPT_XRC_TGT:
*qp_attr_mask |= IB_QP_TIMEOUT;
qp_attr->timeout = cm_id_priv->av.timeout;
@@ -4165,7 +4207,9 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
}
} else {
*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
- qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+ if (cm_id_priv->alt_av.port)
+ qp_attr->alt_port_num =
+ cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -4174,9 +4218,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rts_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4210,75 +4252,76 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
}
EXPORT_SYMBOL(ib_cm_init_qp_attr);
-static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
- char *buf)
+static ssize_t cm_show_counter(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
- struct cm_counter_group *group;
- struct cm_counter_attribute *cm_attr;
-
- group = container_of(obj, struct cm_counter_group, obj);
- cm_attr = container_of(attr, struct cm_counter_attribute, attr);
-
- return sprintf(buf, "%ld\n",
- atomic_long_read(&group->counter[cm_attr->index]));
-}
-
-static const struct sysfs_ops cm_counter_ops = {
- .show = cm_show_counter
-};
+ struct cm_counter_attribute *cm_attr =
+ container_of(attr, struct cm_counter_attribute, attr);
+ struct cm_device *cm_dev = ib_get_client_data(ibdev, &cm_client);
-static struct kobj_type cm_counter_obj_type = {
- .sysfs_ops = &cm_counter_ops,
- .default_attrs = cm_counter_default_attrs
-};
+ if (WARN_ON(!cm_dev))
+ return -EINVAL;
-static char *cm_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+ return sysfs_emit(
+ buf, "%ld\n",
+ atomic_long_read(
+ &cm_dev->port[port_num - 1]
+ ->counters[cm_attr->group][cm_attr->index]));
}
-struct class cm_class = {
- .owner = THIS_MODULE,
- .name = "infiniband_cm",
- .devnode = cm_devnode,
-};
-EXPORT_SYMBOL(cm_class);
-
-static int cm_create_port_fs(struct cm_port *port)
-{
- int i, ret;
-
- for (i = 0; i < CM_COUNTER_GROUPS; i++) {
- ret = ib_port_register_module_stat(port->cm_dev->ib_device,
- port->port_num,
- &port->counter_group[i].obj,
- &cm_counter_obj_type,
- counter_group_names[i]);
- if (ret)
- goto error;
+#define CM_COUNTER_ATTR(_name, _group, _index) \
+ { \
+ .attr = __ATTR(_name, 0444, cm_show_counter, NULL), \
+ .group = _group, .index = _index \
}
- return 0;
-
-error:
- while (i--)
- ib_port_unregister_module_stat(&port->counter_group[i].obj);
- return ret;
-
-}
-
-static void cm_remove_port_fs(struct cm_port *port)
-{
- int i;
-
- for (i = 0; i < CM_COUNTER_GROUPS; i++)
- ib_port_unregister_module_stat(&port->counter_group[i].obj);
+#define CM_COUNTER_GROUP(_group, _name) \
+ static struct cm_counter_attribute cm_counter_attr_##_group[] = { \
+ CM_COUNTER_ATTR(req, _group, CM_REQ_COUNTER), \
+ CM_COUNTER_ATTR(mra, _group, CM_MRA_COUNTER), \
+ CM_COUNTER_ATTR(rej, _group, CM_REJ_COUNTER), \
+ CM_COUNTER_ATTR(rep, _group, CM_REP_COUNTER), \
+ CM_COUNTER_ATTR(rtu, _group, CM_RTU_COUNTER), \
+ CM_COUNTER_ATTR(dreq, _group, CM_DREQ_COUNTER), \
+ CM_COUNTER_ATTR(drep, _group, CM_DREP_COUNTER), \
+ CM_COUNTER_ATTR(sidr_req, _group, CM_SIDR_REQ_COUNTER), \
+ CM_COUNTER_ATTR(sidr_rep, _group, CM_SIDR_REP_COUNTER), \
+ CM_COUNTER_ATTR(lap, _group, CM_LAP_COUNTER), \
+ CM_COUNTER_ATTR(apr, _group, CM_APR_COUNTER), \
+ }; \
+ static struct attribute *cm_counter_attrs_##_group[] = { \
+ &cm_counter_attr_##_group[0].attr.attr, \
+ &cm_counter_attr_##_group[1].attr.attr, \
+ &cm_counter_attr_##_group[2].attr.attr, \
+ &cm_counter_attr_##_group[3].attr.attr, \
+ &cm_counter_attr_##_group[4].attr.attr, \
+ &cm_counter_attr_##_group[5].attr.attr, \
+ &cm_counter_attr_##_group[6].attr.attr, \
+ &cm_counter_attr_##_group[7].attr.attr, \
+ &cm_counter_attr_##_group[8].attr.attr, \
+ &cm_counter_attr_##_group[9].attr.attr, \
+ &cm_counter_attr_##_group[10].attr.attr, \
+ NULL, \
+ }; \
+ static const struct attribute_group cm_counter_group_##_group = { \
+ .name = _name, \
+ .attrs = cm_counter_attrs_##_group, \
+ };
-}
+CM_COUNTER_GROUP(CM_XMIT, "cm_tx_msgs")
+CM_COUNTER_GROUP(CM_XMIT_RETRIES, "cm_tx_retries")
+CM_COUNTER_GROUP(CM_RECV, "cm_rx_msgs")
+CM_COUNTER_GROUP(CM_RECV_DUPLICATES, "cm_rx_duplicates")
+
+static const struct attribute_group *cm_counter_groups[] = {
+ &cm_counter_group_CM_XMIT,
+ &cm_counter_group_CM_XMIT_RETRIES,
+ &cm_counter_group_CM_RECV,
+ &cm_counter_group_CM_RECV_DUPLICATES,
+ NULL,
+};
-static void cm_add_one(struct ib_device *ib_device)
+static int cm_add_one(struct ib_device *ib_device)
{
struct cm_device *cm_dev;
struct cm_port *port;
@@ -4292,34 +4335,38 @@ static void cm_add_one(struct ib_device *ib_device)
unsigned long flags;
int ret;
int count = 0;
- u8 i;
+ u32 i;
cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
GFP_KERNEL);
if (!cm_dev)
- return;
+ return -ENOMEM;
+ kref_init(&cm_dev->kref);
+ spin_lock_init(&cm_dev->mad_agent_lock);
cm_dev->ib_device = ib_device;
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
+ ib_set_client_data(ib_device, &cm_client, cm_dev);
+
set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
- for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ rdma_for_each_port (ib_device, i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
port = kzalloc(sizeof *port, GFP_KERNEL);
- if (!port)
+ if (!port) {
+ ret = -ENOMEM;
goto error1;
+ }
cm_dev->port[i-1] = port;
port->cm_dev = cm_dev;
port->port_num = i;
- INIT_LIST_HEAD(&port->cm_priv_prim_list);
- INIT_LIST_HEAD(&port->cm_priv_altr_list);
-
- ret = cm_create_port_fs(port);
+ ret = ib_port_register_client_groups(ib_device, i,
+ cm_counter_groups);
if (ret)
goto error1;
@@ -4331,8 +4378,10 @@ static void cm_add_one(struct ib_device *ib_device)
cm_recv_handler,
port,
0);
- if (IS_ERR(port->mad_agent))
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
goto error2;
+ }
ret = ib_modify_port(ib_device, i, 0, &port_modify);
if (ret)
@@ -4341,24 +4390,23 @@ static void cm_add_one(struct ib_device *ib_device)
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
-
- ib_set_client_data(ib_device, &cm_client, cm_dev);
+ }
write_lock_irqsave(&cm.device_lock, flags);
list_add_tail(&cm_dev->list, &cm.device_list);
write_unlock_irqrestore(&cm.device_lock, flags);
- return;
+ return 0;
error3:
ib_unregister_mad_agent(port->mad_agent);
error2:
- cm_remove_port_fs(port);
+ ib_port_unregister_client_groups(ib_device, i, cm_counter_groups);
error1:
port_modify.set_port_cap_mask = 0;
port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
- kfree(port);
while (--i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
@@ -4366,27 +4414,23 @@ error1:
port = cm_dev->port[i-1];
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
ib_unregister_mad_agent(port->mad_agent);
- cm_remove_port_fs(port);
- kfree(port);
+ ib_port_unregister_client_groups(ib_device, i,
+ cm_counter_groups);
}
free:
- kfree(cm_dev);
+ cm_device_put(cm_dev);
+ return ret;
}
static void cm_remove_one(struct ib_device *ib_device, void *client_data)
{
struct cm_device *cm_dev = client_data;
struct cm_port *port;
- struct cm_id_private *cm_id_priv;
- struct ib_mad_agent *cur_mad_agent;
struct ib_port_modify port_modify = {
.clr_port_cap_mask = IB_PORT_CM_SUP
};
unsigned long flags;
- int i;
-
- if (!cm_dev)
- return;
+ u32 i;
write_lock_irqsave(&cm.device_lock, flags);
list_del(&cm_dev->list);
@@ -4396,35 +4440,34 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
cm_dev->going_down = 1;
spin_unlock_irq(&cm.lock);
- for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ rdma_for_each_port (ib_device, i) {
+ struct ib_mad_agent *mad_agent;
+
if (!rdma_cap_ib_cm(ib_device, i))
continue;
port = cm_dev->port[i-1];
+ mad_agent = port->mad_agent;
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
- /* Mark all the cm_id's as not valid */
- spin_lock_irq(&cm.lock);
- list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
- cm_id_priv->altr_send_port_not_ready = 1;
- list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
- cm_id_priv->prim_send_port_not_ready = 1;
- spin_unlock_irq(&cm.lock);
/*
* We flush the queue here after the going_down set, this
* verify that no new works will be queued in the recv handler,
* after that we can call the unregister_mad_agent
*/
flush_workqueue(cm.wq);
- spin_lock_irq(&cm.state_lock);
- cur_mad_agent = port->mad_agent;
+ /*
+ * The above ensures no call paths from the work are running,
+ * the remaining paths all take the mad_agent_lock.
+ */
+ spin_lock(&cm_dev->mad_agent_lock);
port->mad_agent = NULL;
- spin_unlock_irq(&cm.state_lock);
- ib_unregister_mad_agent(cur_mad_agent);
- cm_remove_port_fs(port);
- kfree(port);
+ spin_unlock(&cm_dev->mad_agent_lock);
+ ib_unregister_mad_agent(mad_agent);
+ ib_port_unregister_client_groups(ib_device, i,
+ cm_counter_groups);
}
- kfree(cm_dev);
+ cm_device_put(cm_dev);
}
static int __init ib_cm_init(void)
@@ -4434,22 +4477,15 @@ static int __init ib_cm_init(void)
INIT_LIST_HEAD(&cm.device_list);
rwlock_init(&cm.device_lock);
spin_lock_init(&cm.lock);
- spin_lock_init(&cm.state_lock);
cm.listen_service_table = RB_ROOT;
cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
cm.remote_id_table = RB_ROOT;
cm.remote_qp_table = RB_ROOT;
cm.remote_sidr_table = RB_ROOT;
- xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC);
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
INIT_LIST_HEAD(&cm.timewait_list);
- ret = class_register(&cm_class);
- if (ret) {
- ret = -ENOMEM;
- goto error1;
- }
-
cm.wq = alloc_workqueue("ib_cm", 0, 1);
if (!cm.wq) {
ret = -ENOMEM;
@@ -4464,8 +4500,6 @@ static int __init ib_cm_init(void)
error3:
destroy_workqueue(cm.wq);
error2:
- class_unregister(&cm_class);
-error1:
return ret;
}
@@ -4486,7 +4520,6 @@ static void __exit ib_cm_cleanup(void)
kfree(timewait_info);
}
- class_unregister(&cm_class);
WARN_ON(!xa_empty(&cm.local_id_table));
}
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 0cc40656b5c5..8462de7ca26e 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -22,7 +22,7 @@
static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
{
u8 transport_type = IBA_GET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg);
- switch(transport_type) {
+ switch (transport_type) {
case 0: return IB_QPT_RC;
case 1: return IB_QPT_UC;
case 3:
@@ -37,7 +37,7 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
enum ib_qp_type qp_type)
{
- switch(qp_type) {
+ switch (qp_type) {
case IB_QPT_UC:
IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 1);
break;
diff --git a/drivers/infiniband/core/cm_trace.c b/drivers/infiniband/core/cm_trace.c
new file mode 100644
index 000000000000..8f3482f66338
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the IB Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <rdma/rdma_cm.h>
+#include "cma_priv.h"
+
+#define CREATE_TRACE_POINTS
+
+#include "cm_trace.h"
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
new file mode 100644
index 000000000000..e9d282679ef1
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.h
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_cma
+
+#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_IB_CMA_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
+#include <trace/events/rdma.h>
+
+/*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_STATE_LIST \
+ ib_cm_state(IDLE) \
+ ib_cm_state(LISTEN) \
+ ib_cm_state(REQ_SENT) \
+ ib_cm_state(REQ_RCVD) \
+ ib_cm_state(MRA_REQ_SENT) \
+ ib_cm_state(MRA_REQ_RCVD) \
+ ib_cm_state(REP_SENT) \
+ ib_cm_state(REP_RCVD) \
+ ib_cm_state(MRA_REP_SENT) \
+ ib_cm_state(MRA_REP_RCVD) \
+ ib_cm_state(ESTABLISHED) \
+ ib_cm_state(DREQ_SENT) \
+ ib_cm_state(DREQ_RCVD) \
+ ib_cm_state(TIMEWAIT) \
+ ib_cm_state(SIDR_REQ_SENT) \
+ ib_cm_state_end(SIDR_REQ_RCVD)
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_STATE_LIST
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) { IB_CM_##x, #x },
+#define ib_cm_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_state(x) \
+ __print_symbolic(x, IB_CM_STATE_LIST)
+
+/*
+ * enum ib_cm_lap_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_LAP_STATE_LIST \
+ ib_cm_lap_state(LAP_UNINIT) \
+ ib_cm_lap_state(LAP_IDLE) \
+ ib_cm_lap_state(LAP_SENT) \
+ ib_cm_lap_state(LAP_RCVD) \
+ ib_cm_lap_state(MRA_LAP_SENT) \
+ ib_cm_lap_state_end(MRA_LAP_RCVD)
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_lap_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_LAP_STATE_LIST
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) { IB_CM_##x, #x },
+#define ib_cm_lap_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_lap_state(x) \
+ __print_symbolic(x, IB_CM_LAP_STATE_LIST)
+
+/*
+ * enum ib_cm_rej_reason, from include/rdma/ib_cm.h
+ */
+#define IB_CM_REJ_REASON_LIST \
+ ib_cm_rej_reason(REJ_NO_QP) \
+ ib_cm_rej_reason(REJ_NO_EEC) \
+ ib_cm_rej_reason(REJ_NO_RESOURCES) \
+ ib_cm_rej_reason(REJ_TIMEOUT) \
+ ib_cm_rej_reason(REJ_UNSUPPORTED) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE) \
+ ib_cm_rej_reason(REJ_INVALID_SERVICE_ID) \
+ ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE) \
+ ib_cm_rej_reason(REJ_STALE_CONN) \
+ ib_cm_rej_reason(REJ_RDC_NOT_EXIST) \
+ ib_cm_rej_reason(REJ_INVALID_GID) \
+ ib_cm_rej_reason(REJ_INVALID_LID) \
+ ib_cm_rej_reason(REJ_INVALID_SL) \
+ ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_GID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_LID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_SL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_PORT_CM_REDIRECT) \
+ ib_cm_rej_reason(REJ_PORT_REDIRECT) \
+ ib_cm_rej_reason(REJ_INVALID_MTU) \
+ ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES) \
+ ib_cm_rej_reason(REJ_CONSUMER_DEFINED) \
+ ib_cm_rej_reason(REJ_INVALID_RNR_RETRY) \
+ ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION) \
+ ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL) \
+ ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_rej_reason_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_REJ_REASON_LIST
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) { IB_CM_##x, #x },
+#define ib_cm_rej_reason_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_rej_reason(x) \
+ __print_symbolic(x, IB_CM_REJ_REASON_LIST)
+
+DECLARE_EVENT_CLASS(icm_id_class,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id
+ ),
+
+ TP_ARGS(cm_id),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id) /* for eBPF scripts */
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, lap_state)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->lap_state = cm_id->lap_state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_lap_state(__entry->lap_state)
+ )
+);
+
+#define DEFINE_CM_SEND_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_send_##name, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_SEND_EVENT(req);
+DEFINE_CM_SEND_EVENT(rep);
+DEFINE_CM_SEND_EVENT(dup_req);
+DEFINE_CM_SEND_EVENT(dup_rep);
+DEFINE_CM_SEND_EVENT(rtu);
+DEFINE_CM_SEND_EVENT(mra);
+DEFINE_CM_SEND_EVENT(sidr_req);
+DEFINE_CM_SEND_EVENT(sidr_rep);
+DEFINE_CM_SEND_EVENT(dreq);
+DEFINE_CM_SEND_EVENT(drep);
+
+TRACE_EVENT(icm_send_rej,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id,
+ enum ib_cm_rej_reason reason
+ ),
+
+ TP_ARGS(cm_id, reason),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id)
+ __field(u32, local_id)
+ __field(u32, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_rej_reason(__entry->reason)
+ )
+);
+
+#define DEFINE_CM_ERR_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_##name##_err, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_ERR_EVENT(send_cm_rtu);
+DEFINE_CM_ERR_EVENT(establish);
+DEFINE_CM_ERR_EVENT(no_listener);
+DEFINE_CM_ERR_EVENT(send_drep);
+DEFINE_CM_ERR_EVENT(dreq_unknown);
+DEFINE_CM_ERR_EVENT(send_unknown_rej);
+DEFINE_CM_ERR_EVENT(rej_unknown);
+DEFINE_CM_ERR_EVENT(send_mra_unknown);
+DEFINE_CM_ERR_EVENT(mra_unknown);
+DEFINE_CM_ERR_EVENT(qp_init);
+DEFINE_CM_ERR_EVENT(qp_rtr);
+DEFINE_CM_ERR_EVENT(qp_rts);
+
+DEFINE_EVENT(icm_id_class, \
+ icm_dreq_skipped, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id) \
+);
+
+DECLARE_EVENT_CLASS(icm_local_class,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id
+ ),
+
+ TP_ARGS(local_id, remote_id),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u",
+ __entry->local_id, __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_LOCAL_EVENT(name) \
+ DEFINE_EVENT(icm_local_class, \
+ icm_##name, \
+ TP_PROTO( \
+ unsigned int local_id, \
+ unsigned int remote_id \
+ ), \
+ TP_ARGS(local_id, remote_id))
+
+DEFINE_CM_LOCAL_EVENT(issue_rej);
+DEFINE_CM_LOCAL_EVENT(issue_drep);
+DEFINE_CM_LOCAL_EVENT(staleconn_err);
+DEFINE_CM_LOCAL_EVENT(no_priv_err);
+
+DECLARE_EVENT_CLASS(icm_remote_class,
+ TP_PROTO(
+ u32 remote_id
+ ),
+
+ TP_ARGS(remote_id),
+
+ TP_STRUCT__entry(
+ __field(u32, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("remote_id=%u",
+ __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_REMOTE_EVENT(name) \
+ DEFINE_EVENT(icm_remote_class, \
+ icm_##name, \
+ TP_PROTO( \
+ u32 remote_id \
+ ), \
+ TP_ARGS(remote_id))
+
+DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
+DEFINE_CM_REMOTE_EVENT(insert_failed_err);
+
+TRACE_EVENT(icm_send_rep_err,
+ TP_PROTO(
+ __be32 local_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = be32_to_cpu(local_id);
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u state=%s",
+ __entry->local_id, show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_rep_unknown_err,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, remote_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_handler_err,
+ TP_PROTO(
+ enum ib_cm_event_type event
+ ),
+
+ TP_ARGS(event),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, event)
+ ),
+
+ TP_fast_assign(
+ __entry->event = event;
+ ),
+
+ TP_printk("unhandled event=%s",
+ rdma_show_ib_cm_event(__entry->event)
+ )
+);
+
+TRACE_EVENT(icm_mad_send_err,
+ TP_PROTO(
+ enum ib_cm_state state,
+ enum ib_wc_status wc_status
+ ),
+
+ TP_ARGS(state, wc_status),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, state)
+ __field(unsigned long, wc_status)
+ ),
+
+ TP_fast_assign(
+ __entry->state = state;
+ __entry->wc_status = wc_status;
+ ),
+
+ TP_printk("state=%s completion status=%s",
+ show_ib_cm_state(__entry->state),
+ rdma_show_wc_status(__entry->wc_status)
+ )
+);
+
+#endif /* _TRACE_IB_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core
+#define TRACE_INCLUDE_FILE cm_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2dec3a02ab9f..26d1772179b8 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -11,6 +11,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/rbtree.h>
#include <linux/igmp.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
@@ -20,6 +21,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -43,7 +45,6 @@ MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
-#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
#define CMA_IBOE_PACKET_LIFETIME 18
@@ -68,6 +69,9 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type);
+
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
size_t index = event;
@@ -91,7 +95,13 @@ const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
}
EXPORT_SYMBOL(rdma_reject_msg);
-bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
+/**
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
+ * request.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+static bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
{
if (rdma_ib_or_roce(id->device, id->port_num))
return reason == IB_CM_REJ_CONSUMER_DEFINED;
@@ -102,7 +112,6 @@ bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
WARN_ON_ONCE(1);
return false;
}
-EXPORT_SYMBOL(rdma_is_consumer_reject);
const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
struct rdma_cm_event *ev, u8 *data_len)
@@ -148,7 +157,7 @@ struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
}
EXPORT_SYMBOL(rdma_res_to_id);
-static void cma_add_one(struct ib_device *device);
+static int cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
static struct ib_client cma_client = {
@@ -161,6 +170,9 @@ static struct ib_sa_client sa_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
+static struct rb_root id_table = RB_ROOT;
+/* Serialize operations of id_table tree */
+static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;
@@ -195,11 +207,16 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
}
}
+struct id_table_entry {
+ struct list_head id_list;
+ struct rb_node rb_node;
+};
+
struct cma_device {
struct list_head list;
struct ib_device *device;
struct completion comp;
- atomic_t refcount;
+ refcount_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
u8 *default_roce_tos;
@@ -211,14 +228,6 @@ struct rdma_bind_list {
unsigned short port;
};
-struct class_port_info_context {
- struct ib_class_port_info *class_port_info;
- struct ib_device *device;
- struct completion done;
- struct ib_sa_query *sa_query;
- u8 port_num;
-};
-
static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
@@ -247,9 +256,15 @@ enum {
CMA_OPTION_AFONLY,
};
-void cma_ref_dev(struct cma_device *cma_dev)
+void cma_dev_get(struct cma_device *cma_dev)
+{
+ refcount_inc(&cma_dev->refcount);
+}
+
+void cma_dev_put(struct cma_device *cma_dev)
{
- atomic_inc(&cma_dev->refcount);
+ if (refcount_dec_and_test(&cma_dev->refcount))
+ complete(&cma_dev->comp);
}
struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
@@ -267,13 +282,13 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
}
if (found_cma_dev)
- cma_ref_dev(found_cma_dev);
+ cma_dev_get(found_cma_dev);
mutex_unlock(&lock);
return found_cma_dev;
}
int cma_get_default_gid_type(struct cma_device *cma_dev,
- unsigned int port)
+ u32 port)
{
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
@@ -282,7 +297,7 @@ int cma_get_default_gid_type(struct cma_device *cma_dev,
}
int cma_set_default_gid_type(struct cma_device *cma_dev,
- unsigned int port,
+ u32 port,
enum ib_gid_type default_gid_type)
{
unsigned long supported_gids;
@@ -290,6 +305,10 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
+ if (default_gid_type == IB_GID_TYPE_IB &&
+ rdma_protocol_roce_eth_encap(cma_dev->device, port))
+ default_gid_type = IB_GID_TYPE_ROCE;
+
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
if (!(supported_gids & 1 << default_gid_type))
@@ -301,7 +320,7 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
return 0;
}
-int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
+int cma_get_default_roce_tos(struct cma_device *cma_dev, u32 port)
{
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
@@ -309,7 +328,7 @@ int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
}
-int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
+int cma_set_default_roce_tos(struct cma_device *cma_dev, u32 port,
u8 default_roce_tos)
{
if (!rdma_is_port_valid(cma_dev->device, port))
@@ -335,12 +354,15 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
struct cma_multicast {
struct rdma_id_private *id_priv;
union {
- struct ib_sa_multicast *ib;
- } multicast;
+ struct ib_sa_multicast *sa_mc;
+ struct {
+ struct work_struct work;
+ struct rdma_cm_event event;
+ } iboe_join;
+ };
struct list_head list;
void *context;
struct sockaddr_storage addr;
- struct kref mcref;
u8 join_state;
};
@@ -352,18 +374,6 @@ struct cma_work {
struct rdma_cm_event event;
};
-struct cma_ndev_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct rdma_cm_event event;
-};
-
-struct iboe_mcast_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct cma_multicast *mc;
-};
-
union cma_ip_addr {
struct in6_addr ip6;
struct {
@@ -393,23 +403,21 @@ struct cma_req_info {
u16 pkey;
};
-static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&id_priv->lock, flags);
- ret = (id_priv->state == comp);
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return ret;
-}
-
static int cma_comp_exch(struct rdma_id_private *id_priv,
enum rdma_cm_state comp, enum rdma_cm_state exch)
{
unsigned long flags;
int ret;
+ /*
+ * The FSM uses a funny double locking where state is protected by both
+ * the handler_mutex and the spinlock. State is not allowed to change
+ * to/from a handler_mutex protected value without also holding
+ * handler_mutex.
+ */
+ if (comp == RDMA_CM_CONNECT || exch == RDMA_CM_CONNECT)
+ lockdep_assert_held(&id_priv->handler_mutex);
+
spin_lock_irqsave(&id_priv->lock, flags);
if ((ret = (id_priv->state == comp)))
id_priv->state = exch;
@@ -417,27 +425,24 @@ static int cma_comp_exch(struct rdma_id_private *id_priv,
return ret;
}
-static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
- enum rdma_cm_state exch)
+static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
{
- unsigned long flags;
- enum rdma_cm_state old;
+ return hdr->ip_version >> 4;
+}
- spin_lock_irqsave(&id_priv->lock, flags);
- old = id_priv->state;
- id_priv->state = exch;
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return old;
+static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+{
+ hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
-static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
+static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
{
- return hdr->ip_version >> 4;
+ return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
}
-static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
{
- hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
+ return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
}
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
@@ -460,19 +465,128 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
return (in_dev) ? 0 : -ENODEV;
}
+static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
+ struct id_table_entry *entry_b)
+{
+ struct rdma_id_private *id_priv = list_first_entry(
+ &entry_b->id_list, struct rdma_id_private, id_list_entry);
+ int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
+ struct sockaddr *sb = cma_dst_addr(id_priv);
+
+ if (ifindex_a != ifindex_b)
+ return (ifindex_a > ifindex_b) ? 1 : -1;
+
+ if (sa->sa_family != sb->sa_family)
+ return sa->sa_family - sb->sa_family;
+
+ if (sa->sa_family == AF_INET)
+ return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr,
+ (char *)&((struct sockaddr_in *)sb)->sin_addr,
+ sizeof(((struct sockaddr_in *)sa)->sin_addr));
+
+ return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
+ &((struct sockaddr_in6 *)sb)->sin6_addr);
+}
+
+static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
+{
+ struct rb_node **new, *parent = NULL;
+ struct id_table_entry *this, *node;
+ unsigned long flags;
+ int result;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ new = &id_table.rb_node;
+ while (*new) {
+ this = container_of(*new, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(
+ node_id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(node_id_priv), this);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else {
+ list_add_tail(&node_id_priv->id_list_entry,
+ &this->id_list);
+ kfree(node);
+ goto unlock;
+ }
+ }
+
+ INIT_LIST_HEAD(&node->id_list);
+ list_add_tail(&node_id_priv->id_list_entry, &node->id_list);
+
+ rb_link_node(&node->rb_node, parent, new);
+ rb_insert_color(&node->rb_node, &id_table);
+
+unlock:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return 0;
+}
+
+static struct id_table_entry *
+node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
+{
+ struct rb_node *node = root->rb_node;
+ struct id_table_entry *data;
+ int result;
+
+ while (node) {
+ data = container_of(node, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(ifindex, sa, data);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+
+ return NULL;
+}
+
+static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
+{
+ struct id_table_entry *data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (list_empty(&id_priv->id_list_entry))
+ goto out;
+
+ data = node_from_ndev_ip(&id_table,
+ id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(id_priv));
+ if (!data)
+ goto out;
+
+ list_del_init(&id_priv->id_list_entry);
+ if (list_empty(&data->id_list)) {
+ rb_erase(&data->rb_node, &id_table);
+ kfree(data);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+}
+
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
- cma_ref_dev(cma_dev);
+ cma_dev_get(cma_dev);
id_priv->cma_dev = cma_dev;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
- list_add_tail(&id_priv->list, &cma_dev->id_list);
- if (id_priv->res.kern_name)
- rdma_restrack_kadd(&id_priv->res);
- else
- rdma_restrack_uadd(&id_priv->res);
+ list_add_tail(&id_priv->device_item, &cma_dev->id_list);
+
+ trace_cm_id_attach(id_priv, cma_dev->device);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -484,39 +598,20 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
rdma_start_port(cma_dev->device)];
}
-void cma_deref_dev(struct cma_device *cma_dev)
-{
- if (atomic_dec_and_test(&cma_dev->refcount))
- complete(&cma_dev->comp);
-}
-
-static inline void release_mc(struct kref *kref)
-{
- struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
-
- kfree(mc->multicast.ib);
- kfree(mc);
-}
-
static void cma_release_dev(struct rdma_id_private *id_priv)
{
mutex_lock(&lock);
- list_del(&id_priv->list);
- cma_deref_dev(id_priv->cma_dev);
+ list_del_init(&id_priv->device_item);
+ cma_dev_put(id_priv->cma_dev);
id_priv->cma_dev = NULL;
+ id_priv->id.device = NULL;
+ if (id_priv->id.route.addr.dev_addr.sgid_attr) {
+ rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+ id_priv->id.route.addr.dev_addr.sgid_attr = NULL;
+ }
mutex_unlock(&lock);
}
-static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-}
-
-static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
-}
-
static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
return id_priv->id.route.addr.src_addr.ss_family;
@@ -579,7 +674,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
}
static const struct ib_gid_attr *
-cma_validate_port(struct ib_device *device, u8 port,
+cma_validate_port(struct ib_device *device, u32 port,
enum ib_gid_type gid_type,
union ib_gid *gid,
struct rdma_id_private *id_priv)
@@ -637,7 +732,7 @@ static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
struct cma_device *cma_dev;
enum ib_gid_type gid_type;
int ret = -ENODEV;
- unsigned int port;
+ u32 port;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
@@ -717,6 +812,7 @@ static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
mutex_lock(&lock);
cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
mutex_unlock(&lock);
+ rdma_restrack_add(&id_priv->res);
return 0;
}
@@ -729,7 +825,7 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
enum ib_gid_type gid_type;
int ret = -ENODEV;
union ib_gid gid;
- u8 port;
+ u32 port;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
@@ -753,7 +849,7 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
}
list_for_each_entry(cma_dev, &dev_list, list) {
- for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+ rdma_for_each_port (cma_dev->device, port) {
if (listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
@@ -771,8 +867,10 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
}
out:
- if (!ret)
+ if (!ret) {
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
+ }
mutex_unlock(&lock);
return ret;
@@ -786,9 +884,10 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
struct cma_device *cma_dev, *cur_dev;
struct sockaddr_ib *addr;
union ib_gid gid, sgid, *dgid;
+ unsigned int p;
u16 pkey, index;
- u8 p;
enum ib_port_state port_state;
+ int ret;
int i;
cma_dev = NULL;
@@ -798,7 +897,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
mutex_lock(&lock);
list_for_each_entry(cur_dev, &dev_list, list) {
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ rdma_for_each_port (cur_dev->device, p) {
if (!rdma_cap_af_ib(cur_dev->device, p))
continue;
@@ -807,9 +906,14 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
continue;
- for (i = 0; !rdma_query_gid(cur_dev->device,
- p, i, &gid);
- i++) {
+
+ for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len;
+ ++i) {
+ ret = rdma_query_gid(cur_dev->device, p, i,
+ &gid);
+ if (ret)
+ continue;
+
if (!memcmp(&gid, dgid, sizeof(gid))) {
cma_dev = cur_dev;
sgid = gid;
@@ -833,6 +937,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
found:
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
mutex_unlock(&lock);
addr = (struct sockaddr_ib *)cma_src_addr(id_priv);
memcpy(&addr->sib_addr, &sgid, sizeof(sgid));
@@ -840,16 +945,21 @@ found:
return 0;
}
-static void cma_deref_id(struct rdma_id_private *id_priv)
+static void cma_id_get(struct rdma_id_private *id_priv)
{
- if (atomic_dec_and_test(&id_priv->refcount))
+ refcount_inc(&id_priv->refcount);
+}
+
+static void cma_id_put(struct rdma_id_private *id_priv)
+{
+ if (refcount_dec_and_test(&id_priv->refcount))
complete(&id_priv->comp);
}
-struct rdma_cm_id *__rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_ucm_port_space ps,
- enum ib_qp_type qp_type, const char *caller)
+static struct rdma_id_private *
+__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const struct rdma_id_private *parent)
{
struct rdma_id_private *id_priv;
@@ -857,8 +967,6 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- rdma_restrack_set_task(&id_priv->res, caller);
- id_priv->res.type = RDMA_RESTRACK_CM_ID;
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
@@ -866,22 +974,60 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->id.qp_type = qp_type;
id_priv->tos_set = false;
id_priv->timeout_set = false;
+ id_priv->min_rnr_timer_set = false;
id_priv->gid_type = IB_GID_TYPE_IB;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
- atomic_set(&id_priv->refcount, 1);
+ refcount_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
+ INIT_LIST_HEAD(&id_priv->device_item);
+ INIT_LIST_HEAD(&id_priv->id_list_entry);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
- trace_cm_id_create(id_priv);
- return &id_priv->id;
+ rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
+ if (parent)
+ rdma_restrack_parent_name(&id_priv->res, &parent->res);
+
+ return id_priv;
+}
+
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, caller);
+ return &ret->id;
}
-EXPORT_SYMBOL(__rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_kernel_id);
+
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+ void *context,
+ enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context,
+ ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, NULL);
+ return &ret->id;
+}
+EXPORT_SYMBOL(rdma_create_user_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
@@ -1114,12 +1260,16 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
qp_attr_mask);
qp_attr->port_num = id_priv->id.port_num;
*qp_attr_mask |= IB_QP_PORT;
- } else
+ } else {
ret = -ENOSYS;
+ }
if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
qp_attr->timeout = id_priv->timeout;
+ if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
+ qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
+
return ret;
}
EXPORT_SYMBOL(rdma_init_qp_attr);
@@ -1406,7 +1556,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
return false;
memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_iif = net_dev->ifindex;
+ fl4.flowi4_oif = net_dev->ifindex;
fl4.daddr = daddr;
fl4.saddr = saddr;
@@ -1560,7 +1710,7 @@ static bool cma_match_private_data(struct rdma_id_private *id_priv,
static bool cma_protocol_roce(const struct rdma_cm_id *id)
{
struct ib_device *device = id->device;
- const int port_num = id->port_num ?: rdma_start_port(device);
+ const u32 port_num = id->port_num ?: rdma_start_port(device);
return rdma_protocol_roce(device, port_num);
}
@@ -1614,6 +1764,8 @@ static struct rdma_id_private *cma_find_listener(
{
struct rdma_id_private *id_priv, *id_priv_dev;
+ lockdep_assert_held(&lock);
+
if (!bind_list)
return ERR_PTR(-EINVAL);
@@ -1624,7 +1776,7 @@ static struct rdma_id_private *cma_find_listener(
return id_priv;
list_for_each_entry(id_priv_dev,
&id_priv->listen_list,
- listen_list) {
+ listen_item) {
if (id_priv_dev->id.device == cm_id->device &&
cma_match_net_dev(&id_priv_dev->id,
net_dev, req))
@@ -1660,6 +1812,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
}
+ mutex_lock(&lock);
/*
* Net namespace might be getting deleted while route lookup,
* cm_id lookup is in progress. Therefore, perform netdevice
@@ -1688,8 +1841,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
if (!validate_net_dev(*net_dev,
- (struct sockaddr *)&req->listen_addr_storage,
- (struct sockaddr *)&req->src_addr_storage)) {
+ (struct sockaddr *)&req->src_addr_storage,
+ (struct sockaddr *)&req->listen_addr_storage)) {
id_priv = ERR_PTR(-EHOSTUNREACH);
goto err;
}
@@ -1701,6 +1854,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
err:
rcu_read_unlock();
+ mutex_unlock(&lock);
if (IS_ERR(id_priv) && *net_dev) {
dev_put(*net_dev);
*net_dev = NULL;
@@ -1721,28 +1875,36 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
}
}
-static void cma_cancel_listens(struct rdma_id_private *id_priv)
+static void _cma_cancel_listens(struct rdma_id_private *id_priv)
{
struct rdma_id_private *dev_id_priv;
+ lockdep_assert_held(&lock);
+
/*
* Remove from listen_any_list to prevent added devices from spawning
* additional listen requests.
*/
- mutex_lock(&lock);
- list_del(&id_priv->list);
+ list_del_init(&id_priv->listen_any_item);
while (!list_empty(&id_priv->listen_list)) {
- dev_id_priv = list_entry(id_priv->listen_list.next,
- struct rdma_id_private, listen_list);
+ dev_id_priv =
+ list_first_entry(&id_priv->listen_list,
+ struct rdma_id_private, listen_item);
/* sync with device removal to avoid duplicate destruction */
- list_del_init(&dev_id_priv->list);
- list_del(&dev_id_priv->listen_list);
+ list_del_init(&dev_id_priv->device_item);
+ list_del_init(&dev_id_priv->listen_item);
mutex_unlock(&lock);
rdma_destroy_id(&dev_id_priv->id);
mutex_lock(&lock);
}
+}
+
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
+{
+ mutex_lock(&lock);
+ _cma_cancel_listens(id_priv);
mutex_unlock(&lock);
}
@@ -1751,6 +1913,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -1782,19 +1952,39 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
- struct cma_multicast *mc)
+static void destroy_mc(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
{
- struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev = NULL;
+ bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
+ if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
+ ib_sa_free_multicast(mc->sa_mc);
+
+ if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(dev_addr->net,
+ dev_addr->bound_dev_if);
+ if (ndev && !send_only) {
+ enum ib_gid_type gid_type;
+ union ib_gid mgid;
+
+ gid_type = id_priv->cma_dev->default_gid_type
+ [id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+ cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
+ gid_type);
+ cma_igmp_send(ndev, &mgid, false);
+ }
dev_put(ndev);
+
+ cancel_work_sync(&mc->iboe_join.work);
}
- kref_put(&mc->mcref, release_mc);
+ kfree(mc);
}
static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
@@ -1802,37 +1992,20 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
struct cma_multicast *mc;
while (!list_empty(&id_priv->mc_list)) {
- mc = container_of(id_priv->mc_list.next,
- struct cma_multicast, list);
+ mc = list_first_entry(&id_priv->mc_list, struct cma_multicast,
+ list);
list_del(&mc->list);
- if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
- id_priv->id.port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else {
- cma_leave_roce_mc_group(id_priv, mc);
- }
+ destroy_mc(id_priv, mc);
}
}
-void rdma_destroy_id(struct rdma_cm_id *id)
+static void _destroy_id(struct rdma_id_private *id_priv,
+ enum rdma_cm_state state)
{
- struct rdma_id_private *id_priv;
- enum rdma_cm_state state;
-
- id_priv = container_of(id, struct rdma_id_private, id);
- trace_cm_id_destroy(id_priv);
- state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
- /*
- * Wait for any active callback to finish. New callbacks will find
- * the id_priv state set to destroying and abort.
- */
- mutex_lock(&id_priv->handler_mutex);
- mutex_unlock(&id_priv->handler_mutex);
-
rdma_restrack_del(&id_priv->res);
+ cma_remove_id_from_tree(id_priv);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
@@ -1846,20 +2019,55 @@ void rdma_destroy_id(struct rdma_cm_id *id)
}
cma_release_port(id_priv);
- cma_deref_id(id_priv);
+ cma_id_put(id_priv);
wait_for_completion(&id_priv->comp);
if (id_priv->internal_id)
- cma_deref_id(id_priv->id.context);
+ cma_id_put(id_priv->id.context);
kfree(id_priv->id.route.path_rec);
-
- if (id_priv->id.route.addr.dev_addr.sgid_attr)
- rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+ kfree(id_priv->id.route.path_rec_inbound);
+ kfree(id_priv->id.route.path_rec_outbound);
put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
}
+
+/*
+ * destroy an ID from within the handler_mutex. This ensures that no other
+ * handlers can start running concurrently.
+ */
+static void destroy_id_handler_unlock(struct rdma_id_private *id_priv)
+ __releases(&idprv->handler_mutex)
+{
+ enum rdma_cm_state state;
+ unsigned long flags;
+
+ trace_cm_id_destroy(id_priv);
+
+ /*
+ * Setting the state to destroyed under the handler mutex provides a
+ * fence against calling handler callbacks. If this is invoked due to
+ * the failure of a handler callback then it guarentees that no future
+ * handlers will be called.
+ */
+ lockdep_assert_held(&id_priv->handler_mutex);
+ spin_lock_irqsave(&id_priv->lock, flags);
+ state = id_priv->state;
+ id_priv->state = RDMA_CM_DESTROYING;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ mutex_unlock(&id_priv->handler_mutex);
+ _destroy_id(id_priv, state);
+}
+
+void rdma_destroy_id(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
+ destroy_id_handler_unlock(id_priv);
+}
EXPORT_SYMBOL(rdma_destroy_id);
static int cma_rep_recv(struct rdma_id_private *id_priv)
@@ -1901,6 +2109,9 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
event->param.conn.srq = rep_data->srq;
event->param.conn.qp_num = rep_data->remote_qpn;
+
+ event->ece.vendor_id = rep_data->ece.vendor_id;
+ event->ece.attr_mod = rep_data->ece.attr_mod;
}
static int cma_cm_event_handler(struct rdma_id_private *id_priv,
@@ -1908,6 +2119,8 @@ static int cma_cm_event_handler(struct rdma_id_private *id_priv,
{
int ret;
+ lockdep_assert_held(&id_priv->handler_mutex);
+
trace_cm_event_handler(id_priv, event);
ret = id_priv->id.event_handler(&id_priv->id, event);
trace_cm_event_done(id_priv, event, ret);
@@ -1919,13 +2132,15 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
{
struct rdma_id_private *id_priv = cm_id->context;
struct rdma_cm_event event = {};
- int ret = 0;
+ enum rdma_cm_state state;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
+ state = READ_ONCE(id_priv->state);
if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_CONNECT) ||
+ state != RDMA_CM_CONNECT) ||
(ib_event->event == IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_DISCONNECT))
+ state != RDMA_CM_DISCONNECT))
goto out;
switch (ib_event->event) {
@@ -1935,7 +2150,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
- if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+ if (state == RDMA_CM_CONNECT &&
(id_priv->id.qp_type != IB_QPT_UD)) {
trace_cm_send_mra(id_priv);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
@@ -1955,7 +2170,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
event.event = RDMA_CM_EVENT_ESTABLISHED;
break;
case IB_CM_DREQ_ERROR:
- event.status = -ETIMEDOUT; /* fall through */
+ event.status = -ETIMEDOUT;
+ fallthrough;
case IB_CM_DREQ_RECEIVED:
case IB_CM_DREP_RECEIVED:
if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
@@ -1988,14 +2204,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ return 0;
}
static struct rdma_id_private *
@@ -2014,28 +2228,29 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
int ret;
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
- id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
- listen_id->event_handler, listen_id->context,
- listen_id->ps, ib_event->param.req_rcvd.qp_type,
- listen_id_priv->res.kern_name);
- if (IS_ERR(id))
+ id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id->event_handler, listen_id->context,
+ listen_id->ps,
+ ib_event->param.req_rcvd.qp_type,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family, service_id))
goto err;
rt = &id->route;
- rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
- rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec),
- GFP_KERNEL);
+ rt->num_pri_alt_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
+ rt->path_rec = kmalloc_array(rt->num_pri_alt_paths,
+ sizeof(*rt->path_rec), GFP_KERNEL);
if (!rt->path_rec)
goto err;
rt->path_rec[0] = *path;
- if (rt->num_paths == 2)
+ if (rt->num_pri_alt_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (net_dev) {
@@ -2075,13 +2290,13 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
int ret;
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
- id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
- listen_id->ps, IB_QPT_UD,
- listen_id_priv->res.kern_name);
- if (IS_ERR(id))
+ id_priv = __rdma_create_id(net, listen_id->event_handler,
+ listen_id->context, listen_id->ps, IB_QPT_UD,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family,
@@ -2119,6 +2334,9 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
event->param.conn.srq = req_data->srq;
event->param.conn.qp_num = req_data->remote_qpn;
+
+ event->ece.vendor_id = req_data->ece.vendor_id;
+ event->ece.attr_mod = req_data->ece.attr_mod;
}
static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
@@ -2152,9 +2370,9 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
}
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN) {
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) {
ret = -ECONNABORTED;
- goto err1;
+ goto err_unlock;
}
offset = cma_user_data_offset(listen_id);
@@ -2171,55 +2389,38 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
}
if (!conn_id) {
ret = -ENOMEM;
- goto err1;
+ goto err_unlock;
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
- if (ret)
- goto err2;
+ if (ret) {
+ destroy_id_handler_unlock(conn_id);
+ goto err_unlock;
+ }
conn_id->cm_id.ib = cm_id;
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
- /*
- * Protect against the user destroying conn_id from another thread
- * until we're done accessing it.
- */
- atomic_inc(&conn_id->refcount);
ret = cma_cm_event_handler(conn_id, &event);
- if (ret)
- goto err3;
- /*
- * Acquire mutex to prevent user executing rdma_destroy_id()
- * while we're accessing the cm_id.
- */
- mutex_lock(&lock);
- if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
- (conn_id->id.qp_type != IB_QPT_UD)) {
+ if (ret) {
+ /* Destroy the CM ID by returning a non-zero value. */
+ conn_id->cm_id.ib = NULL;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ goto net_dev_put;
+ }
+
+ if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
+ conn_id->id.qp_type != IB_QPT_UD) {
trace_cm_send_mra(cm_id->context);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
}
- mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
- mutex_unlock(&listen_id->handler_mutex);
- cma_deref_id(conn_id);
- if (net_dev)
- dev_put(net_dev);
- return 0;
-err3:
- cma_deref_id(conn_id);
- /* Destroy the CM ID by returning a non-zero value. */
- conn_id->cm_id.ib = NULL;
-err2:
- cma_exch(conn_id, RDMA_CM_DESTROYING);
- mutex_unlock(&conn_id->handler_mutex);
-err1:
+err_unlock:
mutex_unlock(&listen_id->handler_mutex);
- if (conn_id)
- rdma_destroy_id(&conn_id->id);
net_dev_put:
if (net_dev)
@@ -2273,7 +2474,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
switch (iw_event->event) {
@@ -2319,9 +2520,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
@@ -2333,7 +2532,6 @@ out:
static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct iw_cm_event *iw_event)
{
- struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event = {};
int ret = -ECONNABORTED;
@@ -2349,35 +2547,33 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
listen_id = cm_id->context;
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN)
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN)
goto out;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
- listen_id->id.event_handler,
- listen_id->id.context,
- RDMA_PS_TCP, IB_QPT_RC,
- listen_id->res.kern_name);
- if (IS_ERR(new_cm_id)) {
+ conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
+ listen_id->id.context, RDMA_PS_TCP,
+ IB_QPT_RC, listen_id);
+ if (IS_ERR(conn_id)) {
ret = -ENOMEM;
goto out;
}
- conn_id = container_of(new_cm_id, struct rdma_id_private, id);
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
ret = cma_iw_acquire_dev(conn_id, listen_id);
if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
conn_id->cm_id.iw = cm_id;
@@ -2387,25 +2583,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
- /*
- * Protect against the user destroying conn_id from another thread
- * until we're done accessing it.
- */
- atomic_inc(&conn_id->refcount);
ret = cma_cm_event_handler(conn_id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
- cma_exch(conn_id, RDMA_CM_DESTROYING);
- mutex_unlock(&conn_id->handler_mutex);
mutex_unlock(&listen_id->handler_mutex);
- cma_deref_id(conn_id);
- rdma_destroy_id(&conn_id->id);
+ destroy_id_handler_unlock(conn_id);
return ret;
}
mutex_unlock(&conn_id->handler_mutex);
- cma_deref_id(conn_id);
out:
mutex_unlock(&listen_id->handler_mutex);
@@ -2440,8 +2627,11 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
if (IS_ERR(id))
return PTR_ERR(id);
+ mutex_lock(&id_priv->qp_mutex);
id->tos = id_priv->tos;
id->tos_set = id_priv->tos_set;
+ mutex_unlock(&id_priv->qp_mutex);
+ id->afonly = id_priv->afonly;
id_priv->cm_id.iw = id;
memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
@@ -2462,57 +2652,88 @@ static int cma_listen_handler(struct rdma_cm_id *id,
{
struct rdma_id_private *id_priv = id->context;
+ /* Listening IDs are always destroyed on removal */
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
+ return -1;
+
id->context = id_priv->id.context;
id->event_handler = id_priv->id.event_handler;
trace_cm_event_handler(id_priv, event);
return id_priv->id.event_handler(id, event);
}
-static void cma_listen_on_dev(struct rdma_id_private *id_priv,
- struct cma_device *cma_dev)
+static int cma_listen_on_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev,
+ struct rdma_id_private **to_destroy)
{
struct rdma_id_private *dev_id_priv;
- struct rdma_cm_id *id;
struct net *net = id_priv->id.route.addr.dev_addr.net;
int ret;
- if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
- return;
+ lockdep_assert_held(&lock);
- id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
- id_priv->id.qp_type, id_priv->res.kern_name);
- if (IS_ERR(id))
- return;
+ *to_destroy = NULL;
+ if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
+ return 0;
- dev_id_priv = container_of(id, struct rdma_id_private, id);
+ dev_id_priv =
+ __rdma_create_id(net, cma_listen_handler, id_priv,
+ id_priv->id.ps, id_priv->id.qp_type, id_priv);
+ if (IS_ERR(dev_id_priv))
+ return PTR_ERR(dev_id_priv);
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
_cma_attach_to_dev(dev_id_priv, cma_dev);
- list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
- atomic_inc(&id_priv->refcount);
+ rdma_restrack_add(&dev_id_priv->res);
+ cma_id_get(id_priv);
dev_id_priv->internal_id = 1;
dev_id_priv->afonly = id_priv->afonly;
+ mutex_lock(&id_priv->qp_mutex);
dev_id_priv->tos_set = id_priv->tos_set;
dev_id_priv->tos = id_priv->tos;
+ mutex_unlock(&id_priv->qp_mutex);
- ret = rdma_listen(id, id_priv->backlog);
+ ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
if (ret)
- dev_warn(&cma_dev->device->dev,
- "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
+ goto err_listen;
+ list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list);
+ return 0;
+err_listen:
+ /* Caller must destroy this after releasing lock */
+ *to_destroy = dev_id_priv;
+ dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret);
+ return ret;
}
-static void cma_listen_on_all(struct rdma_id_private *id_priv)
+static int cma_listen_on_all(struct rdma_id_private *id_priv)
{
+ struct rdma_id_private *to_destroy;
struct cma_device *cma_dev;
+ int ret;
mutex_lock(&lock);
- list_add_tail(&id_priv->list, &listen_any_list);
- list_for_each_entry(cma_dev, &dev_list, list)
- cma_listen_on_dev(id_priv, cma_dev);
+ list_add_tail(&id_priv->listen_any_item, &listen_any_list);
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
+ if (ret) {
+ /* Prevent racing with cma_process_remove() */
+ if (to_destroy)
+ list_del_init(&to_destroy->device_item);
+ goto err_listen;
+ }
+ }
mutex_unlock(&lock);
+ return 0;
+
+err_listen:
+ _cma_cancel_listens(id_priv);
+ mutex_unlock(&lock);
+ if (to_destroy)
+ rdma_destroy_id(&to_destroy->id);
+ return ret;
}
void rdma_set_service_type(struct rdma_cm_id *id, int tos)
@@ -2520,8 +2741,10 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
id_priv->tos = (u8) tos;
id_priv->tos_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
}
EXPORT_SYMBOL(rdma_set_service_type);
@@ -2544,37 +2767,124 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
{
struct rdma_id_private *id_priv;
- if (id->qp_type != IB_QPT_RC)
+ if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI)
return -EINVAL;
id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
id_priv->timeout = timeout;
id_priv->timeout_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
return 0;
}
EXPORT_SYMBOL(rdma_set_ack_timeout);
+/**
+ * rdma_set_min_rnr_timer() - Set the minimum RNR Retry timer of the
+ * QP associated with a connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @min_rnr_timer: 5-bit value encoded as Table 45: "Encoding for RNR NAK
+ * Timer Field" in the IBTA specification.
+ *
+ * This function should be called before rdma_connect() on active
+ * side, and on passive side before rdma_accept(). The timer value
+ * will be associated with the local QP. When it receives a send it is
+ * not read to handle, typically if the receive queue is empty, an RNR
+ * Retry NAK is returned to the requester with the min_rnr_timer
+ * encoded. The requester will then wait at least the time specified
+ * in the NAK before retrying. The default is zero, which translates
+ * to a minimum RNR Timer value of 655 ms.
+ *
+ * Return: 0 for success
+ */
+int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
+{
+ struct rdma_id_private *id_priv;
+
+ /* It is a five-bit value */
+ if (min_rnr_timer & 0xe0)
+ return -EINVAL;
+
+ if (WARN_ON(id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT))
+ return -EINVAL;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
+ id_priv->min_rnr_timer = min_rnr_timer;
+ id_priv->min_rnr_timer_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(rdma_set_min_rnr_timer);
+
+static void route_set_path_rec_inbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_inbound) {
+ route->path_rec_inbound =
+ kzalloc(sizeof(*route->path_rec_inbound), GFP_KERNEL);
+ if (!route->path_rec_inbound)
+ return;
+ }
+
+ *route->path_rec_inbound = *path_rec;
+}
+
+static void route_set_path_rec_outbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_outbound) {
+ route->path_rec_outbound =
+ kzalloc(sizeof(*route->path_rec_outbound), GFP_KERNEL);
+ if (!route->path_rec_outbound)
+ return;
+ }
+
+ *route->path_rec_outbound = *path_rec;
+}
+
static void cma_query_handler(int status, struct sa_path_rec *path_rec,
- void *context)
+ int num_prs, void *context)
{
struct cma_work *work = context;
struct rdma_route *route;
+ int i;
route = &work->id->id.route;
- if (!status) {
- route->num_paths = 1;
- *route->path_rec = *path_rec;
- } else {
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
- work->event.status = status;
- pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
- status);
+ if (status)
+ goto fail;
+
+ for (i = 0; i < num_prs; i++) {
+ if (!path_rec[i].flags || (path_rec[i].flags & IB_PATH_GMP))
+ *route->path_rec = path_rec[i];
+ else if (path_rec[i].flags & IB_PATH_INBOUND)
+ route_set_path_rec_inbound(work, &path_rec[i]);
+ else if (path_rec[i].flags & IB_PATH_OUTBOUND)
+ route_set_path_rec_outbound(work, &path_rec[i]);
+ }
+ if (!route->path_rec) {
+ status = -EINVAL;
+ goto fail;
}
+ route->num_pri_alt_paths = 1;
+ queue_work(cma_wq, &work->work);
+ return;
+
+fail:
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
+ work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
queue_work(cma_wq, &work->work);
}
@@ -2631,49 +2941,54 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv,
return (id_priv->query_id < 0) ? id_priv->query_id : 0;
}
-static void cma_work_handler(struct work_struct *_work)
+static void cma_iboe_join_work_handler(struct work_struct *work)
{
- struct cma_work *work = container_of(_work, struct cma_work, work);
- struct rdma_id_private *id_priv = work->id;
- int destroy = 0;
+ struct cma_multicast *mc =
+ container_of(work, struct cma_multicast, iboe_join.work);
+ struct rdma_cm_event *event = &mc->iboe_join.event;
+ struct rdma_id_private *id_priv = mc->id_priv;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
- if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
- goto out;
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
- if (cma_cm_event_handler(id_priv, &work->event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- destroy = 1;
- }
-out:
+ ret = cma_cm_event_handler(id_priv, event);
+ WARN_ON(ret);
+
+out_unlock:
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
- if (destroy)
- rdma_destroy_id(&id_priv->id);
- kfree(work);
+ if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&event->param.ud.ah_attr);
}
-static void cma_ndev_work_handler(struct work_struct *_work)
+static void cma_work_handler(struct work_struct *_work)
{
- struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
+ struct cma_work *work = container_of(_work, struct cma_work, work);
struct rdma_id_private *id_priv = work->id;
- int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state == RDMA_CM_DESTROYING ||
- id_priv->state == RDMA_CM_DEVICE_REMOVAL)
- goto out;
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+ if (work->old_state != 0 || work->new_state != 0) {
+ if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+ goto out_unlock;
+ }
if (cma_cm_event_handler(id_priv, &work->event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- destroy = 1;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ goto out_free;
}
-out:
+out_unlock:
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
- if (destroy)
- rdma_destroy_id(&id_priv->id);
+ cma_id_put(id_priv);
+out_free:
+ if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&work->event.param.ud.ah_attr);
kfree(work);
}
@@ -2687,14 +3002,19 @@ static void cma_init_resolve_route_work(struct cma_work *work,
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
}
-static void cma_init_resolve_addr_work(struct cma_work *work,
- struct rdma_id_private *id_priv)
+static void enqueue_resolve_addr_work(struct cma_work *work,
+ struct rdma_id_private *id_priv)
{
+ /* Balances with cma_id_put() in cma_work_handler */
+ cma_id_get(id_priv);
+
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
work->old_state = RDMA_CM_ADDR_QUERY;
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+
+ queue_work(cma_wq, &work->work);
}
static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
@@ -2710,7 +3030,8 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
cma_init_resolve_route_work(work, id_priv);
- route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
+ if (!route->path_rec)
+ route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
ret = -ENOMEM;
goto err1;
@@ -2808,7 +3129,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id,
dev_put(ndev);
}
- id->route.num_paths = 1;
+ id->route.num_pri_alt_paths = 1;
return 0;
err_free:
@@ -2851,9 +3172,10 @@ struct iboe_prio_tc_map {
bool found;
};
-static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
+static int get_lower_vlan_dev_tc(struct net_device *dev,
+ struct netdev_nested_priv *priv)
{
- struct iboe_prio_tc_map *map = data;
+ struct iboe_prio_tc_map *map = (struct iboe_prio_tc_map *)priv->data;
if (is_vlan_dev(dev))
map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
@@ -2872,16 +3194,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
{
struct iboe_prio_tc_map prio_tc_map = {};
int prio = rt_tos2priority(tos);
+ struct netdev_nested_priv priv;
/* If VLAN device, get it directly from the VLAN netdev */
if (is_vlan_dev(ndev))
return get_vlan_ndev_tc(ndev, prio);
prio_tc_map.input_prio = prio;
+ priv.data = (void *)&prio_tc_map;
rcu_read_lock();
netdev_walk_all_lower_dev_rcu(ndev,
get_lower_vlan_dev_tc,
- &prio_tc_map);
+ &priv);
rcu_read_unlock();
/* If map is found from lower device, use it; Otherwise
* continue with the current netdevice to get priority to tc map.
@@ -2894,6 +3218,24 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
return 0;
}
+static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv)
+{
+ struct sockaddr_in6 *addr6;
+ u16 dport, sport;
+ u32 hash, fl;
+
+ addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv);
+ fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK;
+ if ((cma_family(id_priv) != AF_INET6) || !fl) {
+ dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv)));
+ sport = be16_to_cpu(cma_port(cma_src_addr(id_priv)));
+ hash = (u32)sport * 31 + dport;
+ fl = hash & IB_GRH_FLOWLABEL_MASK;
+ }
+
+ return cpu_to_be32(fl);
+}
+
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
@@ -2904,8 +3246,11 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
- u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
+ u8 tos;
+ mutex_lock(&id_priv->qp_mutex);
+ tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
+ mutex_unlock(&id_priv->qp_mutex);
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
@@ -2917,7 +3262,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err1;
}
- route->num_paths = 1;
+ route->num_pri_alt_paths = 1;
ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
if (!ndev) {
@@ -2952,14 +3297,23 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
* PacketLifeTime = local ACK timeout/2
* as a reasonable approximation for RoCE networks.
*/
- route->path_rec->packet_life_time = id_priv->timeout_set ?
- id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
+ mutex_lock(&id_priv->qp_mutex);
+ if (id_priv->timeout_set && id_priv->timeout)
+ route->path_rec->packet_life_time = id_priv->timeout - 1;
+ else
+ route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+ mutex_unlock(&id_priv->qp_mutex);
if (!route->path_rec->mtu) {
ret = -EINVAL;
goto err2;
}
+ if (rdma_protocol_roce_udp_encap(id_priv->id.device,
+ id_priv->id.port_num))
+ route->path_rec->flow_label =
+ cma_get_roce_udp_flow_label(id_priv);
+
cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
@@ -2968,6 +3322,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
err2:
kfree(route->path_rec);
route->path_rec = NULL;
+ route->num_pri_alt_paths = 0;
err1:
kfree(work);
return ret;
@@ -2978,15 +3333,21 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
struct rdma_id_private *id_priv;
int ret;
+ if (!timeout_ms)
+ return -EINVAL;
+
id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
return -EINVAL;
- atomic_inc(&id_priv->refcount);
+ cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
- else if (rdma_protocol_roce(id->device, id->port_num))
+ else if (rdma_protocol_roce(id->device, id->port_num)) {
ret = cma_resolve_iboe_route(id_priv);
+ if (!ret)
+ cma_add_id_to_tree(id_priv);
+ }
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv);
else
@@ -2998,7 +3359,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
return 0;
err:
cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
- cma_deref_id(id_priv);
+ cma_id_put(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_route);
@@ -3025,9 +3386,9 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
struct cma_device *cma_dev, *cur_dev;
union ib_gid gid;
enum ib_port_state port_state;
+ unsigned int p;
u16 pkey;
int ret;
- u8 p;
cma_dev = NULL;
mutex_lock(&lock);
@@ -3039,7 +3400,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
if (!cma_dev)
cma_dev = cur_dev;
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ rdma_for_each_port (cur_dev->device, p) {
if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
@@ -3072,6 +3433,7 @@ port_found:
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
cma_set_loopback(cma_src_addr(id_priv));
out:
mutex_unlock(&lock);
@@ -3104,6 +3466,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
if (status)
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
status);
+ rdma_restrack_add(&id_priv->res);
} else if (status) {
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
}
@@ -3120,9 +3483,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
if (cma_cm_event_handler(id_priv, &event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return;
}
out:
@@ -3148,9 +3509,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- atomic_inc(&id_priv->refcount);
- cma_init_resolve_addr_work(work, id_priv);
- queue_work(cma_wq, &work->work);
+ enqueue_resolve_addr_work(work, id_priv);
return 0;
err:
kfree(work);
@@ -3175,9 +3534,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
- atomic_inc(&id_priv->refcount);
- cma_init_resolve_addr_work(work, id_priv);
- queue_work(cma_wq, &work->work);
+ enqueue_resolve_addr_work(work, id_priv);
return 0;
err:
kfree(work);
@@ -3187,50 +3544,80 @@ err:
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
const struct sockaddr *dst_addr)
{
- if (!src_addr || !src_addr->sa_family) {
- src_addr = (struct sockaddr *) &id->route.addr.src_addr;
- src_addr->sa_family = dst_addr->sa_family;
- if (IS_ENABLED(CONFIG_IPV6) &&
- dst_addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
- } else if (dst_addr->sa_family == AF_IB) {
- ((struct sockaddr_ib *) src_addr)->sib_pkey =
- ((struct sockaddr_ib *) dst_addr)->sib_pkey;
- }
+ struct sockaddr_storage zero_sock = {};
+
+ if (src_addr && src_addr->sa_family)
+ return rdma_bind_addr(id, src_addr);
+
+ /*
+ * When the src_addr is not specified, automatically supply an any addr
+ */
+ zero_sock.ss_family = dst_addr->sa_family;
+ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src_addr6 =
+ (struct sockaddr_in6 *)&zero_sock;
+ struct sockaddr_in6 *dst_addr6 =
+ (struct sockaddr_in6 *)dst_addr;
+
+ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ id->route.addr.dev_addr.bound_dev_if =
+ dst_addr6->sin6_scope_id;
+ } else if (dst_addr->sa_family == AF_IB) {
+ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
}
- return rdma_bind_addr(id, src_addr);
+ return rdma_bind_addr(id, (struct sockaddr *)&zero_sock);
}
-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, unsigned long timeout_ms)
+/*
+ * If required, resolve the source address for bind and leave the id_priv in
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+ * ignored.
+ */
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
+ struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
{
- struct rdma_id_private *id_priv;
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
- if (id_priv->state == RDMA_CM_IDLE) {
- ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret) {
- memset(cma_dst_addr(id_priv), 0,
- rdma_addr_size(dst_addr));
- return ret;
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+ if (ret)
+ goto err_dst;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_ADDR_QUERY))) {
+ ret = -EINVAL;
+ goto err_dst;
}
}
if (cma_family(id_priv) != dst_addr->sa_family) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_state;
}
+ return 0;
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
- return -EINVAL;
- }
+err_state:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+err_dst:
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+ return ret;
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+ if (ret)
+ return ret;
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
@@ -3238,6 +3625,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
&id->route.addr.dev_addr,
timeout_ms, addr_handler,
@@ -3262,7 +3664,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irqsave(&id_priv->lock, flags);
- if (reuse || id_priv->state == RDMA_CM_IDLE) {
+ if ((reuse && id_priv->state != RDMA_CM_LISTEN) ||
+ id_priv->state == RDMA_CM_IDLE) {
id_priv->reuseaddr = reuse;
ret = 0;
} else {
@@ -3301,6 +3704,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
u64 sid, mask;
__be16 port;
+ lockdep_assert_held(&lock);
+
addr = cma_src_addr(id_priv);
port = htons(bind_list->port);
@@ -3329,6 +3734,8 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list;
int ret;
+ lockdep_assert_held(&lock);
+
bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
if (!bind_list)
return -ENOMEM;
@@ -3355,6 +3762,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
struct sockaddr *saddr = cma_src_addr(id_priv);
__be16 dport = cma_port(daddr);
+ lockdep_assert_held(&lock);
+
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
struct sockaddr *cur_saddr = cma_src_addr(cur_id);
@@ -3394,9 +3803,11 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
unsigned int rover;
struct net *net = id_priv->id.route.addr.dev_addr.net;
+ lockdep_assert_held(&lock);
+
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
- rover = prandom_u32() % remaining + low;
+ rover = prandom_u32_max(remaining) + low;
retry:
if (last_used_port != rover) {
struct rdma_bind_list *bind_list;
@@ -3441,13 +3852,14 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *cur_id;
struct sockaddr *addr, *cur_addr;
+ lockdep_assert_held(&lock);
+
addr = cma_src_addr(id_priv);
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
if (id_priv == cur_id)
continue;
- if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
- cur_id->reuseaddr)
+ if (reuseaddr && cur_id->reuseaddr)
continue;
cur_addr = cma_src_addr(cur_id);
@@ -3471,6 +3883,8 @@ static int cma_use_port(enum rdma_ucm_port_space ps,
unsigned short snum;
int ret;
+ lockdep_assert_held(&lock);
+
snum = ntohs(cma_port(cma_src_addr(id_priv)));
if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
@@ -3486,18 +3900,6 @@ static int cma_use_port(enum rdma_ucm_port_space ps,
return ret;
}
-static int cma_bind_listen(struct rdma_id_private *id_priv)
-{
- struct rdma_bind_list *bind_list = id_priv->bind_list;
- int ret = 0;
-
- mutex_lock(&lock);
- if (bind_list->owners.first->next)
- ret = cma_check_port(bind_list, id_priv, 0);
- mutex_unlock(&lock);
- return ret;
-}
-
static enum rdma_ucm_port_space
cma_select_inet_ps(struct rdma_id_private *id_priv)
{
@@ -3591,28 +3993,41 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == RDMA_CM_IDLE) {
- id->route.addr.src_addr.ss_family = AF_INET;
- ret = rdma_bind_addr(id, cma_src_addr(id_priv));
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
+ struct sockaddr_in any_in = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ };
+
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = rdma_bind_addr(id, (struct sockaddr *)&any_in);
if (ret)
return ret;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN)))
+ return -EINVAL;
}
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
- return -EINVAL;
-
+ /*
+ * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable
+ * any more, and has to be unique in the bind list.
+ */
if (id_priv->reuseaddr) {
- ret = cma_bind_listen(id_priv);
+ mutex_lock(&lock);
+ ret = cma_check_port(id_priv->bind_list, id_priv, 0);
+ if (!ret)
+ id_priv->reuseaddr = 0;
+ mutex_unlock(&lock);
if (ret)
goto err;
}
id_priv->backlog = backlog;
- if (id->device) {
+ if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id->device, 1)) {
ret = cma_ib_listen(id_priv);
if (ret)
@@ -3625,12 +4040,19 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
ret = -ENOSYS;
goto err;
}
- } else
- cma_listen_on_all(id_priv);
+ } else {
+ ret = cma_listen_on_all(id_priv);
+ if (ret)
+ goto err;
+ }
return 0;
err:
id_priv->backlog = 0;
+ /*
+ * All the failure paths that lead here will not allow the req_handler's
+ * to have run.
+ */
cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
return ret;
}
@@ -3683,9 +4105,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err2;
+ if (!cma_any_addr(addr))
+ rdma_restrack_add(&id_priv->res);
return 0;
err2:
- rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev)
cma_release_dev(id_priv);
err1:
@@ -3731,10 +4154,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
struct rdma_cm_event event = {};
const struct ib_cm_sidr_rep_event_param *rep =
&ib_event->param.sidr_rep_rcvd;
- int ret = 0;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
switch (ib_event->event) {
@@ -3781,14 +4204,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ return 0;
}
static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
@@ -3802,8 +4223,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -3862,8 +4282,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -3894,7 +4313,9 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
}
req.primary_path = &route->path_rec[0];
- if (route->num_paths == 2)
+ req.primary_path_inbound = route->path_rec_inbound;
+ req.primary_path_outbound = route->path_rec_outbound;
+ if (route->num_pri_alt_paths == 2)
req.alternate_path = &route->path_rec[1];
req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
@@ -3912,6 +4333,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
+ req.ece.vendor_id = id_priv->ece.vendor_id;
+ req.ece.attr_mod = id_priv->ece.attr_mod;
trace_cm_send_req(id_priv);
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
@@ -3936,8 +4359,11 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
+ mutex_lock(&id_priv->qp_mutex);
cm_id->tos = id_priv->tos;
cm_id->tos_set = id_priv->tos_set;
+ mutex_unlock(&id_priv->qp_mutex);
+
id_priv->cm_id.iw = cm_id;
memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
@@ -3968,12 +4394,21 @@ out:
return ret;
}
-int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+/**
+ * rdma_connect_locked - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ *
+ * Same as rdma_connect() but can only be called from the
+ * RDMA_CM_EVENT_ROUTE_RESOLVED handler callback.
+ */
+int rdma_connect_locked(struct rdma_cm_id *id,
+ struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
return -EINVAL;
@@ -3987,20 +4422,66 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
ret = cma_resolve_ib_udp(id_priv, conn_param);
else
ret = cma_connect_ib(id_priv, conn_param);
- } else if (rdma_cap_iw_cm(id->device, id->port_num))
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = cma_connect_iw(id_priv, conn_param);
- else
+ } else {
ret = -ENOSYS;
+ }
if (ret)
- goto err;
-
+ goto err_state;
return 0;
-err:
+err_state:
cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
return ret;
}
+EXPORT_SYMBOL(rdma_connect_locked);
+
+/**
+ * rdma_connect - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ *
+ * Users must have resolved a route for the rdma_cm_id to connect with by having
+ * called rdma_resolve_route before calling this routine.
+ *
+ * This call will either connect to a remote QP or obtain remote QP information
+ * for unconnected rdma_cm_id's. The actual operation is based on the
+ * rdma_cm_id's port space.
+ */
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ mutex_lock(&id_priv->handler_mutex);
+ ret = rdma_connect_locked(id, conn_param);
+ mutex_unlock(&id_priv->handler_mutex);
+ return ret;
+}
EXPORT_SYMBOL(rdma_connect);
+/**
+ * rdma_connect_ece - Initiate an active connection request with ECE data.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ * @ece: ECE parameters
+ *
+ * See rdma_connect() explanation.
+ */
+int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ id_priv->ece.vendor_id = ece->vendor_id;
+ id_priv->ece.attr_mod = ece->attr_mod;
+
+ return rdma_connect(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_connect_ece);
+
static int cma_accept_ib(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
@@ -4026,6 +4507,8 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
rep.flow_control = conn_param->flow_control;
rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
trace_cm_send_rep(id_priv);
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
@@ -4050,9 +4533,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
iw_param.ird = conn_param->responder_resources;
iw_param.private_data = conn_param->private_data;
iw_param.private_data_len = conn_param->private_data_len;
- if (id_priv->id.qp) {
+ if (id_priv->id.qp)
iw_param.qpn = id_priv->qp_num;
- } else
+ else
iw_param.qpn = conn_param->qp_num;
return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
@@ -4073,7 +4556,11 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ret;
rep.qp_num = id_priv->qp_num;
rep.qkey = id_priv->qkey;
+
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
}
+
rep.private_data = private_data;
rep.private_data_len = private_data_len;
@@ -4081,17 +4568,33 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller)
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection. This must be
+ * provided if accepting a connection request. If accepting a connection
+ * response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request. It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
+ *
+ * This function is for use by kernel ULPs and must be called from under the
+ * handler callback.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
-
- rdma_restrack_set_task(&id_priv->res, caller);
+ lockdep_assert_held(&id_priv->handler_mutex);
- if (!cma_comp(id_priv, RDMA_CM_CONNECT))
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
return -EINVAL;
if (!id->qp && conn_param) {
@@ -4115,21 +4618,52 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
else
ret = cma_rep_recv(id_priv);
}
- } else if (rdma_cap_iw_cm(id->device, id->port_num))
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = cma_accept_iw(id_priv, conn_param);
- else
+ } else {
ret = -ENOSYS;
-
+ }
if (ret)
goto reject;
return 0;
reject:
cma_modify_qp_err(id_priv);
- rdma_reject(id, NULL, 0);
+ rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
return ret;
}
-EXPORT_SYMBOL(__rdma_accept);
+EXPORT_SYMBOL(rdma_accept);
+
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ id_priv->ece.vendor_id = ece->vendor_id;
+ id_priv->ece.attr_mod = ece->attr_mod;
+
+ return rdma_accept(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_accept_ece);
+
+void rdma_lock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_lock_handler);
+
+void rdma_unlock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_unlock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_unlock_handler);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
@@ -4153,7 +4687,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
EXPORT_SYMBOL(rdma_notify);
int rdma_reject(struct rdma_cm_id *id, const void *private_data,
- u8 private_data_len)
+ u8 private_data_len, u8 reason)
{
struct rdma_id_private *id_priv;
int ret;
@@ -4168,15 +4702,15 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
private_data, private_data_len);
} else {
trace_cm_send_rej(id_priv);
- ret = ib_send_cm_rej(id_priv->cm_id.ib,
- IB_CM_REJ_CONSUMER_DEFINED, NULL,
- 0, private_data, private_data_len);
+ ret = ib_send_cm_rej(id_priv->cm_id.ib, reason, NULL, 0,
+ private_data, private_data_len);
}
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_reject(id_priv->cm_id.iw,
private_data, private_data_len);
- } else
+ } else {
ret = -ENOSYS;
+ }
return ret;
}
@@ -4213,70 +4747,68 @@ out:
}
EXPORT_SYMBOL(rdma_disconnect);
-static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
+ struct ib_sa_multicast *multicast,
+ struct rdma_cm_event *event,
+ struct cma_multicast *mc)
{
- struct rdma_id_private *id_priv;
- struct cma_multicast *mc = multicast->context;
- struct rdma_cm_event event = {};
- int ret = 0;
-
- id_priv = mc->id_priv;
- mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_ADDR_BOUND &&
- id_priv->state != RDMA_CM_ADDR_RESOLVED)
- goto out;
+ struct rdma_dev_addr *dev_addr;
+ enum ib_gid_type gid_type;
+ struct net_device *ndev;
if (!status)
status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
else
pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
status);
- mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp) {
- status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
- be16_to_cpu(multicast->rec.mlid));
- if (status)
- pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
- status);
+
+ event->status = status;
+ event->param.ud.private_data = mc->context;
+ if (status) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ return;
}
- mutex_unlock(&id_priv->qp_mutex);
- event.status = status;
- event.param.ud.private_data = mc->context;
- if (!status) {
- struct rdma_dev_addr *dev_addr =
- &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev =
- dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- enum ib_gid_type gid_type =
- id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
- rdma_start_port(id_priv->cma_dev->device)];
-
- event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
- ret = ib_init_ah_from_mcmember(id_priv->id.device,
- id_priv->id.port_num,
- &multicast->rec,
- ndev, gid_type,
- &event.param.ud.ah_attr);
- if (ret)
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ dev_addr = &id_priv->id.route.addr.dev_addr;
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ gid_type =
+ id_priv->cma_dev
+ ->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+
+ event->event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num,
+ &multicast->rec, ndev, gid_type,
+ &event->param.ud.ah_attr)) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ goto out;
+ }
- event.param.ud.qp_num = 0xFFFFFF;
- event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- if (ndev)
- dev_put(ndev);
- } else
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ event->param.ud.qp_num = 0xFFFFFF;
+ event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- ret = cma_cm_event_handler(id_priv, &event);
+out:
+ if (ndev)
+ dev_put(ndev);
+}
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+ struct cma_multicast *mc = multicast->context;
+ struct rdma_id_private *id_priv = mc->id_priv;
+ struct rdma_cm_event event = {};
+ int ret = 0;
+
+ mutex_lock(&id_priv->handler_mutex);
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
+ goto out;
+
+ cma_make_mc_event(status, id_priv, multicast, &event, mc);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
- if (ret) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
- return 0;
- }
+ WARN_ON(ret);
out:
mutex_unlock(&id_priv->handler_mutex);
@@ -4337,17 +4869,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
- if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
- (!ib_sa_sendonly_fullmem_support(&sa_client,
- id_priv->id.device,
- id_priv->id.port_num))) {
- dev_warn(
- &id_priv->id.device->dev,
- "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
- id_priv->id.port_num);
- return -EOPNOTSUPP;
- }
-
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
@@ -4361,23 +4882,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
IB_SA_MCMEMBER_REC_MTU |
IB_SA_MCMEMBER_REC_HOP_LIMIT;
- mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
- id_priv->id.port_num, &rec,
- comp_mask, GFP_KERNEL,
- cma_ib_mc_handler, mc);
- return PTR_ERR_OR_ZERO(mc->multicast.ib);
-}
-
-static void iboe_mcast_work_handler(struct work_struct *work)
-{
- struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
- struct cma_multicast *mc = mw->mc;
- struct ib_sa_multicast *m = mc->multicast.ib;
-
- mc->multicast.ib->context = mc;
- cma_ib_mc_handler(0, m);
- kref_put(&mc->mcref, release_mc);
- kfree(mw);
+ mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &rec, comp_mask,
+ GFP_KERNEL, cma_ib_mc_handler, mc);
+ return PTR_ERR_OR_ZERO(mc->sa_mc);
}
static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
@@ -4412,52 +4920,41 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
- struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
+ struct ib_sa_multicast ib;
enum ib_gid_type gid_type;
bool send_only;
send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
- if (cma_zero_addr((struct sockaddr *)&mc->addr))
+ if (cma_zero_addr(addr))
return -EINVAL;
- work = kzalloc(sizeof *work, GFP_KERNEL);
- if (!work)
- return -ENOMEM;
-
- mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
- if (!mc->multicast.ib) {
- err = -ENOMEM;
- goto out1;
- }
-
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
+ cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
- mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
+ ib.rec.pkey = cpu_to_be16(0xffff);
if (id_priv->id.ps == RDMA_PS_UDP)
- mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (!ndev) {
- err = -ENODEV;
- goto out2;
- }
- mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
- mc->multicast.ib->rec.hop_limit = 1;
- mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+ if (!ndev)
+ return -ENODEV;
+
+ ib.rec.rate = iboe_get_rate(ndev);
+ ib.rec.hop_limit = 1;
+ ib.rec.mtu = iboe_get_mtu(ndev->mtu);
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
- mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
if (!send_only) {
- err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+ err = cma_igmp_send(ndev, &ib.rec.mgid,
true);
}
}
@@ -4466,44 +4963,35 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -ENOTSUPP;
}
dev_put(ndev);
- if (err || !mc->multicast.ib->rec.mtu) {
- if (!err)
- err = -EINVAL;
- goto out2;
- }
- rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
- &mc->multicast.ib->rec.port_gid);
- work->id = id_priv;
- work->mc = mc;
- INIT_WORK(&work->work, iboe_mcast_work_handler);
- kref_get(&mc->mcref);
- queue_work(cma_wq, &work->work);
+ if (err || !ib.rec.mtu)
+ return err ?: -EINVAL;
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &ib.rec.port_gid);
+ INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
+ cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc);
+ queue_work(cma_wq, &mc->iboe_join.work);
return 0;
-
-out2:
- kfree(mc->multicast.ib);
-out1:
- kfree(work);
- return err;
}
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
u8 join_state, void *context)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
struct cma_multicast *mc;
int ret;
- if (!id->device)
+ /* Not supported for kernel QPs */
+ if (WARN_ON(id->qp))
return -EINVAL;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
- !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
+ /* ULP is calling this wrong. */
+ if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND &&
+ READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
return -EINVAL;
- mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc)
return -ENOMEM;
@@ -4513,7 +5001,6 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
mc->join_state = join_state;
if (rdma_protocol_roce(id->device, id->port_num)) {
- kref_init(&mc->mcref);
ret = cma_iboe_join_multicast(id_priv, mc);
if (ret)
goto out_err;
@@ -4545,25 +5032,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irq(&id_priv->lock);
list_for_each_entry(mc, &id_priv->mc_list, list) {
- if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
- list_del(&mc->list);
- spin_unlock_irq(&id_priv->lock);
-
- if (id->qp)
- ib_detach_mcast(id->qp,
- &mc->multicast.ib->rec.mgid,
- be16_to_cpu(mc->multicast.ib->rec.mlid));
-
- BUG_ON(id_priv->cma_dev->device != id->device);
-
- if (rdma_cap_ib_mcast(id->device, id->port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else if (rdma_protocol_roce(id->device, id->port_num)) {
- cma_leave_roce_mc_group(id_priv, mc);
- }
- return;
- }
+ if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0)
+ continue;
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ WARN_ON(id_priv->cma_dev->device != id->device);
+ destroy_mc(id_priv, mc);
+ return;
}
spin_unlock_irq(&id_priv->lock);
}
@@ -4572,7 +5048,7 @@ EXPORT_SYMBOL(rdma_leave_multicast);
static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr;
- struct cma_ndev_work *work;
+ struct cma_work *work;
dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -4585,10 +5061,10 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
if (!work)
return -ENOMEM;
- INIT_WORK(&work->work, cma_ndev_work_handler);
+ INIT_WORK(&work->work, cma_work_handler);
work->id = id_priv;
work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
- atomic_inc(&id_priv->refcount);
+ cma_id_get(id_priv);
queue_work(cma_wq, &work->work);
}
@@ -4611,7 +5087,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list)
- list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+ list_for_each_entry(id_priv, &cma_dev->id_list, device_item) {
ret = cma_netdev_change(ndev, id_priv);
if (ret)
goto out;
@@ -4622,33 +5098,192 @@ out:
return ret;
}
+static void cma_netevent_work_handler(struct work_struct *_work)
+{
+ struct rdma_id_private *id_priv =
+ container_of(_work, struct rdma_id_private, id.net_work);
+ struct rdma_cm_event event = {};
+
+ mutex_lock(&id_priv->handler_mutex);
+
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -ETIMEDOUT;
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ __acquire(&id_priv->handler_mutex);
+ id_priv->cm_id.ib = NULL;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ return;
+ }
+
+out_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+}
+
+static int cma_netevent_callback(struct notifier_block *self,
+ unsigned long event, void *ctx)
+{
+ struct id_table_entry *ips_node = NULL;
+ struct rdma_id_private *current_id;
+ struct neighbour *neigh = ctx;
+ unsigned long flags;
+
+ if (event != NETEVENT_NEIGH_UPDATE)
+ return NOTIFY_DONE;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (neigh->tbl->family == AF_INET6) {
+ struct sockaddr_in6 neigh_sock_6;
+
+ neigh_sock_6.sin6_family = AF_INET6;
+ neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_6);
+ } else if (neigh->tbl->family == AF_INET) {
+ struct sockaddr_in neigh_sock_4;
+
+ neigh_sock_4.sin_family = AF_INET;
+ neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_4);
+ } else
+ goto out;
+
+ if (!ips_node)
+ goto out;
+
+ list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
+ if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
+ neigh->ha, ETH_ALEN))
+ continue;
+ INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler);
+ cma_id_get(current_id);
+ queue_work(cma_wq, &current_id->id.net_work);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return NOTIFY_DONE;
+}
+
static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
-static void cma_add_one(struct ib_device *device)
+static struct notifier_block cma_netevent_cb = {
+ .notifier_call = cma_netevent_callback
+};
+
+static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
+{
+ struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
+ enum rdma_cm_state state;
+ unsigned long flags;
+
+ mutex_lock(&id_priv->handler_mutex);
+ /* Record that we want to remove the device */
+ spin_lock_irqsave(&id_priv->lock, flags);
+ state = id_priv->state;
+ if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) {
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+ return;
+ }
+ id_priv->state = RDMA_CM_DEVICE_REMOVAL;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ /*
+ * At this point the ULP promises it won't call
+ * rdma_destroy_id() concurrently
+ */
+ cma_id_put(id_priv);
+ mutex_unlock(&id_priv->handler_mutex);
+ trace_cm_id_destroy(id_priv);
+ _destroy_id(id_priv, state);
+ return;
+ }
+ mutex_unlock(&id_priv->handler_mutex);
+
+ /*
+ * If this races with destroy then the thread that first assigns state
+ * to a destroying does the cancel.
+ */
+ cma_cancel_operation(id_priv, state);
+ cma_id_put(id_priv);
+}
+
+static void cma_process_remove(struct cma_device *cma_dev)
+{
+ mutex_lock(&lock);
+ while (!list_empty(&cma_dev->id_list)) {
+ struct rdma_id_private *id_priv = list_first_entry(
+ &cma_dev->id_list, struct rdma_id_private, device_item);
+
+ list_del_init(&id_priv->listen_item);
+ list_del_init(&id_priv->device_item);
+ cma_id_get(id_priv);
+ mutex_unlock(&lock);
+
+ cma_send_device_removal_put(id_priv);
+
+ mutex_lock(&lock);
+ }
+ mutex_unlock(&lock);
+
+ cma_dev_put(cma_dev);
+ wait_for_completion(&cma_dev->comp);
+}
+
+static bool cma_supported(struct ib_device *device)
+{
+ u32 i;
+
+ rdma_for_each_port(device, i) {
+ if (rdma_cap_ib_cm(device, i) || rdma_cap_iw_cm(device, i))
+ return true;
+ }
+ return false;
+}
+
+static int cma_add_one(struct ib_device *device)
{
+ struct rdma_id_private *to_destroy;
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
- unsigned int i;
unsigned long supported_gids = 0;
+ int ret;
+ u32 i;
+
+ if (!cma_supported(device))
+ return -EOPNOTSUPP;
- cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
+ cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL);
if (!cma_dev)
- return;
+ return -ENOMEM;
cma_dev->device = device;
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
- if (!cma_dev->default_gid_type)
+ if (!cma_dev->default_gid_type) {
+ ret = -ENOMEM;
goto free_cma_dev;
+ }
cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_roce_tos),
GFP_KERNEL);
- if (!cma_dev->default_roce_tos)
+ if (!cma_dev->default_roce_tos) {
+ ret = -ENOMEM;
goto free_gid_type;
+ }
rdma_for_each_port (device, i) {
supported_gids = roce_gid_type_mask_support(device, i);
@@ -4663,90 +5298,43 @@ static void cma_add_one(struct ib_device *device)
}
init_completion(&cma_dev->comp);
- atomic_set(&cma_dev->refcount, 1);
+ refcount_set(&cma_dev->refcount, 1);
INIT_LIST_HEAD(&cma_dev->id_list);
ib_set_client_data(device, &cma_client, cma_dev);
mutex_lock(&lock);
list_add_tail(&cma_dev->list, &dev_list);
- list_for_each_entry(id_priv, &listen_any_list, list)
- cma_listen_on_dev(id_priv, cma_dev);
+ list_for_each_entry(id_priv, &listen_any_list, listen_any_item) {
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
+ if (ret)
+ goto free_listen;
+ }
mutex_unlock(&lock);
trace_cm_add_one(device);
- return;
+ return 0;
+
+free_listen:
+ list_del(&cma_dev->list);
+ mutex_unlock(&lock);
+ /* cma_process_remove() will delete to_destroy */
+ cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
free_gid_type:
kfree(cma_dev->default_gid_type);
free_cma_dev:
kfree(cma_dev);
-
- return;
-}
-
-static int cma_remove_id_dev(struct rdma_id_private *id_priv)
-{
- struct rdma_cm_event event = {};
- enum rdma_cm_state state;
- int ret = 0;
-
- /* Record that we want to remove the device */
- state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
- if (state == RDMA_CM_DESTROYING)
- return 0;
-
- cma_cancel_operation(id_priv, state);
- mutex_lock(&id_priv->handler_mutex);
-
- /* Check for destruction from another callback. */
- if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
- goto out;
-
- event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
- ret = cma_cm_event_handler(id_priv, &event);
-out:
- mutex_unlock(&id_priv->handler_mutex);
return ret;
}
-static void cma_process_remove(struct cma_device *cma_dev)
-{
- struct rdma_id_private *id_priv;
- int ret;
-
- mutex_lock(&lock);
- while (!list_empty(&cma_dev->id_list)) {
- id_priv = list_entry(cma_dev->id_list.next,
- struct rdma_id_private, list);
-
- list_del(&id_priv->listen_list);
- list_del_init(&id_priv->list);
- atomic_inc(&id_priv->refcount);
- mutex_unlock(&lock);
-
- ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
- cma_deref_id(id_priv);
- if (ret)
- rdma_destroy_id(&id_priv->id);
-
- mutex_lock(&lock);
- }
- mutex_unlock(&lock);
-
- cma_deref_dev(cma_dev);
- wait_for_completion(&cma_dev->comp);
-}
-
static void cma_remove_one(struct ib_device *device, void *client_data)
{
struct cma_device *cma_dev = client_data;
trace_cm_remove_one(device);
- if (!cma_dev)
- return;
-
mutex_lock(&lock);
list_del(&cma_dev->list);
mutex_unlock(&lock);
@@ -4790,6 +5378,19 @@ static int __init cma_init(void)
{
int ret;
+ /*
+ * There is a rare lock ordering dependency in cma_netdev_callback()
+ * that only happens when bonding is enabled. Teach lockdep that rtnl
+ * must never be nested under lock so it can find these without having
+ * to test with bonding.
+ */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ rtnl_lock();
+ mutex_lock(&lock);
+ mutex_unlock(&lock);
+ rtnl_unlock();
+ }
+
cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
if (!cma_wq)
return -ENOMEM;
@@ -4800,6 +5401,7 @@ static int __init cma_init(void)
ib_sa_register_client(&sa_client);
register_netdevice_notifier(&cma_nb);
+ register_netevent_notifier(&cma_netevent_cb);
ret = ib_register_client(&cma_client);
if (ret)
@@ -4814,6 +5416,7 @@ static int __init cma_init(void)
err_ib:
ib_unregister_client(&cma_client);
err:
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
@@ -4826,6 +5429,7 @@ static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ib_unregister_client(&cma_client);
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 8b0b5ae22e4c..7b68b3ea979f 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/configfs.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -43,7 +42,7 @@ struct cma_device;
struct cma_dev_group;
struct cma_dev_port_group {
- unsigned int port_num;
+ u32 port_num;
struct cma_dev_group *cma_dev_group;
struct config_group group;
};
@@ -94,7 +93,7 @@ static int cma_configfs_params_get(struct config_item *item,
static void cma_configfs_params_put(struct cma_device *cma_dev)
{
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
}
static ssize_t default_roce_mode_show(struct config_item *item,
@@ -115,7 +114,7 @@ static ssize_t default_roce_mode_show(struct config_item *item,
if (gid_type < 0)
return gid_type;
- return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+ return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_type));
}
static ssize_t default_roce_mode_store(struct config_item *item,
@@ -123,16 +122,19 @@ static ssize_t default_roce_mode_store(struct config_item *item,
{
struct cma_device *cma_dev;
struct cma_dev_port_group *group;
- int gid_type = ib_cache_gid_parse_type_str(buf);
+ int gid_type;
ssize_t ret;
- if (gid_type < 0)
- return -EINVAL;
-
ret = cma_configfs_params_get(item, &cma_dev, &group);
if (ret)
return ret;
+ gid_type = ib_cache_gid_parse_type_str(buf);
+ if (gid_type < 0) {
+ cma_configfs_params_put(cma_dev);
+ return -EINVAL;
+ }
+
ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
cma_configfs_params_put(cma_dev);
@@ -156,7 +158,7 @@ static ssize_t default_roce_tos_show(struct config_item *item, char *buf)
tos = cma_get_default_roce_tos(cma_dev, group->port_num);
cma_configfs_params_put(cma_dev);
- return sprintf(buf, "%u\n", tos);
+ return sysfs_emit(buf, "%u\n", tos);
}
static ssize_t default_roce_tos_store(struct config_item *item,
@@ -197,11 +199,10 @@ static const struct config_item_type cma_port_group_type = {
static int make_cma_ports(struct cma_dev_group *cma_dev_group,
struct cma_device *cma_dev)
{
- struct ib_device *ibdev;
- unsigned int i;
- unsigned int ports_num;
struct cma_dev_port_group *ports;
- int err;
+ struct ib_device *ibdev;
+ u32 ports_num;
+ u32 i;
ibdev = cma_get_ib_dev(cma_dev);
@@ -212,10 +213,8 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
GFP_KERNEL);
- if (!ports) {
- err = -ENOMEM;
- goto free;
- }
+ if (!ports)
+ return -ENOMEM;
for (i = 0; i < ports_num; i++) {
char port_str[10];
@@ -231,12 +230,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
}
cma_dev_group->ports = ports;
-
return 0;
-free:
- kfree(ports);
- cma_dev_group->ports = NULL;
- return err;
}
static void release_cma_dev(struct config_item *item)
@@ -298,7 +292,7 @@ static struct config_group *make_cma_dev(struct config_group *group,
goto fail;
}
- strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+ strscpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
config_group_init_type_name(&cma_dev_group->ports_group, "ports",
&cma_ports_group_type);
@@ -312,18 +306,31 @@ static struct config_group *make_cma_dev(struct config_group *group,
configfs_add_default_group(&cma_dev_group->ports_group,
&cma_dev_group->device_group);
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
return &cma_dev_group->device_group;
fail:
if (cma_dev)
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
kfree(cma_dev_group);
return ERR_PTR(err);
}
+static void drop_cma_dev(struct config_group *cgroup, struct config_item *item)
+{
+ struct config_group *group =
+ container_of(item, struct config_group, cg_item);
+ struct cma_dev_group *cma_dev_group =
+ container_of(group, struct cma_dev_group, device_group);
+
+ configfs_remove_default_groups(&cma_dev_group->ports_group);
+ configfs_remove_default_groups(&cma_dev_group->device_group);
+ config_item_put(item);
+}
+
static struct configfs_group_operations cma_subsys_group_ops = {
.make_group = make_cma_dev,
+ .drop_item = drop_cma_dev,
};
static const struct config_item_type cma_subsys_type = {
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index ca7307277518..b7354c94cf1b 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -55,8 +55,16 @@ struct rdma_id_private {
struct rdma_bind_list *bind_list;
struct hlist_node node;
- struct list_head list; /* listen_any_list or cma_device.list */
- struct list_head listen_list; /* per device listens */
+ union {
+ struct list_head device_item; /* On cma_device->id_list */
+ struct list_head listen_any_item; /* On listen_any_list */
+ };
+ union {
+ /* On rdma_id_private->listen_list */
+ struct list_head listen_item;
+ struct list_head listen_list;
+ };
+ struct list_head id_list_entry;
struct cma_device *cma_dev;
struct list_head mc_list;
@@ -66,7 +74,7 @@ struct rdma_id_private {
struct mutex qp_mutex;
struct completion comp;
- atomic_t refcount;
+ refcount_t refcount;
struct mutex handler_mutex;
int backlog;
@@ -86,15 +94,19 @@ struct rdma_id_private {
u8 tos;
u8 tos_set:1;
u8 timeout_set:1;
+ u8 min_rnr_timer_set:1;
u8 reuseaddr;
u8 afonly;
u8 timeout;
+ u8 min_rnr_timer;
+ u8 used_resolve_ip;
enum ib_gid_type gid_type;
/*
* Internal to RDMA/core, don't use in the drivers
*/
struct rdma_restrack_entry res;
+ struct rdma_ucm_ece ece;
};
#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
@@ -111,16 +123,16 @@ static inline void cma_configfs_exit(void)
}
#endif
-void cma_ref_dev(struct cma_device *dev);
-void cma_deref_dev(struct cma_device *dev);
+void cma_dev_get(struct cma_device *dev);
+void cma_dev_put(struct cma_device *dev);
typedef bool (*cma_device_filter)(struct ib_device *, void *);
struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
void *cookie);
-int cma_get_default_gid_type(struct cma_device *dev, unsigned int port);
-int cma_set_default_gid_type(struct cma_device *dev, unsigned int port,
+int cma_get_default_gid_type(struct cma_device *dev, u32 port);
+int cma_set_default_gid_type(struct cma_device *dev, u32 port,
enum ib_gid_type default_gid_type);
-int cma_get_default_roce_tos(struct cma_device *dev, unsigned int port);
-int cma_set_default_roce_tos(struct cma_device *dev, unsigned int port,
+int cma_get_default_roce_tos(struct cma_device *dev, u32 port);
+int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
u8 default_roce_tos);
struct ib_device *cma_get_ib_dev(struct cma_device *dev);
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
index 81e36bf13159..e45264267bcc 100644
--- a/drivers/infiniband/core/cma_trace.h
+++ b/drivers/infiniband/core/cma_trace.h
@@ -17,46 +17,6 @@
#include <linux/tracepoint.h>
#include <trace/events/rdma.h>
-/*
- * enum ib_cm_event_type, from include/rdma/ib_cm.h
- */
-#define IB_CM_EVENT_LIST \
- ib_cm_event(REQ_ERROR) \
- ib_cm_event(REQ_RECEIVED) \
- ib_cm_event(REP_ERROR) \
- ib_cm_event(REP_RECEIVED) \
- ib_cm_event(RTU_RECEIVED) \
- ib_cm_event(USER_ESTABLISHED) \
- ib_cm_event(DREQ_ERROR) \
- ib_cm_event(DREQ_RECEIVED) \
- ib_cm_event(DREP_RECEIVED) \
- ib_cm_event(TIMEWAIT_EXIT) \
- ib_cm_event(MRA_RECEIVED) \
- ib_cm_event(REJ_RECEIVED) \
- ib_cm_event(LAP_ERROR) \
- ib_cm_event(LAP_RECEIVED) \
- ib_cm_event(APR_RECEIVED) \
- ib_cm_event(SIDR_REQ_ERROR) \
- ib_cm_event(SIDR_REQ_RECEIVED) \
- ib_cm_event_end(SIDR_REP_RECEIVED)
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
-#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
-
-IB_CM_EVENT_LIST
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x) { IB_CM_##x, #x },
-#define ib_cm_event_end(x) { IB_CM_##x, #x }
-
-#define rdma_show_ib_cm_event(x) \
- __print_symbolic(x, IB_CM_EVENT_LIST)
-
DECLARE_EVENT_CLASS(cma_fsm_class,
TP_PROTO(
@@ -103,23 +63,33 @@ DEFINE_CMA_FSM_EVENT(sent_drep);
DEFINE_CMA_FSM_EVENT(sent_dreq);
DEFINE_CMA_FSM_EVENT(id_destroy);
-TRACE_EVENT(cm_id_create,
+TRACE_EVENT(cm_id_attach,
TP_PROTO(
- const struct rdma_id_private *id_priv
+ const struct rdma_id_private *id_priv,
+ const struct ib_device *device
),
- TP_ARGS(id_priv),
+ TP_ARGS(id_priv, device),
TP_STRUCT__entry(
__field(u32, cm_id)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ __string(devname, device->name)
),
TP_fast_assign(
__entry->cm_id = id_priv->res.id;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ __assign_str(devname, device->name);
),
- TP_printk("cm.id=%u",
- __entry->cm_id
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc device=%s",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr,
+ __get_str(devname)
)
);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index cf42acca4a3a..f66f48d860ec 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -44,6 +44,7 @@
#include <rdma/ib_mad.h>
#include <rdma/restrack.h>
#include "mad_priv.h"
+#include "restrack.h"
/* Total number of ports combined across all struct ib_devices's */
#define RDMA_MAX_PORTS 8192
@@ -77,19 +78,17 @@ static inline struct rdma_dev_net *rdma_net_to_dev_net(struct net *net)
return net_generic(net, rdma_dev_net_id);
}
-int ib_device_register_sysfs(struct ib_device *device);
-void ib_device_unregister_sysfs(struct ib_device *device);
int ib_device_rename(struct ib_device *ibdev, const char *name);
int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim);
-typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
+typedef void (*roce_netdev_callback)(struct ib_device *device, u32 port,
struct net_device *idev, void *cookie);
-typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
+typedef bool (*roce_netdev_filter)(struct ib_device *device, u32 port,
struct net_device *idev, void *cookie);
struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
- unsigned int port);
+ u32 port);
void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_filter filter,
@@ -112,7 +111,7 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
struct ib_client_nl_info {
struct sk_buff *nl_msg;
struct device *cdev;
- unsigned int port;
+ u32 port;
u64 abi;
};
int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
@@ -127,24 +126,24 @@ int ib_cache_gid_parse_type_str(const char *buf);
const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
-void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
+void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port,
struct net_device *ndev,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode);
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr);
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr);
-int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
struct net_device *ndev);
int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
-unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
@@ -213,15 +212,15 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
-int ib_get_cached_subnet_prefix(struct ib_device *device,
- u8 port_num,
- u64 *sn_pfx);
+void ib_get_cached_subnet_prefix(struct ib_device *device,
+ u32 port_num,
+ u64 *sn_pfx);
#ifdef CONFIG_SECURITY_INFINIBAND
void ib_security_release_port_pkey_list(struct ib_device *device);
void ib_security_cache_change(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u64 subnet_prefix);
int ib_security_modify_qp(struct ib_qp *qp,
@@ -246,7 +245,7 @@ static inline void ib_security_release_port_pkey_list(struct ib_device *device)
}
static inline void ib_security_cache_change(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u64 subnet_prefix)
{
}
@@ -317,58 +316,13 @@ struct ib_device *ib_device_get_by_index(const struct net *net, u32 index);
void nldev_init(void);
void nldev_exit(void);
-static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
- struct ib_pd *pd,
- struct ib_qp_init_attr *attr,
- struct ib_udata *udata,
- struct ib_uqp_object *uobj)
-{
- enum ib_qp_type qp_type = attr->qp_type;
- struct ib_qp *qp;
- bool is_xrc;
-
- if (!dev->ops.create_qp)
- return ERR_PTR(-EOPNOTSUPP);
-
- qp = dev->ops.create_qp(pd, attr, udata);
- if (IS_ERR(qp))
- return qp;
-
- qp->device = dev;
- qp->pd = pd;
- qp->uobject = uobj;
- qp->real_qp = qp;
-
- qp->qp_type = attr->qp_type;
- qp->rwq_ind_tbl = attr->rwq_ind_tbl;
- qp->send_cq = attr->send_cq;
- qp->recv_cq = attr->recv_cq;
- qp->srq = attr->srq;
- qp->rwq_ind_tbl = attr->rwq_ind_tbl;
- qp->event_handler = attr->event_handler;
-
- atomic_set(&qp->usecnt, 0);
- spin_lock_init(&qp->mr_lock);
- INIT_LIST_HEAD(&qp->rdma_mrs);
- INIT_LIST_HEAD(&qp->sig_mrs);
-
- /*
- * We don't track XRC QPs for now, because they don't have PD
- * and more importantly they are created internaly by driver,
- * see mlx5 create_dev_resources() as an example.
- */
- is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT;
- if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) {
- qp->res.type = RDMA_RESTRACK_QP;
- if (uobj)
- rdma_restrack_uadd(&qp->res);
- else
- rdma_restrack_kadd(&qp->res);
- } else
- qp->res.valid = false;
-
- return qp;
-}
+struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller);
+
+void ib_qp_usecnt_inc(struct ib_qp *qp);
+void ib_qp_usecnt_dec(struct ib_qp *qp);
struct rdma_dev_addr;
int rdma_resolve_ip_route(struct sockaddr *src_addr,
@@ -390,13 +344,16 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
void ib_free_port_attrs(struct ib_core_device *coredev);
int ib_setup_port_attrs(struct ib_core_device *coredev);
+struct rdma_hw_stats *ib_get_hw_stats_port(struct ib_device *ibdev, u32 port_num);
+void ib_device_release_hw_stats(struct hw_stats_device_data *data);
+int ib_setup_device_attrs(struct ib_device *ibdev);
int rdma_compatdev_set(u8 enable);
-int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
- struct kobject *kobj, struct kobj_type *ktype,
- const char *name);
-void ib_port_unregister_module_stat(struct kobject *kobj);
+int ib_port_register_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups);
+void ib_port_unregister_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups);
int ib_device_set_netns_put(struct sk_buff *skb,
struct ib_device *dev, u32 ns_fd);
@@ -414,4 +371,6 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv,
struct vm_area_struct *vma,
struct rdma_user_mmap_entry *entry);
+void ib_cq_pool_cleanup(struct ib_device *dev);
+
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 2257d7f7810f..af59486fe418 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -8,33 +8,43 @@
#include "core_priv.h"
#include "restrack.h"
-#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE)
+#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
-static int __counter_set_mode(struct rdma_counter_mode *curr,
+static int __counter_set_mode(struct rdma_port_counter *port_counter,
enum rdma_nl_counter_mode new_mode,
enum rdma_nl_counter_mask new_mask)
{
- if ((new_mode == RDMA_COUNTER_MODE_AUTO) &&
- ((new_mask & (~ALL_AUTO_MODE_MASKS)) ||
- (curr->mode != RDMA_COUNTER_MODE_NONE)))
- return -EINVAL;
+ if (new_mode == RDMA_COUNTER_MODE_AUTO) {
+ if (new_mask & (~ALL_AUTO_MODE_MASKS))
+ return -EINVAL;
+ if (port_counter->num_counters)
+ return -EBUSY;
+ }
- curr->mode = new_mode;
- curr->mask = new_mask;
+ port_counter->mode.mode = new_mode;
+ port_counter->mode.mask = new_mask;
return 0;
}
-/**
+/*
* rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
*
- * When @on is true, the @mask must be set; When @on is false, it goes
- * into manual mode if there's any counter, so that the user is able to
- * manually access them.
+ * @dev: Device to operate
+ * @port: Port to use
+ * @mask: Mask to configure
+ * @extack: Message to the user
+ *
+ * Return 0 on success. If counter mode wasn't changed then it is considered
+ * as success as well.
+ * Return -EBUSY when changing to auto mode while there are bounded counters.
+ *
*/
-int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
- bool on, enum rdma_nl_counter_mask mask)
+int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
+ enum rdma_nl_counter_mask mask,
+ struct netlink_ext_ack *extack)
{
struct rdma_port_counter *port_counter;
+ enum rdma_nl_counter_mode mode;
int ret;
port_counter = &dev->port_data[port].port_counter;
@@ -42,30 +52,95 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
return -EOPNOTSUPP;
mutex_lock(&port_counter->lock);
- if (on) {
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_AUTO, mask);
- } else {
- if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) {
- ret = -EINVAL;
- goto out;
- }
-
- if (port_counter->num_counters)
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_MANUAL, 0);
- else
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_NONE, 0);
+ if (mask)
+ mode = RDMA_COUNTER_MODE_AUTO;
+ else
+ mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
+ RDMA_COUNTER_MODE_NONE;
+
+ if (port_counter->mode.mode == mode &&
+ port_counter->mode.mask == mask) {
+ ret = 0;
+ goto out;
}
+ ret = __counter_set_mode(port_counter, mode, mask);
+
out:
mutex_unlock(&port_counter->lock);
+ if (ret == -EBUSY)
+ NL_SET_ERR_MSG(
+ extack,
+ "Modifying auto mode is not allowed when there is a bound QP");
+ return ret;
+}
+
+static void auto_mode_init_counter(struct rdma_counter *counter,
+ const struct ib_qp *qp,
+ enum rdma_nl_counter_mask new_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+
+ counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
+ counter->mode.mask = new_mask;
+
+ if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ param->qp_type = qp->qp_type;
+}
+
+static int __rdma_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ int ret;
+
+ if (qp->counter)
+ return -EINVAL;
+
+ if (!qp->device->ops.counter_bind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_bind_qp(counter, qp);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+int rdma_counter_modify(struct ib_device *dev, u32 port,
+ unsigned int index, bool enable)
+{
+ struct rdma_hw_stats *stats;
+ int ret = 0;
+
+ if (!dev->ops.modify_hw_stat)
+ return -EOPNOTSUPP;
+
+ stats = ib_get_hw_stats_port(dev, port);
+ if (!stats || index >= stats->num_counters ||
+ !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
+ return -EINVAL;
+
+ mutex_lock(&stats->lock);
+
+ if (enable != test_bit(index, stats->is_disabled))
+ goto out;
+
+ ret = dev->ops.modify_hw_stat(dev, port, index, enable);
+ if (ret)
+ goto out;
+
+ if (enable)
+ clear_bit(index, stats->is_disabled);
+ else
+ set_bit(index, stats->is_disabled);
+out:
+ mutex_unlock(&stats->lock);
return ret;
}
-static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
- enum rdma_nl_counter_mode mode)
+static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
+ struct ib_qp *qp,
+ enum rdma_nl_counter_mode mode)
{
struct rdma_port_counter *port_counter;
struct rdma_counter *counter;
@@ -80,18 +155,30 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
counter->device = dev;
counter->port = port;
- counter->res.type = RDMA_RESTRACK_COUNTER;
- counter->stats = dev->ops.counter_alloc_stats(counter);
+
+ rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
+ counter->stats = dev->ops.counter_alloc_stats(counter);
if (!counter->stats)
goto err_stats;
port_counter = &dev->port_data[port].port_counter;
mutex_lock(&port_counter->lock);
- if (mode == RDMA_COUNTER_MODE_MANUAL) {
- ret = __counter_set_mode(&port_counter->mode,
- RDMA_COUNTER_MODE_MANUAL, 0);
- if (ret)
+ switch (mode) {
+ case RDMA_COUNTER_MODE_MANUAL:
+ ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
+ 0);
+ if (ret) {
+ mutex_unlock(&port_counter->lock);
goto err_mode;
+ }
+ break;
+ case RDMA_COUNTER_MODE_AUTO:
+ auto_mode_init_counter(counter, qp, port_counter->mode.mask);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ mutex_unlock(&port_counter->lock);
+ goto err_mode;
}
port_counter->num_counters++;
@@ -101,12 +188,18 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
kref_init(&counter->kref);
mutex_init(&counter->lock);
+ ret = __rdma_counter_bind_qp(counter, qp);
+ if (ret)
+ goto err_mode;
+
+ rdma_restrack_parent_name(&counter->res, &qp->res);
+ rdma_restrack_add(&counter->res);
return counter;
err_mode:
- mutex_unlock(&port_counter->lock);
- kfree(counter->stats);
+ rdma_free_hw_stats_struct(counter->stats);
err_stats:
+ rdma_restrack_put(&counter->res);
kfree(counter);
return NULL;
}
@@ -120,71 +213,29 @@ static void rdma_counter_free(struct rdma_counter *counter)
port_counter->num_counters--;
if (!port_counter->num_counters &&
(port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
- __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE,
- 0);
+ __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0);
mutex_unlock(&port_counter->lock);
rdma_restrack_del(&counter->res);
- kfree(counter->stats);
+ rdma_free_hw_stats_struct(counter->stats);
kfree(counter);
}
-static void auto_mode_init_counter(struct rdma_counter *counter,
- const struct ib_qp *qp,
- enum rdma_nl_counter_mask new_mask)
-{
- struct auto_mode_param *param = &counter->mode.param;
-
- counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
- counter->mode.mask = new_mask;
-
- if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
- param->qp_type = qp->qp_type;
-}
-
static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
enum rdma_nl_counter_mask auto_mask)
{
struct auto_mode_param *param = &counter->mode.param;
bool match = true;
- /*
- * Ensure that counter belongs to the right PID. This operation can
- * race with user space which kills the process and leaves QP and
- * counters orphans.
- *
- * It is not a big deal because exitted task will leave both QP and
- * counter in the same bucket of zombie process. Just ensure that
- * process is still alive before procedding.
- *
- */
- if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) ||
- !task_pid_nr(qp->res.task))
- return false;
-
if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
match &= (param->qp_type == qp->qp_type);
- return match;
-}
-
-static int __rdma_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- int ret;
-
- if (qp->counter)
- return -EINVAL;
+ if (auto_mask & RDMA_COUNTER_MASK_PID)
+ match &= (task_pid_nr(counter->res.task) ==
+ task_pid_nr(qp->res.task));
- if (!qp->device->ops.counter_bind_qp)
- return -EOPNOTSUPP;
-
- mutex_lock(&counter->lock);
- ret = qp->device->ops.counter_bind_qp(counter, qp);
- mutex_unlock(&counter->lock);
-
- return ret;
+ return match;
}
static int __rdma_counter_unbind_qp(struct ib_qp *qp)
@@ -202,7 +253,7 @@ static int __rdma_counter_unbind_qp(struct ib_qp *qp)
return ret;
}
-static void counter_history_stat_update(const struct rdma_counter *counter)
+static void counter_history_stat_update(struct rdma_counter *counter)
{
struct ib_device *dev = counter->device;
struct rdma_port_counter *port_counter;
@@ -212,18 +263,20 @@ static void counter_history_stat_update(const struct rdma_counter *counter)
if (!port_counter->hstats)
return;
+ rdma_counter_query_stats(counter);
+
for (i = 0; i < counter->stats->num_counters; i++)
port_counter->hstats->value[i] += counter->stats->value[i];
}
-/**
+/*
* rdma_get_counter_auto_mode - Find the counter that @qp should be bound
* with in auto mode
*
* Return: The counter (with ref-count increased) if found
*/
static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
- u8 port)
+ u32 port)
{
struct rdma_port_counter *port_counter;
struct rdma_counter *counter = NULL;
@@ -253,18 +306,6 @@ next:
return counter;
}
-static void rdma_counter_res_add(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- if (rdma_is_kernel_res(&qp->res)) {
- rdma_restrack_set_task(&counter->res, qp->res.kern_name);
- rdma_restrack_kadd(&counter->res);
- } else {
- rdma_restrack_attach_task(&counter->res, qp->res.task);
- rdma_restrack_uadd(&counter->res);
- }
-}
-
static void counter_release(struct kref *kref)
{
struct rdma_counter *counter;
@@ -275,18 +316,18 @@ static void counter_release(struct kref *kref)
rdma_counter_free(counter);
}
-/**
+/*
* rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
* the auto-mode rule
*/
-int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
{
struct rdma_port_counter *port_counter;
struct ib_device *dev = qp->device;
struct rdma_counter *counter;
int ret;
- if (!qp->res.valid)
+ if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res))
return 0;
if (!rdma_is_port_valid(dev, port))
@@ -304,25 +345,15 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
return ret;
}
} else {
- counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO);
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO);
if (!counter)
return -ENOMEM;
-
- auto_mode_init_counter(counter, qp, port_counter->mode.mask);
-
- ret = __rdma_counter_bind_qp(counter, qp);
- if (ret) {
- rdma_counter_free(counter);
- return ret;
- }
-
- rdma_counter_res_add(counter, qp);
}
return 0;
}
-/**
+/*
* rdma_counter_unbind_qp - Unbind a qp from a counter
* @force:
* true - Decrease the counter ref-count anyway (e.g., qp destroy)
@@ -359,7 +390,7 @@ int rdma_counter_query_stats(struct rdma_counter *counter)
}
static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
- u8 port, u32 index)
+ u32 port, u32 index)
{
struct rdma_restrack_entry *res;
struct rdma_restrack_root *rt;
@@ -391,11 +422,11 @@ next:
return sum;
}
-/**
+/*
* rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
* specific port, including the running ones and history data
*/
-u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index)
+u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index)
{
struct rdma_port_counter *port_counter;
u64 sum;
@@ -430,15 +461,6 @@ err:
return NULL;
}
-static int rdma_counter_bind_qp_manual(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- if ((counter->device != qp->device) || (counter->port != qp->port))
- return -EINVAL;
-
- return __rdma_counter_bind_qp(counter, qp);
-}
-
static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
u32 counter_id)
{
@@ -456,10 +478,10 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
return counter;
}
-/**
+/*
* rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
*/
-int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
+int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
u32 qp_num, u32 counter_id)
{
struct rdma_port_counter *port_counter;
@@ -481,12 +503,17 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
goto err;
}
- if (counter->res.task != qp->res.task) {
+ if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) {
+ ret = -EINVAL;
+ goto err_task;
+ }
+
+ if ((counter->device != qp->device) || (counter->port != qp->port)) {
ret = -EINVAL;
goto err_task;
}
- ret = rdma_counter_bind_qp_manual(counter, qp);
+ ret = __rdma_counter_bind_qp(counter, qp);
if (ret)
goto err_task;
@@ -500,11 +527,11 @@ err:
return ret;
}
-/**
+/*
* rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
* The id of new counter is returned in @counter_id
*/
-int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
+int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
u32 qp_num, u32 *counter_id)
{
struct rdma_port_counter *port_counter;
@@ -531,35 +558,27 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
goto err;
}
- counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL);
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL);
if (!counter) {
ret = -ENOMEM;
goto err;
}
- ret = rdma_counter_bind_qp_manual(counter, qp);
- if (ret)
- goto err_bind;
-
if (counter_id)
*counter_id = counter->id;
- rdma_counter_res_add(counter, qp);
-
rdma_restrack_put(&qp->res);
- return ret;
+ return 0;
-err_bind:
- rdma_counter_free(counter);
err:
rdma_restrack_put(&qp->res);
return ret;
}
-/**
+/*
* rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
*/
-int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,
+int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
u32 qp_num, u32 counter_id)
{
struct rdma_port_counter *port_counter;
@@ -592,7 +611,7 @@ out:
return ret;
}
-int rdma_counter_get_mode(struct ib_device *dev, u8 port,
+int rdma_counter_get_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mode *mode,
enum rdma_nl_counter_mask *mask)
{
@@ -618,10 +637,10 @@ void rdma_counter_init(struct ib_device *dev)
port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
mutex_init(&port_counter->lock);
- if (!dev->ops.alloc_hw_stats)
+ if (!dev->ops.alloc_hw_port_stats)
continue;
- port_counter->hstats = dev->ops.alloc_hw_stats(dev, port);
+ port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port);
if (!port_counter->hstats)
goto fail;
}
@@ -631,7 +650,7 @@ void rdma_counter_init(struct ib_device *dev)
fail:
for (i = port; i >= rdma_start_port(dev); i--) {
port_counter = &dev->port_data[port].port_counter;
- kfree(port_counter->hstats);
+ rdma_free_hw_stats_struct(port_counter->hstats);
port_counter->hstats = NULL;
mutex_destroy(&port_counter->lock);
}
@@ -644,7 +663,7 @@ void rdma_counter_release(struct ib_device *dev)
rdma_for_each_port(dev, port) {
port_counter = &dev->port_data[port].port_counter;
- kfree(port_counter->hstats);
+ rdma_free_hw_stats_struct(port_counter->hstats);
mutex_destroy(&port_counter->lock);
}
}
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index 4f25b2400694..a70876a0a231 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -2,12 +2,15 @@
/*
* Copyright (c) 2015 HGST, a Western Digital Company.
*/
-#include <linux/module.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
#include <trace/events/rdma_core.h>
+/* Max size for shared CQ, may require tuning */
+#define IB_MAX_SHARED_CQ_SZ 4096U
/* # of WCs to poll for with a single call to ib_poll_cq */
#define IB_POLL_BATCH 16
@@ -68,6 +71,15 @@ static void rdma_dim_init(struct ib_cq *cq)
INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
}
+static void rdma_dim_destroy(struct ib_cq *cq)
+{
+ if (!cq->dim)
+ return;
+
+ cancel_work_sync(&cq->dim->work);
+ kfree(cq->dim);
+}
+
static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
{
int rc;
@@ -110,7 +122,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
}
/**
- * ib_process_direct_cq - process a CQ in caller context
+ * ib_process_cq_direct - process a CQ in caller context
* @cq: CQ to process
* @budget: number of CQEs to poll for
*
@@ -184,24 +196,22 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
}
/**
- * __ib_alloc_cq_user - allocate a completion queue
+ * __ib_alloc_cq - allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
* @caller: module owner name.
- * @udata: Valid user data or NULL for kernel object
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
* specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
@@ -218,20 +228,19 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
cq->cq_context = private;
cq->poll_ctx = poll_ctx;
atomic_set(&cq->usecnt, 0);
+ cq->comp_vector = comp_vector;
cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
if (!cq->wc)
goto out_free_cq;
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_set_task(&cq->res, caller);
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
ret = dev->ops.create_cq(cq, &cq_attr, NULL);
if (ret)
goto out_free_wc;
- rdma_restrack_kadd(&cq->res);
-
rdma_dim_init(cq);
switch (cq->poll_ctx) {
@@ -257,20 +266,22 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
goto out_destroy_cq;
}
+ rdma_restrack_add(&cq->res);
trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
return cq;
out_destroy_cq:
- rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
+ rdma_dim_destroy(cq);
+ cq->device->ops.destroy_cq(cq, NULL);
out_free_wc:
+ rdma_restrack_put(&cq->res);
kfree(cq->wc);
out_free_cq:
kfree(cq);
trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(__ib_alloc_cq_user);
+EXPORT_SYMBOL(__ib_alloc_cq);
/**
* __ib_alloc_cq_any - allocate a completion queue
@@ -295,20 +306,23 @@ struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
atomic_inc_return(&counter) %
min_t(int, dev->num_comp_vectors, num_online_cpus());
- return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- caller, NULL);
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ caller);
}
EXPORT_SYMBOL(__ib_alloc_cq_any);
/**
- * ib_free_cq_user - free a completion queue
+ * ib_free_cq - free a completion queue
* @cq: completion queue to free.
- * @udata: User data or NULL for kernel object
*/
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
+void ib_free_cq(struct ib_cq *cq)
{
+ int ret;
+
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
return;
+ if (WARN_ON_ONCE(cq->cqe_used))
+ return;
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
@@ -324,13 +338,170 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
WARN_ON_ONCE(1);
}
+ rdma_dim_destroy(cq);
trace_cq_free(cq);
+ ret = cq->device->ops.destroy_cq(cq, NULL);
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
- if (cq->dim)
- cancel_work_sync(&cq->dim->work);
- kfree(cq->dim);
kfree(cq->wc);
kfree(cq);
}
-EXPORT_SYMBOL(ib_free_cq_user);
+EXPORT_SYMBOL(ib_free_cq);
+
+void ib_cq_pool_cleanup(struct ib_device *dev)
+{
+ struct ib_cq *cq, *n;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) {
+ list_for_each_entry_safe(cq, n, &dev->cq_pools[i],
+ pool_entry) {
+ WARN_ON(cq->cqe_used);
+ list_del(&cq->pool_entry);
+ cq->shared = false;
+ ib_free_cq(cq);
+ }
+ }
+}
+
+static int ib_alloc_cqs(struct ib_device *dev, unsigned int nr_cqes,
+ enum ib_poll_context poll_ctx)
+{
+ LIST_HEAD(tmp_list);
+ unsigned int nr_cqs, i;
+ struct ib_cq *cq, *n;
+ int ret;
+
+ if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
+ WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
+ return -EINVAL;
+ }
+
+ /*
+ * Allocate at least as many CQEs as requested, and otherwise
+ * a reasonable batch size so that we can share CQs between
+ * multiple users instead of allocating a larger number of CQs.
+ */
+ nr_cqes = min_t(unsigned int, dev->attrs.max_cqe,
+ max(nr_cqes, IB_MAX_SHARED_CQ_SZ));
+ nr_cqs = min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
+ for (i = 0; i < nr_cqs; i++) {
+ cq = ib_alloc_cq(dev, NULL, nr_cqes, i, poll_ctx);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto out_free_cqs;
+ }
+ cq->shared = true;
+ list_add_tail(&cq->pool_entry, &tmp_list);
+ }
+
+ spin_lock_irq(&dev->cq_pools_lock);
+ list_splice(&tmp_list, &dev->cq_pools[poll_ctx]);
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ return 0;
+
+out_free_cqs:
+ list_for_each_entry_safe(cq, n, &tmp_list, pool_entry) {
+ cq->shared = false;
+ ib_free_cq(cq);
+ }
+ return ret;
+}
+
+/**
+ * ib_cq_pool_get() - Find the least used completion queue that matches
+ * a given cpu hint (or least used for wild card affinity) and fits
+ * nr_cqe.
+ * @dev: rdma device
+ * @nr_cqe: number of needed cqe entries
+ * @comp_vector_hint: completion vector hint (-1) for the driver to assign
+ * a comp vector based on internal counter
+ * @poll_ctx: cq polling context
+ *
+ * Finds a cq that satisfies @comp_vector_hint and @nr_cqe requirements and
+ * claim entries in it for us. In case there is no available cq, allocate
+ * a new cq with the requirements and add it to the device pool.
+ * IB_POLL_DIRECT cannot be used for shared cqs so it is not a valid value
+ * for @poll_ctx.
+ */
+struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
+ int comp_vector_hint,
+ enum ib_poll_context poll_ctx)
+{
+ static unsigned int default_comp_vector;
+ unsigned int vector, num_comp_vectors;
+ struct ib_cq *cq, *found = NULL;
+ int ret;
+
+ if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
+ WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
+ return ERR_PTR(-EINVAL);
+ }
+
+ num_comp_vectors =
+ min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
+ /* Project the affinty to the device completion vector range */
+ if (comp_vector_hint < 0) {
+ comp_vector_hint =
+ (READ_ONCE(default_comp_vector) + 1) % num_comp_vectors;
+ WRITE_ONCE(default_comp_vector, comp_vector_hint);
+ }
+ vector = comp_vector_hint % num_comp_vectors;
+
+ /*
+ * Find the least used CQ with correct affinity and
+ * enough free CQ entries
+ */
+ while (!found) {
+ spin_lock_irq(&dev->cq_pools_lock);
+ list_for_each_entry(cq, &dev->cq_pools[poll_ctx],
+ pool_entry) {
+ /*
+ * Check to see if we have found a CQ with the
+ * correct completion vector
+ */
+ if (vector != cq->comp_vector)
+ continue;
+ if (cq->cqe_used + nr_cqe > cq->cqe)
+ continue;
+ found = cq;
+ break;
+ }
+
+ if (found) {
+ found->cqe_used += nr_cqe;
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ return found;
+ }
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ /*
+ * Didn't find a match or ran out of CQs in the device
+ * pool, allocate a new array of CQs.
+ */
+ ret = ib_alloc_cqs(dev, nr_cqe, poll_ctx);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ return found;
+}
+EXPORT_SYMBOL(ib_cq_pool_get);
+
+/**
+ * ib_cq_pool_put - Return a CQ taken from a shared pool.
+ * @cq: The CQ to return.
+ * @nr_cqe: The max number of cqes that the user had requested.
+ */
+void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe)
+{
+ if (WARN_ON_ONCE(nr_cqe > cq->cqe_used))
+ return;
+
+ spin_lock_irq(&cq->device->cq_pools_lock);
+ cq->cqe_used -= nr_cqe;
+ spin_unlock_irq(&cq->device->cq_pools_lock);
+}
+EXPORT_SYMBOL(ib_cq_pool_put);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index f6c255202d7f..b69e2c4e4d2a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
+static struct workqueue_struct *ib_unreg_wq;
/*
* Each of the three rwsem locks (devices, clients, client_data) protects the
@@ -272,7 +273,6 @@ static void ib_device_check_mandatory(struct ib_device *device)
} mandatory_table[] = {
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
- IB_MANDATORY_FUNC(query_pkey),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_qp),
@@ -285,6 +285,7 @@ static void ib_device_check_mandatory(struct ib_device *device)
IB_MANDATORY_FUNC(poll_cq),
IB_MANDATORY_FUNC(req_notify_cq),
IB_MANDATORY_FUNC(get_dma_mr),
+ IB_MANDATORY_FUNC(reg_user_mr),
IB_MANDATORY_FUNC(dereg_mr),
IB_MANDATORY_FUNC(get_port_immutable)
};
@@ -421,7 +422,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
return ret;
}
- strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
+ strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
ret = rename_compat_devs(ibdev);
downgrade_write(&devices_rwsem);
@@ -491,6 +492,8 @@ static void ib_device_release(struct device *device)
free_netdevs(dev);
WARN_ON(refcount_read(&dev->refcount));
+ if (dev->hw_stats_data)
+ ib_device_release_hw_stats(dev->hw_stats_data);
if (dev->port_data) {
ib_cache_release_one(dev);
ib_security_release_port_pkey_list(dev);
@@ -570,6 +573,7 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
struct ib_device *_ib_alloc_device(size_t size)
{
struct ib_device *device;
+ unsigned int i;
if (WARN_ON(size < sizeof(struct ib_device)))
return NULL;
@@ -583,7 +587,6 @@ struct ib_device *_ib_alloc_device(size_t size)
return NULL;
}
- device->groups[0] = &ib_dev_attr_group;
rdma_init_coredev(&device->coredev, device, &init_net);
INIT_LIST_HEAD(&device->event_handler_list);
@@ -601,6 +604,43 @@ struct ib_device *_ib_alloc_device(size_t size)
init_completion(&device->unreg_completion);
INIT_WORK(&device->unregistration_work, ib_unregister_work);
+ spin_lock_init(&device->cq_pools_lock);
+ for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++)
+ INIT_LIST_HEAD(&device->cq_pools[i]);
+
+ rwlock_init(&device->cache_lock);
+
+ device->uverbs_cmd_mask =
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) |
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) |
+ BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ);
return device;
}
EXPORT_SYMBOL(_ib_alloc_device);
@@ -677,8 +717,20 @@ static int add_client_context(struct ib_device *device,
if (ret)
goto out;
downgrade_write(&device->client_data_rwsem);
- if (client->add)
- client->add(device);
+ if (client->add) {
+ if (client->add(device)) {
+ /*
+ * If a client fails to add then the error code is
+ * ignored, but we won't call any more ops on this
+ * client.
+ */
+ xa_erase(&device->client_data, client->client_id);
+ up_read(&device->client_data_rwsem);
+ ib_device_put(device);
+ ib_client_put(client);
+ return 0;
+ }
+ }
/* Readers shall not see a client until add has been completed */
xa_set_mark(&device->client_data, client->client_id,
@@ -731,7 +783,7 @@ static void remove_client_context(struct ib_device *device,
static int alloc_port_data(struct ib_device *device)
{
struct ib_port_data_rcu *pdata_rcu;
- unsigned int port;
+ u32 port;
if (device->port_data)
return 0;
@@ -740,6 +792,10 @@ static int alloc_port_data(struct ib_device *device)
if (WARN_ON(!device->phys_port_cnt))
return -EINVAL;
+ /* Reserve U32_MAX so the logic to go over all the ports is sane */
+ if (WARN_ON(device->phys_port_cnt == U32_MAX))
+ return -EINVAL;
+
/*
* device->port_data is indexed directly by the port number to make
* access to this data as efficient as possible.
@@ -771,7 +827,7 @@ static int alloc_port_data(struct ib_device *device)
return 0;
}
-static int verify_immutable(const struct ib_device *dev, u8 port)
+static int verify_immutable(const struct ib_device *dev, u32 port)
{
return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
rdma_max_mad_size(dev, port) != 0);
@@ -779,7 +835,7 @@ static int verify_immutable(const struct ib_device *dev, u8 port)
static int setup_port_data(struct ib_device *device)
{
- unsigned int port;
+ u32 port;
int ret;
ret = alloc_port_data(device);
@@ -800,6 +856,20 @@ static int setup_port_data(struct ib_device *device)
return 0;
}
+/**
+ * ib_port_immutable_read() - Read rdma port's immutable data
+ * @dev: IB device
+ * @port: port number whose immutable data to read. It starts with index 1 and
+ * valid upto including rdma_end_port().
+ */
+const struct ib_port_immutable*
+ib_port_immutable_read(struct ib_device *dev, unsigned int port)
+{
+ WARN_ON(!rdma_is_port_valid(dev, port));
+ return &dev->port_data[port].immutable;
+}
+EXPORT_SYMBOL(ib_port_immutable_read);
+
void ib_get_device_fw_str(struct ib_device *dev, char *str)
{
if (dev->ops.get_dev_fw_str)
@@ -820,15 +890,8 @@ static void ib_policy_change_task(struct work_struct *work)
rdma_for_each_port (dev, i) {
u64 sp;
- int ret = ib_get_cached_subnet_prefix(dev,
- i,
- &sp);
-
- WARN_ONCE(ret,
- "ib_get_cached_subnet_prefix err: %d, this should never happen here\n",
- ret);
- if (!ret)
- ib_security_cache_change(dev, i, sp);
+ ib_get_cached_subnet_prefix(dev, i, &sp);
+ ib_security_cache_change(dev, i, sp);
}
}
up_read(&devices_rwsem);
@@ -896,7 +959,9 @@ static int add_one_compat_dev(struct ib_device *device,
cdev->dev.parent = device->dev.parent;
rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
cdev->dev.release = compatdev_release;
- dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ if (ret)
+ goto add_err;
ret = device_add(&cdev->dev);
if (ret)
@@ -1152,7 +1217,7 @@ static int assign_name(struct ib_device *device, const char *name)
ret = -ENFILE;
goto out;
}
- strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
+ strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b,
&last_id, GFP_KERNEL);
@@ -1164,56 +1229,6 @@ out:
return ret;
}
-static void setup_dma_device(struct ib_device *device)
-{
- struct device *parent = device->dev.parent;
-
- WARN_ON_ONCE(device->dma_device);
- if (device->dev.dma_ops) {
- /*
- * The caller provided custom DMA operations. Copy the
- * DMA-related fields that are used by e.g. dma_alloc_coherent()
- * into device->dev.
- */
- device->dma_device = &device->dev;
- if (!device->dev.dma_mask) {
- if (parent)
- device->dev.dma_mask = parent->dma_mask;
- else
- WARN_ON_ONCE(true);
- }
- if (!device->dev.coherent_dma_mask) {
- if (parent)
- device->dev.coherent_dma_mask =
- parent->coherent_dma_mask;
- else
- WARN_ON_ONCE(true);
- }
- } else {
- /*
- * The caller did not provide custom DMA operations. Use the
- * DMA mapping operations of the parent device.
- */
- WARN_ON_ONCE(!parent);
- device->dma_device = parent;
- }
-
- if (!device->dev.dma_parms) {
- if (parent) {
- /*
- * The caller did not provide DMA parameters, so
- * 'parent' probably represents a PCI device. The PCI
- * core sets the maximum segment size to 64
- * KB. Increase this parameter to 2 GB.
- */
- device->dev.dma_parms = parent->dma_parms;
- dma_set_max_seg_size(device->dma_device, SZ_2G);
- } else {
- WARN_ON_ONCE(true);
- }
- }
-}
-
/*
* setup_device() allocates memory and sets up data that requires calling the
* device ops, this is the only reason these actions are not done during
@@ -1224,7 +1239,6 @@ static int setup_device(struct ib_device *device)
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
int ret;
- setup_dma_device(device);
ib_device_check_mandatory(device);
ret = setup_port_data(device);
@@ -1268,6 +1282,8 @@ static void disable_device(struct ib_device *device)
remove_client_context(device, cid);
}
+ ib_cq_pool_cleanup(device);
+
/* Pairs with refcount_set in enable_device */
ib_device_put(device);
wait_for_completion(&device->unreg_completion);
@@ -1325,11 +1341,18 @@ out:
return ret;
}
+static void prevent_dealloc_device(struct ib_device *ib_dev)
+{
+}
+
/**
* ib_register_device - Register an IB device with IB core
* @device: Device to register
* @name: unique string device name. This may include a '%' which will
- * cause a unique index to be added to the passed device name.
+ * cause a unique index to be added to the passed device name.
+ * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB
+ * device will be used. In this case the caller should fully
+ * setup the ibdev for DMA. This usually means using dma_virt_ops.
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
@@ -1340,7 +1363,8 @@ out:
* asynchronously then the device pointer may become freed as soon as this
* function returns.
*/
-int ib_register_device(struct ib_device *device, const char *name)
+int ib_register_device(struct ib_device *device, const char *name,
+ struct device *dma_device)
{
int ret;
@@ -1348,6 +1372,14 @@ int ib_register_device(struct ib_device *device, const char *name)
if (ret)
return ret;
+ /*
+ * If the caller does not provide a DMA capable device then the IB core
+ * will set up ib_sge and scatterlist structures that stash the kernel
+ * virtual address into the address field.
+ */
+ WARN_ON(dma_device && !dma_device->dma_parms);
+ device->dma_device = dma_device;
+
ret = setup_device(device);
if (ret)
return ret;
@@ -1359,6 +1391,12 @@ int ib_register_device(struct ib_device *device, const char *name)
return ret;
}
+ device->groups[0] = &ib_dev_attr_group;
+ device->groups[1] = device->ops.device_group;
+ ret = ib_setup_device_attrs(device);
+ if (ret)
+ goto cache_cleanup;
+
ib_device_register_rdmacg(device);
rdma_counter_init(device);
@@ -1372,7 +1410,7 @@ int ib_register_device(struct ib_device *device, const char *name)
if (ret)
goto cg_cleanup;
- ret = ib_device_register_sysfs(device);
+ ret = ib_setup_port_attrs(&device->coredev);
if (ret) {
dev_warn(&device->dev,
"Couldn't register device with driver model\n");
@@ -1380,9 +1418,6 @@ int ib_register_device(struct ib_device *device, const char *name)
}
ret = enable_device_and_get(device);
- dev_set_uevent_suppress(&device->dev, false);
- /* Mark for userspace that device is ready */
- kobject_uevent(&device->dev.kobj, KOBJ_ADD);
if (ret) {
void (*dealloc_fn)(struct ib_device *);
@@ -1394,16 +1429,20 @@ int ib_register_device(struct ib_device *device, const char *name)
* possibility for a parallel unregistration along with this
* error flow. Since we have a refcount here we know any
* parallel flow is stopped in disable_device and will see the
- * NULL pointers, causing the responsibility to
+ * special dealloc_driver pointer, causing the responsibility to
* ib_dealloc_device() to revert back to this thread.
*/
dealloc_fn = device->ops.dealloc_driver;
- device->ops.dealloc_driver = NULL;
+ device->ops.dealloc_driver = prevent_dealloc_device;
ib_device_put(device);
__ib_unregister_device(device);
device->ops.dealloc_driver = dealloc_fn;
+ dev_set_uevent_suppress(&device->dev, false);
return ret;
}
+ dev_set_uevent_suppress(&device->dev, false);
+ /* Mark for userspace that device is ready */
+ kobject_uevent(&device->dev.kobj, KOBJ_ADD);
ib_device_put(device);
return 0;
@@ -1413,6 +1452,7 @@ dev_cleanup:
cg_cleanup:
dev_set_uevent_suppress(&device->dev, false);
ib_device_unregister_rdmacg(device);
+cache_cleanup:
ib_cache_cleanup_one(device);
return ret;
}
@@ -1437,7 +1477,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
/* Expedite removing unregistered pointers from the hash table */
free_netdevs(ib_dev);
- ib_device_unregister_sysfs(ib_dev);
+ ib_free_port_attrs(&ib_dev->coredev);
device_del(&ib_dev->dev);
ib_device_unregister_rdmacg(ib_dev);
ib_cache_cleanup_one(ib_dev);
@@ -1446,7 +1486,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
* Drivers using the new flow may not call ib_dealloc_device except
* in error unwind prior to registration success.
*/
- if (ib_dev->ops.dealloc_driver) {
+ if (ib_dev->ops.dealloc_driver &&
+ ib_dev->ops.dealloc_driver != prevent_dealloc_device) {
WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
ib_dealloc_device(ib_dev);
}
@@ -1562,7 +1603,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
WARN_ON(!refcount_read(&ib_dev->refcount));
WARN_ON(!ib_dev->ops.dealloc_driver);
get_device(&ib_dev->dev);
- if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
+ if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work))
put_device(&ib_dev->dev);
}
EXPORT_SYMBOL(ib_unregister_device_queued);
@@ -1654,13 +1695,11 @@ int ib_device_set_netns_put(struct sk_buff *skb,
}
/*
- * Currently supported only for those providers which support
- * disassociation and don't do port specific sysfs init. Once a
- * port_cleanup infrastructure is implemented, this limitation will be
- * removed.
+ * All the ib_clients, including uverbs, are reset when the namespace is
+ * changed and this cannot be blocked waiting for userspace to do
+ * something, so disassociation is mandatory.
*/
- if (!dev->ops.disassociate_ucontext || dev->ops.init_port ||
- ib_devices_shared_netns) {
+ if (!dev->ops.disassociate_ucontext || ib_devices_shared_netns) {
ret = -EOPNOTSUPP;
goto ns_err;
}
@@ -1868,9 +1907,9 @@ static int __ib_get_client_nl_info(struct ib_device *ibdev,
/**
* ib_get_client_nl_info - Fetch the nl_info from a client
- * @device - IB device
- * @client_name - Name of the client
- * @res - Result of the query
+ * @ibdev: IB device
+ * @client_name: Name of the client
+ * @res: Result of the query
*/
int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
struct ib_client_nl_info *res)
@@ -1972,7 +2011,7 @@ void ib_dispatch_event_clients(struct ib_event *event)
}
static int iw_query_port(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
struct ib_port_attr *port_attr)
{
struct in_device *inetdev;
@@ -2011,10 +2050,9 @@ static int iw_query_port(struct ib_device *device,
}
static int __ib_query_port(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
struct ib_port_attr *port_attr)
{
- union ib_gid gid = {};
int err;
memset(port_attr, 0, sizeof(*port_attr));
@@ -2027,11 +2065,8 @@ static int __ib_query_port(struct ib_device *device,
IB_LINK_LAYER_INFINIBAND)
return 0;
- err = device->ops.query_gid(device, port_num, 0, &gid);
- if (err)
- return err;
-
- port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
+ ib_get_cached_subnet_prefix(device, port_num,
+ &port_attr->subnet_prefix);
return 0;
}
@@ -2045,7 +2080,7 @@ static int __ib_query_port(struct ib_device *device,
* @port_attr pointer.
*/
int ib_query_port(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
struct ib_port_attr *port_attr)
{
if (!rdma_is_port_valid(device, port_num))
@@ -2097,7 +2132,7 @@ static void add_ndev_hash(struct ib_port_data *pdata)
* NETDEV_UNREGISTER event.
*/
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
- unsigned int port)
+ u32 port)
{
struct net_device *old_ndev;
struct ib_port_data *pdata;
@@ -2140,7 +2175,7 @@ EXPORT_SYMBOL(ib_device_set_netdev);
static void free_netdevs(struct ib_device *ib_dev)
{
unsigned long flags;
- unsigned int port;
+ u32 port;
if (!ib_dev->port_data)
return;
@@ -2171,7 +2206,7 @@ static void free_netdevs(struct ib_device *ib_dev)
}
struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
- unsigned int port)
+ u32 port)
{
struct ib_port_data *pdata;
struct net_device *res;
@@ -2258,7 +2293,7 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_callback cb,
void *cookie)
{
- unsigned int port;
+ u32 port;
rdma_for_each_port (ib_dev, port)
if (rdma_protocol_roce(ib_dev, port)) {
@@ -2298,7 +2333,7 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
up_read(&devices_rwsem);
}
-/**
+/*
* ib_enum_all_devs - enumerate all ib_devices
* @cb: Callback to call for each found ib_device
*
@@ -2336,11 +2371,14 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
* ib_query_pkey() fetches the specified P_Key table entry.
*/
int ib_query_pkey(struct ib_device *device,
- u8 port_num, u16 index, u16 *pkey)
+ u32 port_num, u16 index, u16 *pkey)
{
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ if (!device->ops.query_pkey)
+ return -EOPNOTSUPP;
+
return device->ops.query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
@@ -2378,7 +2416,7 @@ EXPORT_SYMBOL(ib_modify_device);
* @port_modify_mask and @port_modify structure.
*/
int ib_modify_port(struct ib_device *device,
- u8 port_num, int port_modify_mask,
+ u32 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
int rc;
@@ -2410,10 +2448,10 @@ EXPORT_SYMBOL(ib_modify_port);
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- u8 *port_num, u16 *index)
+ u32 *port_num, u16 *index)
{
union ib_gid tmp_gid;
- unsigned int port;
+ u32 port;
int ret, i;
rdma_for_each_port (device, port) {
@@ -2424,7 +2462,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
++i) {
ret = rdma_query_gid(device, port, i, &tmp_gid);
if (ret)
- return ret;
+ continue;
+
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
*port_num = port;
if (index)
@@ -2447,7 +2486,7 @@ EXPORT_SYMBOL(ib_find_gid);
* @index: The index into the PKey table where the PKey was found.
*/
int ib_find_pkey(struct ib_device *device,
- u8 port_num, u16 pkey, u16 *index)
+ u32 port_num, u16 pkey, u16 *index)
{
int ret, i;
u16 tmp_pkey;
@@ -2490,7 +2529,7 @@ EXPORT_SYMBOL(ib_find_pkey);
*
*/
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
- u8 port,
+ u32 port,
u16 pkey,
const union ib_gid *gid,
const struct sockaddr *addr)
@@ -2555,8 +2594,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, add_gid);
SET_DEVICE_OP(dev_ops, advise_mr);
SET_DEVICE_OP(dev_ops, alloc_dm);
- SET_DEVICE_OP(dev_ops, alloc_fmr);
- SET_DEVICE_OP(dev_ops, alloc_hw_stats);
+ SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
+ SET_DEVICE_OP(dev_ops, alloc_hw_port_stats);
SET_DEVICE_OP(dev_ops, alloc_mr);
SET_DEVICE_OP(dev_ops, alloc_mr_integrity);
SET_DEVICE_OP(dev_ops, alloc_mw);
@@ -2575,14 +2614,13 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_counters);
SET_DEVICE_OP(dev_ops, create_cq);
SET_DEVICE_OP(dev_ops, create_flow);
- SET_DEVICE_OP(dev_ops, create_flow_action_esp);
SET_DEVICE_OP(dev_ops, create_qp);
SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
SET_DEVICE_OP(dev_ops, create_srq);
+ SET_DEVICE_OP(dev_ops, create_user_ah);
SET_DEVICE_OP(dev_ops, create_wq);
SET_DEVICE_OP(dev_ops, dealloc_dm);
SET_DEVICE_OP(dev_ops, dealloc_driver);
- SET_DEVICE_OP(dev_ops, dealloc_fmr);
SET_DEVICE_OP(dev_ops, dealloc_mw);
SET_DEVICE_OP(dev_ops, dealloc_pd);
SET_DEVICE_OP(dev_ops, dealloc_ucontext);
@@ -2598,24 +2636,31 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
SET_DEVICE_OP(dev_ops, destroy_srq);
SET_DEVICE_OP(dev_ops, destroy_wq);
+ SET_DEVICE_OP(dev_ops, device_group);
SET_DEVICE_OP(dev_ops, detach_mcast);
SET_DEVICE_OP(dev_ops, disassociate_ucontext);
SET_DEVICE_OP(dev_ops, drain_rq);
SET_DEVICE_OP(dev_ops, drain_sq);
SET_DEVICE_OP(dev_ops, enable_driver);
- SET_DEVICE_OP(dev_ops, fill_res_entry);
- SET_DEVICE_OP(dev_ops, fill_stat_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cq_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_mr_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_qp_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_stat_mr_entry);
SET_DEVICE_OP(dev_ops, get_dev_fw_str);
SET_DEVICE_OP(dev_ops, get_dma_mr);
SET_DEVICE_OP(dev_ops, get_hw_stats);
SET_DEVICE_OP(dev_ops, get_link_layer);
SET_DEVICE_OP(dev_ops, get_netdev);
+ SET_DEVICE_OP(dev_ops, get_numa_node);
SET_DEVICE_OP(dev_ops, get_port_immutable);
SET_DEVICE_OP(dev_ops, get_vector_affinity);
SET_DEVICE_OP(dev_ops, get_vf_config);
SET_DEVICE_OP(dev_ops, get_vf_guid);
SET_DEVICE_OP(dev_ops, get_vf_stats);
- SET_DEVICE_OP(dev_ops, init_port);
SET_DEVICE_OP(dev_ops, iw_accept);
SET_DEVICE_OP(dev_ops, iw_add_ref);
SET_DEVICE_OP(dev_ops, iw_connect);
@@ -2626,19 +2671,19 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, iw_rem_ref);
SET_DEVICE_OP(dev_ops, map_mr_sg);
SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
- SET_DEVICE_OP(dev_ops, map_phys_fmr);
SET_DEVICE_OP(dev_ops, mmap);
SET_DEVICE_OP(dev_ops, mmap_free);
SET_DEVICE_OP(dev_ops, modify_ah);
SET_DEVICE_OP(dev_ops, modify_cq);
SET_DEVICE_OP(dev_ops, modify_device);
- SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
+ SET_DEVICE_OP(dev_ops, modify_hw_stat);
SET_DEVICE_OP(dev_ops, modify_port);
SET_DEVICE_OP(dev_ops, modify_qp);
SET_DEVICE_OP(dev_ops, modify_srq);
SET_DEVICE_OP(dev_ops, modify_wq);
SET_DEVICE_OP(dev_ops, peek_cq);
SET_DEVICE_OP(dev_ops, poll_cq);
+ SET_DEVICE_OP(dev_ops, port_groups);
SET_DEVICE_OP(dev_ops, post_recv);
SET_DEVICE_OP(dev_ops, post_send);
SET_DEVICE_OP(dev_ops, post_srq_recv);
@@ -2650,26 +2695,46 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, query_port);
SET_DEVICE_OP(dev_ops, query_qp);
SET_DEVICE_OP(dev_ops, query_srq);
+ SET_DEVICE_OP(dev_ops, query_ucontext);
SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
SET_DEVICE_OP(dev_ops, read_counters);
SET_DEVICE_OP(dev_ops, reg_dm_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr);
- SET_DEVICE_OP(dev_ops, req_ncomp_notif);
+ SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
SET_DEVICE_OP(dev_ops, req_notify_cq);
SET_DEVICE_OP(dev_ops, rereg_user_mr);
SET_DEVICE_OP(dev_ops, resize_cq);
SET_DEVICE_OP(dev_ops, set_vf_guid);
SET_DEVICE_OP(dev_ops, set_vf_link_state);
- SET_DEVICE_OP(dev_ops, unmap_fmr);
SET_OBJ_SIZE(dev_ops, ib_ah);
+ SET_OBJ_SIZE(dev_ops, ib_counters);
SET_OBJ_SIZE(dev_ops, ib_cq);
+ SET_OBJ_SIZE(dev_ops, ib_mw);
SET_OBJ_SIZE(dev_ops, ib_pd);
+ SET_OBJ_SIZE(dev_ops, ib_qp);
+ SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
SET_OBJ_SIZE(dev_ops, ib_srq);
SET_OBJ_SIZE(dev_ops, ib_ucontext);
+ SET_OBJ_SIZE(dev_ops, ib_xrcd);
}
EXPORT_SYMBOL(ib_set_device_ops);
+#ifdef CONFIG_INFINIBAND_VIRT_DMA
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+ sg_dma_address(s) = (uintptr_t)sg_virt(s);
+ sg_dma_len(s) = s->length;
+ }
+ return nents;
+}
+EXPORT_SYMBOL(ib_dma_virt_map_sg);
+#endif /* CONFIG_INFINIBAND_VIRT_DMA */
+
static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.doit = ib_nl_handle_resolve_resp,
@@ -2687,27 +2752,28 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
static int __init ib_core_init(void)
{
- int ret;
+ int ret = -ENOMEM;
ib_wq = alloc_workqueue("infiniband", 0, 0);
if (!ib_wq)
return -ENOMEM;
+ ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!ib_unreg_wq)
+ goto err;
+
ib_comp_wq = alloc_workqueue("ib-comp-wq",
WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
- if (!ib_comp_wq) {
- ret = -ENOMEM;
- goto err;
- }
+ if (!ib_comp_wq)
+ goto err_unbound;
ib_comp_unbound_wq =
alloc_workqueue("ib-comp-unb-wq",
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
- if (!ib_comp_unbound_wq) {
- ret = -ENOMEM;
+ if (!ib_comp_unbound_wq)
goto err_comp;
- }
ret = class_register(&ib_class);
if (ret) {
@@ -2719,7 +2785,7 @@ static int __init ib_core_init(void)
ret = addr_init();
if (ret) {
- pr_warn("Could't init IB address resolution\n");
+ pr_warn("Couldn't init IB address resolution\n");
goto err_ibnl;
}
@@ -2749,10 +2815,18 @@ static int __init ib_core_init(void)
nldev_init();
rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
- roce_gid_mgmt_init();
+ ret = roce_gid_mgmt_init();
+ if (ret) {
+ pr_warn("Couldn't init RoCE GID management\n");
+ goto err_parent;
+ }
return 0;
+err_parent:
+ rdma_nl_unregister(RDMA_NL_LS);
+ nldev_exit();
+ unregister_pernet_device(&rdma_dev_net_ops);
err_compat:
unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
err_sa:
@@ -2767,6 +2841,8 @@ err_comp_unbound:
destroy_workqueue(ib_comp_unbound_wq);
err_comp:
destroy_workqueue(ib_comp_wq);
+err_unbound:
+ destroy_workqueue(ib_unreg_wq);
err:
destroy_workqueue(ib_wq);
return ret;
@@ -2788,7 +2864,7 @@ static void __exit ib_core_cleanup(void)
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
- flush_workqueue(system_unbound_wq);
+ destroy_workqueue(ib_unreg_wq);
WARN_ON(!xa_empty(&clients));
WARN_ON(!xa_empty(&devices));
}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
deleted file mode 100644
index e08aec427027..000000000000
--- a/drivers/infiniband/core/fmr_pool.c
+++ /dev/null
@@ -1,494 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/jhash.h>
-#include <linux/kthread.h>
-
-#include <rdma/ib_fmr_pool.h>
-
-#include "core_priv.h"
-
-#define PFX "fmr_pool: "
-
-enum {
- IB_FMR_MAX_REMAPS = 32,
-
- IB_FMR_HASH_BITS = 8,
- IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
- IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
-};
-
-/*
- * If an FMR is not in use, then the list member will point to either
- * its pool's free_list (if the FMR can be mapped again; that is,
- * remap_count < pool->max_remaps) or its pool's dirty_list (if the
- * FMR needs to be unmapped before being remapped). In either of
- * these cases it is a bug if the ref_count is not 0. In other words,
- * if ref_count is > 0, then the list member must not be linked into
- * either free_list or dirty_list.
- *
- * The cache_node member is used to link the FMR into a cache bucket
- * (if caching is enabled). This is independent of the reference
- * count of the FMR. When a valid FMR is released, its ref_count is
- * decremented, and if ref_count reaches 0, the FMR is placed in
- * either free_list or dirty_list as appropriate. However, it is not
- * removed from the cache and may be "revived" if a call to
- * ib_fmr_register_physical() occurs before the FMR is remapped. In
- * this case we just increment the ref_count and remove the FMR from
- * free_list/dirty_list.
- *
- * Before we remap an FMR from free_list, we remove it from the cache
- * (to prevent another user from obtaining a stale FMR). When an FMR
- * is released, we add it to the tail of the free list, so that our
- * cache eviction policy is "least recently used."
- *
- * All manipulation of ref_count, list and cache_node is protected by
- * pool_lock to maintain consistency.
- */
-
-struct ib_fmr_pool {
- spinlock_t pool_lock;
-
- int pool_size;
- int max_pages;
- int max_remaps;
- int dirty_watermark;
- int dirty_len;
- struct list_head free_list;
- struct list_head dirty_list;
- struct hlist_head *cache_bucket;
-
- void (*flush_function)(struct ib_fmr_pool *pool,
- void * arg);
- void *flush_arg;
-
- struct kthread_worker *worker;
- struct kthread_work work;
-
- atomic_t req_ser;
- atomic_t flush_ser;
-
- wait_queue_head_t force_wait;
-};
-
-static inline u32 ib_fmr_hash(u64 first_page)
-{
- return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) &
- (IB_FMR_HASH_SIZE - 1);
-}
-
-/* Caller must hold pool_lock */
-static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
- u64 *page_list,
- int page_list_len,
- u64 io_virtual_address)
-{
- struct hlist_head *bucket;
- struct ib_pool_fmr *fmr;
-
- if (!pool->cache_bucket)
- return NULL;
-
- bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
-
- hlist_for_each_entry(fmr, bucket, cache_node)
- if (io_virtual_address == fmr->io_virtual_address &&
- page_list_len == fmr->page_list_len &&
- !memcmp(page_list, fmr->page_list,
- page_list_len * sizeof *page_list))
- return fmr;
-
- return NULL;
-}
-
-static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
-{
- int ret;
- struct ib_pool_fmr *fmr;
- LIST_HEAD(unmap_list);
- LIST_HEAD(fmr_list);
-
- spin_lock_irq(&pool->pool_lock);
-
- list_for_each_entry(fmr, &pool->dirty_list, list) {
- hlist_del_init(&fmr->cache_node);
- fmr->remap_count = 0;
- list_add_tail(&fmr->fmr->list, &fmr_list);
- }
-
- list_splice_init(&pool->dirty_list, &unmap_list);
- pool->dirty_len = 0;
-
- spin_unlock_irq(&pool->pool_lock);
-
- if (list_empty(&unmap_list)) {
- return;
- }
-
- ret = ib_unmap_fmr(&fmr_list);
- if (ret)
- pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
-
- spin_lock_irq(&pool->pool_lock);
- list_splice(&unmap_list, &pool->free_list);
- spin_unlock_irq(&pool->pool_lock);
-}
-
-static void ib_fmr_cleanup_func(struct kthread_work *work)
-{
- struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work);
-
- ib_fmr_batch_release(pool);
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
-
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0)
- kthread_queue_work(pool->worker, &pool->work);
-}
-
-/**
- * ib_create_fmr_pool - Create an FMR pool
- * @pd:Protection domain for FMRs
- * @params:FMR pool parameters
- *
- * Create a pool of FMRs. Return value is pointer to new pool or
- * error code if creation failed.
- */
-struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
- struct ib_fmr_pool_param *params)
-{
- struct ib_device *device;
- struct ib_fmr_pool *pool;
- int i;
- int ret;
- int max_remaps;
-
- if (!params)
- return ERR_PTR(-EINVAL);
-
- device = pd->device;
- if (!device->ops.alloc_fmr || !device->ops.dealloc_fmr ||
- !device->ops.map_phys_fmr || !device->ops.unmap_fmr) {
- dev_info(&device->dev, "Device does not support FMRs\n");
- return ERR_PTR(-ENOSYS);
- }
-
- if (!device->attrs.max_map_per_fmr)
- max_remaps = IB_FMR_MAX_REMAPS;
- else
- max_remaps = device->attrs.max_map_per_fmr;
-
- pool = kmalloc(sizeof *pool, GFP_KERNEL);
- if (!pool)
- return ERR_PTR(-ENOMEM);
-
- pool->cache_bucket = NULL;
- pool->flush_function = params->flush_function;
- pool->flush_arg = params->flush_arg;
-
- INIT_LIST_HEAD(&pool->free_list);
- INIT_LIST_HEAD(&pool->dirty_list);
-
- if (params->cache) {
- pool->cache_bucket =
- kmalloc_array(IB_FMR_HASH_SIZE,
- sizeof(*pool->cache_bucket),
- GFP_KERNEL);
- if (!pool->cache_bucket) {
- ret = -ENOMEM;
- goto out_free_pool;
- }
-
- for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
- INIT_HLIST_HEAD(pool->cache_bucket + i);
- }
-
- pool->pool_size = 0;
- pool->max_pages = params->max_pages_per_fmr;
- pool->max_remaps = max_remaps;
- pool->dirty_watermark = params->dirty_watermark;
- pool->dirty_len = 0;
- spin_lock_init(&pool->pool_lock);
- atomic_set(&pool->req_ser, 0);
- atomic_set(&pool->flush_ser, 0);
- init_waitqueue_head(&pool->force_wait);
-
- pool->worker =
- kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev));
- if (IS_ERR(pool->worker)) {
- pr_warn(PFX "couldn't start cleanup kthread worker\n");
- ret = PTR_ERR(pool->worker);
- goto out_free_pool;
- }
- kthread_init_work(&pool->work, ib_fmr_cleanup_func);
-
- {
- struct ib_pool_fmr *fmr;
- struct ib_fmr_attr fmr_attr = {
- .max_pages = params->max_pages_per_fmr,
- .max_maps = pool->max_remaps,
- .page_shift = params->page_shift
- };
- int bytes_per_fmr = sizeof *fmr;
-
- if (pool->cache_bucket)
- bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);
-
- for (i = 0; i < params->pool_size; ++i) {
- fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
- if (!fmr)
- goto out_fail;
-
- fmr->pool = pool;
- fmr->remap_count = 0;
- fmr->ref_count = 0;
- INIT_HLIST_NODE(&fmr->cache_node);
-
- fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
- if (IS_ERR(fmr->fmr)) {
- pr_warn(PFX "fmr_create failed for FMR %d\n",
- i);
- kfree(fmr);
- goto out_fail;
- }
-
- list_add_tail(&fmr->list, &pool->free_list);
- ++pool->pool_size;
- }
- }
-
- return pool;
-
- out_free_pool:
- kfree(pool->cache_bucket);
- kfree(pool);
-
- return ERR_PTR(ret);
-
- out_fail:
- ib_destroy_fmr_pool(pool);
-
- return ERR_PTR(-ENOMEM);
-}
-EXPORT_SYMBOL(ib_create_fmr_pool);
-
-/**
- * ib_destroy_fmr_pool - Free FMR pool
- * @pool:FMR pool to free
- *
- * Destroy an FMR pool and free all associated resources.
- */
-void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
-{
- struct ib_pool_fmr *fmr;
- struct ib_pool_fmr *tmp;
- LIST_HEAD(fmr_list);
- int i;
-
- kthread_destroy_worker(pool->worker);
- ib_fmr_batch_release(pool);
-
- i = 0;
- list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
- if (fmr->remap_count) {
- INIT_LIST_HEAD(&fmr_list);
- list_add_tail(&fmr->fmr->list, &fmr_list);
- ib_unmap_fmr(&fmr_list);
- }
- ib_dealloc_fmr(fmr->fmr);
- list_del(&fmr->list);
- kfree(fmr);
- ++i;
- }
-
- if (i < pool->pool_size)
- pr_warn(PFX "pool still has %d regions registered\n",
- pool->pool_size - i);
-
- kfree(pool->cache_bucket);
- kfree(pool);
-}
-EXPORT_SYMBOL(ib_destroy_fmr_pool);
-
-/**
- * ib_flush_fmr_pool - Invalidate all unmapped FMRs
- * @pool:FMR pool to flush
- *
- * Ensure that all unmapped FMRs are fully invalidated.
- */
-int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
-{
- int serial;
- struct ib_pool_fmr *fmr, *next;
-
- /*
- * The free_list holds FMRs that may have been used
- * but have not been remapped enough times to be dirty.
- * Put them on the dirty list now so that the cleanup
- * thread will reap them too.
- */
- spin_lock_irq(&pool->pool_lock);
- list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
- if (fmr->remap_count > 0)
- list_move(&fmr->list, &pool->dirty_list);
- }
- spin_unlock_irq(&pool->pool_lock);
-
- serial = atomic_inc_return(&pool->req_ser);
- kthread_queue_work(pool->worker, &pool->work);
-
- if (wait_event_interruptible(pool->force_wait,
- atomic_read(&pool->flush_ser) - serial >= 0))
- return -EINTR;
-
- return 0;
-}
-EXPORT_SYMBOL(ib_flush_fmr_pool);
-
-/**
- * ib_fmr_pool_map_phys - Map an FMR from an FMR pool.
- * @pool_handle: FMR pool to allocate FMR from
- * @page_list: List of pages to map
- * @list_len: Number of pages in @page_list
- * @io_virtual_address: I/O virtual address for new FMR
- */
-struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
- u64 *page_list,
- int list_len,
- u64 io_virtual_address)
-{
- struct ib_fmr_pool *pool = pool_handle;
- struct ib_pool_fmr *fmr;
- unsigned long flags;
- int result;
-
- if (list_len < 1 || list_len > pool->max_pages)
- return ERR_PTR(-EINVAL);
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- fmr = ib_fmr_cache_lookup(pool,
- page_list,
- list_len,
- io_virtual_address);
- if (fmr) {
- /* found in cache */
- ++fmr->ref_count;
- if (fmr->ref_count == 1) {
- list_del(&fmr->list);
- }
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- return fmr;
- }
-
- if (list_empty(&pool->free_list)) {
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- return ERR_PTR(-EAGAIN);
- }
-
- fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
- list_del(&fmr->list);
- hlist_del_init(&fmr->cache_node);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
- io_virtual_address);
-
- if (result) {
- spin_lock_irqsave(&pool->pool_lock, flags);
- list_add(&fmr->list, &pool->free_list);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- pr_warn(PFX "fmr_map returns %d\n", result);
-
- return ERR_PTR(result);
- }
-
- ++fmr->remap_count;
- fmr->ref_count = 1;
-
- if (pool->cache_bucket) {
- fmr->io_virtual_address = io_virtual_address;
- fmr->page_list_len = list_len;
- memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- hlist_add_head(&fmr->cache_node,
- pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- }
-
- return fmr;
-}
-EXPORT_SYMBOL(ib_fmr_pool_map_phys);
-
-/**
- * ib_fmr_pool_unmap - Unmap FMR
- * @fmr:FMR to unmap
- *
- * Unmap an FMR. The FMR mapping may remain valid until the FMR is
- * reused (or until ib_flush_fmr_pool() is called).
- */
-void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
-{
- struct ib_fmr_pool *pool;
- unsigned long flags;
-
- pool = fmr->pool;
-
- spin_lock_irqsave(&pool->pool_lock, flags);
-
- --fmr->ref_count;
- if (!fmr->ref_count) {
- if (fmr->remap_count < pool->max_remaps) {
- list_add_tail(&fmr->list, &pool->free_list);
- } else {
- list_add_tail(&fmr->list, &pool->dirty_list);
- if (++pool->dirty_len >= pool->dirty_watermark) {
- atomic_inc(&pool->req_ser);
- kthread_queue_work(pool->worker, &pool->work);
- }
- }
- }
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-}
-EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index da8adadf4755..2b47073c61a6 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -211,8 +211,7 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
*/
static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
- BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
- if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+ if (refcount_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
return 1;
@@ -225,7 +224,7 @@ static void add_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- atomic_inc(&cm_id_priv->refcount);
+ refcount_inc(&cm_id_priv->refcount);
}
static void rem_ref(struct iw_cm_id *cm_id)
@@ -257,7 +256,7 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
cm_id_priv->id.add_ref = add_ref;
cm_id_priv->id.rem_ref = rem_ref;
spin_lock_init(&cm_id_priv->lock);
- atomic_set(&cm_id_priv->refcount, 1);
+ refcount_set(&cm_id_priv->refcount, 1);
init_waitqueue_head(&cm_id_priv->connect_wait);
init_completion(&cm_id_priv->destroy_comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
@@ -1094,7 +1093,7 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
}
}
- atomic_inc(&cm_id_priv->refcount);
+ refcount_inc(&cm_id_priv->refcount);
if (list_empty(&cm_id_priv->work_list)) {
list_add_tail(&work->list, &cm_id_priv->work_list);
queue_work(iwcm_wq, &work->work);
@@ -1187,29 +1186,34 @@ static int __init iw_cm_init(void)
ret = iwpm_init(RDMA_NL_IWCM);
if (ret)
- pr_err("iw_cm: couldn't init iwpm\n");
- else
- rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
+ return ret;
+
iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0);
if (!iwcm_wq)
- return -ENOMEM;
+ goto err_alloc;
iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
iwcm_ctl_table);
if (!iwcm_ctl_table_hdr) {
pr_err("iw_cm: couldn't register sysctl paths\n");
- destroy_workqueue(iwcm_wq);
- return -ENOMEM;
+ goto err_sysctl;
}
+ rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
return 0;
+
+err_sysctl:
+ destroy_workqueue(iwcm_wq);
+err_alloc:
+ iwpm_exit(RDMA_NL_IWCM);
+ return -ENOMEM;
}
static void __exit iw_cm_cleanup(void)
{
+ rdma_nl_unregister(RDMA_NL_IWCM);
unregister_net_sysctl_table(iwcm_ctl_table_hdr);
destroy_workqueue(iwcm_wq);
- rdma_nl_unregister(RDMA_NL_IWCM);
iwpm_exit(RDMA_NL_IWCM);
}
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
index 82c2cd1b0a80..bf74639be128 100644
--- a/drivers/infiniband/core/iwcm.h
+++ b/drivers/infiniband/core/iwcm.h
@@ -52,7 +52,7 @@ struct iwcm_id_private {
wait_queue_head_t connect_wait;
struct list_head work_list;
spinlock_t lock;
- atomic_t refcount;
+ refcount_t refcount;
struct list_head work_free_list;
};
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 46686990a827..3c9a9869212b 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -69,10 +69,6 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto pid_query_error;
- }
if (iwpm_check_registration(nl_client, IWPM_REG_VALID) ||
iwpm_user_pid == IWPM_PID_UNAVAILABLE)
return 0;
@@ -123,7 +119,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
pid_query_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
@@ -153,10 +149,6 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto add_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) {
@@ -211,7 +203,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
add_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
add_mapping_error_nowarn:
dev_kfree_skb(skb);
if (nlmsg_request)
@@ -240,10 +232,6 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto query_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) {
@@ -304,7 +292,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
query_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
query_mapping_error_nowarn:
dev_kfree_skb(skb);
if (nlmsg_request)
@@ -331,10 +319,6 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto remove_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (iwpm_check_registration(nl_client, IWPM_REG_UNDEF)) {
@@ -372,7 +356,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
"remove_mapping: Local sockaddr:");
return 0;
remove_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
if (skb)
dev_kfree_skb_any(skb);
return ret;
@@ -392,7 +376,7 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
/**
* iwpm_register_pid_cb - Process the port mapper response to
* iwpm_register_pid query
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* If successful, the function receives the userspace port mapper pid
@@ -431,7 +415,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
strcmp(iwpm_ulib_name, iwpm_name) ||
iwpm_version < IWPM_UABI_VERSION_MIN) {
- pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
+ pr_info("%s: Incorrect info (dev = %s name = %s version = %u)\n",
__func__, dev_name, iwpm_name, iwpm_version);
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
goto register_pid_response_exit;
@@ -439,13 +423,12 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
iwpm_user_pid = cb->nlh->nlmsg_pid;
iwpm_ulib_version = iwpm_version;
if (iwpm_ulib_version < IWPM_UABI_VERSION)
- pr_warn_once("%s: Down level iwpmd/pid %u. Continuing...",
+ pr_warn_once("%s: Down level iwpmd/pid %d. Continuing...",
__func__, iwpm_user_pid);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
__func__, iwpm_user_pid);
- if (iwpm_valid_client(nl_client))
- iwpm_set_registration(nl_client, IWPM_REG_VALID);
+ iwpm_set_registration(nl_client, IWPM_REG_VALID);
register_pid_response_exit:
nlmsg_request->request_done = 1;
/* always for found nlmsg_request */
@@ -468,7 +451,7 @@ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
/**
* iwpm_add_mapping_cb - Process the port mapper response to
* iwpm_add_mapping request
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -528,7 +511,8 @@ add_mapping_response_exit:
}
/* netlink attribute policy for the response to add and query mapping request
- * and response with remote address info */
+ * and response with remote address info
+ */
static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
[IWPM_NLA_RQUERY_MAPPING_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_RQUERY_LOCAL_ADDR] = {
@@ -545,7 +529,7 @@ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] =
/**
* iwpm_add_and_query_mapping_cb - Process the port mapper response to
* iwpm_add_and_query_mapping request
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
@@ -627,7 +611,7 @@ query_mapping_response_exit:
/**
* iwpm_remote_info_cb - Process remote connecting peer address info, which
* the port mapper has received from the connecting peer
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Stores the IPv4/IPv6 address info in a hash table
@@ -648,11 +632,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
return ret;
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
local_sockaddr = (struct sockaddr_storage *)
@@ -706,7 +685,7 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
/**
* iwpm_mapping_info_cb - Process a notification that the userspace
* port mapper daemon is started
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send all the local mapping
@@ -730,22 +709,17 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
if (strcmp(iwpm_ulib_name, iwpm_name) ||
iwpm_version < IWPM_UABI_VERSION_MIN) {
- pr_info("%s: Invalid port mapper name = %s version = %d\n",
+ pr_info("%s: Invalid port mapper name = %s version = %u\n",
__func__, iwpm_name, iwpm_version);
return ret;
}
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
iwpm_user_pid = cb->nlh->nlmsg_pid;
if (iwpm_ulib_version < IWPM_UABI_VERSION)
- pr_warn_once("%s: Down level iwpmd/pid %u. Continuing...",
+ pr_warn_once("%s: Down level iwpmd/pid %d. Continuing...",
__func__, iwpm_user_pid);
if (!iwpm_mapinfo_available())
@@ -766,7 +740,7 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
/**
* iwpm_ack_mapping_info_cb - Process the port mapper ack for
* the provided local mapping info records
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -796,7 +770,7 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
/**
* iwpm_mapping_error_cb - Process port mapper notification for error
*
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -841,7 +815,7 @@ static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = {
/**
* iwpm_hello_cb - Process a hello message from iwpmd
*
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send the kernel's abi_version
@@ -862,11 +836,6 @@ int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb)
}
abi_version = nla_get_u16(nltb[IWPM_NLA_HELLO_ABI_VERSION]);
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
iwpm_ulib_version = min_t(u16, IWPM_UABI_VERSION, abi_version);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 13495b43dbc1..358a2db38d23 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -48,7 +48,6 @@ static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
static struct hlist_head *iwpm_reminfo_bucket;
static DEFINE_SPINLOCK(iwpm_reminfo_lock);
-static DEFINE_MUTEX(iwpm_admin_lock);
static struct iwpm_admin_data iwpm_admin;
/**
@@ -59,35 +58,21 @@ static struct iwpm_admin_data iwpm_admin;
*/
int iwpm_init(u8 nl_client)
{
- int ret = 0;
- mutex_lock(&iwpm_admin_lock);
- if (atomic_read(&iwpm_admin.refcount) == 0) {
- iwpm_hash_bucket = kcalloc(IWPM_MAPINFO_HASH_SIZE,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!iwpm_hash_bucket) {
- ret = -ENOMEM;
- goto init_exit;
- }
- iwpm_reminfo_bucket = kcalloc(IWPM_REMINFO_HASH_SIZE,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!iwpm_reminfo_bucket) {
- kfree(iwpm_hash_bucket);
- ret = -ENOMEM;
- goto init_exit;
- }
- }
- atomic_inc(&iwpm_admin.refcount);
-init_exit:
- mutex_unlock(&iwpm_admin_lock);
- if (!ret) {
- iwpm_set_valid(nl_client, 1);
- iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
- pr_debug("%s: Mapinfo and reminfo tables are created\n",
- __func__);
+ iwpm_hash_bucket = kcalloc(IWPM_MAPINFO_HASH_SIZE,
+ sizeof(struct hlist_head), GFP_KERNEL);
+ if (!iwpm_hash_bucket)
+ return -ENOMEM;
+
+ iwpm_reminfo_bucket = kcalloc(IWPM_REMINFO_HASH_SIZE,
+ sizeof(struct hlist_head), GFP_KERNEL);
+ if (!iwpm_reminfo_bucket) {
+ kfree(iwpm_hash_bucket);
+ return -ENOMEM;
}
- return ret;
+
+ iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
+ pr_debug("%s: Mapinfo and reminfo tables are created\n", __func__);
+ return 0;
}
static void free_hash_bucket(void);
@@ -101,22 +86,9 @@ static void free_reminfo_bucket(void);
*/
int iwpm_exit(u8 nl_client)
{
-
- if (!iwpm_valid_client(nl_client))
- return -EINVAL;
- mutex_lock(&iwpm_admin_lock);
- if (atomic_read(&iwpm_admin.refcount) == 0) {
- mutex_unlock(&iwpm_admin_lock);
- pr_err("%s Incorrect usage - negative refcount\n", __func__);
- return -EINVAL;
- }
- if (atomic_dec_and_test(&iwpm_admin.refcount)) {
- free_hash_bucket();
- free_reminfo_bucket();
- pr_debug("%s: Resources are destroyed\n", __func__);
- }
- mutex_unlock(&iwpm_admin_lock);
- iwpm_set_valid(nl_client, 0);
+ free_hash_bucket();
+ free_reminfo_bucket();
+ pr_debug("%s: Resources are destroyed\n", __func__);
iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
return 0;
}
@@ -127,8 +99,8 @@ static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
/**
* iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
* info in a hash table
- * @local_addr: Local ip/tcp address
- * @mapped_addr: Mapped local ip/tcp address
+ * @local_sockaddr: Local ip/tcp address
+ * @mapped_sockaddr: Mapped local ip/tcp address
* @nl_client: The index of the netlink client
* @map_flags: IWPM mapping flags
*/
@@ -141,8 +113,6 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
unsigned long flags;
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client))
- return ret;
map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
if (!map_info)
return -ENOMEM;
@@ -174,7 +144,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
/**
* iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
* info from the hash table
- * @local_addr: Local ip/tcp address
+ * @local_sockaddr: Local ip/tcp address
* @mapped_local_addr: Mapped local ip/tcp address
*
* Returns err code if mapping info is not found in the hash table,
@@ -302,10 +272,6 @@ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
unsigned long flags;
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid client = %d\n", __func__, nl_client);
- return ret;
- }
spin_lock_irqsave(&iwpm_reminfo_lock, flags);
if (iwpm_reminfo_bucket) {
hash_bucket_head = get_reminfo_hash_bucket(
@@ -420,16 +386,6 @@ int iwpm_get_nlmsg_seq(void)
return atomic_inc_return(&iwpm_admin.nlmsg_seq);
}
-int iwpm_valid_client(u8 nl_client)
-{
- return iwpm_admin.client_list[nl_client];
-}
-
-void iwpm_set_valid(u8 nl_client, int valid)
-{
- iwpm_admin.client_list[nl_client] = valid;
-}
-
/* valid client */
u32 iwpm_get_registration(u8 nl_client)
{
@@ -651,7 +607,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
err_str = "Unable to send a nlmsg";
goto mapinfo_num_error;
}
- pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num);
+ pr_debug("%s: Sent mapping number = %u\n", __func__, mapping_num);
return 0;
mapinfo_num_error:
pr_info("%s: %s\n", __func__, err_str);
@@ -806,7 +762,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
- const char *err_str = "";
+ const char *err_str;
int ret = -EINVAL;
skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client);
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index 1bf87d9fd0bd..d6fc8402158a 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -33,7 +33,6 @@
#ifndef _IWPM_UTIL_H
#define _IWPM_UTIL_H
-#include <linux/module.h>
#include <linux/io.h>
#include <linux/in.h>
#include <linux/in6.h>
@@ -90,9 +89,7 @@ struct iwpm_remote_info {
};
struct iwpm_admin_data {
- atomic_t refcount;
atomic_t nlmsg_seq;
- int client_list[RDMA_NL_NUM_CLIENTS];
u32 reg_list[RDMA_NL_NUM_CLIENTS];
};
@@ -141,29 +138,13 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
int iwpm_get_nlmsg_seq(void);
/**
- * iwpm_add_reminfo - Add remote address info of the connecting peer
+ * iwpm_add_remote_info - Add remote address info of the connecting peer
* to the remote info hash table
* @reminfo: The remote info to be added
*/
void iwpm_add_remote_info(struct iwpm_remote_info *reminfo);
/**
- * iwpm_valid_client - Check if the port mapper client is valid
- * @nl_client: The index of the netlink client
- *
- * Valid clients need to call iwpm_init() before using
- * the port mapper
- */
-int iwpm_valid_client(u8 nl_client);
-
-/**
- * iwpm_set_valid - Set the port mapper client to valid or not
- * @nl_client: The index of the netlink client
- * @valid: 1 if valid or 0 if invalid
- */
-void iwpm_set_valid(u8 nl_client, int valid);
-
-/**
* iwpm_check_registration - Check if the client registration
* matches the given one
* @nl_client: The index of the netlink client
@@ -183,7 +164,7 @@ u32 iwpm_check_registration(u8 nl_client, u32 reg);
void iwpm_set_registration(u8 nl_client, u32 reg);
/**
- * iwpm_get_registration
+ * iwpm_get_registration - Get the client registration
* @nl_client: The index of the netlink client
*
* Returns the client registration type
diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c
new file mode 100644
index 000000000000..c77d7d2559a1
--- /dev/null
+++ b/drivers/infiniband/core/lag.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include <rdma/lag.h>
+
+static struct sk_buff *rdma_build_skb(struct net_device *netdev,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct ipv6hdr *ip6h;
+ struct sk_buff *skb;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ struct udphdr *uh;
+ u8 smac[ETH_ALEN];
+ bool is_ipv4;
+ int hdr_len;
+
+ is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw);
+ hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev);
+ hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr);
+
+ skb = alloc_skb(hdr_len, flags);
+ if (!skb)
+ return NULL;
+
+ skb->dev = netdev;
+ skb_reserve(skb, hdr_len);
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+ uh->source =
+ htons(rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label));
+ uh->dest = htons(ROCE_V2_UDP_DPORT);
+ uh->len = htons(sizeof(struct udphdr));
+
+ if (is_ipv4) {
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ iph->frag_off = 0;
+ iph->version = 4;
+ iph->protocol = IPPROTO_UDP;
+ iph->ihl = 0x5;
+ iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct
+ iphdr));
+ memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12,
+ sizeof(struct in_addr));
+ memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12,
+ sizeof(struct in_addr));
+ } else {
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6h->version = 6;
+ ip6h->nexthdr = IPPROTO_UDP;
+ memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label,
+ sizeof(*ip6h->flow_lbl));
+ memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw,
+ sizeof(struct in6_addr));
+ memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw,
+ sizeof(struct in6_addr));
+ }
+
+ skb_push(skb, sizeof(struct ethhdr));
+ skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
+ skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6);
+ rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac);
+ memcpy(eth->h_source, smac, ETH_ALEN);
+ memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN);
+
+ return skb;
+}
+
+static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device,
+ struct net_device *master,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave;
+ struct sk_buff *skb;
+
+ skb = rdma_build_skb(master, ah_attr, flags);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ rcu_read_lock();
+ slave = netdev_get_xmit_slave(master, skb,
+ !!(device->lag_flags &
+ RDMA_LAG_FLAGS_HASH_ALL_SLAVES));
+ if (slave)
+ dev_hold(slave);
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return slave;
+}
+
+void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave)
+{
+ if (xmit_slave)
+ dev_put(xmit_slave);
+}
+
+struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave = NULL;
+ struct net_device *master;
+
+ if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE &&
+ ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ ah_attr->grh.flow_label))
+ return NULL;
+
+ rcu_read_lock();
+ master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr);
+ if (IS_ERR(master)) {
+ rcu_read_unlock();
+ return master;
+ }
+ dev_hold(master);
+ rcu_read_unlock();
+
+ if (!netif_is_bond_master(master))
+ goto put;
+
+ slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags);
+put:
+ dev_put(master);
+ return slave;
+}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index c54db13fa9b0..1893aa613ad7 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -61,7 +61,7 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
{
u16 pkey;
struct ib_device *dev = qp_info->port_priv->device;
- u8 pnum = qp_info->port_priv->port_num;
+ u32 pnum = qp_info->port_priv->port_num;
struct ib_ud_wr *wr = &mad_send_wr->send_wr;
struct rdma_ah_attr attr = {};
@@ -85,7 +85,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
-/* Client ID 0 is used for snoop-only clients */
static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
static u32 ib_mad_client_next;
static struct list_head ib_mad_port_list;
@@ -119,7 +118,7 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
* Assumes ib_mad_port_list_lock is being held
*/
static inline struct ib_mad_port_private *
-__ib_get_mad_port(struct ib_device *device, int port_num)
+__ib_get_mad_port(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *entry;
@@ -135,7 +134,7 @@ __ib_get_mad_port(struct ib_device *device, int port_num)
* for a device/port
*/
static inline struct ib_mad_port_private *
-ib_get_mad_port(struct ib_device *device, int port_num)
+ib_get_mad_port(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *entry;
unsigned long flags;
@@ -156,8 +155,7 @@ static inline u8 convert_mgmt_class(u8 mgmt_class)
static int get_spl_qp_index(enum ib_qp_type qp_type)
{
- switch (qp_type)
- {
+ switch (qp_type) {
case IB_QPT_SMI:
return 0;
case IB_QPT_GSI:
@@ -223,7 +221,7 @@ EXPORT_SYMBOL(ib_response_mad);
* Context: Process context.
*/
struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
enum ib_qp_type qp_type,
struct ib_mad_reg_req *mad_reg_req,
u8 rmpp_version,
@@ -353,7 +351,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
- dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
+ dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n",
__func__, port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
@@ -403,7 +401,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
INIT_LIST_HEAD(&mad_agent_priv->local_list);
INIT_WORK(&mad_agent_priv->local_work, local_completions);
- atomic_set(&mad_agent_priv->refcount, 1);
+ refcount_set(&mad_agent_priv->refcount, 1);
init_completion(&mad_agent_priv->comp);
ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
@@ -483,141 +481,12 @@ error1:
}
EXPORT_SYMBOL(ib_register_mad_agent);
-static inline int is_snooping_sends(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (/*IB_MAD_SNOOP_POSTED_SENDS |
- IB_MAD_SNOOP_RMPP_SENDS |*/
- IB_MAD_SNOOP_SEND_COMPLETIONS /*|
- IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
-}
-
-static inline int is_snooping_recvs(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (IB_MAD_SNOOP_RECVS /*|
- IB_MAD_SNOOP_RMPP_RECVS*/));
-}
-
-static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
- struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_snoop_private **new_snoop_table;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- /* Check for empty slot in array. */
- for (i = 0; i < qp_info->snoop_table_size; i++)
- if (!qp_info->snoop_table[i])
- break;
-
- if (i == qp_info->snoop_table_size) {
- /* Grow table. */
- new_snoop_table = krealloc(qp_info->snoop_table,
- sizeof mad_snoop_priv *
- (qp_info->snoop_table_size + 1),
- GFP_ATOMIC);
- if (!new_snoop_table) {
- i = -ENOMEM;
- goto out;
- }
-
- qp_info->snoop_table = new_snoop_table;
- qp_info->snoop_table_size++;
- }
- qp_info->snoop_table[i] = mad_snoop_priv;
- atomic_inc(&qp_info->snoop_count);
-out:
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- return i;
-}
-
-struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
- u8 port_num,
- enum ib_qp_type qp_type,
- int mad_snoop_flags,
- ib_mad_snoop_handler snoop_handler,
- ib_mad_recv_handler recv_handler,
- void *context)
-{
- struct ib_mad_port_private *port_priv;
- struct ib_mad_agent *ret;
- struct ib_mad_snoop_private *mad_snoop_priv;
- int qpn;
- int err;
-
- /* Validate parameters */
- if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
- (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- qpn = get_spl_qp_index(qp_type);
- if (qpn == -1) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- port_priv = ib_get_mad_port(device, port_num);
- if (!port_priv) {
- ret = ERR_PTR(-ENODEV);
- goto error1;
- }
- /* Allocate structures */
- mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
- if (!mad_snoop_priv) {
- ret = ERR_PTR(-ENOMEM);
- goto error1;
- }
-
- /* Now, fill in the various structures */
- mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
- mad_snoop_priv->agent.device = device;
- mad_snoop_priv->agent.recv_handler = recv_handler;
- mad_snoop_priv->agent.snoop_handler = snoop_handler;
- mad_snoop_priv->agent.context = context;
- mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
- mad_snoop_priv->agent.port_num = port_num;
- mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
- init_completion(&mad_snoop_priv->comp);
-
- err = ib_mad_agent_security_setup(&mad_snoop_priv->agent, qp_type);
- if (err) {
- ret = ERR_PTR(err);
- goto error2;
- }
-
- mad_snoop_priv->snoop_index = register_snoop_agent(
- &port_priv->qp_info[qpn],
- mad_snoop_priv);
- if (mad_snoop_priv->snoop_index < 0) {
- ret = ERR_PTR(mad_snoop_priv->snoop_index);
- goto error3;
- }
-
- atomic_set(&mad_snoop_priv->refcount, 1);
- return &mad_snoop_priv->agent;
-error3:
- ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
-error2:
- kfree(mad_snoop_priv);
-error1:
- return ret;
-}
-EXPORT_SYMBOL(ib_register_mad_snoop);
-
static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
- if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ if (refcount_dec_and_test(&mad_agent_priv->refcount))
complete(&mad_agent_priv->comp);
}
-static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- if (atomic_dec_and_test(&mad_snoop_priv->refcount))
- complete(&mad_snoop_priv->comp);
-}
-
static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_port_private *port_priv;
@@ -639,10 +508,10 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
flush_workqueue(port_priv->wq);
- ib_cancel_rmpp_recvs(mad_agent_priv);
deref_mad_agent(mad_agent_priv);
wait_for_completion(&mad_agent_priv->comp);
+ ib_cancel_rmpp_recvs(mad_agent_priv);
ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
@@ -650,25 +519,6 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
kfree_rcu(mad_agent_priv, rcu);
}
-static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_qp_info *qp_info;
- unsigned long flags;
-
- qp_info = mad_snoop_priv->qp_info;
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
- atomic_dec(&qp_info->snoop_count);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-
- deref_snoop_agent(mad_snoop_priv);
- wait_for_completion(&mad_snoop_priv->comp);
-
- ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
-
- kfree(mad_snoop_priv);
-}
-
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
*
@@ -677,20 +527,11 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
- struct ib_mad_snoop_private *mad_snoop_priv;
-
- /* If the TID is zero, the agent can only snoop. */
- if (mad_agent->hi_tid) {
- mad_agent_priv = container_of(mad_agent,
- struct ib_mad_agent_private,
- agent);
- unregister_mad_agent(mad_agent_priv);
- } else {
- mad_snoop_priv = container_of(mad_agent,
- struct ib_mad_snoop_private,
- agent);
- unregister_mad_snoop(mad_snoop_priv);
- }
+
+ mad_agent_priv = container_of(mad_agent,
+ struct ib_mad_agent_private,
+ agent);
+ unregister_mad_agent(mad_agent_priv);
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -706,59 +547,8 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
spin_unlock_irqrestore(&mad_queue->lock, flags);
}
-static void snoop_send(struct ib_mad_qp_info *qp_info,
- struct ib_mad_send_buf *send_buf,
- struct ib_mad_send_wc *mad_send_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
- send_buf, mad_send_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
-static void snoop_recv(struct ib_mad_qp_info *qp_info,
- struct ib_mad_recv_wc *mad_recv_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
- mad_recv_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
- u16 pkey_index, u8 port_num, struct ib_wc *wc)
+ u16 pkey_index, u32 port_num, struct ib_wc *wc)
{
memset(wc, 0, sizeof *wc);
wc->wr_cqe = cqe;
@@ -817,7 +607,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_port_private *port_priv;
struct ib_mad_agent_private *recv_mad_agent = NULL;
struct ib_device *device = mad_agent_priv->agent.device;
- u8 port_num;
+ u32 port_num;
struct ib_wc mad_wc;
struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
@@ -916,8 +706,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
(const struct ib_mad *)smp,
(struct ib_mad *)mad_priv->mad, &mad_size,
&out_mad_pkey_index);
- switch (ret)
- {
+ switch (ret) {
case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) &&
mad_agent_priv->agent.recv_handler) {
@@ -927,7 +716,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
* Reference MAD agent until receive
* side of local completion handled
*/
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
} else
kfree(mad_priv);
break;
@@ -967,7 +756,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local->return_wc_byte_len = mad_size;
}
/* Reference MAD agent until send side of local completion handled */
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
/* Queue local completion to local list */
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
@@ -1016,7 +805,7 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
/* Allocate data segments. */
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
- seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
+ seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask);
if (!seg) {
free_send_rmpp_list(send_wr);
return -ENOMEM;
@@ -1046,12 +835,11 @@ int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent)
}
EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
-struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
- u32 remote_qpn, u16 pkey_index,
- int rmpp_active,
- int hdr_len, int data_len,
- gfp_t gfp_mask,
- u8 base_version)
+struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
+ u32 remote_qpn, u16 pkey_index,
+ int rmpp_active, int hdr_len,
+ int data_len, gfp_t gfp_mask,
+ u8 base_version)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
@@ -1125,7 +913,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
}
mad_send_wr->send_buf.mad_agent = mad_agent;
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
return &mad_send_wr->send_buf;
}
EXPORT_SYMBOL(ib_create_send_mad);
@@ -1340,7 +1128,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
mad_send_wr->status = IB_WC_SUCCESS;
/* Reference MAD agent until send completes */
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_add_tail(&mad_send_wr->agent_list,
&mad_agent_priv->send_list);
@@ -1357,7 +1145,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_del(&mad_send_wr->agent_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
goto error;
}
}
@@ -1484,11 +1272,9 @@ static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
int i;
/* Remove any methods for this mad agent */
- for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
- if (method->agent[i] == agent) {
+ for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
+ if (method->agent[i] == agent)
method->agent[i] = NULL;
- }
- }
}
static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
@@ -1663,9 +1449,8 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
* Was MAD registration request supplied
* with original registration ?
*/
- if (!agent_priv->reg_req) {
+ if (!agent_priv->reg_req)
goto out;
- }
port_priv = agent_priv->qp_info->port_priv;
mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
@@ -1763,7 +1548,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
rcu_read_lock();
mad_agent = xa_load(&ib_mad_clients, hi_tid);
- if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount))
+ if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount))
mad_agent = NULL;
rcu_read_unlock();
} else {
@@ -1815,14 +1600,14 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
}
}
if (mad_agent)
- atomic_inc(&mad_agent->refcount);
+ refcount_inc(&mad_agent->refcount);
out:
spin_unlock_irqrestore(&port_priv->reg_lock, flags);
}
if (mad_agent && !mad_agent->agent.recv_handler) {
dev_notice(&port_priv->device->dev,
- "No receive handler for client %p on port %d\n",
+ "No receive handler for client %p on port %u\n",
&mad_agent->agent, port_priv->port_num);
deref_mad_agent(mad_agent);
mad_agent = NULL;
@@ -1841,7 +1626,7 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr,
/* Make sure MAD base version is understood */
if (mad_hdr->base_version != IB_MGMT_BASE_VERSION &&
(!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) {
- pr_err("MAD received with unsupported base version %d %s\n",
+ pr_err("MAD received with unsupported base version %u %s\n",
mad_hdr->base_version, opa ? "(opa)" : "");
goto out;
}
@@ -1886,15 +1671,16 @@ static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr,
rwc->recv_buf.mad->mad_hdr.mgmt_class;
}
-static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
- const struct ib_mad_send_wr_private *wr,
- const struct ib_mad_recv_wc *rwc )
+static inline int
+rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
+ const struct ib_mad_send_wr_private *wr,
+ const struct ib_mad_recv_wc *rwc)
{
struct rdma_ah_attr attr;
u8 send_resp, rcv_resp;
union ib_gid sgid;
struct ib_device *device = mad_agent_priv->agent.device;
- u8 port_num = mad_agent_priv->agent.port_num;
+ u32 port_num = mad_agent_priv->agent.port_num;
u8 lmc;
bool has_grh;
@@ -2040,10 +1826,11 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_agent_priv->agent.recv_handler(
&mad_agent_priv->agent, NULL,
mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
} else {
/* not user rmpp, revert to normal behavior and
- * drop the mad */
+ * drop the mad
+ */
ib_free_recv_mad(mad_recv_wc);
deref_mad_agent(mad_agent_priv);
return;
@@ -2057,7 +1844,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
&mad_agent_priv->agent,
&mad_send_wr->send_buf,
mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
@@ -2069,14 +1856,12 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_recv_wc);
deref_mad_agent(mad_agent_priv);
}
-
- return;
}
static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
const struct ib_mad_qp_info *qp_info,
const struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response)
{
@@ -2163,7 +1948,7 @@ static enum smi_action
handle_opa_smi(struct ib_mad_port_private *port_priv,
struct ib_mad_qp_info *qp_info,
struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response)
{
@@ -2219,7 +2004,7 @@ static enum smi_action
handle_smi(struct ib_mad_port_private *port_priv,
struct ib_mad_qp_info *qp_info,
struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response,
bool opa)
@@ -2243,7 +2028,7 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_mad_private_header *mad_priv_hdr;
struct ib_mad_private *recv, *response = NULL;
struct ib_mad_agent_private *mad_agent;
- int port_num;
+ u32 port_num;
int ret = IB_MAD_RESULT_SUCCESS;
size_t mad_size;
u16 resp_mad_pkey_index = 0;
@@ -2289,9 +2074,6 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
recv->header.recv_wc.recv_buf.grh = &recv->grh;
- if (atomic_read(&qp_info->snoop_count))
- snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
-
/* Validate MAD */
if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
goto out;
@@ -2414,9 +2196,10 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
temp_mad_send_wr->timeout))
break;
}
- }
- else
+ } else {
list_item = &mad_agent_priv->wait_list;
+ }
+
list_add(&mad_send_wr->agent_list, list_item);
/* Reschedule a work item if we have a shorter timeout */
@@ -2470,7 +2253,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
adjust_timeout(mad_agent_priv);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- if (mad_send_wr->status != IB_WC_SUCCESS )
+ if (mad_send_wr->status != IB_WC_SUCCESS)
mad_send_wc->status = mad_send_wr->status;
if (ret == IB_RMPP_RESULT_INTERNAL)
ib_rmpp_send_handler(mad_send_wc);
@@ -2538,9 +2321,6 @@ retry:
mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_send_wc.status = wc->status;
mad_send_wc.vendor_err = wc->vendor_err;
- if (atomic_read(&qp_info->snoop_count))
- snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
- IB_MAD_SNOOP_SEND_COMPLETIONS);
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
@@ -2653,7 +2433,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
list_del(&mad_send_wr->agent_list);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
}
}
@@ -2679,16 +2459,18 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
return NULL;
}
-int ib_modify_mad(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf, u32 timeout_ms)
+int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
unsigned long flags;
int active;
- mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
- agent);
+ if (!send_buf)
+ return -EINVAL;
+
+ mad_agent_priv = container_of(send_buf->mad_agent,
+ struct ib_mad_agent_private, agent);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
@@ -2713,13 +2495,6 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent,
}
EXPORT_SYMBOL(ib_modify_mad);
-void ib_cancel_mad(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf)
-{
- ib_modify_mad(mad_agent, send_buf, 0);
-}
-EXPORT_SYMBOL(ib_cancel_mad);
-
static void local_completions(struct work_struct *work)
{
struct ib_mad_agent_private *mad_agent_priv;
@@ -2782,16 +2557,12 @@ static void local_completions(struct work_struct *work)
local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
local->mad_priv->header.recv_wc.recv_buf.mad =
(struct ib_mad *)local->mad_priv->mad;
- if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
- snoop_recv(recv_mad_agent->qp_info,
- &local->mad_priv->header.recv_wc,
- IB_MAD_SNOOP_RECVS);
recv_mad_agent->agent.recv_handler(
&recv_mad_agent->agent,
&local->mad_send_wr->send_buf,
&local->mad_priv->header.recv_wc);
spin_lock_irqsave(&recv_mad_agent->lock, flags);
- atomic_dec(&recv_mad_agent->refcount);
+ deref_mad_agent(recv_mad_agent);
spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
}
@@ -2800,15 +2571,11 @@ local_send_completion:
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
- if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
- snoop_send(mad_agent_priv->qp_info,
- &local->mad_send_wr->send_buf,
- &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
if (free_mad)
kfree(local->mad_priv);
kfree(local);
@@ -2894,7 +2661,7 @@ static void timeout_sends(struct work_struct *work)
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
}
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
@@ -2941,6 +2708,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
sg_list.addr))) {
+ kfree(mad_priv);
ret = -ENOMEM;
break;
}
@@ -3099,7 +2867,7 @@ static void qp_event_handler(struct ib_event *event, void *qp_context)
/* It's worse than that! He's dead, Jim! */
dev_err(&qp_info->port_priv->device->dev,
- "Fatal error (%d) on MAD QP (%d)\n",
+ "Fatal error (%d) on MAD QP (%u)\n",
event->event, qp_info->qp->qp_num);
}
@@ -3119,10 +2887,6 @@ static void init_mad_qp(struct ib_mad_port_private *port_priv,
init_mad_queue(qp_info, &qp_info->send_queue);
init_mad_queue(qp_info, &qp_info->recv_queue);
INIT_LIST_HEAD(&qp_info->overflow_list);
- spin_lock_init(&qp_info->snoop_lock);
- qp_info->snoop_table = NULL;
- qp_info->snoop_table_size = 0;
- atomic_set(&qp_info->snoop_count, 0);
}
static int create_mad_qp(struct ib_mad_qp_info *qp_info,
@@ -3166,7 +2930,6 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
return;
ib_destroy_qp(qp_info->qp);
- kfree(qp_info->snoop_table);
}
/*
@@ -3174,7 +2937,7 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
* Create the QP, PD, MR, and CQ if needed
*/
static int ib_mad_port_open(struct ib_device *device,
- int port_num)
+ u32 port_num)
{
int ret, cq_size;
struct ib_mad_port_private *port_priv;
@@ -3229,7 +2992,7 @@ static int ib_mad_port_open(struct ib_device *device,
if (ret)
goto error7;
- snprintf(name, sizeof name, "ib_mad%d", port_num);
+ snprintf(name, sizeof(name), "ib_mad%u", port_num);
port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
if (!port_priv->wq) {
ret = -ENOMEM;
@@ -3275,7 +3038,7 @@ error3:
* If there are no classes using the port, free the port
* resources (CQ, MR, PD, QP) and remove the port's info structure
*/
-static int ib_mad_port_close(struct ib_device *device, int port_num)
+static int ib_mad_port_close(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *port_priv;
unsigned long flags;
@@ -3284,7 +3047,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
port_priv = __ib_get_mad_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
- dev_err(&device->dev, "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %u not found\n", port_num);
return -ENODEV;
}
list_del_init(&port_priv->port_list);
@@ -3304,9 +3067,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
return 0;
}
-static void ib_mad_init_device(struct ib_device *device)
+static int ib_mad_init_device(struct ib_device *device)
{
int start, i;
+ unsigned int count = 0;
+ int ret;
start = rdma_start_port(device);
@@ -3314,17 +3079,23 @@ static void ib_mad_init_device(struct ib_device *device)
if (!rdma_cap_ib_mad(device, i))
continue;
- if (ib_mad_port_open(device, i)) {
+ ret = ib_mad_port_open(device, i);
+ if (ret) {
dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
- if (ib_agent_port_open(device, i)) {
+ ret = ib_agent_port_open(device, i);
+ if (ret) {
dev_err(&device->dev,
"Couldn't open port %d for agents\n", i);
goto error_agent;
}
+ count++;
}
- return;
+ if (!count)
+ return -EOPNOTSUPP;
+
+ return 0;
error_agent:
if (ib_mad_port_close(device, i))
@@ -3341,6 +3112,7 @@ error:
if (ib_mad_port_close(device, i))
dev_err(&device->dev, "Couldn't close port %d\n", i);
}
+ return ret;
}
static void ib_mad_remove_device(struct ib_device *device, void *client_data)
@@ -3353,9 +3125,9 @@ static void ib_mad_remove_device(struct ib_device *device, void *client_data)
if (ib_agent_port_close(device, i))
dev_err(&device->dev,
- "Couldn't close port %d for agents\n", i);
+ "Couldn't close port %u for agents\n", i);
if (ib_mad_port_close(device, i))
- dev_err(&device->dev, "Couldn't close port %d\n", i);
+ dev_err(&device->dev, "Couldn't close port %u\n", i);
}
}
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 956b3a7dfed7..1b7445a6f671 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -79,13 +79,13 @@ struct ib_mad_private {
struct ib_mad_private_header header;
size_t mad_size;
struct ib_grh grh;
- u8 mad[0];
+ u8 mad[];
} __packed;
struct ib_rmpp_segment {
struct list_head list;
u32 num;
- u8 data[0];
+ u8 data[];
};
struct ib_mad_agent_private {
@@ -103,7 +103,7 @@ struct ib_mad_agent_private {
struct work_struct local_work;
struct list_head rmpp_list;
- atomic_t refcount;
+ refcount_t refcount;
union {
struct completion comp;
struct rcu_head rcu;
@@ -115,7 +115,6 @@ struct ib_mad_snoop_private {
struct ib_mad_qp_info *qp_info;
int snoop_index;
int mad_snoop_flags;
- atomic_t refcount;
struct completion comp;
};
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 5ec57abc0849..8af0619a39cd 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -40,8 +40,7 @@
enum rmpp_state {
RMPP_STATE_ACTIVE,
RMPP_STATE_TIMEOUT,
- RMPP_STATE_COMPLETE,
- RMPP_STATE_CANCELING
+ RMPP_STATE_COMPLETE
};
struct mad_rmpp_recv {
@@ -52,7 +51,7 @@ struct mad_rmpp_recv {
struct completion comp;
enum rmpp_state state;
spinlock_t lock;
- atomic_t refcount;
+ refcount_t refcount;
struct ib_ah *ah;
struct ib_mad_recv_wc *rmpp_wc;
@@ -73,7 +72,7 @@ struct mad_rmpp_recv {
static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
- if (atomic_dec_and_test(&rmpp_recv->refcount))
+ if (refcount_dec_and_test(&rmpp_recv->refcount))
complete(&rmpp_recv->comp);
}
@@ -92,22 +91,18 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
spin_lock_irqsave(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
- if (rmpp_recv->state != RMPP_STATE_COMPLETE)
- ib_free_recv_mad(rmpp_recv->rmpp_wc);
- rmpp_recv->state = RMPP_STATE_CANCELING;
- }
- spin_unlock_irqrestore(&agent->lock, flags);
-
- list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
cancel_delayed_work(&rmpp_recv->timeout_work);
cancel_delayed_work(&rmpp_recv->cleanup_work);
}
+ spin_unlock_irqrestore(&agent->lock, flags);
flush_workqueue(agent->qp_info->port_priv->wq);
list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
&agent->rmpp_list, list) {
list_del(&rmpp_recv->list);
+ if (rmpp_recv->state != RMPP_STATE_COMPLETE)
+ ib_free_recv_mad(rmpp_recv->rmpp_wc);
destroy_rmpp_recv(rmpp_recv);
}
}
@@ -272,10 +267,6 @@ static void recv_cleanup_handler(struct work_struct *work)
unsigned long flags;
spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
- if (rmpp_recv->state == RMPP_STATE_CANCELING) {
- spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
- return;
- }
list_del(&rmpp_recv->list);
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
destroy_rmpp_recv(rmpp_recv);
@@ -305,7 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler);
spin_lock_init(&rmpp_recv->lock);
rmpp_recv->state = RMPP_STATE_ACTIVE;
- atomic_set(&rmpp_recv->refcount, 1);
+ refcount_set(&rmpp_recv->refcount, 1);
rmpp_recv->rmpp_wc = mad_recv_wc;
rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf;
@@ -357,7 +348,7 @@ acquire_rmpp_recv(struct ib_mad_agent_private *agent,
spin_lock_irqsave(&agent->lock, flags);
rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
if (rmpp_recv)
- atomic_inc(&rmpp_recv->refcount);
+ refcount_inc(&rmpp_recv->refcount);
spin_unlock_irqrestore(&agent->lock, flags);
return rmpp_recv;
}
@@ -391,8 +382,8 @@ static inline int get_seg_num(struct ib_mad_recv_buf *seg)
return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
}
-static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list,
- struct ib_mad_recv_buf *seg)
+static inline struct ib_mad_recv_buf *get_next_seg(struct list_head *rmpp_list,
+ struct ib_mad_recv_buf *seg)
{
if (seg->list.next == rmpp_list)
return NULL;
@@ -405,8 +396,8 @@ static inline int window_size(struct ib_mad_agent_private *agent)
return max(agent->qp_info->recv_queue.max_active >> 3, 1);
}
-static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
- int seg_num)
+static struct ib_mad_recv_buf *find_seg_location(struct list_head *rmpp_list,
+ int seg_num)
{
struct ib_mad_recv_buf *seg_buf;
int cur_seg_num;
@@ -458,7 +449,7 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
return hdr_size + rmpp_recv->seg_num * data_size - pad;
}
-static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
+static struct ib_mad_recv_wc *complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
{
struct ib_mad_recv_wc *rmpp_wc;
@@ -553,7 +544,7 @@ start_rmpp(struct ib_mad_agent_private *agent,
destroy_rmpp_recv(rmpp_recv);
return continue_rmpp(agent, mad_recv_wc);
}
- atomic_inc(&rmpp_recv->refcount);
+ refcount_inc(&rmpp_recv->refcount);
if (get_last_flag(&mad_recv_wc->recv_buf)) {
rmpp_recv->state = RMPP_STATE_COMPLETE;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index cd338ddc4a39..a236532a9026 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -42,7 +42,7 @@
#include <rdma/ib_cache.h>
#include "sa.h"
-static void mcast_add_one(struct ib_device *device);
+static int mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device, void *client_data);
static struct ib_client mcast_client = {
@@ -61,9 +61,9 @@ struct mcast_port {
struct mcast_device *dev;
spinlock_t lock;
struct rb_root table;
- atomic_t refcount;
+ refcount_t refcount;
struct completion comp;
- u8 port_num;
+ u32 port_num;
};
struct mcast_device {
@@ -71,7 +71,7 @@ struct mcast_device {
struct ib_event_handler event_handler;
int start_port;
int end_port;
- struct mcast_port port[0];
+ struct mcast_port port[];
};
enum mcast_state {
@@ -117,7 +117,7 @@ struct mcast_member {
struct mcast_group *group;
struct list_head list;
enum mcast_state state;
- atomic_t refcount;
+ refcount_t refcount;
struct completion comp;
};
@@ -178,7 +178,7 @@ static struct mcast_group *mcast_insert(struct mcast_port *port,
static void deref_port(struct mcast_port *port)
{
- if (atomic_dec_and_test(&port->refcount))
+ if (refcount_dec_and_test(&port->refcount))
complete(&port->comp);
}
@@ -199,7 +199,7 @@ static void release_group(struct mcast_group *group)
static void deref_member(struct mcast_member *member)
{
- if (atomic_dec_and_test(&member->refcount))
+ if (refcount_dec_and_test(&member->refcount))
complete(&member->comp);
}
@@ -401,7 +401,7 @@ static void process_group_error(struct mcast_group *group)
while (!list_empty(&group->active_list)) {
member = list_entry(group->active_list.next,
struct mcast_member, list);
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
list_del_init(&member->list);
adjust_membership(group, member->multicast.rec.join_state, -1);
member->state = MCAST_ERROR;
@@ -445,7 +445,7 @@ retest:
struct mcast_member, list);
multicast = &member->multicast;
join_state = multicast->rec.join_state;
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
if (join_state == (group->rec.join_state & join_state)) {
status = cmp_rec(&group->rec, &multicast->rec,
@@ -497,7 +497,7 @@ static void process_join_error(struct mcast_group *group, int status)
member = list_entry(group->pending_list.next,
struct mcast_member, list);
if (group->last_join == member) {
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
ret = member->multicast.callback(status, &member->multicast);
@@ -589,7 +589,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port,
kfree(group);
group = cur_group;
} else
- atomic_inc(&port->refcount);
+ refcount_inc(&port->refcount);
found:
atomic_inc(&group->refcount);
spin_unlock_irqrestore(&port->lock, flags);
@@ -605,7 +605,7 @@ found:
*/
struct ib_sa_multicast *
ib_sa_join_multicast(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
int (*callback)(int status,
@@ -632,7 +632,7 @@ ib_sa_join_multicast(struct ib_sa_client *client,
member->multicast.callback = callback;
member->multicast.context = context;
init_completion(&member->comp);
- atomic_set(&member->refcount, 1);
+ refcount_set(&member->refcount, 1);
member->state = MCAST_JOINING;
member->group = acquire_group(&dev->port[port_num - dev->start_port],
@@ -690,7 +690,7 @@ void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
}
EXPORT_SYMBOL(ib_sa_free_multicast);
-int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+int ib_sa_get_mcmember_rec(struct ib_device *device, u32 port_num,
union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
{
struct mcast_device *dev;
@@ -721,6 +721,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
* member record and gid of the device.
* @device: RDMA device
* @port_num: Port of the rdma device to consider
+ * @rec: Multicast member record to use
* @ndev: Optional netdevice, applicable only for RoCE
* @gid_type: GID type to consider
* @ah_attr: AH attribute to fillup on successful completion
@@ -731,7 +732,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
* success or appropriate error code.
*
*/
-int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+int ib_init_ah_from_mcmember(struct ib_device *device, u32 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
@@ -815,7 +816,7 @@ static void mcast_event_handler(struct ib_event_handler *handler,
}
}
-static void mcast_add_one(struct ib_device *device)
+static int mcast_add_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
@@ -825,7 +826,7 @@ static void mcast_add_one(struct ib_device *device)
dev = kmalloc(struct_size(dev, port, device->phys_port_cnt),
GFP_KERNEL);
if (!dev)
- return;
+ return -ENOMEM;
dev->start_port = rdma_start_port(device);
dev->end_port = rdma_end_port(device);
@@ -839,13 +840,13 @@ static void mcast_add_one(struct ib_device *device)
spin_lock_init(&port->lock);
port->table = RB_ROOT;
init_completion(&port->comp);
- atomic_set(&port->refcount, 1);
+ refcount_set(&port->refcount, 1);
++count;
}
if (!count) {
kfree(dev);
- return;
+ return -EOPNOTSUPP;
}
dev->device = device;
@@ -853,6 +854,7 @@ static void mcast_add_one(struct ib_device *device)
INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
ib_register_event_handler(&dev->event_handler);
+ return 0;
}
static void mcast_remove_one(struct ib_device *device, void *client_data)
@@ -861,9 +863,6 @@ static void mcast_remove_one(struct ib_device *device, void *client_data)
struct mcast_port *port;
int i;
- if (!dev)
- return;
-
ib_unregister_event_handler(&dev->event_handler);
flush_workqueue(mcast_wq);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 8cd31ef25eff..1b2cc9e45ade 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -98,7 +98,7 @@ get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
*/
up_read(&rdma_nl_types[type].sem);
- request_module("rdma-netlink-subsys-%d", type);
+ request_module("rdma-netlink-subsys-%u", type);
down_read(&rdma_nl_types[type].sem);
cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index e0b0a91da696..12dc97067ed2 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -92,7 +92,9 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
.len = sizeof(struct __kernel_sockaddr_storage) },
[RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
@@ -114,6 +116,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY },
[RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
@@ -129,6 +132,11 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 },
@@ -145,6 +153,9 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
[RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
[RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -241,7 +252,7 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
{
char fw[IB_FW_VERSION_NAME_MAX];
int ret = 0;
- u8 port;
+ u32 port;
if (fill_nldev_handle(msg, device))
return -EMSGSIZE;
@@ -384,6 +395,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_CM_ID] = "cm_id",
[RDMA_RESTRACK_MR] = "mr",
[RDMA_RESTRACK_CTX] = "ctx",
+ [RDMA_RESTRACK_SRQ] = "srq",
};
struct nlattr *table_attr;
@@ -446,27 +458,11 @@ static int fill_res_name_pid(struct sk_buff *msg,
return err ? -EMSGSIZE : 0;
}
-static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
- struct rdma_restrack_entry *res)
-{
- if (!dev->ops.fill_res_entry)
- return false;
- return dev->ops.fill_res_entry(msg, res);
-}
-
-static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg,
- struct rdma_restrack_entry *res)
-{
- if (!dev->ops.fill_stat_entry)
- return false;
- return dev->ops.fill_stat_entry(msg, res);
-}
-
-static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
- struct rdma_restrack_entry *res, uint32_t port)
+static int fill_res_qp_entry_query(struct sk_buff *msg,
+ struct rdma_restrack_entry *res,
+ struct ib_device *dev,
+ struct ib_qp *qp)
{
- struct ib_qp *qp = container_of(res, struct ib_qp, res);
- struct ib_device *dev = qp->device;
struct ib_qp_init_attr qp_init_attr;
struct ib_qp_attr qp_attr;
int ret;
@@ -475,16 +471,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (ret)
return ret;
- if (port && port != qp_attr.port_num)
- return -EAGAIN;
-
- /* In create_qp() port is not set yet */
- if (qp_attr.port_num &&
- nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
- goto err;
-
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
- goto err;
if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
qp_attr.dest_qp_num))
@@ -508,19 +494,53 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
goto err;
+ if (dev->ops.fill_res_qp_entry)
+ return dev->ops.fill_res_qp_entry(msg, qp);
+ return 0;
+
+err: return -EMSGSIZE;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_device *dev = qp->device;
+ int ret;
+
+ if (port && port != qp->port)
+ return -EAGAIN;
+
+ /* In create_qp() port is not set yet */
+ if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
+ return -EINVAL;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
+ if (ret)
+ return -EMSGSIZE;
+
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_name_pid(msg, res))
- goto err;
+ ret = fill_res_name_pid(msg, res);
+ if (ret)
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return fill_res_qp_entry_query(msg, res, dev, qp);
+}
- return 0;
+static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_device *dev = qp->device;
-err: return -EMSGSIZE;
+ if (port && port != qp->port)
+ return -EAGAIN;
+ if (!dev->ops.fill_res_qp_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_qp_entry_raw(msg, qp);
}
static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -568,9 +588,8 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (fill_res_name_pid(msg, res))
goto err;
- if (fill_res_entry(dev, msg, res))
- goto err;
-
+ if (dev->ops.fill_res_cm_id_entry)
+ return dev->ops.fill_res_cm_id_entry(msg, cm_id);
return 0;
err: return -EMSGSIZE;
@@ -583,35 +602,42 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct ib_device *dev = cq->device;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
- goto err;
+ return -EMSGSIZE;
/* Poll context is only valid for kernel CQs */
if (rdma_is_kernel_res(res) &&
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
- goto err;
+ return -EMSGSIZE;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
cq->uobject->uevent.uobject.context->res.id))
- goto err;
+ return -EMSGSIZE;
if (fill_res_name_pid(msg, res))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return (dev->ops.fill_res_cq_entry) ?
+ dev->ops.fill_res_cq_entry(msg, cq) : 0;
+}
- return 0;
+static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_cq *cq = container_of(res, struct ib_cq, res);
+ struct ib_device *dev = cq->device;
-err: return -EMSGSIZE;
+ if (!dev->ops.fill_res_cq_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_cq_entry_raw(msg, cq);
}
static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -622,38 +648,45 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
- goto err;
+ return -EMSGSIZE;
}
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
RDMA_NLDEV_ATTR_PAD))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
- goto err;
+ return -EMSGSIZE;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
- goto err;
+ return -EMSGSIZE;
if (fill_res_name_pid(msg, res))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return (dev->ops.fill_res_mr_entry) ?
+ dev->ops.fill_res_mr_entry(msg, mr) :
+ 0;
+}
- return 0;
+static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct ib_device *dev = mr->pd->device;
-err: return -EMSGSIZE;
+ if (!dev->ops.fill_res_mr_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_mr_entry_raw(msg, mr);
}
static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_pd *pd = container_of(res, struct ib_pd, res);
- struct ib_device *dev = pd->device;
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
@@ -676,15 +709,138 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
pd->uobject->context->res.id))
goto err;
- if (fill_res_name_pid(msg, res))
- goto err;
+ return fill_res_name_pid(msg, res);
+
+err: return -EMSGSIZE;
+}
+
+static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
+
+ if (rdma_is_kernel_res(res))
+ return 0;
- if (fill_res_entry(dev, msg, res))
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
+ return -EMSGSIZE;
+
+ return fill_res_name_pid(msg, res);
+}
+
+static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
+ uint32_t max_range)
+{
+ struct nlattr *entry_attr;
+
+ if (!min_range)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (min_range == max_range) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
+ goto err;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
+ goto err;
+ }
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
+{
+ uint32_t min_range = 0, prev = 0;
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ struct nlattr *table_attr;
+ struct ib_qp *qp = NULL;
+ unsigned long id = 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ rt = &srq->device->res[RDMA_RESTRACK_QP];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ if (!rdma_restrack_get(res))
+ continue;
+
+ qp = container_of(res, struct ib_qp, res);
+ if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
+ rdma_restrack_put(res);
+ continue;
+ }
+
+ if (qp->qp_num < prev)
+ /* qp_num should be ascending */
+ goto err_loop;
+
+ if (min_range == 0) {
+ min_range = qp->qp_num;
+ } else if (qp->qp_num > (prev + 1)) {
+ if (fill_res_range_qp_entry(msg, min_range, prev))
+ goto err_loop;
+
+ min_range = qp->qp_num;
+ }
+ prev = qp->qp_num;
+ rdma_restrack_put(res);
+ }
+
+ xa_unlock(&rt->xa);
+
+ if (fill_res_range_qp_entry(msg, min_range, prev))
goto err;
+ nla_nest_end(msg, table_attr);
return 0;
-err: return -EMSGSIZE;
+err_loop:
+ rdma_restrack_put(res);
+ xa_unlock(&rt->xa);
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_srq *srq = container_of(res, struct ib_srq, res);
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
+ goto err;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
+ goto err;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
+ goto err;
+
+ if (ib_srq_has_cq(srq->srq_type)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
+ srq->ext.cq->res.id))
+ goto err;
+ }
+
+ if (fill_res_srq_qps(msg, srq))
+ goto err;
+
+ return fill_res_name_pid(msg, res);
+
+err:
+ return -EMSGSIZE;
}
static int fill_stat_counter_mode(struct sk_buff *msg,
@@ -695,11 +851,16 @@ static int fill_stat_counter_mode(struct sk_buff *msg,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
return -EMSGSIZE;
- if (m->mode == RDMA_COUNTER_MODE_AUTO)
+ if (m->mode == RDMA_COUNTER_MODE_AUTO) {
if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
return -EMSGSIZE;
+ if ((m->mask & RDMA_COUNTER_MASK_PID) &&
+ fill_res_name_pid(msg, &counter->res))
+ return -EMSGSIZE;
+ }
+
return 0;
}
@@ -738,9 +899,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg,
xa_lock(&rt->xa);
xa_for_each(&rt->xa, id, res) {
qp = container_of(res, struct ib_qp, res);
- if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
- continue;
-
if (!qp->counter || (qp->counter->id != counter->id))
continue;
@@ -793,9 +951,8 @@ static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
goto err;
- if (fill_stat_entry(dev, msg, res))
- goto err;
-
+ if (dev->ops.fill_stat_mr_entry)
+ return dev->ops.fill_stat_mr_entry(msg, mr);
return 0;
err:
@@ -813,14 +970,21 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg,
if (!table_attr)
return -EMSGSIZE;
- for (i = 0; i < st->num_counters; i++)
- if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
+ mutex_lock(&st->lock);
+ for (i = 0; i < st->num_counters; i++) {
+ if (test_bit(i, st->is_disabled))
+ continue;
+ if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
+ st->value[i]))
goto err;
+ }
+ mutex_unlock(&st->lock);
nla_nest_end(msg, table_attr);
return 0;
err:
+ mutex_unlock(&st->lock);
nla_nest_cancel(msg, table_attr);
return -EMSGSIZE;
}
@@ -840,7 +1004,6 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
- fill_res_name_pid(msg, &counter->res) ||
fill_stat_counter_mode(msg, counter) ||
fill_stat_counter_qps(msg, counter) ||
fill_stat_counter_hwcounters(msg, counter))
@@ -916,8 +1079,12 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
char name[IB_DEVICE_NAME_MAX] = {};
- nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+ nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
IB_DEVICE_NAME_MAX);
+ if (strlen(name) == 0) {
+ err = -EINVAL;
+ goto done;
+ }
err = ib_device_rename(device, name);
goto done;
}
@@ -1173,7 +1340,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
struct nldev_fill_res_entry {
enum rdma_nldev_attr nldev_attr;
- enum rdma_nldev_command nldev_cmd;
u8 flags;
u32 entry;
u32 id;
@@ -1185,44 +1351,51 @@ enum nldev_res_flags {
static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
[RDMA_RESTRACK_QP] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_LQPN,
},
[RDMA_RESTRACK_CM_ID] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
},
[RDMA_RESTRACK_CQ] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CQN,
},
[RDMA_RESTRACK_MR] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_MRN,
},
[RDMA_RESTRACK_PD] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_PDN,
},
[RDMA_RESTRACK_COUNTER] = {
- .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
},
+ [RDMA_RESTRACK_CTX] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_CTXN,
+ },
+ [RDMA_RESTRACK_SRQ] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_SRQN,
+ },
+
};
static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1277,7 +1450,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
- RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NL_GET_OP(nlh->nlmsg_type)),
0, 0);
if (fill_nldev_handle(msg, device)) {
@@ -1288,11 +1462,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
ret = fill_func(msg, has_cap_net_admin, res, port);
-
- rdma_restrack_put(res);
if (ret)
goto err_free;
+ rdma_restrack_put(res);
nlmsg_end(msg, nlh);
ib_device_put(device);
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
@@ -1356,7 +1529,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
}
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
0, NLM_F_MULTI);
if (fill_nldev_handle(skb, device)) {
@@ -1438,27 +1612,32 @@ err_index:
return ret;
}
-#define RES_GET_FUNCS(name, type) \
- static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
+#define RES_GET_FUNCS(name, type) \
+ static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
struct netlink_callback *cb) \
- { \
- return res_get_common_dumpit(skb, cb, type, \
- fill_res_##name##_entry); \
- } \
- static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
- struct nlmsghdr *nlh, \
+ { \
+ return res_get_common_dumpit(skb, cb, type, \
+ fill_res_##name##_entry); \
+ } \
+ static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
+ struct nlmsghdr *nlh, \
struct netlink_ext_ack *extack) \
- { \
- return res_get_common_doit(skb, nlh, extack, type, \
- fill_res_##name##_entry); \
+ { \
+ return res_get_common_doit(skb, nlh, extack, type, \
+ fill_res_##name##_entry); \
}
RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
+RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
+RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
+RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
+RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
+RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
static LIST_HEAD(link_ops);
static DECLARE_RWSEM(link_ops_rwsem);
@@ -1512,13 +1691,13 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
!tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
return -EINVAL;
- nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+ nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
sizeof(ibdev_name));
- if (strchr(ibdev_name, '%'))
+ if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
return -EINVAL;
- nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
- nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
+ nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
+ nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
sizeof(ndev_name));
ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
@@ -1560,7 +1739,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!device)
return -EINVAL;
- if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
+ if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
ib_device_put(device);
return -EINVAL;
}
@@ -1585,7 +1764,7 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
return -EINVAL;
- nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
+ nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
sizeof(client_name));
if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
@@ -1680,6 +1859,19 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_free(msg);
return err;
}
+
+ /*
+ * Copy-on-fork is supported.
+ * See commits:
+ * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
+ * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
+ * for more details. Don't backport this without them.
+ *
+ * Return value ignored on purpose, assume copy-on-fork is not
+ * supported in case of failure.
+ */
+ nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
+
nlmsg_end(msg, nlh);
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
}
@@ -1705,24 +1897,113 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
+static int nldev_stat_set_mode_doit(struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ struct nlattr *tb[],
+ struct ib_device *device, u32 port)
+{
+ u32 mode, mask = 0, qpn, cntn = 0;
+ int ret;
+
+ /* Currently only counter for QP is supported */
+ if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
+ nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+ return -EINVAL;
+
+ mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
+ if (mode == RDMA_COUNTER_MODE_AUTO) {
+ if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
+ mask = nla_get_u32(
+ tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
+ return rdma_counter_set_auto_mode(device, port, mask, extack);
+ }
+
+ if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
+ return -EINVAL;
+
+ qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
+ if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
+ cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
+ ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
+ if (ret)
+ return ret;
+ } else {
+ ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
+ if (ret)
+ return ret;
+ }
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
+
+ return 0;
+
+err_fill:
+ rdma_counter_unbind_qpn(device, port, qpn, cntn);
+ return ret;
+}
+
+static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
+ struct ib_device *device,
+ u32 port)
+{
+ struct rdma_hw_stats *stats;
+ struct nlattr *entry_attr;
+ unsigned long *target;
+ int rem, i, ret = 0;
+ u32 index;
+
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats)
+ return -EINVAL;
+
+ target = kcalloc(BITS_TO_LONGS(stats->num_counters),
+ sizeof(*stats->is_disabled), GFP_KERNEL);
+ if (!target)
+ return -ENOMEM;
+
+ nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
+ rem) {
+ index = nla_get_u32(entry_attr);
+ if ((index >= stats->num_counters) ||
+ !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ set_bit(index, target);
+ }
+
+ for (i = 0; i < stats->num_counters; i++) {
+ if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
+ continue;
+
+ ret = rdma_counter_modify(device, port, i, test_bit(i, target));
+ if (ret)
+ goto out;
+ }
+
+out:
+ kfree(target);
+ return ret;
+}
+
static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- u32 index, port, mode, mask = 0, qpn, cntn = 0;
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
struct sk_buff *msg;
+ u32 index, port;
int ret;
- ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, extack);
- /* Currently only counter for QP is supported */
- if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
- !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
- !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
- return -EINVAL;
-
- if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+ extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
@@ -1733,61 +2014,49 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
if (!rdma_is_port_valid(device, port)) {
ret = -EINVAL;
- goto err;
+ goto err_put_device;
+ }
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
+ !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
+ ret = -EINVAL;
+ goto err_put_device;
}
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto err;
+ goto err_put_device;
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_STAT_SET),
0, 0);
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
+ ret = -EMSGSIZE;
+ goto err_free_msg;
+ }
- mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
- if (mode == RDMA_COUNTER_MODE_AUTO) {
- if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
- mask = nla_get_u32(
- tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
-
- ret = rdma_counter_set_auto_mode(device, port,
- mask ? true : false, mask);
- if (ret)
- goto err_msg;
- } else {
- if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
- goto err_msg;
- qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
- if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
- cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
- ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
- } else {
- ret = rdma_counter_bind_qpn_alloc(device, port,
- qpn, &cntn);
- }
+ if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
+ ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
if (ret)
- goto err_msg;
+ goto err_free_msg;
+ }
- if (fill_nldev_handle(msg, device) ||
- nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
- nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
- nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
- ret = -EMSGSIZE;
- goto err_fill;
- }
+ if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
+ ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
+ if (ret)
+ goto err_free_msg;
}
nlmsg_end(msg, nlh);
ib_device_put(device);
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
-err_fill:
- rdma_counter_unbind_qpn(device, port, qpn, cntn);
-err_msg:
+err_free_msg:
nlmsg_free(msg);
-err:
+err_put_device:
ib_device_put(device);
return ret;
}
@@ -1879,13 +2148,14 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
if (!device)
return -EINVAL;
- if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
+ if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
ret = -EINVAL;
goto err;
}
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
- if (!rdma_is_port_valid(device, port)) {
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats) {
ret = -EINVAL;
goto err;
}
@@ -1907,11 +2177,6 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
goto err_msg;
}
- stats = device->port_data ? device->port_data[port].hw_stats : NULL;
- if (stats == NULL) {
- ret = -EINVAL;
- goto err_msg;
- }
mutex_lock(&stats->lock);
num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
@@ -1926,9 +2191,13 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
goto err_stats;
}
for (i = 0; i < num_cnts; i++) {
+ if (test_bit(i, stats->is_disabled))
+ continue;
+
v = stats->value[i] +
rdma_counter_get_hwstat_value(device, port, i);
- if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) {
+ if (rdma_nl_stat_hwcounter_entry(msg,
+ stats->descs[i].name, v)) {
ret = -EMSGSIZE;
goto err_table;
}
@@ -2076,6 +2345,99 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb,
return ret;
}
+static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
+ struct rdma_hw_stats *stats;
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 devid, port;
+ int ret, i;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), devid);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(
+ msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
+ 0, 0);
+
+ ret = -EMSGSIZE;
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
+ goto err_msg;
+
+ table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+ if (!table)
+ goto err_msg;
+
+ mutex_lock(&stats->lock);
+ for (i = 0; i < stats->num_counters; i++) {
+ entry = nla_nest_start(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
+ if (!entry)
+ goto err_msg_table;
+
+ if (nla_put_string(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
+ stats->descs[i].name) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
+ goto err_msg_entry;
+
+ if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
+ (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
+ !test_bit(i, stats->is_disabled))))
+ goto err_msg_entry;
+
+ nla_nest_end(msg, entry);
+ }
+ mutex_unlock(&stats->lock);
+
+ nla_nest_end(msg, table);
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_msg_entry:
+ nla_nest_cancel(msg, entry);
+err_msg_table:
+ mutex_unlock(&stats->lock);
+ nla_nest_cancel(msg, table);
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -2124,6 +2486,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_res_get_pd_doit,
.dump = nldev_res_get_pd_dumpit,
},
+ [RDMA_NLDEV_CMD_RES_CTX_GET] = {
+ .doit = nldev_res_get_ctx_doit,
+ .dump = nldev_res_get_ctx_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_SRQ_GET] = {
+ .doit = nldev_res_get_srq_doit,
+ .dump = nldev_res_get_srq_dumpit,
+ },
[RDMA_NLDEV_CMD_SYS_GET] = {
.doit = nldev_sys_get_doit,
},
@@ -2142,6 +2512,24 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_stat_del_doit,
.flags = RDMA_NL_ADMIN_PERM,
},
+ [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
+ .doit = nldev_res_get_qp_raw_doit,
+ .dump = nldev_res_get_qp_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
+ .doit = nldev_res_get_cq_raw_doit,
+ .dump = nldev_res_get_cq_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
+ .doit = nldev_res_get_mr_raw_doit,
+ .dump = nldev_res_get_mr_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
+ .doit = nldev_stat_get_counter_status_doit,
+ },
};
void __init nldev_init(void)
@@ -2149,7 +2537,7 @@ void __init nldev_init(void)
rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
}
-void __exit nldev_exit(void)
+void nldev_exit(void)
{
rdma_nl_unregister(RDMA_NL_NLDEV);
}
diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h
index af4879bdf3d6..64e2822af70f 100644
--- a/drivers/infiniband/core/opa_smi.h
+++ b/drivers/infiniband/core/opa_smi.h
@@ -40,11 +40,11 @@
#include "smi.h"
enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt);
+ u32 port_num, int phys_port_cnt);
int opa_smi_get_fwd_port(struct opa_smp *smp);
extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp);
extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp,
- bool is_switch, int port_num);
+ bool is_switch, u32 port_num);
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 5128cb16bb48..29b1ab1d5f93 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -68,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
* In exclusive access mode, we check that the counter is zero (nobody
* claimed this object) and we set it to -1. Releasing a shared access
* lock is done simply by decreasing the counter. As for exclusive
- * access locks, since only a single one of them is is allowed
+ * access locks, since only a single one of them is allowed
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
@@ -112,7 +112,7 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj,
* however the type's allocat_commit function cannot have been called and the
* uobject cannot be on the uobjects_lists
*
- * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
+ * For RDMA_REMOVE_DESTROY the caller should be holding a kref (eg via
* rdma_lookup_get_uobject) and the object is left in a state where the caller
* needs to call rdma_lookup_put_uobject.
*
@@ -137,15 +137,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
} else if (uobj->object) {
ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
attrs);
- if (ret) {
- if (ib_is_destroy_retryable(ret, reason, uobj))
- return ret;
-
- /* Nothing to be done, dangle the memory and move on */
- WARN(true,
- "ib_uverbs: failed to remove uobject id %d, driver err=%d",
- uobj->id, ret);
- }
+ if (ret)
+ /* Nothing to be done, wait till ucontext will clean it */
+ return ret;
uobj->object = NULL;
}
@@ -153,9 +147,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
uobj->context = NULL;
/*
- * For DESTROY the usecnt is held write locked, the caller is expected
- * to put it unlock and put the object when done with it. Only DESTROY
- * can remove the IDR handle.
+ * For DESTROY the usecnt is not changed, the caller is expected to
+ * manage it via uobj_put_destroy(). Only DESTROY can remove the IDR
+ * handle.
*/
if (reason != RDMA_REMOVE_DESTROY)
atomic_set(&uobj->usecnt, 0);
@@ -187,7 +181,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
/*
* This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
* sequence. It should only be used from command callbacks. On success the
- * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
+ * caller must pair this with uobj_put_destroy(). This
* version requires the caller to have already obtained an
* LOOKUP_DESTROY uobject kref.
*/
@@ -198,6 +192,13 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
down_read(&ufile->hw_destroy_rwsem);
+ /*
+ * Once the uobject is destroyed by RDMA_REMOVE_DESTROY then it is left
+ * write locked as the callers put it back with UVERBS_LOOKUP_DESTROY.
+ * This is because any other concurrent thread can still see the object
+ * in the xarray due to RCU. Leaving it locked ensures nothing else will
+ * touch it.
+ */
ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
if (ret)
goto out_unlock;
@@ -216,7 +217,7 @@ out_unlock:
/*
* uobj_get_destroy destroys the HW object and returns a handle to the uobj
* with a NULL object pointer. The caller must pair this with
- * uverbs_put_destroy.
+ * uobj_put_destroy().
*/
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
u32 id, struct uverbs_attr_bundle *attrs)
@@ -250,8 +251,7 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
uobj = __uobj_get_destroy(obj, id, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
-
- rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+ uobj_put_destroy(uobj);
return 0;
}
@@ -360,7 +360,7 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj,
* uverbs_uobject_fd_release(), and the caller is expected to ensure
* that release is never done while a call to lookup is possible.
*/
- if (f->f_op != fd_type->fops) {
+ if (f->f_op != fd_type->fops || uobject->ufile != ufile) {
fput(f);
return ERR_PTR(-EBADF);
}
@@ -453,40 +453,46 @@ static struct ib_uobject *
alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
struct uverbs_attr_bundle *attrs)
{
- const struct uverbs_obj_fd_type *fd_type =
- container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
+ const struct uverbs_obj_fd_type *fd_type;
int new_fd;
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj, *ret;
struct file *filp;
- if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release))
- return ERR_PTR(-EINVAL);
-
- new_fd = get_unused_fd_flags(O_CLOEXEC);
- if (new_fd < 0)
- return ERR_PTR(new_fd);
-
uobj = alloc_uobj(attrs, obj);
if (IS_ERR(uobj))
+ return uobj;
+
+ fd_type =
+ container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
+ if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release &&
+ fd_type->fops->release != &uverbs_async_event_release)) {
+ ret = ERR_PTR(-EINVAL);
goto err_fd;
+ }
+
+ new_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (new_fd < 0) {
+ ret = ERR_PTR(new_fd);
+ goto err_fd;
+ }
/* Note that uverbs_uobject_fd_release() is called during abort */
filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL,
fd_type->flags);
if (IS_ERR(filp)) {
- uobj = ERR_CAST(filp);
- goto err_uobj;
+ ret = ERR_CAST(filp);
+ goto err_getfile;
}
uobj->object = filp;
uobj->id = new_fd;
return uobj;
-err_uobj:
- uverbs_uobject_put(uobj);
-err_fd:
+err_getfile:
put_unused_fd(new_fd);
- return uobj;
+err_fd:
+ uverbs_uobject_put(uobj);
+ return ret;
}
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
@@ -531,12 +537,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
struct uverbs_obj_idr_type, type);
int ret = idr_type->destroy_object(uobj, why, attrs);
- /*
- * We can only fail gracefully if the user requested to destroy the
- * object or when a retry may be called upon an error.
- * In the rest of the cases, just remove whatever you can.
- */
- if (ib_is_destroy_retryable(ret, why, uobj))
+ if (ret)
return ret;
if (why == RDMA_REMOVE_ABORT)
@@ -569,11 +570,8 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
{
const struct uverbs_obj_fd_type *fd_type = container_of(
uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
- int ret = fd_type->destroy_object(uobj, why);
-
- if (ib_is_destroy_retryable(ret, why, uobj))
- return ret;
+ fd_type->destroy_object(uobj, why);
return 0;
}
@@ -597,6 +595,27 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
WARN_ON(old != NULL);
}
+static void swap_idr_uobjects(struct ib_uobject *obj_old,
+ struct ib_uobject *obj_new)
+{
+ struct ib_uverbs_file *ufile = obj_old->ufile;
+ void *old;
+
+ /*
+ * New must be an object that been allocated but not yet committed, this
+ * moves the pre-committed state to obj_old, new still must be comitted.
+ */
+ old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ if (WARN_ON(old != obj_old))
+ return;
+
+ swap(obj_old->id, obj_new->id);
+
+ old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL);
+ WARN_ON(old != NULL);
+}
+
static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
{
int fd = uobj->id;
@@ -626,9 +645,6 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
{
struct ib_uverbs_file *ufile = attrs->ufile;
- /* alloc_commit consumes the uobj kref */
- uobj->uapi_object->type_class->alloc_commit(uobj);
-
/* kref is held so long as the uobj is on the uobj list. */
uverbs_uobject_get(uobj);
spin_lock_irq(&ufile->uobjects_lock);
@@ -638,18 +654,65 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
/* matches atomic_set(-1) in alloc_uobj */
atomic_set(&uobj->usecnt, 0);
+ /* alloc_commit consumes the uobj kref */
+ uobj->uapi_object->type_class->alloc_commit(uobj);
+
/* Matches the down_read in rdma_alloc_begin_uobject */
up_read(&ufile->hw_destroy_rwsem);
}
/*
+ * new_uobj will be assigned to the handle currently used by to_uobj, and
+ * to_uobj will be destroyed.
+ *
+ * Upon return the caller must do:
+ * rdma_alloc_commit_uobject(new_uobj)
+ * uobj_put_destroy(to_uobj)
+ *
+ * to_uobj must have a write get but the put mode switches to destroy once
+ * this is called.
+ */
+void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj,
+ struct uverbs_attr_bundle *attrs)
+{
+ assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE);
+
+ if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object ||
+ !to_uobj->uapi_object->type_class->swap_uobjects))
+ return;
+
+ to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj);
+
+ /*
+ * If this fails then the uobject is still completely valid (though with
+ * a new ID) and we leak it until context close.
+ */
+ uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs);
+}
+
+/*
* This consumes the kref for uobj. It is up to the caller to unwind the HW
* object and anything else connected to uobj before calling this.
*/
void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
- struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs,
+ bool hw_obj_valid)
{
struct ib_uverbs_file *ufile = uobj->ufile;
+ int ret;
+
+ if (hw_obj_valid) {
+ ret = uobj->uapi_object->type_class->destroy_hw(
+ uobj, RDMA_REMOVE_ABORT, attrs);
+ /*
+ * If the driver couldn't destroy the object then go ahead and
+ * commit it. Leaking objects that can't be destroyed is only
+ * done during FD close after the driver has a few more tries to
+ * destroy it.
+ */
+ if (WARN_ON(ret))
+ return rdma_alloc_commit_uobject(uobj, attrs);
+ }
uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
@@ -679,7 +742,6 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj,
enum rdma_lookup_mode mode)
{
assert_uverbs_usecnt(uobj, mode);
- uobj->uapi_object->type_class->lookup_put(uobj, mode);
/*
* In order to unlock an object, either decrease its usecnt for
* read access or zero it in case of exclusive access. See
@@ -696,6 +758,7 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj,
break;
}
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
/* Pairs with the kref obtained by type->lookup_get */
uverbs_uobject_put(uobj);
}
@@ -734,6 +797,7 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
.lookup_put = lookup_put_idr_uobject,
.destroy_hw = destroy_hw_idr_uobject,
.remove_handle = remove_handle_idr_uobject,
+ .swap_uobjects = swap_idr_uobjects,
};
EXPORT_SYMBOL(uverbs_idr_class);
@@ -836,16 +900,23 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
* racing with a lookup_get.
*/
WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
+ if (reason == RDMA_REMOVE_DRIVER_FAILURE)
+ obj->object = NULL;
if (!uverbs_destroy_uobject(obj, reason, &attrs))
ret = 0;
else
atomic_set(&obj->usecnt, 0);
}
+
+ if (reason == RDMA_REMOVE_DRIVER_FAILURE) {
+ WARN_ON(!list_empty(&ufile->uobjects));
+ return 0;
+ }
return ret;
}
/*
- * Destroy the uncontext and every uobject associated with it.
+ * Destroy the ucontext and every uobject associated with it.
*
* This is internally locked and can be called in parallel from multiple
* contexts.
@@ -862,21 +933,12 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
if (!ufile->ucontext)
goto done;
- ufile->ucontext->closing = true;
- ufile->ucontext->cleanup_retryable = true;
- while (!list_empty(&ufile->uobjects))
- if (__uverbs_cleanup_ufile(ufile, reason)) {
- /*
- * No entry was cleaned-up successfully during this
- * iteration
- */
- break;
- }
-
- ufile->ucontext->cleanup_retryable = false;
- if (!list_empty(&ufile->uobjects))
- __uverbs_cleanup_ufile(ufile, reason);
+ while (!list_empty(&ufile->uobjects) &&
+ !__uverbs_cleanup_ufile(ufile, reason)) {
+ }
+ if (WARN_ON(!list_empty(&ufile->uobjects)))
+ __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE);
ufile_destroy_ucontext(ufile, reason);
done:
@@ -921,8 +983,8 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
}
void uverbs_finalize_object(struct ib_uobject *uobj,
- enum uverbs_obj_access access, bool commit,
- struct uverbs_attr_bundle *attrs)
+ enum uverbs_obj_access access, bool hw_obj_valid,
+ bool commit, struct uverbs_attr_bundle *attrs)
{
/*
* refcounts should be handled at the object level and not at the
@@ -945,7 +1007,7 @@ void uverbs_finalize_object(struct ib_uobject *uobj,
if (commit)
rdma_alloc_commit_uobject(uobj, attrs);
else
- rdma_alloc_abort_uobject(uobj, attrs);
+ rdma_alloc_abort_uobject(uobj, attrs, hw_obj_valid);
break;
default:
WARN_ON(true);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 33978e0f1262..33706dad6c0f 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -64,8 +64,8 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
s64 id, struct uverbs_attr_bundle *attrs);
void uverbs_finalize_object(struct ib_uobject *uobj,
- enum uverbs_obj_access access, bool commit,
- struct uverbs_attr_bundle *attrs);
+ enum uverbs_obj_access access, bool hw_obj_valid,
+ bool commit, struct uverbs_attr_bundle *attrs);
int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx);
@@ -159,6 +159,9 @@ extern const struct uapi_definition uverbs_def_obj_dm[];
extern const struct uapi_definition uverbs_def_obj_flow_action[];
extern const struct uapi_definition uverbs_def_obj_intf[];
extern const struct uapi_definition uverbs_def_obj_mr[];
+extern const struct uapi_definition uverbs_def_obj_qp[];
+extern const struct uapi_definition uverbs_def_obj_srq[];
+extern const struct uapi_definition uverbs_def_obj_wq[];
extern const struct uapi_definition uverbs_def_write_intf[];
static inline const struct uverbs_api_write_method *
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 62fbb0ae9cb4..1f935d9f6178 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -47,6 +47,7 @@ static const char *type2str(enum rdma_restrack_type type)
[RDMA_RESTRACK_MR] = "MR",
[RDMA_RESTRACK_CTX] = "CTX",
[RDMA_RESTRACK_COUNTER] = "COUNTER",
+ [RDMA_RESTRACK_SRQ] = "SRQ",
};
return names[type];
@@ -123,32 +124,6 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type)
}
EXPORT_SYMBOL(rdma_restrack_count);
-static void set_kern_name(struct rdma_restrack_entry *res)
-{
- struct ib_pd *pd;
-
- switch (res->type) {
- case RDMA_RESTRACK_QP:
- pd = container_of(res, struct ib_qp, res)->pd;
- if (!pd) {
- WARN_ONCE(true, "XRC QPs are not supported\n");
- /* Survive, despite the programmer's error */
- res->kern_name = " ";
- }
- break;
- case RDMA_RESTRACK_MR:
- pd = container_of(res, struct ib_mr, res)->pd;
- break;
- default:
- /* Other types set kern_name directly */
- pd = NULL;
- break;
- }
-
- if (pd)
- res->kern_name = pd->res.kern_name;
-}
-
static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
{
switch (res->type) {
@@ -167,60 +142,111 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
return container_of(res, struct ib_ucontext, res)->device;
case RDMA_RESTRACK_COUNTER:
return container_of(res, struct rdma_counter, res)->device;
+ case RDMA_RESTRACK_SRQ:
+ return container_of(res, struct ib_srq, res)->device;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
}
}
-void rdma_restrack_set_task(struct rdma_restrack_entry *res,
- const char *caller)
+/**
+ * rdma_restrack_attach_task() - attach the task onto this resource,
+ * valid for user space restrack entries.
+ * @res: resource entry
+ * @task: the task to attach
+ */
+static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
+ struct task_struct *task)
{
- if (caller) {
- res->kern_name = caller;
+ if (WARN_ON_ONCE(!task))
return;
- }
if (res->task)
put_task_struct(res->task);
- get_task_struct(current);
- res->task = current;
+ get_task_struct(task);
+ res->task = task;
+ res->user = true;
}
-EXPORT_SYMBOL(rdma_restrack_set_task);
/**
- * rdma_restrack_attach_task() - attach the task onto this resource
+ * rdma_restrack_set_name() - set the task for this resource
* @res: resource entry
- * @task: the task to attach, the current task will be used if it is NULL.
+ * @caller: kernel name, the current task will be used if the caller is NULL.
*/
-void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
- struct task_struct *task)
+void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
{
- if (res->task)
- put_task_struct(res->task);
- get_task_struct(task);
- res->task = task;
+ if (caller) {
+ res->kern_name = caller;
+ return;
+ }
+
+ rdma_restrack_attach_task(res, current);
}
+EXPORT_SYMBOL(rdma_restrack_set_name);
-static void rdma_restrack_add(struct rdma_restrack_entry *res)
+/**
+ * rdma_restrack_parent_name() - set the restrack name properties based
+ * on parent restrack
+ * @dst: destination resource entry
+ * @parent: parent resource entry
+ */
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+ const struct rdma_restrack_entry *parent)
+{
+ if (rdma_is_kernel_res(parent))
+ dst->kern_name = parent->kern_name;
+ else
+ rdma_restrack_attach_task(dst, parent->task);
+}
+EXPORT_SYMBOL(rdma_restrack_parent_name);
+
+/**
+ * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface
+ * to release memory in fully automatic way.
+ * @res: Entry to initialize
+ * @type: REstrack type
+ */
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+ enum rdma_restrack_type type)
+{
+ kref_init(&res->kref);
+ init_completion(&res->comp);
+ res->type = type;
+}
+EXPORT_SYMBOL(rdma_restrack_new);
+
+/**
+ * rdma_restrack_add() - add object to the reource tracking database
+ * @res: resource entry
+ */
+void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
struct rdma_restrack_root *rt;
- int ret;
+ int ret = 0;
if (!dev)
return;
+ if (res->no_track)
+ goto out;
+
rt = &dev->res[res->type];
- kref_init(&res->kref);
- init_completion(&res->comp);
if (res->type == RDMA_RESTRACK_QP) {
/* Special case to ensure that LQPN points to right QP */
struct ib_qp *qp = container_of(res, struct ib_qp, res);
- ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL);
- res->id = ret ? 0 : qp->qp_num;
+ WARN_ONCE(qp->qp_num >> 24 || qp->port >> 8,
+ "QP number 0x%0X and port 0x%0X", qp->qp_num,
+ qp->port);
+ res->id = qp->qp_num;
+ if (qp->qp_type == IB_QPT_SMI || qp->qp_type == IB_QPT_GSI)
+ res->id |= qp->port << 24;
+ ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL);
+ if (ret)
+ res->id = 0;
} else if (res->type == RDMA_RESTRACK_COUNTER) {
/* Special case to ensure that cntn points to right counter */
struct rdma_counter *counter;
@@ -231,43 +257,14 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
} else {
ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b,
&rt->next_id, GFP_KERNEL);
+ ret = (ret < 0) ? ret : 0;
}
+out:
if (!ret)
res->valid = true;
}
-
-/**
- * rdma_restrack_kadd() - add kernel object to the reource tracking database
- * @res: resource entry
- */
-void rdma_restrack_kadd(struct rdma_restrack_entry *res)
-{
- res->task = NULL;
- set_kern_name(res);
- res->user = false;
- rdma_restrack_add(res);
-}
-EXPORT_SYMBOL(rdma_restrack_kadd);
-
-/**
- * rdma_restrack_uadd() - add user object to the reource tracking database
- * @res: resource entry
- */
-void rdma_restrack_uadd(struct rdma_restrack_entry *res)
-{
- if ((res->type != RDMA_RESTRACK_CM_ID) &&
- (res->type != RDMA_RESTRACK_COUNTER))
- res->task = NULL;
-
- if (!res->task)
- rdma_restrack_set_task(res, NULL);
- res->kern_name = NULL;
-
- res->user = true;
- rdma_restrack_add(res);
-}
-EXPORT_SYMBOL(rdma_restrack_uadd);
+EXPORT_SYMBOL(rdma_restrack_add);
int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
{
@@ -305,6 +302,10 @@ static void restrack_release(struct kref *kref)
struct rdma_restrack_entry *res;
res = container_of(kref, struct rdma_restrack_entry, kref);
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
complete(&res->comp);
}
@@ -314,13 +315,25 @@ int rdma_restrack_put(struct rdma_restrack_entry *res)
}
EXPORT_SYMBOL(rdma_restrack_put);
+/**
+ * rdma_restrack_del() - delete object from the reource tracking database
+ * @res: resource entry
+ */
void rdma_restrack_del(struct rdma_restrack_entry *res)
{
struct rdma_restrack_entry *old;
struct rdma_restrack_root *rt;
struct ib_device *dev;
- if (!res->valid)
+ if (!res->valid) {
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
+ return;
+ }
+
+ if (res->no_track)
goto out;
dev = res_to_dev(res);
@@ -330,16 +343,13 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
rt = &dev->res[res->type];
old = xa_erase(&rt->xa, res->id);
+ if (res->type == RDMA_RESTRACK_MR)
+ return;
WARN_ON(old != res);
- res->valid = false;
+out:
+ res->valid = false;
rdma_restrack_put(res);
wait_for_completion(&res->comp);
-
-out:
- if (res->task) {
- put_task_struct(res->task);
- res->task = NULL;
- }
}
EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h
index d084e5f89849..6a04fc41f738 100644
--- a/drivers/infiniband/core/restrack.h
+++ b/drivers/infiniband/core/restrack.h
@@ -25,6 +25,12 @@ struct rdma_restrack_root {
int rdma_restrack_init(struct ib_device *dev);
void rdma_restrack_clean(struct ib_device *dev);
-void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
- struct task_struct *task);
+void rdma_restrack_add(struct rdma_restrack_entry *res);
+void rdma_restrack_del(struct rdma_restrack_entry *res);
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+ enum rdma_restrack_type type);
+void rdma_restrack_set_name(struct rdma_restrack_entry *res,
+ const char *caller);
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+ const struct rdma_restrack_entry *parent);
#endif /* _RDMA_CORE_RESTRACK_H_ */
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 2860def84f4d..e958c43dd28f 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -70,7 +70,7 @@ struct netdev_event_work {
};
static const struct {
- bool (*is_supported)(const struct ib_device *device, u8 port_num);
+ bool (*is_supported)(const struct ib_device *device, u32 port_num);
enum ib_gid_type gid_type;
} PORT_CAP_TO_GID_TYPE[] = {
{rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
@@ -79,7 +79,7 @@ static const struct {
#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
-unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port)
{
int i;
unsigned int ret_flags = 0;
@@ -96,7 +96,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
EXPORT_SYMBOL(roce_gid_type_mask_support);
static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
- u8 port, union ib_gid *gid,
+ u32 port, union ib_gid *gid,
struct ib_gid_attr *gid_attr)
{
int i;
@@ -144,7 +144,7 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
BONDING_SLAVE_STATE_NA)
static bool
-is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port,
+is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *real_dev;
@@ -168,7 +168,7 @@ is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port,
}
static bool
-is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port,
+is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *master_dev;
@@ -186,18 +186,19 @@ is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port,
return res;
}
-/** is_ndev_for_default_gid_filter - Check if a given netdevice
+/**
+ * is_ndev_for_default_gid_filter - Check if a given netdevice
* can be considered for default GIDs or not.
* @ib_dev: IB device to check
* @port: Port to consider for adding default GID
* @rdma_ndev: rdma netdevice pointer
- * @cookie_ndev: Netdevice to consider to form a default GID
+ * @cookie: Netdevice to consider to form a default GID
*
* is_ndev_for_default_gid_filter() returns true if a given netdevice can be
* considered for deriving default RoCE GID, returns false otherwise.
*/
static bool
-is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port,
+is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *cookie_ndev = cookie;
@@ -223,13 +224,13 @@ is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port,
return res;
}
-static bool pass_all_filter(struct ib_device *ib_dev, u8 port,
+static bool pass_all_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
return true;
}
-static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
+static bool upper_device_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
bool res;
@@ -249,7 +250,7 @@ static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
/**
* is_upper_ndev_bond_master_filter - Check if a given netdevice
- * is bond master device of netdevice of the the RDMA device of port.
+ * is bond master device of netdevice of the RDMA device of port.
* @ib_dev: IB device to check
* @port: Port to consider for adding default GID
* @rdma_ndev: Pointer to rdma netdevice
@@ -260,7 +261,7 @@ static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
* not have been established as slave device yet.
*/
static bool
-is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
+is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
@@ -280,7 +281,7 @@ is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
static void update_gid_ip(enum gid_op_type gid_op,
struct ib_device *ib_dev,
- u8 port, struct net_device *ndev,
+ u32 port, struct net_device *ndev,
struct sockaddr *addr)
{
union ib_gid gid;
@@ -294,7 +295,7 @@ static void update_gid_ip(enum gid_op_type gid_op,
}
static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
- u8 port,
+ u32 port,
struct net_device *rdma_ndev,
struct net_device *event_ndev)
{
@@ -328,7 +329,7 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
}
static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
- u8 port, struct net_device *ndev)
+ u32 port, struct net_device *ndev)
{
const struct in_ifaddr *ifa;
struct in_device *in_dev;
@@ -372,7 +373,7 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
}
static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
- u8 port, struct net_device *ndev)
+ u32 port, struct net_device *ndev)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *in6_dev;
@@ -417,7 +418,7 @@ static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
}
}
-static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
+static void _add_netdev_ips(struct ib_device *ib_dev, u32 port,
struct net_device *ndev)
{
enum_netdev_ipv4_ips(ib_dev, port, ndev);
@@ -425,13 +426,13 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
enum_netdev_ipv6_ips(ib_dev, port, ndev);
}
-static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
+static void add_netdev_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
_add_netdev_ips(ib_dev, port, cookie);
}
-static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
+static void del_netdev_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
@@ -446,7 +447,7 @@ static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
*
* del_default_gids() deletes the default GIDs of the event/cookie netdevice.
*/
-static void del_default_gids(struct ib_device *ib_dev, u8 port,
+static void del_default_gids(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *cookie_ndev = cookie;
@@ -458,7 +459,7 @@ static void del_default_gids(struct ib_device *ib_dev, u8 port,
IB_CACHE_GID_DEFAULT_MODE_DELETE);
}
-static void add_default_gids(struct ib_device *ib_dev, u8 port,
+static void add_default_gids(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *event_ndev = cookie;
@@ -470,7 +471,7 @@ static void add_default_gids(struct ib_device *ib_dev, u8 port,
}
static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
- u8 port,
+ u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
@@ -505,7 +506,7 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
*
- * @device: the rdma device
+ * @ib_dev: the rdma device
*/
void rdma_roce_rescan_device(struct ib_device *ib_dev)
{
@@ -515,7 +516,7 @@ void rdma_roce_rescan_device(struct ib_device *ib_dev)
EXPORT_SYMBOL(rdma_roce_rescan_device);
static void callback_for_addr_gid_device_scan(struct ib_device *device,
- u8 port,
+ u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
@@ -531,10 +532,11 @@ struct upper_list {
struct net_device *upper;
};
-static int netdev_upper_walk(struct net_device *upper, void *data)
+static int netdev_upper_walk(struct net_device *upper,
+ struct netdev_nested_priv *priv)
{
struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
- struct list_head *upper_list = data;
+ struct list_head *upper_list = (struct list_head *)priv->data;
if (!entry)
return 0;
@@ -546,19 +548,21 @@ static int netdev_upper_walk(struct net_device *upper, void *data)
return 0;
}
-static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
+static void handle_netdev_upper(struct ib_device *ib_dev, u32 port,
void *cookie,
void (*handle_netdev)(struct ib_device *ib_dev,
- u8 port,
+ u32 port,
struct net_device *ndev))
{
struct net_device *ndev = cookie;
+ struct netdev_nested_priv priv;
struct upper_list *upper_iter;
struct upper_list *upper_temp;
LIST_HEAD(upper_list);
+ priv.data = &upper_list;
rcu_read_lock();
- netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list);
+ netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &priv);
rcu_read_unlock();
handle_netdev(ib_dev, port, ndev);
@@ -571,25 +575,25 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
}
}
-static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
struct net_device *event_ndev)
{
ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
}
-static void del_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
+static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids);
}
-static void add_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
+static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips);
}
-static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
+static void del_netdev_default_ips_join(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 06e5b6787443..8367974b7998 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -2,6 +2,7 @@
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
*/
+#include <linux/memremap.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
#include <linux/pci-p2pdma.h>
@@ -25,7 +26,7 @@ MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
* registration is also enabled if registering memory might yield better
* performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
*/
-static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
+static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u32 port_num)
{
if (rdma_protocol_iwarp(dev, port_num))
return true;
@@ -42,7 +43,7 @@ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
* optimization otherwise. Additionally we have a debug option to force usage
* of MRs to help testing this code path.
*/
-static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
+static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u32 port_num,
enum dma_data_direction dir, int dma_nents)
{
if (dir == DMA_FROM_DEVICE) {
@@ -87,7 +88,7 @@ static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg)
}
/* Caller must have zero-initialized *reg. */
-static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
+static int rdma_rw_init_one_mr(struct ib_qp *qp, u32 port_num,
struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
u32 sg_cnt, u32 offset)
{
@@ -121,7 +122,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
}
static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct rdma_rw_reg_ctx *prev = NULL;
@@ -129,7 +130,7 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
qp->integrity_en);
int i, j, ret = 0, count = 0;
- ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr;
+ ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr);
ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
if (!ctx->reg) {
ret = -ENOMEM;
@@ -273,23 +274,6 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
return 1;
}
-static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
- u32 sg_cnt, enum dma_data_direction dir)
-{
- if (is_pci_p2pdma_page(sg_page(sg)))
- pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir);
- else
- ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
-}
-
-static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg,
- u32 sg_cnt, enum dma_data_direction dir)
-{
- if (is_pci_p2pdma_page(sg_page(sg)))
- return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
- return ib_dma_map_sg(dev, sg, sg_cnt, dir);
-}
-
/**
* rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
* @ctx: context to initialize
@@ -305,17 +289,21 @@ static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg,
* Returns the number of WQEs that will be needed on the workqueue if
* successful, or a negative error code.
*/
-int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct ib_device *dev = qp->pd->device;
+ struct sg_table sgt = {
+ .sgl = sg,
+ .orig_nents = sg_cnt,
+ };
int ret;
- ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir);
- if (!ret)
- return -ENOMEM;
- sg_cnt = ret;
+ ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0);
+ if (ret)
+ return ret;
+ sg_cnt = sgt.nents;
/*
* Skip to the S/G entry that sg_offset falls into:
@@ -351,7 +339,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
return ret;
out_unmap_sg:
- rdma_rw_unmap_sg(dev, sg, sg_cnt, dir);
+ ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0);
return ret;
}
EXPORT_SYMBOL(rdma_rw_ctx_init);
@@ -374,7 +362,7 @@ EXPORT_SYMBOL(rdma_rw_ctx_init);
* successful, or a negative error code.
*/
int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
struct ib_sig_attrs *sig_attrs,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
@@ -382,32 +370,36 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
struct ib_device *dev = qp->pd->device;
u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
qp->integrity_en);
+ struct sg_table sgt = {
+ .sgl = sg,
+ .orig_nents = sg_cnt,
+ };
+ struct sg_table prot_sgt = {
+ .sgl = prot_sg,
+ .orig_nents = prot_sg_cnt,
+ };
struct ib_rdma_wr *rdma_wr;
int count = 0, ret;
if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
- pr_err("SG count too large: sg_cnt=%d, prot_sg_cnt=%d, pages_per_mr=%d\n",
+ pr_err("SG count too large: sg_cnt=%u, prot_sg_cnt=%u, pages_per_mr=%u\n",
sg_cnt, prot_sg_cnt, pages_per_mr);
return -EINVAL;
}
- ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
- if (!ret)
- return -ENOMEM;
- sg_cnt = ret;
+ ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0);
+ if (ret)
+ return ret;
if (prot_sg_cnt) {
- ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir);
- if (!ret) {
- ret = -ENOMEM;
+ ret = ib_dma_map_sgtable_attrs(dev, &prot_sgt, dir, 0);
+ if (ret)
goto out_unmap_sg;
- }
- prot_sg_cnt = ret;
}
ctx->type = RDMA_RW_SIG_MR;
ctx->nr_ops = 1;
- ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL);
+ ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL);
if (!ctx->reg) {
ret = -ENOMEM;
goto out_unmap_prot_sg;
@@ -423,10 +415,11 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs));
- ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg,
- prot_sg_cnt, NULL, SZ_4K);
+ ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sgt.nents, NULL, prot_sg,
+ prot_sgt.nents, NULL, SZ_4K);
if (unlikely(ret)) {
- pr_err("failed to map PI sg (%d)\n", sg_cnt + prot_sg_cnt);
+ pr_err("failed to map PI sg (%u)\n",
+ sgt.nents + prot_sgt.nents);
goto out_destroy_sig_mr;
}
@@ -465,10 +458,10 @@ out_destroy_sig_mr:
out_free_ctx:
kfree(ctx->reg);
out_unmap_prot_sg:
- if (prot_sg_cnt)
- ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
+ if (prot_sgt.nents)
+ ib_dma_unmap_sgtable_attrs(dev, &prot_sgt, dir, 0);
out_unmap_sg:
- ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+ ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0);
return ret;
}
EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
@@ -502,7 +495,7 @@ static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
* completion notification.
*/
struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
+ u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
{
struct ib_send_wr *first_wr, *last_wr;
int i;
@@ -510,7 +503,6 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
switch (ctx->type) {
case RDMA_RW_SIG_MR:
case RDMA_RW_MR:
- /* fallthrough */
for (i = 0; i < ctx->nr_ops; i++) {
rdma_rw_update_lkey(&ctx->reg[i],
ctx->reg[i].wr.wr.opcode !=
@@ -560,7 +552,7 @@ EXPORT_SYMBOL(rdma_rw_ctx_wrs);
* is not set @cqe must be set so that the caller gets a completion
* notification.
*/
-int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
{
struct ib_send_wr *first_wr;
@@ -579,8 +571,9 @@ EXPORT_SYMBOL(rdma_rw_ctx_post);
* @sg_cnt: number of entries in @sg
* @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
*/
-void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
- struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir)
+void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
+ enum dma_data_direction dir)
{
int i;
@@ -601,7 +594,7 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
break;
}
- rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy);
@@ -618,7 +611,7 @@ EXPORT_SYMBOL(rdma_rw_ctx_destroy);
* @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
*/
void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
enum dma_data_direction dir)
{
@@ -628,9 +621,9 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
kfree(ctx->reg);
- ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
if (prot_sg_cnt)
ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
@@ -645,7 +638,7 @@ EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
* compute max_rdma_ctxts and the size of the transport's Send and
* Send Completion Queues.
*/
-unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
+unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
unsigned int maxpages)
{
unsigned int mr_pages;
@@ -711,7 +704,7 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
IB_MR_TYPE_MEM_REG,
max_num_sg, 0);
if (ret) {
- pr_err("%s: failed to allocated %d MRs\n",
+ pr_err("%s: failed to allocated %u MRs\n",
__func__, nr_mrs);
return ret;
}
@@ -721,7 +714,7 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg);
if (ret) {
- pr_err("%s: failed to allocated %d SIG MRs\n",
+ pr_err("%s: failed to allocated %u SIG MRs\n",
__func__, nr_sig_mrs);
goto out_free_rdma_mrs;
}
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
index cbaaaa92fff3..143de37ae598 100644
--- a/drivers/infiniband/core/sa.h
+++ b/drivers/infiniband/core/sa.h
@@ -49,7 +49,7 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num, u8 method,
+ struct ib_device *device, u32 port_num, u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 30d4c126a2db..0de83d9a4985 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -32,7 +32,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/random.h>
@@ -51,6 +50,7 @@
#include <rdma/ib_marshall.h>
#include <rdma/ib_addr.h>
#include <rdma/opa_addr.h>
+#include <rdma/rdma_cm.h>
#include "sa.h"
#include "core_priv.h"
@@ -95,17 +95,18 @@ struct ib_sa_port {
struct delayed_work ib_cpi_work;
spinlock_t classport_lock; /* protects class port info set */
spinlock_t ah_lock;
- u8 port_num;
+ u32 port_num;
};
struct ib_sa_device {
int start_port, end_port;
struct ib_event_handler event_handler;
- struct ib_sa_port port[0];
+ struct ib_sa_port port[];
};
struct ib_sa_query {
- void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
+ void (*callback)(struct ib_sa_query *sa_query, int status,
+ int num_prs, struct ib_sa_mad *mad);
void (*release)(struct ib_sa_query *);
struct ib_sa_client *client;
struct ib_sa_port *port;
@@ -117,20 +118,21 @@ struct ib_sa_query {
u32 seq; /* Local svc request sequence number */
unsigned long timeout; /* Local svc timeout */
u8 path_use; /* How will the pathrecord be used */
+
+ /* A separate buffer to save pathrecords of a response, as in cases
+ * like IB/netlink, mulptiple pathrecords are supported, so that
+ * mad->data is not large enough to hold them
+ */
+ void *resp_pr_data;
};
#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001
#define IB_SA_CANCEL 0x00000002
#define IB_SA_QUERY_OPA 0x00000004
-struct ib_sa_service_query {
- void (*callback)(int, struct ib_sa_service_rec *, void *);
- void *context;
- struct ib_sa_query sa_query;
-};
-
struct ib_sa_path_query {
- void (*callback)(int, struct sa_path_rec *, void *);
+ void (*callback)(int status, struct sa_path_rec *rec,
+ int num_paths, void *context);
void *context;
struct ib_sa_query sa_query;
struct sa_path_rec *conv_pr;
@@ -174,7 +176,7 @@ static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
};
-static void ib_sa_add_one(struct ib_device *device);
+static int ib_sa_add_one(struct ib_device *device);
static void ib_sa_remove_one(struct ib_device *device, void *client_data);
static struct ib_client sa_client = {
@@ -190,7 +192,7 @@ static u32 tid;
#define PATH_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct sa_path_rec, field), \
- .struct_size_bytes = sizeof((struct sa_path_rec *)0)->field, \
+ .struct_size_bytes = sizeof_field(struct sa_path_rec, field), \
.field_name = "sa_path_rec:" #field
static const struct ib_field path_rec_table[] = {
@@ -292,7 +294,7 @@ static const struct ib_field path_rec_table[] = {
.struct_offset_bytes = \
offsetof(struct sa_path_rec, field), \
.struct_size_bytes = \
- sizeof((struct sa_path_rec *)0)->field, \
+ sizeof_field(struct sa_path_rec, field), \
.field_name = "sa_path_rec:" #field
static const struct ib_field opa_path_rec_table[] = {
@@ -420,7 +422,7 @@ static const struct ib_field opa_path_rec_table[] = {
#define MCMEMBER_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_sa_mcmember_rec, field), \
.field_name = "sa_mcmember_rec:" #field
static const struct ib_field mcmember_rec_table[] = {
@@ -502,57 +504,9 @@ static const struct ib_field mcmember_rec_table[] = {
.size_bits = 23 },
};
-#define SERVICE_REC_FIELD(field) \
- .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \
- .field_name = "sa_service_rec:" #field
-
-static const struct ib_field service_rec_table[] = {
- { SERVICE_REC_FIELD(id),
- .offset_words = 0,
- .offset_bits = 0,
- .size_bits = 64 },
- { SERVICE_REC_FIELD(gid),
- .offset_words = 2,
- .offset_bits = 0,
- .size_bits = 128 },
- { SERVICE_REC_FIELD(pkey),
- .offset_words = 6,
- .offset_bits = 0,
- .size_bits = 16 },
- { SERVICE_REC_FIELD(lease),
- .offset_words = 7,
- .offset_bits = 0,
- .size_bits = 32 },
- { SERVICE_REC_FIELD(key),
- .offset_words = 8,
- .offset_bits = 0,
- .size_bits = 128 },
- { SERVICE_REC_FIELD(name),
- .offset_words = 12,
- .offset_bits = 0,
- .size_bits = 64*8 },
- { SERVICE_REC_FIELD(data8),
- .offset_words = 28,
- .offset_bits = 0,
- .size_bits = 16*8 },
- { SERVICE_REC_FIELD(data16),
- .offset_words = 32,
- .offset_bits = 0,
- .size_bits = 8*16 },
- { SERVICE_REC_FIELD(data32),
- .offset_words = 36,
- .offset_bits = 0,
- .size_bits = 4*32 },
- { SERVICE_REC_FIELD(data64),
- .offset_words = 40,
- .offset_bits = 0,
- .size_bits = 2*64 },
-};
-
#define CLASSPORTINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_class_port_info, field), \
- .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_class_port_info, field), \
.field_name = "ib_class_port_info:" #field
static const struct ib_field ib_classport_info_rec_table[] = {
@@ -630,7 +584,7 @@ static const struct ib_field ib_classport_info_rec_table[] = {
.struct_offset_bytes =\
offsetof(struct opa_class_port_info, field), \
.struct_size_bytes = \
- sizeof((struct opa_class_port_info *)0)->field, \
+ sizeof_field(struct opa_class_port_info, field), \
.field_name = "opa_class_port_info:" #field
static const struct ib_field opa_classport_info_rec_table[] = {
@@ -710,7 +664,7 @@ static const struct ib_field opa_classport_info_rec_table[] = {
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
- .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_sa_guidinfo_rec, field), \
.field_name = "sa_guidinfo_rec:" #field
static const struct ib_field guidinfo_rec_table[] = {
@@ -760,13 +714,14 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
/* Construct the family header first */
header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
- memcpy(header->device_name, dev_name(&query->port->agent->device->dev),
- LS_DEVICE_NAME_MAX);
+ strscpy_pad(header->device_name,
+ dev_name(&query->port->agent->device->dev),
+ LS_DEVICE_NAME_MAX);
header->port_num = query->port->port_num;
if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
sa_rec->reversible != 0)
- query->path_use = LS_RESOLVE_PATH_USE_GMP;
+ query->path_use = LS_RESOLVE_PATH_USE_ALL;
else
query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
header->path_use = query->path_use;
@@ -829,13 +784,20 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
return len;
}
-static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
+static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
void *data;
struct ib_sa_mad *mad;
int len;
+ unsigned long flags;
+ unsigned long delay;
+ gfp_t gfp_flag;
+ int ret;
+
+ INIT_LIST_HEAD(&query->list);
+ query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
mad = query->mad_buf->mad;
len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
@@ -860,36 +822,25 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- return rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_mask);
-}
+ gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
+ GFP_NOWAIT;
-static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
-{
- unsigned long flags;
- unsigned long delay;
- int ret;
+ spin_lock_irqsave(&ib_nl_request_lock, flags);
+ ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
- INIT_LIST_HEAD(&query->list);
- query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
+ if (ret)
+ goto out;
- /* Put the request on the list first.*/
- spin_lock_irqsave(&ib_nl_request_lock, flags);
+ /* Put the request on the list.*/
delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
query->timeout = delay + jiffies;
list_add_tail(&query->list, &ib_nl_request_list);
/* Start the timeout if this is the only request */
if (ib_nl_request_list.next == &query->list)
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
- ret = ib_nl_send_msg(query, gfp_mask);
- if (ret) {
- ret = -EIO;
- /* Remove the request */
- spin_lock_irqsave(&ib_nl_request_lock, flags);
- list_del(&query->list);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
- }
+out:
+ spin_unlock_irqrestore(&ib_nl_request_lock, flags);
return ret;
}
@@ -923,50 +874,81 @@ static void send_handler(struct ib_mad_agent *agent,
static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
const struct nlmsghdr *nlh)
{
+ struct ib_path_rec_data *srec, *drec;
+ struct ib_sa_path_query *path_query;
struct ib_mad_send_wc mad_send_wc;
- struct ib_sa_mad *mad = NULL;
const struct nlattr *head, *curr;
- struct ib_path_rec_data *rec;
- int len, rem;
+ struct ib_sa_mad *mad = NULL;
+ int len, rem, num_prs = 0;
u32 mask = 0;
int status = -EIO;
- if (query->callback) {
- head = (const struct nlattr *) nlmsg_data(nlh);
- len = nlmsg_len(nlh);
- switch (query->path_use) {
- case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
- mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
- break;
+ if (!query->callback)
+ goto out;
- case LS_RESOLVE_PATH_USE_ALL:
- case LS_RESOLVE_PATH_USE_GMP:
- default:
- mask = IB_PATH_PRIMARY | IB_PATH_GMP |
- IB_PATH_BIDIRECTIONAL;
- break;
+ path_query = container_of(query, struct ib_sa_path_query, sa_query);
+ mad = query->mad_buf->mad;
+ if (!path_query->conv_pr &&
+ (be16_to_cpu(mad->mad_hdr.attr_id) == IB_SA_ATTR_PATH_REC)) {
+ /* Need a larger buffer for possible multiple PRs */
+ query->resp_pr_data = kvcalloc(RDMA_PRIMARY_PATH_MAX_REC_NUM,
+ sizeof(*drec), GFP_KERNEL);
+ if (!query->resp_pr_data) {
+ query->callback(query, -ENOMEM, 0, NULL);
+ return;
}
- nla_for_each_attr(curr, head, len, rem) {
- if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
- rec = nla_data(curr);
- /*
- * Get the first one. In the future, we may
- * need to get up to 6 pathrecords.
- */
- if ((rec->flags & mask) == mask) {
- mad = query->mad_buf->mad;
- mad->mad_hdr.method |=
- IB_MGMT_METHOD_RESP;
- memcpy(mad->data, rec->path_rec,
- sizeof(rec->path_rec));
- status = 0;
- break;
- }
- }
+ }
+
+ head = (const struct nlattr *) nlmsg_data(nlh);
+ len = nlmsg_len(nlh);
+ switch (query->path_use) {
+ case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
+ mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
+ break;
+
+ case LS_RESOLVE_PATH_USE_ALL:
+ mask = IB_PATH_PRIMARY;
+ break;
+
+ case LS_RESOLVE_PATH_USE_GMP:
+ default:
+ mask = IB_PATH_PRIMARY | IB_PATH_GMP |
+ IB_PATH_BIDIRECTIONAL;
+ break;
+ }
+
+ drec = (struct ib_path_rec_data *)query->resp_pr_data;
+ nla_for_each_attr(curr, head, len, rem) {
+ if (curr->nla_type != LS_NLA_TYPE_PATH_RECORD)
+ continue;
+
+ srec = nla_data(curr);
+ if ((srec->flags & mask) != mask)
+ continue;
+
+ status = 0;
+ if (!drec) {
+ memcpy(mad->data, srec->path_rec,
+ sizeof(srec->path_rec));
+ num_prs = 1;
+ break;
}
- query->callback(query, status, mad);
+
+ memcpy(drec, srec, sizeof(*drec));
+ drec++;
+ num_prs++;
+ if (num_prs >= RDMA_PRIMARY_PATH_MAX_REC_NUM)
+ break;
}
+ if (!status)
+ mad->mad_hdr.method |= IB_MGMT_METHOD_RESP;
+
+ query->callback(query, status, num_prs, mad);
+ kvfree(query->resp_pr_data);
+ query->resp_pr_data = NULL;
+
+out:
mad_send_wc.send_buf = query->mad_buf;
mad_send_wc.status = IB_WC_SUCCESS;
send_handler(query->mad_buf->mad_agent, &mad_send_wc);
@@ -1092,10 +1074,9 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_ext_ack *extack)
{
unsigned long flags;
- struct ib_sa_query *query;
+ struct ib_sa_query *query = NULL, *iter;
struct ib_mad_send_buf *send_buf;
struct ib_mad_send_wc mad_send_wc;
- int found = 0;
int ret;
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
@@ -1103,20 +1084,21 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
- list_for_each_entry(query, &ib_nl_request_list, list) {
+ list_for_each_entry(iter, &ib_nl_request_list, list) {
/*
* If the query is cancelled, let the timeout routine
* take care of it.
*/
- if (nlh->nlmsg_seq == query->seq) {
- found = !ib_sa_query_cancelled(query);
- if (found)
- list_del(&query->list);
+ if (nlh->nlmsg_seq == iter->seq) {
+ if (!ib_sa_query_cancelled(iter)) {
+ list_del(&iter->list);
+ query = iter;
+ }
break;
}
}
- if (!found) {
+ if (!query) {
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
goto resp_out;
}
@@ -1176,7 +1158,6 @@ EXPORT_SYMBOL(ib_sa_unregister_client);
void ib_sa_cancel_query(int id, struct ib_sa_query *query)
{
unsigned long flags;
- struct ib_mad_agent *agent;
struct ib_mad_send_buf *mad_buf;
xa_lock_irqsave(&queries, flags);
@@ -1184,7 +1165,6 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
xa_unlock_irqrestore(&queries, flags);
return;
}
- agent = query->port->agent;
mad_buf = query->mad_buf;
xa_unlock_irqrestore(&queries, flags);
@@ -1194,11 +1174,11 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
* sent to the MAD layer and has to be cancelled from there.
*/
if (!ib_nl_cancel_request(query))
- ib_cancel_mad(agent, mad_buf);
+ ib_cancel_mad(mad_buf);
}
EXPORT_SYMBOL(ib_sa_cancel_query);
-static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
+static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
{
struct ib_sa_device *sa_dev;
struct ib_sa_port *port;
@@ -1217,7 +1197,7 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
+static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr,
const struct ib_gid_attr *gid_attr)
@@ -1255,7 +1235,7 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
* User must invoke rdma_destroy_ah_attr() to release reference to SGID
* attributes which are initialized using ib_init_ah_attr_from_path().
*/
-int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr,
const struct ib_gid_attr *gid_attr)
@@ -1364,6 +1344,7 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
{
unsigned long flags;
int ret, id;
+ const int nmbr_sa_query_retries = 10;
xa_lock_irqsave(&queries, flags);
ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
@@ -1371,7 +1352,13 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
if (ret < 0)
return ret;
- query->mad_buf->timeout_ms = timeout_ms;
+ query->mad_buf->timeout_ms = timeout_ms / nmbr_sa_query_retries;
+ query->mad_buf->retries = nmbr_sa_query_retries;
+ if (!query->mad_buf->timeout_ms) {
+ /* Special case, very small timeout_ms */
+ query->mad_buf->timeout_ms = 1;
+ query->mad_buf->retries = timeout_ms;
+ }
query->mad_buf->context[0] = query;
query->id = id;
@@ -1412,17 +1399,13 @@ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
EXPORT_SYMBOL(ib_sa_pack_path);
static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
- struct ib_device *device,
- u8 port_num)
+ struct ib_sa_device *sa_dev,
+ u32 port_num)
{
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
unsigned long flags;
bool ret = false;
- if (!sa_dev)
- return ret;
-
port = &sa_dev->port[port_num - sa_dev->start_port];
spin_lock_irqsave(&port->classport_lock, flags);
if (!port->classport_info.valid)
@@ -1442,24 +1425,24 @@ enum opa_pr_supported {
PR_IB_SUPPORTED
};
-/**
- * Check if current PR query can be an OPA query.
+/*
+ * opa_pr_query_possible - Check if current PR query can be an OPA query.
+ *
* Retuns PR_NOT_SUPPORTED if a path record query is not
* possible, PR_OPA_SUPPORTED if an OPA path record query
* is possible and PR_IB_SUPPORTED if an IB path record
* query is possible.
*/
static int opa_pr_query_possible(struct ib_sa_client *client,
- struct ib_device *device,
- u8 port_num,
- struct sa_path_rec *rec)
+ struct ib_sa_device *sa_dev,
+ struct ib_device *device, u32 port_num)
{
struct ib_port_attr port_attr;
if (ib_query_port(device, port_num, &port_attr))
return PR_NOT_SUPPORTED;
- if (ib_sa_opa_pathrecord_support(client, device, port_num))
+ if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
return PR_OPA_SUPPORTED;
if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
@@ -1468,41 +1451,90 @@ static int opa_pr_query_possible(struct ib_sa_client *client,
return PR_IB_SUPPORTED;
}
+static void ib_sa_pr_callback_single(struct ib_sa_path_query *query,
+ int status, struct ib_sa_mad *mad)
+{
+ struct sa_path_rec rec = {};
+
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_dmac_zero(&rec);
+
+ if (query->conv_pr) {
+ struct sa_path_rec opa;
+
+ memset(&opa, 0, sizeof(struct sa_path_rec));
+ sa_convert_path_ib_to_opa(&opa, &rec);
+ query->callback(status, &opa, 1, query->context);
+ } else {
+ query->callback(status, &rec, 1, query->context);
+ }
+}
+
+/**
+ * ib_sa_pr_callback_multiple() - Parse path records then do callback.
+ *
+ * In a multiple-PR case the PRs are saved in "query->resp_pr_data"
+ * (instead of"mad->data") and with "ib_path_rec_data" structure format,
+ * so that rec->flags can be set to indicate the type of PR.
+ * This is valid only in IB fabric.
+ */
+static void ib_sa_pr_callback_multiple(struct ib_sa_path_query *query,
+ int status, int num_prs,
+ struct ib_path_rec_data *rec_data)
+{
+ struct sa_path_rec *rec;
+ int i;
+
+ rec = kvcalloc(num_prs, sizeof(*rec), GFP_KERNEL);
+ if (!rec) {
+ query->callback(-ENOMEM, NULL, 0, query->context);
+ return;
+ }
+
+ for (i = 0; i < num_prs; i++) {
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ rec_data[i].path_rec, rec + i);
+ rec[i].rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_dmac_zero(rec + i);
+ rec[i].flags = rec_data[i].flags;
+ }
+
+ query->callback(status, rec, num_prs, query->context);
+ kvfree(rec);
+}
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
- int status,
+ int status, int num_prs,
struct ib_sa_mad *mad)
{
struct ib_sa_path_query *query =
container_of(sa_query, struct ib_sa_path_query, sa_query);
+ struct sa_path_rec rec;
- if (mad) {
- struct sa_path_rec rec;
-
- if (sa_query->flags & IB_SA_QUERY_OPA) {
- ib_unpack(opa_path_rec_table,
- ARRAY_SIZE(opa_path_rec_table),
- mad->data, &rec);
- rec.rec_type = SA_PATH_REC_TYPE_OPA;
- query->callback(status, &rec, query->context);
- } else {
- ib_unpack(path_rec_table,
- ARRAY_SIZE(path_rec_table),
- mad->data, &rec);
- rec.rec_type = SA_PATH_REC_TYPE_IB;
- sa_path_set_dmac_zero(&rec);
-
- if (query->conv_pr) {
- struct sa_path_rec opa;
+ if (!mad || !num_prs) {
+ query->callback(status, NULL, 0, query->context);
+ return;
+ }
- memset(&opa, 0, sizeof(struct sa_path_rec));
- sa_convert_path_ib_to_opa(&opa, &rec);
- query->callback(status, &opa, query->context);
- } else {
- query->callback(status, &rec, query->context);
- }
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ if (num_prs != 1) {
+ query->callback(-EINVAL, NULL, 0, query->context);
+ return;
}
- } else
- query->callback(status, NULL, query->context);
+
+ ib_unpack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ query->callback(status, &rec, num_prs, query->context);
+ } else {
+ if (!sa_query->resp_pr_data)
+ ib_sa_pr_callback_single(query, status, mad);
+ else
+ ib_sa_pr_callback_multiple(query, status, num_prs,
+ sa_query->resp_pr_data);
+ }
}
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
@@ -1540,13 +1572,13 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
* the query.
*/
int ib_sa_path_rec_get(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct sa_path_rec *resp,
- void *context),
+ int num_paths, void *context),
void *context,
struct ib_sa_query **sa_query)
{
@@ -1574,7 +1606,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
query->sa_query.port = port;
if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
- status = opa_pr_query_possible(client, device, port_num, rec);
+ status = opa_pr_query_possible(client, sa_dev, device, port_num);
if (status == PR_NOT_SUPPORTED) {
ret = -EINVAL;
goto err1;
@@ -1644,131 +1676,8 @@ err1:
}
EXPORT_SYMBOL(ib_sa_path_rec_get);
-static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
-{
- struct ib_sa_service_query *query =
- container_of(sa_query, struct ib_sa_service_query, sa_query);
-
- if (mad) {
- struct ib_sa_service_rec rec;
-
- ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
- mad->data, &rec);
- query->callback(status, &rec, query->context);
- } else
- query->callback(status, NULL, query->context);
-}
-
-static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
-{
- kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
-}
-
-/**
- * ib_sa_service_rec_query - Start Service Record operation
- * @client:SA client
- * @device:device to send request on
- * @port_num: port number to send request on
- * @method:SA method - should be get, set, or delete
- * @rec:Service Record to send in request
- * @comp_mask:component mask to send in request
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when request completes, times out or is
- * canceled
- * @context:opaque user context passed to callback
- * @sa_query:request context, used to cancel request
- *
- * Send a Service Record set/get/delete to the SA to register,
- * unregister or query a service record.
- * The callback function will be called when the request completes (or
- * fails); status is 0 for a successful response, -EINTR if the query
- * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
- * occurred sending the query. The resp parameter of the callback is
- * only valid if status is 0.
- *
- * If the return value of ib_sa_service_rec_query() is negative, it is an
- * error code. Otherwise it is a request ID that can be used to cancel
- * the query.
- */
-int ib_sa_service_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num, u8 method,
- struct ib_sa_service_rec *rec,
- ib_sa_comp_mask comp_mask,
- unsigned long timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_service_rec *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
-{
- struct ib_sa_service_query *query;
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
- struct ib_mad_agent *agent;
- struct ib_sa_mad *mad;
- int ret;
-
- if (!sa_dev)
- return -ENODEV;
-
- port = &sa_dev->port[port_num - sa_dev->start_port];
- agent = port->agent;
-
- if (method != IB_MGMT_METHOD_GET &&
- method != IB_MGMT_METHOD_SET &&
- method != IB_SA_METHOD_DELETE)
- return -EINVAL;
-
- query = kzalloc(sizeof(*query), gfp_mask);
- if (!query)
- return -ENOMEM;
-
- query->sa_query.port = port;
- ret = alloc_mad(&query->sa_query, gfp_mask);
- if (ret)
- goto err1;
-
- ib_sa_client_get(client);
- query->sa_query.client = client;
- query->callback = callback;
- query->context = context;
-
- mad = query->sa_query.mad_buf->mad;
- init_mad(&query->sa_query, agent);
-
- query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
- query->sa_query.release = ib_sa_service_rec_release;
- mad->mad_hdr.method = method;
- mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
- mad->sa_hdr.comp_mask = comp_mask;
-
- ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
- rec, mad->data);
-
- *sa_query = &query->sa_query;
-
- ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
- if (ret < 0)
- goto err2;
-
- return ret;
-
-err2:
- *sa_query = NULL;
- ib_sa_client_put(query->sa_query.client);
- free_mad(&query->sa_query);
-
-err1:
- kfree(query);
- return ret;
-}
-EXPORT_SYMBOL(ib_sa_service_rec_query);
-
static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
- int status,
+ int status, int num_prs,
struct ib_sa_mad *mad)
{
struct ib_sa_mcmember_query *query =
@@ -1790,7 +1699,7 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
@@ -1860,7 +1769,7 @@ err1:
/* Support GuidInfoRecord */
static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
- int status,
+ int status, int num_paths,
struct ib_sa_mad *mad)
{
struct ib_sa_guidinfo_query *query =
@@ -1882,7 +1791,7 @@ static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
}
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
unsigned long timeout_ms, gfp_t gfp_mask,
@@ -1957,30 +1866,6 @@ err1:
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
-bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
- struct ib_device *device,
- u8 port_num)
-{
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
- bool ret = false;
- unsigned long flags;
-
- if (!sa_dev)
- return ret;
-
- port = &sa_dev->port[port_num - sa_dev->start_port];
-
- spin_lock_irqsave(&port->classport_lock, flags);
- if ((port->classport_info.valid) &&
- (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB))
- ret = ib_get_cpi_capmask2(&port->classport_info.data.ib)
- & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT;
- spin_unlock_irqrestore(&port->classport_lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support);
-
struct ib_classport_info_context {
struct completion done;
struct ib_sa_query *sa_query;
@@ -1994,7 +1879,7 @@ static void ib_classportinfo_cb(void *context)
}
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
- int status,
+ int status, int num_prs,
struct ib_sa_mad *mad)
{
unsigned long flags;
@@ -2170,13 +2055,13 @@ static void send_handler(struct ib_mad_agent *agent,
/* No callback -- already got recv */
break;
case IB_WC_RESP_TIMEOUT_ERR:
- query->callback(query, -ETIMEDOUT, NULL);
+ query->callback(query, -ETIMEDOUT, 0, NULL);
break;
case IB_WC_WR_FLUSH_ERR:
- query->callback(query, -EINTR, NULL);
+ query->callback(query, -EINTR, 0, NULL);
break;
default:
- query->callback(query, -EIO, NULL);
+ query->callback(query, -EIO, 0, NULL);
break;
}
@@ -2204,10 +2089,10 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
query->callback(query,
mad_recv_wc->recv_buf.mad->mad_hdr.status ?
- -EINVAL : 0,
+ -EINVAL : 0, 1,
(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
else
- query->callback(query, -EIO, NULL);
+ query->callback(query, -EIO, 0, NULL);
}
ib_free_recv_mad(mad_recv_wc);
@@ -2295,7 +2180,7 @@ static void ib_sa_event(struct ib_event_handler *handler,
unsigned long flags;
struct ib_sa_device *sa_dev =
container_of(handler, typeof(*sa_dev), event_handler);
- u8 port_num = event->element.port_num - sa_dev->start_port;
+ u32 port_num = event->element.port_num - sa_dev->start_port;
struct ib_sa_port *port = &sa_dev->port[port_num];
if (!rdma_cap_ib_sa(handler->device, port->port_num))
@@ -2325,18 +2210,19 @@ static void ib_sa_event(struct ib_event_handler *handler,
}
}
-static void ib_sa_add_one(struct ib_device *device)
+static int ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
int s, e, i;
int count = 0;
+ int ret;
s = rdma_start_port(device);
e = rdma_end_port(device);
sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
if (!sa_dev)
- return;
+ return -ENOMEM;
sa_dev->start_port = s;
sa_dev->end_port = e;
@@ -2356,8 +2242,10 @@ static void ib_sa_add_one(struct ib_device *device)
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
NULL, 0, send_handler,
recv_handler, sa_dev, 0);
- if (IS_ERR(sa_dev->port[i].agent))
+ if (IS_ERR(sa_dev->port[i].agent)) {
+ ret = PTR_ERR(sa_dev->port[i].agent);
goto err;
+ }
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
@@ -2366,8 +2254,10 @@ static void ib_sa_add_one(struct ib_device *device)
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(device, &sa_client, sa_dev);
@@ -2386,7 +2276,7 @@ static void ib_sa_add_one(struct ib_device *device)
update_sm_ah(&sa_dev->port[i].update_task);
}
- return;
+ return 0;
err:
while (--i >= 0) {
@@ -2395,7 +2285,7 @@ err:
}
free:
kfree(sa_dev);
- return;
+ return ret;
}
static void ib_sa_remove_one(struct ib_device *device, void *client_data)
@@ -2403,9 +2293,6 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
struct ib_sa_device *sa_dev = client_data;
int i;
- if (!sa_dev)
- return;
-
ib_unregister_event_handler(&sa_dev->event_handler);
flush_workqueue(ib_wq);
@@ -2463,7 +2350,6 @@ err1:
void ib_sa_cleanup(void)
{
cancel_delayed_work(&ib_nl_timed_work);
- flush_workqueue(ib_nl_wq);
destroy_workqueue(ib_nl_wq);
mcast_cleanup();
ib_unregister_client(&sa_client);
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 2d5608315dc8..3512c2e54efc 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -72,7 +72,7 @@ static int get_pkey_and_subnet_prefix(struct ib_port_pkey *pp,
if (ret)
return ret;
- ret = ib_get_cached_subnet_prefix(dev, pp->port_num, subnet_prefix);
+ ib_get_cached_subnet_prefix(dev, pp->port_num, subnet_prefix);
return ret;
}
@@ -193,7 +193,7 @@ static void qp_to_error(struct ib_qp_security *sec)
static inline void check_pkey_qps(struct pkey_index_qp_list *pkey,
struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u64 subnet_prefix)
{
struct ib_port_pkey *pp, *tmp_pp;
@@ -245,7 +245,7 @@ static int port_pkey_list_insert(struct ib_port_pkey *pp)
struct pkey_index_qp_list *tmp_pkey;
struct pkey_index_qp_list *pkey;
struct ib_device *dev;
- u8 port_num = pp->port_num;
+ u32 port_num = pp->port_num;
int ret = 0;
if (pp->state != IB_PORT_PKEY_VALID)
@@ -349,16 +349,11 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
else if (qp_pps)
new_pps->main.pkey_index = qp_pps->main.pkey_index;
- if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT))
+ if (((qp_attr_mask & IB_QP_PKEY_INDEX) &&
+ (qp_attr_mask & IB_QP_PORT)) ||
+ (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID))
new_pps->main.state = IB_PORT_PKEY_VALID;
- if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) {
- new_pps->main.port_num = qp_pps->main.port_num;
- new_pps->main.pkey_index = qp_pps->main.pkey_index;
- if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
- new_pps->main.state = IB_PORT_PKEY_VALID;
- }
-
if (qp_attr_mask & IB_QP_ALT_PATH) {
new_pps->alt.port_num = qp_attr->alt_port_num;
new_pps->alt.pkey_index = qp_attr->alt_pkey_index;
@@ -543,7 +538,7 @@ void ib_destroy_qp_security_end(struct ib_qp_security *sec)
}
void ib_security_cache_change(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u64 subnet_prefix)
{
struct pkey_index_qp_list *pkey;
@@ -591,7 +586,7 @@ int ib_security_modify_qp(struct ib_qp *qp,
WARN_ONCE((qp_attr_mask & IB_QP_PORT &&
rdma_protocol_ib(real_qp->device, qp_attr->port_num) &&
!real_qp->qp_sec),
- "%s: QP security is not initialized for IB QP: %d\n",
+ "%s: QP security is not initialized for IB QP: %u\n",
__func__, real_qp->qp_num);
/* The port/pkey settings are maintained only for the real QP. Open
@@ -654,7 +649,7 @@ int ib_security_modify_qp(struct ib_qp *qp,
}
static int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
+ u32 port_num,
u16 pkey_index,
void *sec)
{
@@ -669,10 +664,7 @@ static int ib_security_pkey_access(struct ib_device *dev,
if (ret)
return ret;
- ret = ib_get_cached_subnet_prefix(dev, port_num, &subnet_prefix);
-
- if (ret)
- return ret;
+ ib_get_cached_subnet_prefix(dev, port_num, &subnet_prefix);
return security_ib_pkey_access(sec, subnet_prefix, pkey);
}
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index f19b23817c2b..45f09b75c893 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -41,7 +41,7 @@
#include "smi.h"
#include "opa_smi.h"
-static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num,
+static enum smi_action __smi_handle_dr_smp_send(bool is_switch, u32 port_num,
u8 *hop_ptr, u8 hop_cnt,
const u8 *initial_path,
const u8 *return_path,
@@ -127,7 +127,7 @@ static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num,
* Return IB_SMI_DISCARD if the SMP should be discarded
*/
enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
- bool is_switch, int port_num)
+ bool is_switch, u32 port_num)
{
return __smi_handle_dr_smp_send(is_switch, port_num,
&smp->hop_ptr, smp->hop_cnt,
@@ -139,7 +139,7 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
}
enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp,
- bool is_switch, int port_num)
+ bool is_switch, u32 port_num)
{
return __smi_handle_dr_smp_send(is_switch, port_num,
&smp->hop_ptr, smp->hop_cnt,
@@ -152,7 +152,7 @@ enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp,
OPA_LID_PERMISSIVE);
}
-static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num,
+static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, u32 port_num,
int phys_port_cnt,
u8 *hop_ptr, u8 hop_cnt,
const u8 *initial_path,
@@ -238,7 +238,7 @@ static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num,
* Return IB_SMI_DISCARD if the SMP should be dropped
*/
enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt)
+ u32 port_num, int phys_port_cnt)
{
return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt,
&smp->hop_ptr, smp->hop_cnt,
@@ -254,7 +254,7 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch,
* Return IB_SMI_DISCARD if the SMP should be dropped
*/
enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt)
+ u32 port_num, int phys_port_cnt)
{
return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt,
&smp->hop_ptr, smp->hop_cnt,
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 91d9b353ab85..e350ed623c45 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -52,11 +52,11 @@ enum smi_forward_action {
};
enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt);
+ u32 port_num, int phys_port_cnt);
int smi_get_fwd_port(struct ib_smp *smp);
extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
- bool is_switch, int port_num);
+ bool is_switch, u32 port_num);
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 087682e6969e..84c53bd2a52d 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -44,110 +44,174 @@
#include <rdma/ib_pma.h>
#include <rdma/ib_cache.h>
#include <rdma/rdma_counter.h>
+#include <rdma/ib_sysfs.h>
-struct ib_port;
+struct port_table_attribute {
+ struct ib_port_attribute attr;
+ char name[8];
+ int index;
+ __be16 attr_id;
+};
struct gid_attr_group {
- struct ib_port *port;
- struct kobject kobj;
- struct attribute_group ndev;
- struct attribute_group type;
+ struct ib_port *port;
+ struct kobject kobj;
+ struct attribute_group groups[2];
+ const struct attribute_group *groups_list[3];
+ struct port_table_attribute attrs_list[];
};
+
struct ib_port {
- struct kobject kobj;
- struct ib_device *ibdev;
+ struct kobject kobj;
+ struct ib_device *ibdev;
struct gid_attr_group *gid_attr_group;
- struct attribute_group gid_group;
- struct attribute_group pkey_group;
- struct attribute_group *pma_table;
- struct attribute_group *hw_stats_ag;
- struct rdma_hw_stats *hw_stats;
- u8 port_num;
+ struct hw_stats_port_data *hw_stats_data;
+
+ struct attribute_group groups[3];
+ const struct attribute_group *groups_list[5];
+ u32 port_num;
+ struct port_table_attribute attrs_list[];
};
-struct port_attribute {
- struct attribute attr;
- ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf);
- ssize_t (*store)(struct ib_port *, struct port_attribute *,
+struct hw_stats_device_attribute {
+ struct device_attribute attr;
+ ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num, char *buf);
+ ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
const char *buf, size_t count);
};
-#define PORT_ATTR(_name, _mode, _show, _store) \
-struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store)
-
-#define PORT_ATTR_RO(_name) \
-struct port_attribute port_attr_##_name = __ATTR_RO(_name)
+struct hw_stats_port_attribute {
+ struct ib_port_attribute attr;
+ ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num, char *buf);
+ ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
+ const char *buf, size_t count);
+};
-struct port_table_attribute {
- struct port_attribute attr;
- char name[8];
- int index;
- __be16 attr_id;
+struct hw_stats_device_data {
+ struct attribute_group group;
+ struct rdma_hw_stats *stats;
+ struct hw_stats_device_attribute attrs[];
};
-struct hw_stats_attribute {
- struct attribute attr;
- ssize_t (*show)(struct kobject *kobj,
- struct attribute *attr, char *buf);
- ssize_t (*store)(struct kobject *kobj,
- struct attribute *attr,
- const char *buf,
- size_t count);
- int index;
- u8 port_num;
+struct hw_stats_port_data {
+ struct rdma_hw_stats *stats;
+ struct hw_stats_port_attribute attrs[];
};
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
- struct port_attribute *port_attr =
- container_of(attr, struct port_attribute, attr);
+ struct ib_port_attribute *port_attr =
+ container_of(attr, struct ib_port_attribute, attr);
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
if (!port_attr->show)
return -EIO;
- return port_attr->show(p, port_attr, buf);
+ return port_attr->show(p->ibdev, p->port_num, port_attr, buf);
}
static ssize_t port_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t count)
{
- struct port_attribute *port_attr =
- container_of(attr, struct port_attribute, attr);
+ struct ib_port_attribute *port_attr =
+ container_of(attr, struct ib_port_attribute, attr);
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
if (!port_attr->store)
return -EIO;
- return port_attr->store(p, port_attr, buf, count);
+ return port_attr->store(p->ibdev, p->port_num, port_attr, buf, count);
}
+struct ib_device *ib_port_sysfs_get_ibdev_kobj(struct kobject *kobj,
+ u32 *port_num)
+{
+ struct ib_port *port = container_of(kobj, struct ib_port, kobj);
+
+ *port_num = port->port_num;
+ return port->ibdev;
+}
+EXPORT_SYMBOL(ib_port_sysfs_get_ibdev_kobj);
+
static const struct sysfs_ops port_sysfs_ops = {
.show = port_attr_show,
.store = port_attr_store
};
+static ssize_t hw_stat_device_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hw_stats_device_attribute *stat_attr =
+ container_of(attr, struct hw_stats_device_attribute, attr);
+ struct ib_device *ibdev = container_of(dev, struct ib_device, dev);
+
+ return stat_attr->show(ibdev, ibdev->hw_stats_data->stats,
+ stat_attr - ibdev->hw_stats_data->attrs, 0, buf);
+}
+
+static ssize_t hw_stat_device_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hw_stats_device_attribute *stat_attr =
+ container_of(attr, struct hw_stats_device_attribute, attr);
+ struct ib_device *ibdev = container_of(dev, struct ib_device, dev);
+
+ return stat_attr->store(ibdev, ibdev->hw_stats_data->stats,
+ stat_attr - ibdev->hw_stats_data->attrs, 0, buf,
+ count);
+}
+
+static ssize_t hw_stat_port_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
+{
+ struct hw_stats_port_attribute *stat_attr =
+ container_of(attr, struct hw_stats_port_attribute, attr);
+ struct ib_port *port = ibdev->port_data[port_num].sysfs;
+
+ return stat_attr->show(ibdev, port->hw_stats_data->stats,
+ stat_attr - port->hw_stats_data->attrs,
+ port->port_num, buf);
+}
+
+static ssize_t hw_stat_port_store(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hw_stats_port_attribute *stat_attr =
+ container_of(attr, struct hw_stats_port_attribute, attr);
+ struct ib_port *port = ibdev->port_data[port_num].sysfs;
+
+ return stat_attr->store(ibdev, port->hw_stats_data->stats,
+ stat_attr - port->hw_stats_data->attrs,
+ port->port_num, buf, count);
+}
+
static ssize_t gid_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
- struct port_attribute *port_attr =
- container_of(attr, struct port_attribute, attr);
+ struct ib_port_attribute *port_attr =
+ container_of(attr, struct ib_port_attribute, attr);
struct ib_port *p = container_of(kobj, struct gid_attr_group,
kobj)->port;
if (!port_attr->show)
return -EIO;
- return port_attr->show(p, port_attr, buf);
+ return port_attr->show(p->ibdev, p->port_num, port_attr, buf);
}
static const struct sysfs_ops gid_attr_sysfs_ops = {
.show = gid_attr_show
};
-static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t state_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
@@ -161,90 +225,91 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
[IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER"
};
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "%d: %s\n", attr.state,
- attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
- state_name[attr.state] : "UNKNOWN");
+ return sysfs_emit(buf, "%d: %s\n", attr.state,
+ attr.state >= 0 &&
+ attr.state < ARRAY_SIZE(state_name) ?
+ state_name[attr.state] :
+ "UNKNOWN");
}
-static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t lid_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "0x%x\n", attr.lid);
+ return sysfs_emit(buf, "0x%x\n", attr.lid);
}
-static ssize_t lid_mask_count_show(struct ib_port *p,
- struct port_attribute *unused,
- char *buf)
+static ssize_t lid_mask_count_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "%d\n", attr.lmc);
+ return sysfs_emit(buf, "%u\n", attr.lmc);
}
-static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t sm_lid_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "0x%x\n", attr.sm_lid);
+ return sysfs_emit(buf, "0x%x\n", attr.sm_lid);
}
-static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t sm_sl_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "%d\n", attr.sm_sl);
+ return sysfs_emit(buf, "%u\n", attr.sm_sl);
}
-static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t cap_mask_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "0x%08x\n", attr.port_cap_flags);
+ return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags);
}
-static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t rate_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
char *speed = "";
int rate; /* in deci-Gb/sec */
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
@@ -273,6 +338,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
speed = " HDR";
rate = 500;
break;
+ case IB_SPEED_NDR:
+ speed = " NDR";
+ rate = 1000;
+ break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
speed = " SDR";
@@ -284,14 +353,14 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
if (rate < 0)
return -EINVAL;
- return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
- rate / 10, rate % 10 ? ".5" : "",
- ib_width_enum_to_int(attr.active_width), speed);
+ return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10,
+ rate % 10 ? ".5" : "",
+ ib_width_enum_to_int(attr.active_width), speed);
}
static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
{
- static const char * phys_state_str[] = {
+ static const char *phys_state_str[] = {
"<unknown>",
"Sleep",
"Polling",
@@ -307,102 +376,113 @@ static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
return "<unknown>";
}
-static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t phys_state_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "%d: %s\n", attr.phys_state,
- phys_state_to_str(attr.phys_state));
+ return sysfs_emit(buf, "%u: %s\n", attr.phys_state,
+ phys_state_to_str(attr.phys_state));
}
-static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t link_layer_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
- switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) {
+ const char *output;
+
+ switch (rdma_port_get_link_layer(ibdev, port_num)) {
case IB_LINK_LAYER_INFINIBAND:
- return sprintf(buf, "%s\n", "InfiniBand");
+ output = "InfiniBand";
+ break;
case IB_LINK_LAYER_ETHERNET:
- return sprintf(buf, "%s\n", "Ethernet");
+ output = "Ethernet";
+ break;
default:
- return sprintf(buf, "%s\n", "Unknown");
+ output = "Unknown";
+ break;
}
+
+ return sysfs_emit(buf, "%s\n", output);
}
-static PORT_ATTR_RO(state);
-static PORT_ATTR_RO(lid);
-static PORT_ATTR_RO(lid_mask_count);
-static PORT_ATTR_RO(sm_lid);
-static PORT_ATTR_RO(sm_sl);
-static PORT_ATTR_RO(cap_mask);
-static PORT_ATTR_RO(rate);
-static PORT_ATTR_RO(phys_state);
-static PORT_ATTR_RO(link_layer);
+static IB_PORT_ATTR_RO(state);
+static IB_PORT_ATTR_RO(lid);
+static IB_PORT_ATTR_RO(lid_mask_count);
+static IB_PORT_ATTR_RO(sm_lid);
+static IB_PORT_ATTR_RO(sm_sl);
+static IB_PORT_ATTR_RO(cap_mask);
+static IB_PORT_ATTR_RO(rate);
+static IB_PORT_ATTR_RO(phys_state);
+static IB_PORT_ATTR_RO(link_layer);
static struct attribute *port_default_attrs[] = {
- &port_attr_state.attr,
- &port_attr_lid.attr,
- &port_attr_lid_mask_count.attr,
- &port_attr_sm_lid.attr,
- &port_attr_sm_sl.attr,
- &port_attr_cap_mask.attr,
- &port_attr_rate.attr,
- &port_attr_phys_state.attr,
- &port_attr_link_layer.attr,
+ &ib_port_attr_state.attr,
+ &ib_port_attr_lid.attr,
+ &ib_port_attr_lid_mask_count.attr,
+ &ib_port_attr_sm_lid.attr,
+ &ib_port_attr_sm_sl.attr,
+ &ib_port_attr_cap_mask.attr,
+ &ib_port_attr_rate.attr,
+ &ib_port_attr_phys_state.attr,
+ &ib_port_attr_link_layer.attr,
NULL
};
+ATTRIBUTE_GROUPS(port_default);
-static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
+static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
{
struct net_device *ndev;
- size_t ret = -EINVAL;
+ int ret = -EINVAL;
rcu_read_lock();
ndev = rcu_dereference(gid_attr->ndev);
if (ndev)
- ret = sprintf(buf, "%s\n", ndev->name);
+ ret = sysfs_emit(buf, "%s\n", ndev->name);
rcu_read_unlock();
return ret;
}
-static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
+static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
{
- return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
+ return sysfs_emit(buf, "%s\n",
+ ib_cache_gid_type_str(gid_attr->gid_type));
}
static ssize_t _show_port_gid_attr(
- struct ib_port *p, struct port_attribute *attr, char *buf,
- size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
+ struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr,
+ char *buf,
+ ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
const struct ib_gid_attr *gid_attr;
ssize_t ret;
- gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
+ gid_attr = rdma_get_gid_attr(ibdev, port_num, tab_attr->index);
if (IS_ERR(gid_attr))
- return PTR_ERR(gid_attr);
+ /* -EINVAL is returned for user space compatibility reasons. */
+ return -EINVAL;
ret = print(gid_attr, buf);
rdma_put_gid_attr(gid_attr);
return ret;
}
-static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t show_port_gid(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
const struct ib_gid_attr *gid_attr;
- ssize_t ret;
+ int len;
- gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
+ gid_attr = rdma_get_gid_attr(ibdev, port_num, tab_attr->index);
if (IS_ERR(gid_attr)) {
const union ib_gid zgid = {};
@@ -415,54 +495,56 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
* space throwing such error on fail to read gid, return zero
* GID as before. This maintains backward compatibility.
*/
- return sprintf(buf, "%pI6\n", zgid.raw);
+ return sysfs_emit(buf, "%pI6\n", zgid.raw);
}
- ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw);
+ len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw);
rdma_put_gid_attr(gid_attr);
- return ret;
+ return len;
}
-static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
- struct port_attribute *attr, char *buf)
+static ssize_t show_port_gid_attr_ndev(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr,
+ char *buf)
{
- return _show_port_gid_attr(p, attr, buf, print_ndev);
+ return _show_port_gid_attr(ibdev, port_num, attr, buf, print_ndev);
}
-static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
- struct port_attribute *attr,
+static ssize_t show_port_gid_attr_gid_type(struct ib_device *ibdev,
+ u32 port_num,
+ struct ib_port_attribute *attr,
char *buf)
{
- return _show_port_gid_attr(p, attr, buf, print_gid_type);
+ return _show_port_gid_attr(ibdev, port_num, attr, buf, print_gid_type);
}
-static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t show_port_pkey(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
u16 pkey;
- ssize_t ret;
+ int ret;
- ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey);
+ ret = ib_query_pkey(ibdev, port_num, tab_attr->index, &pkey);
if (ret)
return ret;
- return sprintf(buf, "0x%04x\n", pkey);
+ return sysfs_emit(buf, "0x%04x\n", pkey);
}
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
.index = (_offset) | ((_width) << 16) | ((_counter) << 24), \
- .attr_id = IB_PMA_PORT_COUNTERS , \
+ .attr_id = IB_PMA_PORT_COUNTERS, \
}
#define PORT_PMA_ATTR_EXT(_name, _width, _offset) \
struct port_table_attribute port_pma_attr_ext_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
.index = (_offset) | ((_width) << 16), \
- .attr_id = IB_PMA_PORT_COUNTERS_EXT , \
+ .attr_id = IB_PMA_PORT_COUNTERS_EXT, \
}
/*
@@ -513,47 +595,45 @@ out:
return ret;
}
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t show_pma_counter(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
int offset = tab_attr->index & 0xffff;
int width = (tab_attr->index >> 16) & 0xff;
- ssize_t ret;
+ int ret;
u8 data[8];
+ int len;
- ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
+ ret = get_perf_mad(ibdev, port_num, tab_attr->attr_id, &data,
40 + offset / 8, sizeof(data));
if (ret < 0)
return ret;
switch (width) {
case 4:
- ret = sprintf(buf, "%u\n", (*data >>
- (4 - (offset % 8))) & 0xf);
+ len = sysfs_emit(buf, "%d\n",
+ (*data >> (4 - (offset % 8))) & 0xf);
break;
case 8:
- ret = sprintf(buf, "%u\n", *data);
+ len = sysfs_emit(buf, "%u\n", *data);
break;
case 16:
- ret = sprintf(buf, "%u\n",
- be16_to_cpup((__be16 *)data));
+ len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data));
break;
case 32:
- ret = sprintf(buf, "%u\n",
- be32_to_cpup((__be32 *)data));
+ len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data));
break;
case 64:
- ret = sprintf(buf, "%llu\n",
- be64_to_cpup((__be64 *)data));
+ len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data));
break;
-
default:
- ret = 0;
+ len = 0;
+ break;
}
- return ret;
+ return len;
}
static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
@@ -653,72 +733,49 @@ static struct attribute *pma_attrs_noietf[] = {
NULL
};
-static struct attribute_group pma_group = {
+static const struct attribute_group pma_group = {
.name = "counters",
.attrs = pma_attrs
};
-static struct attribute_group pma_group_ext = {
+static const struct attribute_group pma_group_ext = {
.name = "counters",
.attrs = pma_attrs_ext
};
-static struct attribute_group pma_group_noietf = {
+static const struct attribute_group pma_group_noietf = {
.name = "counters",
.attrs = pma_attrs_noietf
};
static void ib_port_release(struct kobject *kobj)
{
- struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- struct attribute *a;
+ struct ib_port *port = container_of(kobj, struct ib_port, kobj);
int i;
- if (p->gid_group.attrs) {
- for (i = 0; (a = p->gid_group.attrs[i]); ++i)
- kfree(a);
-
- kfree(p->gid_group.attrs);
- }
-
- if (p->pkey_group.attrs) {
- for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
- kfree(a);
-
- kfree(p->pkey_group.attrs);
- }
-
- kfree(p);
+ for (i = 0; i != ARRAY_SIZE(port->groups); i++)
+ kfree(port->groups[i].attrs);
+ if (port->hw_stats_data)
+ rdma_free_hw_stats_struct(port->hw_stats_data->stats);
+ kfree(port->hw_stats_data);
+ kvfree(port);
}
static void ib_port_gid_attr_release(struct kobject *kobj)
{
- struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
- kobj);
- struct attribute *a;
+ struct gid_attr_group *gid_attr_group =
+ container_of(kobj, struct gid_attr_group, kobj);
int i;
- if (g->ndev.attrs) {
- for (i = 0; (a = g->ndev.attrs[i]); ++i)
- kfree(a);
-
- kfree(g->ndev.attrs);
- }
-
- if (g->type.attrs) {
- for (i = 0; (a = g->type.attrs[i]); ++i)
- kfree(a);
-
- kfree(g->type.attrs);
- }
-
- kfree(g);
+ for (i = 0; i != ARRAY_SIZE(gid_attr_group->groups); i++)
+ kfree(gid_attr_group->groups[i].attrs);
+ kfree(gid_attr_group);
}
static struct kobj_type port_type = {
.release = ib_port_release,
.sysfs_ops = &port_sysfs_ops,
- .default_attrs = port_default_attrs
+ .default_groups = port_default_groups,
};
static struct kobj_type gid_attr_type = {
@@ -726,55 +783,12 @@ static struct kobj_type gid_attr_type = {
.release = ib_port_gid_attr_release
};
-static struct attribute **
-alloc_group_attrs(ssize_t (*show)(struct ib_port *,
- struct port_attribute *, char *buf),
- int len)
-{
- struct attribute **tab_attr;
- struct port_table_attribute *element;
- int i;
-
- tab_attr = kcalloc(1 + len, sizeof(struct attribute *), GFP_KERNEL);
- if (!tab_attr)
- return NULL;
-
- for (i = 0; i < len; i++) {
- element = kzalloc(sizeof(struct port_table_attribute),
- GFP_KERNEL);
- if (!element)
- goto err;
-
- if (snprintf(element->name, sizeof(element->name),
- "%d", i) >= sizeof(element->name)) {
- kfree(element);
- goto err;
- }
-
- element->attr.attr.name = element->name;
- element->attr.attr.mode = S_IRUGO;
- element->attr.show = show;
- element->index = i;
- sysfs_attr_init(&element->attr.attr);
-
- tab_attr[i] = &element->attr.attr;
- }
-
- return tab_attr;
-
-err:
- while (--i >= 0)
- kfree(tab_attr[i]);
- kfree(tab_attr);
- return NULL;
-}
-
/*
* Figure out which counter table to use depending on
* the device capabilities.
*/
-static struct attribute_group *get_counter_table(struct ib_device *dev,
- int port_num)
+static const struct attribute_group *get_counter_table(struct ib_device *dev,
+ int port_num)
{
struct ib_class_port_info cpi;
@@ -794,7 +808,7 @@ static struct attribute_group *get_counter_table(struct ib_device *dev,
}
static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
- u8 port_num, int index)
+ u32 port_num, int index)
{
int ret;
@@ -809,77 +823,50 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
return 0;
}
-static ssize_t print_hw_stat(struct ib_device *dev, int port_num,
- struct rdma_hw_stats *stats, int index, char *buf)
+static int print_hw_stat(struct ib_device *dev, int port_num,
+ struct rdma_hw_stats *stats, int index, char *buf)
{
u64 v = rdma_counter_get_hwstat_value(dev, port_num, index);
- return sprintf(buf, "%llu\n", stats->value[index] + v);
+ return sysfs_emit(buf, "%llu\n", stats->value[index] + v);
}
-static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t show_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats, unsigned int index,
+ unsigned int port_num, char *buf)
{
- struct ib_device *dev;
- struct ib_port *port;
- struct hw_stats_attribute *hsa;
- struct rdma_hw_stats *stats;
int ret;
- hsa = container_of(attr, struct hw_stats_attribute, attr);
- if (!hsa->port_num) {
- dev = container_of((struct device *)kobj,
- struct ib_device, dev);
- stats = dev->hw_stats;
- } else {
- port = container_of(kobj, struct ib_port, kobj);
- dev = port->ibdev;
- stats = port->hw_stats;
- }
mutex_lock(&stats->lock);
- ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
+ ret = update_hw_stats(ibdev, stats, port_num, index);
if (ret)
goto unlock;
- ret = print_hw_stat(dev, hsa->port_num, stats, hsa->index, buf);
+ ret = print_hw_stat(ibdev, port_num, stats, index, buf);
unlock:
mutex_unlock(&stats->lock);
return ret;
}
-static ssize_t show_stats_lifespan(struct kobject *kobj,
- struct attribute *attr,
+static ssize_t show_stats_lifespan(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
char *buf)
{
- struct hw_stats_attribute *hsa;
- struct rdma_hw_stats *stats;
int msecs;
- hsa = container_of(attr, struct hw_stats_attribute, attr);
- if (!hsa->port_num) {
- struct ib_device *dev = container_of((struct device *)kobj,
- struct ib_device, dev);
-
- stats = dev->hw_stats;
- } else {
- struct ib_port *p = container_of(kobj, struct ib_port, kobj);
-
- stats = p->hw_stats;
- }
-
mutex_lock(&stats->lock);
msecs = jiffies_to_msecs(stats->lifespan);
mutex_unlock(&stats->lock);
- return sprintf(buf, "%d\n", msecs);
+ return sysfs_emit(buf, "%d\n", msecs);
}
-static ssize_t set_stats_lifespan(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t count)
+static ssize_t set_stats_lifespan(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
+ const char *buf, size_t count)
{
- struct hw_stats_attribute *hsa;
- struct rdma_hw_stats *stats;
int msecs;
int jiffies;
int ret;
@@ -890,17 +877,6 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
if (msecs < 0 || msecs > 10000)
return -EINVAL;
jiffies = msecs_to_jiffies(msecs);
- hsa = container_of(attr, struct hw_stats_attribute, attr);
- if (!hsa->port_num) {
- struct ib_device *dev = container_of((struct device *)kobj,
- struct ib_device, dev);
-
- stats = dev->hw_stats;
- } else {
- struct ib_port *p = container_of(kobj, struct ib_port, kobj);
-
- stats = p->hw_stats;
- }
mutex_lock(&stats->lock);
stats->lifespan = jiffies;
@@ -909,300 +885,427 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
return count;
}
-static void free_hsag(struct kobject *kobj, struct attribute_group *attr_group)
+static struct hw_stats_device_data *
+alloc_hw_stats_device(struct ib_device *ibdev)
{
- struct attribute **attr;
-
- sysfs_remove_group(kobj, attr_group);
+ struct hw_stats_device_data *data;
+ struct rdma_hw_stats *stats;
- for (attr = attr_group->attrs; *attr; attr++)
- kfree(*attr);
- kfree(attr_group);
-}
+ if (!ibdev->ops.alloc_hw_device_stats)
+ return ERR_PTR(-EOPNOTSUPP);
+ stats = ibdev->ops.alloc_hw_device_stats(ibdev);
+ if (!stats)
+ return ERR_PTR(-ENOMEM);
+ if (!stats->descs || stats->num_counters <= 0)
+ goto err_free_stats;
-static struct attribute *alloc_hsa(int index, u8 port_num, const char *name)
-{
- struct hw_stats_attribute *hsa;
+ /*
+ * Two extra attribue elements here, one for the lifespan entry and
+ * one to NULL terminate the list for the sysfs core code
+ */
+ data = kzalloc(struct_size(data, attrs, stats->num_counters + 1),
+ GFP_KERNEL);
+ if (!data)
+ goto err_free_stats;
+ data->group.attrs = kcalloc(stats->num_counters + 2,
+ sizeof(*data->group.attrs), GFP_KERNEL);
+ if (!data->group.attrs)
+ goto err_free_data;
- hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
- if (!hsa)
- return NULL;
+ data->group.name = "hw_counters";
+ data->stats = stats;
+ return data;
- hsa->attr.name = (char *)name;
- hsa->attr.mode = S_IRUGO;
- hsa->show = show_hw_stats;
- hsa->store = NULL;
- hsa->index = index;
- hsa->port_num = port_num;
+err_free_data:
+ kfree(data);
+err_free_stats:
+ rdma_free_hw_stats_struct(stats);
+ return ERR_PTR(-ENOMEM);
+}
- return &hsa->attr;
+void ib_device_release_hw_stats(struct hw_stats_device_data *data)
+{
+ kfree(data->group.attrs);
+ rdma_free_hw_stats_struct(data->stats);
+ kfree(data);
}
-static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num)
+int ib_setup_device_attrs(struct ib_device *ibdev)
{
- struct hw_stats_attribute *hsa;
+ struct hw_stats_device_attribute *attr;
+ struct hw_stats_device_data *data;
+ bool opstat_skipped = false;
+ int i, ret, pos = 0;
+
+ data = alloc_hw_stats_device(ibdev);
+ if (IS_ERR(data)) {
+ if (PTR_ERR(data) == -EOPNOTSUPP)
+ return 0;
+ return PTR_ERR(data);
+ }
+ ibdev->hw_stats_data = data;
- hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
- if (!hsa)
- return NULL;
+ ret = ibdev->ops.get_hw_stats(ibdev, data->stats, 0,
+ data->stats->num_counters);
+ if (ret != data->stats->num_counters) {
+ if (WARN_ON(ret >= 0))
+ return -EINVAL;
+ return ret;
+ }
- hsa->attr.name = name;
- hsa->attr.mode = S_IWUSR | S_IRUGO;
- hsa->show = show_stats_lifespan;
- hsa->store = set_stats_lifespan;
- hsa->index = 0;
- hsa->port_num = port_num;
+ data->stats->timestamp = jiffies;
- return &hsa->attr;
+ for (i = 0; i < data->stats->num_counters; i++) {
+ if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
+ opstat_skipped = true;
+ continue;
+ }
+
+ WARN_ON(opstat_skipped);
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = data->stats->descs[i].name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = hw_stat_device_show;
+ attr->show = show_hw_stats;
+ data->group.attrs[pos] = &attr->attr.attr;
+ pos++;
+ }
+
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = "lifespan";
+ attr->attr.attr.mode = 0644;
+ attr->attr.show = hw_stat_device_show;
+ attr->show = show_stats_lifespan;
+ attr->attr.store = hw_stat_device_store;
+ attr->store = set_stats_lifespan;
+ data->group.attrs[pos] = &attr->attr.attr;
+ for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
+ if (!ibdev->groups[i]) {
+ ibdev->groups[i] = &data->group;
+ return 0;
+ }
+ WARN(true, "struct ib_device->groups is too small");
+ return -EINVAL;
}
-static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
- u8 port_num)
+static struct hw_stats_port_data *
+alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group)
{
- struct attribute_group *hsag;
+ struct ib_device *ibdev = port->ibdev;
+ struct hw_stats_port_data *data;
struct rdma_hw_stats *stats;
- int i, ret;
-
- stats = device->ops.alloc_hw_stats(device, port_num);
+ if (!ibdev->ops.alloc_hw_port_stats)
+ return ERR_PTR(-EOPNOTSUPP);
+ stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num);
if (!stats)
- return;
-
- if (!stats->names || stats->num_counters <= 0)
+ return ERR_PTR(-ENOMEM);
+ if (!stats->descs || stats->num_counters <= 0)
goto err_free_stats;
/*
* Two extra attribue elements here, one for the lifespan entry and
* one to NULL terminate the list for the sysfs core code
*/
- hsag = kzalloc(sizeof(*hsag) +
- sizeof(void *) * (stats->num_counters + 2),
+ data = kzalloc(struct_size(data, attrs, stats->num_counters + 1),
GFP_KERNEL);
- if (!hsag)
+ if (!data)
goto err_free_stats;
+ group->attrs = kcalloc(stats->num_counters + 2,
+ sizeof(*group->attrs), GFP_KERNEL);
+ if (!group->attrs)
+ goto err_free_data;
- ret = device->ops.get_hw_stats(device, stats, port_num,
- stats->num_counters);
- if (ret != stats->num_counters)
- goto err_free_hsag;
+ group->name = "hw_counters";
+ data->stats = stats;
+ return data;
- stats->timestamp = jiffies;
-
- hsag->name = "hw_counters";
- hsag->attrs = (void *)hsag + sizeof(*hsag);
+err_free_data:
+ kfree(data);
+err_free_stats:
+ rdma_free_hw_stats_struct(stats);
+ return ERR_PTR(-ENOMEM);
+}
- for (i = 0; i < stats->num_counters; i++) {
- hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]);
- if (!hsag->attrs[i])
- goto err;
- sysfs_attr_init(hsag->attrs[i]);
+static int setup_hw_port_stats(struct ib_port *port,
+ struct attribute_group *group)
+{
+ struct hw_stats_port_attribute *attr;
+ struct hw_stats_port_data *data;
+ bool opstat_skipped = false;
+ int i, ret, pos = 0;
+
+ data = alloc_hw_stats_port(port, group);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ ret = port->ibdev->ops.get_hw_stats(port->ibdev, data->stats,
+ port->port_num,
+ data->stats->num_counters);
+ if (ret != data->stats->num_counters) {
+ if (WARN_ON(ret >= 0))
+ return -EINVAL;
+ return ret;
}
- mutex_init(&stats->lock);
- /* treat an error here as non-fatal */
- hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
- if (hsag->attrs[i])
- sysfs_attr_init(hsag->attrs[i]);
+ data->stats->timestamp = jiffies;
- if (port) {
- struct kobject *kobj = &port->kobj;
- ret = sysfs_create_group(kobj, hsag);
- if (ret)
- goto err;
- port->hw_stats_ag = hsag;
- port->hw_stats = stats;
- if (device->port_data)
- device->port_data[port_num].hw_stats = stats;
- } else {
- struct kobject *kobj = &device->dev.kobj;
- ret = sysfs_create_group(kobj, hsag);
- if (ret)
- goto err;
- device->hw_stats_ag = hsag;
- device->hw_stats = stats;
+ for (i = 0; i < data->stats->num_counters; i++) {
+ if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
+ opstat_skipped = true;
+ continue;
+ }
+
+ WARN_ON(opstat_skipped);
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = data->stats->descs[i].name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = hw_stat_port_show;
+ attr->show = show_hw_stats;
+ group->attrs[pos] = &attr->attr.attr;
+ pos++;
}
- return;
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = "lifespan";
+ attr->attr.attr.mode = 0644;
+ attr->attr.show = hw_stat_port_show;
+ attr->show = show_stats_lifespan;
+ attr->attr.store = hw_stat_port_store;
+ attr->store = set_stats_lifespan;
+ group->attrs[pos] = &attr->attr.attr;
+
+ port->hw_stats_data = data;
+ return 0;
+}
-err:
- for (; i >= 0; i--)
- kfree(hsag->attrs[i]);
-err_free_hsag:
- kfree(hsag);
-err_free_stats:
- kfree(stats);
- return;
+struct rdma_hw_stats *ib_get_hw_stats_port(struct ib_device *ibdev,
+ u32 port_num)
+{
+ if (!ibdev->port_data || !rdma_is_port_valid(ibdev, port_num) ||
+ !ibdev->port_data[port_num].sysfs->hw_stats_data)
+ return NULL;
+ return ibdev->port_data[port_num].sysfs->hw_stats_data->stats;
}
-static int add_port(struct ib_core_device *coredev, int port_num)
+static int
+alloc_port_table_group(const char *name, struct attribute_group *group,
+ struct port_table_attribute *attrs, size_t num,
+ ssize_t (*show)(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *, char *buf))
{
- struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
- bool is_full_dev = &device->coredev == coredev;
- struct ib_port *p;
- struct ib_port_attr attr;
+ struct attribute **attr_list;
int i;
- int ret;
- ret = ib_query_port(device, port_num, &attr);
- if (ret)
- return ret;
-
- p = kzalloc(sizeof *p, GFP_KERNEL);
- if (!p)
+ attr_list = kcalloc(num + 1, sizeof(*attr_list), GFP_KERNEL);
+ if (!attr_list)
return -ENOMEM;
- p->ibdev = device;
- p->port_num = port_num;
+ for (i = 0; i < num; i++) {
+ struct port_table_attribute *element = &attrs[i];
- ret = kobject_init_and_add(&p->kobj, &port_type,
- coredev->ports_kobj,
- "%d", port_num);
- if (ret) {
- kfree(p);
- return ret;
- }
+ if (snprintf(element->name, sizeof(element->name), "%d", i) >=
+ sizeof(element->name))
+ goto err;
- p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
- if (!p->gid_attr_group) {
- ret = -ENOMEM;
- goto err_put;
- }
+ sysfs_attr_init(&element->attr.attr);
+ element->attr.attr.name = element->name;
+ element->attr.attr.mode = 0444;
+ element->attr.show = show;
+ element->index = i;
- p->gid_attr_group->port = p;
- ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
- &p->kobj, "gid_attrs");
- if (ret) {
- kfree(p->gid_attr_group);
- goto err_put;
+ attr_list[i] = &element->attr.attr;
}
+ group->name = name;
+ group->attrs = attr_list;
+ return 0;
+err:
+ kfree(attr_list);
+ return -EINVAL;
+}
- if (device->ops.process_mad && is_full_dev) {
- p->pma_table = get_counter_table(device, port_num);
- ret = sysfs_create_group(&p->kobj, p->pma_table);
- if (ret)
- goto err_put_gid_attrs;
- }
+/*
+ * Create the sysfs:
+ * ibp0s9/ports/XX/gid_attrs/{ndevs,types}/YYY
+ * YYY is the gid table index in decimal
+ */
+static int setup_gid_attrs(struct ib_port *port,
+ const struct ib_port_attr *attr)
+{
+ struct gid_attr_group *gid_attr_group;
+ int ret;
- p->gid_group.name = "gids";
- p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
- if (!p->gid_group.attrs) {
- ret = -ENOMEM;
- goto err_remove_pma;
- }
+ gid_attr_group = kzalloc(struct_size(gid_attr_group, attrs_list,
+ attr->gid_tbl_len * 2),
+ GFP_KERNEL);
+ if (!gid_attr_group)
+ return -ENOMEM;
+ gid_attr_group->port = port;
+ kobject_init(&gid_attr_group->kobj, &gid_attr_type);
- ret = sysfs_create_group(&p->kobj, &p->gid_group);
+ ret = alloc_port_table_group("ndevs", &gid_attr_group->groups[0],
+ gid_attr_group->attrs_list,
+ attr->gid_tbl_len,
+ show_port_gid_attr_ndev);
if (ret)
- goto err_free_gid;
+ goto err_put;
+ gid_attr_group->groups_list[0] = &gid_attr_group->groups[0];
- p->gid_attr_group->ndev.name = "ndevs";
- p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
- attr.gid_tbl_len);
- if (!p->gid_attr_group->ndev.attrs) {
- ret = -ENOMEM;
- goto err_remove_gid;
- }
+ ret = alloc_port_table_group(
+ "types", &gid_attr_group->groups[1],
+ gid_attr_group->attrs_list + attr->gid_tbl_len,
+ attr->gid_tbl_len, show_port_gid_attr_gid_type);
+ if (ret)
+ goto err_put;
+ gid_attr_group->groups_list[1] = &gid_attr_group->groups[1];
- ret = sysfs_create_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->ndev);
+ ret = kobject_add(&gid_attr_group->kobj, &port->kobj, "gid_attrs");
if (ret)
- goto err_free_gid_ndev;
+ goto err_put;
+ ret = sysfs_create_groups(&gid_attr_group->kobj,
+ gid_attr_group->groups_list);
+ if (ret)
+ goto err_del;
+ port->gid_attr_group = gid_attr_group;
+ return 0;
- p->gid_attr_group->type.name = "types";
- p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
- attr.gid_tbl_len);
- if (!p->gid_attr_group->type.attrs) {
- ret = -ENOMEM;
- goto err_remove_gid_ndev;
- }
+err_del:
+ kobject_del(&gid_attr_group->kobj);
+err_put:
+ kobject_put(&gid_attr_group->kobj);
+ return ret;
+}
- ret = sysfs_create_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->type);
- if (ret)
- goto err_free_gid_type;
+static void destroy_gid_attrs(struct ib_port *port)
+{
+ struct gid_attr_group *gid_attr_group = port->gid_attr_group;
- p->pkey_group.name = "pkeys";
- p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
- attr.pkey_tbl_len);
- if (!p->pkey_group.attrs) {
- ret = -ENOMEM;
- goto err_remove_gid_type;
- }
+ if (!gid_attr_group)
+ return;
+ sysfs_remove_groups(&gid_attr_group->kobj, gid_attr_group->groups_list);
+ kobject_del(&gid_attr_group->kobj);
+ kobject_put(&gid_attr_group->kobj);
+}
- ret = sysfs_create_group(&p->kobj, &p->pkey_group);
+/*
+ * Create the sysfs:
+ * ibp0s9/ports/XX/{gids,pkeys,counters}/YYY
+ */
+static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
+ const struct ib_port_attr *attr)
+{
+ struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
+ bool is_full_dev = &device->coredev == coredev;
+ const struct attribute_group **cur_group;
+ struct ib_port *p;
+ int ret;
+
+ p = kvzalloc(struct_size(p, attrs_list,
+ attr->gid_tbl_len + attr->pkey_tbl_len),
+ GFP_KERNEL);
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+ p->ibdev = device;
+ p->port_num = port_num;
+ kobject_init(&p->kobj, &port_type);
+
+ cur_group = p->groups_list;
+ ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list,
+ attr->gid_tbl_len, show_port_gid);
if (ret)
- goto err_free_pkey;
+ goto err_put;
+ *cur_group++ = &p->groups[0];
- if (device->ops.init_port && is_full_dev) {
- ret = device->ops.init_port(device, port_num, &p->kobj);
+ if (attr->pkey_tbl_len) {
+ ret = alloc_port_table_group("pkeys", &p->groups[1],
+ p->attrs_list + attr->gid_tbl_len,
+ attr->pkey_tbl_len, show_port_pkey);
if (ret)
- goto err_remove_pkey;
+ goto err_put;
+ *cur_group++ = &p->groups[1];
}
/*
* If port == 0, it means hw_counters are per device and not per
- * port, so holder should be device. Therefore skip per port conunter
- * initialization.
+ * port, so holder should be device. Therefore skip per port
+ * counter initialization.
*/
- if (device->ops.alloc_hw_stats && port_num && is_full_dev)
- setup_hw_stats(device, p, port_num);
-
- list_add_tail(&p->kobj.entry, &coredev->port_list);
-
- kobject_uevent(&p->kobj, KOBJ_ADD);
- return 0;
-
-err_remove_pkey:
- sysfs_remove_group(&p->kobj, &p->pkey_group);
-
-err_free_pkey:
- for (i = 0; i < attr.pkey_tbl_len; ++i)
- kfree(p->pkey_group.attrs[i]);
-
- kfree(p->pkey_group.attrs);
- p->pkey_group.attrs = NULL;
-
-err_remove_gid_type:
- sysfs_remove_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->type);
-
-err_free_gid_type:
- for (i = 0; i < attr.gid_tbl_len; ++i)
- kfree(p->gid_attr_group->type.attrs[i]);
-
- kfree(p->gid_attr_group->type.attrs);
- p->gid_attr_group->type.attrs = NULL;
-
-err_remove_gid_ndev:
- sysfs_remove_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->ndev);
-
-err_free_gid_ndev:
- for (i = 0; i < attr.gid_tbl_len; ++i)
- kfree(p->gid_attr_group->ndev.attrs[i]);
-
- kfree(p->gid_attr_group->ndev.attrs);
- p->gid_attr_group->ndev.attrs = NULL;
-
-err_remove_gid:
- sysfs_remove_group(&p->kobj, &p->gid_group);
+ if (port_num && is_full_dev) {
+ ret = setup_hw_port_stats(p, &p->groups[2]);
+ if (ret && ret != -EOPNOTSUPP)
+ goto err_put;
+ if (!ret)
+ *cur_group++ = &p->groups[2];
+ }
-err_free_gid:
- for (i = 0; i < attr.gid_tbl_len; ++i)
- kfree(p->gid_group.attrs[i]);
+ if (device->ops.process_mad && is_full_dev)
+ *cur_group++ = get_counter_table(device, port_num);
- kfree(p->gid_group.attrs);
- p->gid_group.attrs = NULL;
+ ret = kobject_add(&p->kobj, coredev->ports_kobj, "%d", port_num);
+ if (ret)
+ goto err_put;
+ ret = sysfs_create_groups(&p->kobj, p->groups_list);
+ if (ret)
+ goto err_del;
+ if (is_full_dev) {
+ ret = sysfs_create_groups(&p->kobj, device->ops.port_groups);
+ if (ret)
+ goto err_groups;
+ }
-err_remove_pma:
- if (p->pma_table)
- sysfs_remove_group(&p->kobj, p->pma_table);
+ list_add_tail(&p->kobj.entry, &coredev->port_list);
+ if (device->port_data && is_full_dev)
+ device->port_data[port_num].sysfs = p;
-err_put_gid_attrs:
- kobject_put(&p->gid_attr_group->kobj);
+ return p;
+err_groups:
+ sysfs_remove_groups(&p->kobj, p->groups_list);
+err_del:
+ kobject_del(&p->kobj);
err_put:
kobject_put(&p->kobj);
- return ret;
+ return ERR_PTR(ret);
+}
+
+static void destroy_port(struct ib_core_device *coredev, struct ib_port *port)
+{
+ bool is_full_dev = &port->ibdev->coredev == coredev;
+
+ if (port->ibdev->port_data &&
+ port->ibdev->port_data[port->port_num].sysfs == port)
+ port->ibdev->port_data[port->port_num].sysfs = NULL;
+ list_del(&port->kobj.entry);
+ if (is_full_dev)
+ sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups);
+ sysfs_remove_groups(&port->kobj, port->groups_list);
+ kobject_del(&port->kobj);
+ kobject_put(&port->kobj);
+}
+
+static const char *node_type_string(int node_type)
+{
+ switch (node_type) {
+ case RDMA_NODE_IB_CA:
+ return "CA";
+ case RDMA_NODE_IB_SWITCH:
+ return "switch";
+ case RDMA_NODE_IB_ROUTER:
+ return "router";
+ case RDMA_NODE_RNIC:
+ return "RNIC";
+ case RDMA_NODE_USNIC:
+ return "usNIC";
+ case RDMA_NODE_USNIC_UDP:
+ return "usNIC UDP";
+ case RDMA_NODE_UNSPECIFIED:
+ return "unspecified";
+ }
+ return "<unknown>";
}
static ssize_t node_type_show(struct device *device,
@@ -1210,16 +1313,8 @@ static ssize_t node_type_show(struct device *device,
{
struct ib_device *dev = rdma_device_to_ibdev(device);
- switch (dev->node_type) {
- case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
- case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
- case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
- case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
- case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type);
- case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
- case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
- default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
- }
+ return sysfs_emit(buf, "%u: %s\n", dev->node_type,
+ node_type_string(dev->node_type));
}
static DEVICE_ATTR_RO(node_type);
@@ -1227,12 +1322,13 @@ static ssize_t sys_image_guid_show(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = rdma_device_to_ibdev(device);
+ __be16 *guid = (__be16 *)&dev->attrs.sys_image_guid;
- return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(guid[0]),
+ be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]),
+ be16_to_cpu(guid[3]));
}
static DEVICE_ATTR_RO(sys_image_guid);
@@ -1240,12 +1336,13 @@ static ssize_t node_guid_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = rdma_device_to_ibdev(device);
+ __be16 *node_guid = (__be16 *)&dev->node_guid;
- return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &dev->node_guid)[0]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[1]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(node_guid[0]),
+ be16_to_cpu(node_guid[1]),
+ be16_to_cpu(node_guid[2]),
+ be16_to_cpu(node_guid[3]));
}
static DEVICE_ATTR_RO(node_guid);
@@ -1254,7 +1351,7 @@ static ssize_t node_desc_show(struct device *device,
{
struct ib_device *dev = rdma_device_to_ibdev(device);
- return sprintf(buf, "%.64s\n", dev->node_desc);
+ return sysfs_emit(buf, "%.64s\n", dev->node_desc);
}
static ssize_t node_desc_store(struct device *device,
@@ -1281,10 +1378,11 @@ static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct ib_device *dev = rdma_device_to_ibdev(device);
+ char version[IB_FW_VERSION_NAME_MAX] = {};
- ib_get_device_fw_str(dev, buf);
- strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
- return strlen(buf);
+ ib_get_device_fw_str(dev, version);
+
+ return sysfs_emit(buf, "%s\n", version);
}
static DEVICE_ATTR_RO(fw_ver);
@@ -1303,30 +1401,13 @@ const struct attribute_group ib_dev_attr_group = {
void ib_free_port_attrs(struct ib_core_device *coredev)
{
- struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
- bool is_full_dev = &device->coredev == coredev;
struct kobject *p, *t;
list_for_each_entry_safe(p, t, &coredev->port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
- list_del(&p->entry);
- if (port->hw_stats_ag)
- free_hsag(&port->kobj, port->hw_stats_ag);
- kfree(port->hw_stats);
- if (device->port_data && is_full_dev)
- device->port_data[port->port_num].hw_stats = NULL;
-
- if (port->pma_table)
- sysfs_remove_group(p, port->pma_table);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- sysfs_remove_group(&port->gid_attr_group->kobj,
- &port->gid_attr_group->ndev);
- sysfs_remove_group(&port->gid_attr_group->kobj,
- &port->gid_attr_group->type);
- kobject_put(&port->gid_attr_group->kobj);
- kobject_put(p);
+ destroy_gid_attrs(port);
+ destroy_port(coredev, port);
}
kobject_put(coredev->ports_kobj);
@@ -1335,7 +1416,7 @@ void ib_free_port_attrs(struct ib_core_device *coredev)
int ib_setup_port_attrs(struct ib_core_device *coredev)
{
struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
- unsigned int port;
+ u32 port_num;
int ret;
coredev->ports_kobj = kobject_create_and_add("ports",
@@ -1343,12 +1424,24 @@ int ib_setup_port_attrs(struct ib_core_device *coredev)
if (!coredev->ports_kobj)
return -ENOMEM;
- rdma_for_each_port (device, port) {
- ret = add_port(coredev, port);
+ rdma_for_each_port (device, port_num) {
+ struct ib_port_attr attr;
+ struct ib_port *port;
+
+ ret = ib_query_port(device, port_num, &attr);
if (ret)
goto err_put;
- }
+ port = setup_port(coredev, port_num, &attr);
+ if (IS_ERR(port)) {
+ ret = PTR_ERR(port);
+ goto err_put;
+ }
+
+ ret = setup_gid_attrs(port, &attr);
+ if (ret)
+ goto err_put;
+ }
return 0;
err_put:
@@ -1356,68 +1449,27 @@ err_put:
return ret;
}
-int ib_device_register_sysfs(struct ib_device *device)
-{
- int ret;
-
- ret = ib_setup_port_attrs(&device->coredev);
- if (ret)
- return ret;
-
- if (device->ops.alloc_hw_stats)
- setup_hw_stats(device, NULL, 0);
-
- return 0;
-}
-
-void ib_device_unregister_sysfs(struct ib_device *device)
-{
- if (device->hw_stats_ag)
- free_hsag(&device->dev.kobj, device->hw_stats_ag);
- kfree(device->hw_stats);
-
- ib_free_port_attrs(&device->coredev);
-}
-
/**
- * ib_port_register_module_stat - add module counters under relevant port
- * of IB device.
+ * ib_port_register_client_groups - Add an ib_client's attributes to the port
*
- * @device: IB device to add counters
+ * @ibdev: IB device to add counters
* @port_num: valid port number
- * @kobj: pointer to the kobject to initialize
- * @ktype: pointer to the ktype for this kobject.
- * @name: the name of the kobject
+ * @groups: Group list of attributes
+ *
+ * Do not use. Only for legacy sysfs compatibility.
*/
-int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
- struct kobject *kobj, struct kobj_type *ktype,
- const char *name)
+int ib_port_register_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups)
{
- struct kobject *p, *t;
- int ret;
-
- list_for_each_entry_safe(p, t, &device->coredev.port_list, entry) {
- struct ib_port *port = container_of(p, struct ib_port, kobj);
-
- if (port->port_num != port_num)
- continue;
-
- ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s",
- name);
- if (ret)
- return ret;
- }
-
- return 0;
+ return sysfs_create_groups(&ibdev->port_data[port_num].sysfs->kobj,
+ groups);
}
-EXPORT_SYMBOL(ib_port_register_module_stat);
+EXPORT_SYMBOL(ib_port_register_client_groups);
-/**
- * ib_port_unregister_module_stat - release module counters
- * @kobj: pointer to the kobject to release
- */
-void ib_port_unregister_module_stat(struct kobject *kobj)
+void ib_port_unregister_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups)
{
- kobject_put(kobj);
+ return sysfs_remove_groups(&ibdev->port_data[port_num].sysfs->kobj,
+ groups);
}
-EXPORT_SYMBOL(ib_port_unregister_module_stat);
+EXPORT_SYMBOL(ib_port_unregister_client_groups);
diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c
index 6c3514beac4d..31e7860d35bf 100644
--- a/drivers/infiniband/core/trace.c
+++ b/drivers/infiniband/core/trace.c
@@ -9,6 +9,4 @@
#define CREATE_TRACE_POINTS
-#include <rdma/ib_verbs.h>
-
#include <trace/events/rdma_core.h>
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 0274e9b704be..bf42650f125b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -52,6 +52,7 @@
#include <rdma/rdma_cm_ib.h>
#include <rdma/ib_addr.h>
#include <rdma/ib.h>
+#include <rdma/ib_cm.h>
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
@@ -79,28 +80,22 @@ struct ucma_file {
struct list_head ctx_list;
struct list_head event_list;
wait_queue_head_t poll_wait;
- struct workqueue_struct *close_wq;
};
struct ucma_context {
u32 id;
struct completion comp;
- atomic_t ref;
+ refcount_t ref;
int events_reported;
- int backlog;
+ atomic_t backlog;
struct ucma_file *file;
struct rdma_cm_id *cm_id;
+ struct mutex mutex;
u64 uid;
struct list_head list;
struct list_head mc_list;
- /* mark that device is in process of destroying the internal HW
- * resources, protected by the ctx_table lock
- */
- int closing;
- /* sync between removal event and id destroy, protected by file mut */
- int destroying;
struct work_struct close_work;
};
@@ -117,17 +112,17 @@ struct ucma_multicast {
struct ucma_event {
struct ucma_context *ctx;
+ struct ucma_context *conn_req_ctx;
struct ucma_multicast *mc;
struct list_head list;
- struct rdma_cm_id *cm_id;
struct rdma_ucm_event_resp resp;
- struct work_struct close_work;
};
static DEFINE_XARRAY_ALLOC(ctx_table);
static DEFINE_XARRAY_ALLOC(multicast_table);
static const struct file_operations ucma_fops;
+static int ucma_destroy_private_ctx(struct ucma_context *ctx);
static inline struct ucma_context *_ucma_find_context(int id,
struct ucma_file *file)
@@ -137,7 +132,7 @@ static inline struct ucma_context *_ucma_find_context(int id,
ctx = xa_load(&ctx_table, id);
if (!ctx)
ctx = ERR_PTR(-ENOENT);
- else if (ctx->file != file || !ctx->cm_id)
+ else if (ctx->file != file)
ctx = ERR_PTR(-EINVAL);
return ctx;
}
@@ -148,19 +143,16 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
xa_lock(&ctx_table);
ctx = _ucma_find_context(id, file);
- if (!IS_ERR(ctx)) {
- if (ctx->closing)
- ctx = ERR_PTR(-EIO);
- else
- atomic_inc(&ctx->ref);
- }
+ if (!IS_ERR(ctx))
+ if (!refcount_inc_not_zero(&ctx->ref))
+ ctx = ERR_PTR(-ENXIO);
xa_unlock(&ctx_table);
return ctx;
}
static void ucma_put_ctx(struct ucma_context *ctx)
{
- if (atomic_dec_and_test(&ctx->ref))
+ if (refcount_dec_and_test(&ctx->ref))
complete(&ctx->comp);
}
@@ -181,26 +173,21 @@ static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
return ctx;
}
-static void ucma_close_event_id(struct work_struct *work)
-{
- struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
-
- rdma_destroy_id(uevent_close->cm_id);
- kfree(uevent_close);
-}
-
static void ucma_close_id(struct work_struct *work)
{
struct ucma_context *ctx = container_of(work, struct ucma_context, close_work);
/* once all inflight tasks are finished, we close all underlying
* resources. The context is still alive till its explicit destryoing
- * by its creator.
+ * by its creator. This puts back the xarray's reference.
*/
ucma_put_ctx(ctx);
wait_for_completion(&ctx->comp);
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
+
+ /* Reading the cm_id without holding a positive ref is not allowed */
+ ctx->cm_id = NULL;
}
static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
@@ -212,40 +199,32 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
return NULL;
INIT_WORK(&ctx->close_work, ucma_close_id);
- atomic_set(&ctx->ref, 1);
init_completion(&ctx->comp);
INIT_LIST_HEAD(&ctx->mc_list);
+ /* So list_del() will work if we don't do ucma_finish_ctx() */
+ INIT_LIST_HEAD(&ctx->list);
ctx->file = file;
+ mutex_init(&ctx->mutex);
- if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
- goto error;
-
- list_add_tail(&ctx->list, &file->ctx_list);
+ if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) {
+ kfree(ctx);
+ return NULL;
+ }
return ctx;
-
-error:
- kfree(ctx);
- return NULL;
}
-static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+static void ucma_set_ctx_cm_id(struct ucma_context *ctx,
+ struct rdma_cm_id *cm_id)
{
- struct ucma_multicast *mc;
-
- mc = kzalloc(sizeof(*mc), GFP_KERNEL);
- if (!mc)
- return NULL;
-
- mc->ctx = ctx;
- if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL))
- goto error;
-
- list_add_tail(&mc->list, &ctx->mc_list);
- return mc;
+ refcount_set(&ctx->ref, 1);
+ ctx->cm_id = cm_id;
+}
-error:
- kfree(mc);
- return NULL;
+static void ucma_finish_ctx(struct ucma_context *ctx)
+{
+ lockdep_assert_held(&ctx->file->mut);
+ list_add_tail(&ctx->list, &ctx->file->ctx_list);
+ xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL);
}
static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
@@ -255,7 +234,7 @@ static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
memcpy(dst->private_data, src->private_data,
src->private_data_len);
dst->private_data_len = src->private_data_len;
- dst->responder_resources =src->responder_resources;
+ dst->responder_resources = src->responder_resources;
dst->initiator_depth = src->initiator_depth;
dst->flow_control = src->flow_control;
dst->retry_count = src->retry_count;
@@ -277,10 +256,15 @@ static void ucma_copy_ud_event(struct ib_device *device,
dst->qkey = src->qkey;
}
-static void ucma_set_event_context(struct ucma_context *ctx,
- struct rdma_cm_event *event,
- struct ucma_event *uevent)
+static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
+ struct rdma_cm_event *event)
{
+ struct ucma_event *uevent;
+
+ uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+ if (!uevent)
+ return NULL;
+
uevent->ctx = ctx;
switch (event->event) {
case RDMA_CM_EVENT_MULTICAST_JOIN:
@@ -295,44 +279,55 @@ static void ucma_set_event_context(struct ucma_context *ctx,
uevent->resp.id = ctx->id;
break;
}
+ uevent->resp.event = event->event;
+ uevent->resp.status = event->status;
+ if (ctx->cm_id->qp_type == IB_QPT_UD)
+ ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
+ &event->param.ud);
+ else
+ ucma_copy_conn_event(&uevent->resp.param.conn,
+ &event->param.conn);
+
+ uevent->resp.ece.vendor_id = event->ece.vendor_id;
+ uevent->resp.ece.attr_mod = event->ece.attr_mod;
+ return uevent;
}
-/* Called with file->mut locked for the relevant context. */
-static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
+static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
{
- struct ucma_context *ctx = cm_id->context;
- struct ucma_event *con_req_eve;
- int event_found = 0;
+ struct ucma_context *listen_ctx = cm_id->context;
+ struct ucma_context *ctx;
+ struct ucma_event *uevent;
- if (ctx->destroying)
- return;
+ if (!atomic_add_unless(&listen_ctx->backlog, -1, 0))
+ return -ENOMEM;
+ ctx = ucma_alloc_ctx(listen_ctx->file);
+ if (!ctx)
+ goto err_backlog;
+ ucma_set_ctx_cm_id(ctx, cm_id);
- /* only if context is pointing to cm_id that it owns it and can be
- * queued to be closed, otherwise that cm_id is an inflight one that
- * is part of that context event list pending to be detached and
- * reattached to its new context as part of ucma_get_event,
- * handled separately below.
- */
- if (ctx->cm_id == cm_id) {
- xa_lock(&ctx_table);
- ctx->closing = 1;
- xa_unlock(&ctx_table);
- queue_work(ctx->file->close_wq, &ctx->close_work);
- return;
- }
+ uevent = ucma_create_uevent(listen_ctx, event);
+ if (!uevent)
+ goto err_alloc;
+ uevent->conn_req_ctx = ctx;
+ uevent->resp.id = ctx->id;
- list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
- if (con_req_eve->cm_id == cm_id &&
- con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- list_del(&con_req_eve->list);
- INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
- queue_work(ctx->file->close_wq, &con_req_eve->close_work);
- event_found = 1;
- break;
- }
- }
- if (!event_found)
- pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
+ ctx->cm_id->context = ctx;
+
+ mutex_lock(&ctx->file->mut);
+ ucma_finish_ctx(ctx);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
+ return 0;
+
+err_alloc:
+ ucma_destroy_private_ctx(ctx);
+err_backlog:
+ atomic_inc(&listen_ctx->backlog);
+ /* Returning error causes the new ID to be destroyed */
+ return -ENOMEM;
}
static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -340,69 +335,49 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
{
struct ucma_event *uevent;
struct ucma_context *ctx = cm_id->context;
- int ret = 0;
- uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
- if (!uevent)
- return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
+ if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ return ucma_connect_event_handler(cm_id, event);
- mutex_lock(&ctx->file->mut);
- uevent->cm_id = cm_id;
- ucma_set_event_context(ctx, event, uevent);
- uevent->resp.event = event->event;
- uevent->resp.status = event->status;
- if (cm_id->qp_type == IB_QPT_UD)
- ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
- &event->param.ud);
- else
- ucma_copy_conn_event(&uevent->resp.param.conn,
- &event->param.conn);
-
- if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- if (!ctx->backlog) {
- ret = -ENOMEM;
- kfree(uevent);
- goto out;
- }
- ctx->backlog--;
- } else if (!ctx->uid || ctx->cm_id != cm_id) {
- /*
- * We ignore events for new connections until userspace has set
- * their context. This can only happen if an error occurs on a
- * new connection before the user accepts it. This is okay,
- * since the accept will just fail later. However, we do need
- * to release the underlying HW resources in case of a device
- * removal event.
- */
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
- ucma_removal_event_handler(cm_id);
-
- kfree(uevent);
- goto out;
+ /*
+ * We ignore events for new connections until userspace has set their
+ * context. This can only happen if an error occurs on a new connection
+ * before the user accepts it. This is okay, since the accept will just
+ * fail later. However, we do need to release the underlying HW
+ * resources in case of a device removal event.
+ */
+ if (ctx->uid) {
+ uevent = ucma_create_uevent(ctx, event);
+ if (!uevent)
+ return 0;
+
+ mutex_lock(&ctx->file->mut);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
}
- list_add_tail(&uevent->list, &ctx->file->event_list);
- wake_up_interruptible(&ctx->file->poll_wait);
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
- ucma_removal_event_handler(cm_id);
-out:
- mutex_unlock(&ctx->file->mut);
- return ret;
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
+ xa_lock(&ctx_table);
+ if (xa_load(&ctx_table, ctx->id) == ctx)
+ queue_work(system_unbound_wq, &ctx->close_work);
+ xa_unlock(&ctx_table);
+ }
+ return 0;
}
static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct ucma_context *ctx;
struct rdma_ucm_get_event cmd;
struct ucma_event *uevent;
- int ret = 0;
/*
* Old 32 bit user space does not send the 4 byte padding in the
* reserved field. We don't care, allow it to keep working.
*/
- if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
+ if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) -
+ sizeof(uevent->resp.ece))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -422,35 +397,25 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
mutex_lock(&file->mut);
}
- uevent = list_entry(file->event_list.next, struct ucma_event, list);
-
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- ctx = ucma_alloc_ctx(file);
- if (!ctx) {
- ret = -ENOMEM;
- goto done;
- }
- uevent->ctx->backlog++;
- ctx->cm_id = uevent->cm_id;
- ctx->cm_id->context = ctx;
- uevent->resp.id = ctx->id;
- }
+ uevent = list_first_entry(&file->event_list, struct ucma_event, list);
if (copy_to_user(u64_to_user_ptr(cmd.response),
&uevent->resp,
min_t(size_t, out_len, sizeof(uevent->resp)))) {
- ret = -EFAULT;
- goto done;
+ mutex_unlock(&file->mut);
+ return -EFAULT;
}
list_del(&uevent->list);
uevent->ctx->events_reported++;
if (uevent->mc)
uevent->mc->events_reported++;
- kfree(uevent);
-done:
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ atomic_inc(&uevent->ctx->backlog);
mutex_unlock(&file->mut);
- return ret;
+
+ kfree(uevent);
+ return 0;
}
static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
@@ -491,38 +456,32 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
if (ret)
return ret;
- mutex_lock(&file->mut);
ctx = ucma_alloc_ctx(file);
- mutex_unlock(&file->mut);
if (!ctx)
return -ENOMEM;
ctx->uid = cmd.uid;
- cm_id = __rdma_create_id(current->nsproxy->net_ns,
- ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
+ cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
goto err1;
}
+ ucma_set_ctx_cm_id(ctx, cm_id);
resp.id = ctx->id;
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
ret = -EFAULT;
- goto err2;
+ goto err1;
}
- ctx->cm_id = cm_id;
+ mutex_lock(&file->mut);
+ ucma_finish_ctx(ctx);
+ mutex_unlock(&file->mut);
return 0;
-err2:
- rdma_destroy_id(cm_id);
err1:
- xa_erase(&ctx_table, ctx->id);
- mutex_lock(&file->mut);
- list_del(&ctx->list);
- mutex_unlock(&file->mut);
- kfree(ctx);
+ ucma_destroy_private_ctx(ctx);
return ret;
}
@@ -530,19 +489,25 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx)
{
struct ucma_multicast *mc, *tmp;
- mutex_lock(&ctx->file->mut);
+ xa_lock(&multicast_table);
list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
list_del(&mc->list);
- xa_erase(&multicast_table, mc->id);
+ /*
+ * At this point mc->ctx->ref is 0 so the mc cannot leave the
+ * lock on the reader and this is enough serialization
+ */
+ __xa_erase(&multicast_table, mc->id);
kfree(mc);
}
- mutex_unlock(&ctx->file->mut);
+ xa_unlock(&multicast_table);
}
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
{
struct ucma_event *uevent, *tmp;
+ rdma_lock_handler(mc->ctx->cm_id);
+ mutex_lock(&mc->ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
if (uevent->mc != mc)
continue;
@@ -550,45 +515,75 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
list_del(&uevent->list);
kfree(uevent);
}
+ mutex_unlock(&mc->ctx->file->mut);
+ rdma_unlock_handler(mc->ctx->cm_id);
}
-/*
- * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
- * this point, no new events will be reported from the hardware. However, we
- * still need to cleanup the UCMA context for this ID. Specifically, there
- * might be events that have not yet been consumed by the user space software.
- * These might include pending connect requests which we have not completed
- * processing. We cannot call rdma_destroy_id while holding the lock of the
- * context (file->mut), as it might cause a deadlock. We therefore extract all
- * relevant events from the context pending events list while holding the
- * mutex. After that we release them as needed.
- */
-static int ucma_free_ctx(struct ucma_context *ctx)
+static int ucma_cleanup_ctx_events(struct ucma_context *ctx)
{
int events_reported;
struct ucma_event *uevent, *tmp;
LIST_HEAD(list);
-
- ucma_cleanup_multicast(ctx);
-
- /* Cleanup events not yet reported to the user. */
+ /* Cleanup events not yet reported to the user.*/
mutex_lock(&ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
- if (uevent->ctx == ctx)
+ if (uevent->ctx != ctx)
+ continue;
+
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
+ xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id,
+ uevent->conn_req_ctx, XA_ZERO_ENTRY,
+ GFP_KERNEL) == uevent->conn_req_ctx) {
list_move_tail(&uevent->list, &list);
+ continue;
+ }
+ list_del(&uevent->list);
+ kfree(uevent);
}
list_del(&ctx->list);
+ events_reported = ctx->events_reported;
mutex_unlock(&ctx->file->mut);
+ /*
+ * If this was a listening ID then any connections spawned from it that
+ * have not been delivered to userspace are cleaned up too. Must be done
+ * outside any locks.
+ */
list_for_each_entry_safe(uevent, tmp, &list, list) {
- list_del(&uevent->list);
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
- rdma_destroy_id(uevent->cm_id);
+ ucma_destroy_private_ctx(uevent->conn_req_ctx);
kfree(uevent);
}
+ return events_reported;
+}
- events_reported = ctx->events_reported;
+/*
+ * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie
+ * the ctx is not public to the user). This either because:
+ * - ucma_finish_ctx() hasn't been called
+ * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed)
+ */
+static int ucma_destroy_private_ctx(struct ucma_context *ctx)
+{
+ int events_reported;
+
+ /*
+ * Destroy the underlying cm_id. New work queuing is prevented now by
+ * the removal from the xarray. Once the work is cancled ref will either
+ * be 0 because the work ran to completion and consumed the ref from the
+ * xarray, or it will be positive because we still have the ref from the
+ * xarray. This can also be 0 in cases where cm_id was never set
+ */
+ cancel_work_sync(&ctx->close_work);
+ if (refcount_read(&ctx->ref))
+ ucma_close_id(&ctx->close_work);
+
+ events_reported = ucma_cleanup_ctx_events(ctx);
+ ucma_cleanup_multicast(ctx);
+
+ WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL,
+ GFP_KERNEL) != NULL);
+ mutex_destroy(&ctx->mutex);
kfree(ctx);
return events_reported;
}
@@ -609,31 +604,17 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
xa_lock(&ctx_table);
ctx = _ucma_find_context(cmd.id, file);
- if (!IS_ERR(ctx))
- __xa_erase(&ctx_table, ctx->id);
+ if (!IS_ERR(ctx)) {
+ if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
+ GFP_KERNEL) != ctx)
+ ctx = ERR_PTR(-ENOENT);
+ }
xa_unlock(&ctx_table);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- mutex_lock(&ctx->file->mut);
- ctx->destroying = 1;
- mutex_unlock(&ctx->file->mut);
-
- flush_workqueue(ctx->file->close_wq);
- /* At this point it's guaranteed that there is no inflight
- * closing task */
- xa_lock(&ctx_table);
- if (!ctx->closing) {
- xa_unlock(&ctx_table);
- ucma_put_ctx(ctx);
- wait_for_completion(&ctx->comp);
- rdma_destroy_id(ctx->cm_id);
- } else {
- xa_unlock(&ctx_table);
- }
-
- resp.events_reported = ucma_free_ctx(ctx);
+ resp.events_reported = ucma_destroy_private_ctx(ctx);
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -658,7 +639,10 @@ static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+ mutex_unlock(&ctx->mutex);
+
ucma_put_ctx(ctx);
return ret;
}
@@ -681,7 +665,9 @@ static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -705,8 +691,10 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
(struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -731,8 +719,10 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
(struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -752,7 +742,9 @@ static ssize_t ucma_resolve_route(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -762,8 +754,8 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
{
struct rdma_dev_addr *dev_addr;
- resp->num_paths = route->num_paths;
- switch (route->num_paths) {
+ resp->num_paths = route->num_pri_alt_paths;
+ switch (route->num_pri_alt_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
rdma_addr_get_dgid(dev_addr,
@@ -775,7 +767,7 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
case 2:
ib_copy_path_rec_to_user(&resp->ib_route[1],
&route->path_rec[1]);
- /* fall through */
+ fallthrough;
case 1:
ib_copy_path_rec_to_user(&resp->ib_route[0],
&route->path_rec[0]);
@@ -789,8 +781,8 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
struct rdma_route *route)
{
- resp->num_paths = route->num_paths;
- switch (route->num_paths) {
+ resp->num_paths = route->num_pri_alt_paths;
+ switch (route->num_pri_alt_paths) {
case 0:
rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
(union ib_gid *)&resp->ib_route[0].dgid);
@@ -801,7 +793,7 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
case 2:
ib_copy_path_rec_to_user(&resp->ib_route[1],
&route->path_rec[1]);
- /* fall through */
+ fallthrough;
case 1:
ib_copy_path_rec_to_user(&resp->ib_route[0],
&route->path_rec[0]);
@@ -831,7 +823,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
struct sockaddr *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -841,6 +833,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
memset(&resp, 0, sizeof resp);
addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
@@ -854,6 +847,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
goto out;
resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
+ resp.ibdev_index = ctx->cm_id->device->index;
resp.port_num = ctx->cm_id->port_num;
if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
@@ -864,8 +858,9 @@ static ssize_t ucma_query_route(struct ucma_file *file,
ucma_copy_iw_route(&resp, &ctx->cm_id->route);
out:
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp)))
+ mutex_unlock(&ctx->mutex);
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp,
+ min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
ucma_put_ctx(ctx);
@@ -879,6 +874,7 @@ static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
return;
resp->node_guid = (__force __u64) cm_id->device->node_guid;
+ resp->ibdev_index = cm_id->device->index;
resp->port_num = cm_id->port_num;
resp->pkey = (__force __u16) cpu_to_be16(
ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
@@ -891,7 +887,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx,
struct sockaddr *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
return -ENOSPC;
memset(&resp, 0, sizeof resp);
@@ -906,7 +902,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx,
ucma_query_device_addr(ctx->cm_id, &resp);
- if (copy_to_user(response, &resp, sizeof(resp)))
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
return ret;
@@ -925,7 +921,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
if (!resp)
return -ENOMEM;
- resp->num_paths = ctx->cm_id->route.num_paths;
+ resp->num_paths = ctx->cm_id->route.num_pri_alt_paths;
for (i = 0, out_len -= sizeof(*resp);
i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
i++, out_len -= sizeof(struct ib_path_rec_data)) {
@@ -958,7 +954,7 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
struct sockaddr_ib *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
return -ENOSPC;
memset(&resp, 0, sizeof resp);
@@ -991,7 +987,7 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
&ctx->cm_id->route.addr.dst_addr);
}
- if (copy_to_user(response, &resp, sizeof(resp)))
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
return ret;
@@ -1014,6 +1010,7 @@ static ssize_t ucma_query(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
switch (cmd.option) {
case RDMA_USER_CM_QUERY_ADDR:
ret = ucma_query_addr(ctx, response, out_len);
@@ -1028,6 +1025,7 @@ static ssize_t ucma_query(struct ucma_file *file,
ret = -ENOSYS;
break;
}
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
@@ -1039,25 +1037,30 @@ static void ucma_copy_conn_param(struct rdma_cm_id *id,
{
dst->private_data = src->private_data;
dst->private_data_len = src->private_data_len;
- dst->responder_resources =src->responder_resources;
+ dst->responder_resources = src->responder_resources;
dst->initiator_depth = src->initiator_depth;
dst->flow_control = src->flow_control;
dst->retry_count = src->retry_count;
dst->rnr_retry_count = src->rnr_retry_count;
dst->srq = src->srq;
- dst->qp_num = src->qp_num;
+ dst->qp_num = src->qp_num & 0xFFFFFF;
dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
}
static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct rdma_ucm_connect cmd;
struct rdma_conn_param conn_param;
+ struct rdma_ucm_ece ece = {};
+ struct rdma_ucm_connect cmd;
struct ucma_context *ctx;
+ size_t in_size;
int ret;
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ if (in_len < offsetofend(typeof(cmd), reserved))
+ return -EINVAL;
+ in_size = min_t(size_t, in_len, sizeof(cmd));
+ if (copy_from_user(&cmd, inbuf, in_size))
return -EFAULT;
if (!cmd.conn_param.valid)
@@ -1068,7 +1071,14 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
return PTR_ERR(ctx);
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
- ret = rdma_connect(ctx->cm_id, &conn_param);
+ if (offsetofend(typeof(cmd), ece) <= in_size) {
+ ece.vendor_id = cmd.ece.vendor_id;
+ ece.attr_mod = cmd.ece.attr_mod;
+ }
+
+ mutex_lock(&ctx->mutex);
+ ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1087,9 +1097,13 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
- cmd.backlog : max_backlog;
- ret = rdma_listen(ctx->cm_id, ctx->backlog);
+ if (cmd.backlog <= 0 || cmd.backlog > max_backlog)
+ cmd.backlog = max_backlog;
+ atomic_set(&ctx->backlog, cmd.backlog);
+
+ mutex_lock(&ctx->mutex);
+ ret = rdma_listen(ctx->cm_id, cmd.backlog);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1099,26 +1113,44 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
{
struct rdma_ucm_accept cmd;
struct rdma_conn_param conn_param;
+ struct rdma_ucm_ece ece = {};
struct ucma_context *ctx;
+ size_t in_size;
int ret;
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ if (in_len < offsetofend(typeof(cmd), reserved))
+ return -EINVAL;
+ in_size = min_t(size_t, in_len, sizeof(cmd));
+ if (copy_from_user(&cmd, inbuf, in_size))
return -EFAULT;
ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ if (offsetofend(typeof(cmd), ece) <= in_size) {
+ ece.vendor_id = cmd.ece.vendor_id;
+ ece.attr_mod = cmd.ece.attr_mod;
+ }
+
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
- mutex_lock(&file->mut);
- ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
- if (!ret)
+ mutex_lock(&ctx->mutex);
+ rdma_lock_handler(ctx->cm_id);
+ ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece);
+ if (!ret) {
+ /* The uid must be set atomically with the handler */
ctx->uid = cmd.uid;
- mutex_unlock(&file->mut);
- } else
- ret = __rdma_accept(ctx->cm_id, NULL, NULL);
-
+ }
+ rdma_unlock_handler(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
+ } else {
+ mutex_lock(&ctx->mutex);
+ rdma_lock_handler(ctx->cm_id);
+ ret = rdma_accept_ece(ctx->cm_id, NULL, &ece);
+ rdma_unlock_handler(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
+ }
ucma_put_ctx(ctx);
return ret;
}
@@ -1133,11 +1165,25 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ if (!cmd.reason)
+ cmd.reason = IB_CM_REJ_CONSUMER_DEFINED;
+
+ switch (cmd.reason) {
+ case IB_CM_REJ_CONSUMER_DEFINED:
+ case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
+ mutex_lock(&ctx->mutex);
+ ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len,
+ cmd.reason);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1156,7 +1202,9 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_disconnect(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1187,7 +1235,9 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
resp.qp_attr_mask = 0;
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.qp_state = cmd.qp_state;
+ mutex_lock(&ctx->mutex);
ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
+ mutex_unlock(&ctx->mutex);
if (ret)
goto out;
@@ -1273,9 +1323,13 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
struct sa_path_rec opa;
sa_convert_path_ib_to_opa(&opa, &sa_path);
+ mutex_lock(&ctx->mutex);
ret = rdma_set_ib_path(ctx->cm_id, &opa);
+ mutex_unlock(&ctx->mutex);
} else {
+ mutex_lock(&ctx->mutex);
ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
+ mutex_unlock(&ctx->mutex);
}
if (ret)
return ret;
@@ -1308,7 +1362,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
switch (level) {
case RDMA_OPTION_ID:
+ mutex_lock(&ctx->mutex);
ret = ucma_set_option_id(ctx, optname, optval, optlen);
+ mutex_unlock(&ctx->mutex);
break;
case RDMA_OPTION_IB:
ret = ucma_set_option_ib(ctx, optname, optval, optlen);
@@ -1368,8 +1424,10 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
if (ctx->cm_id->device)
ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
@@ -1403,42 +1461,59 @@ static ssize_t ucma_process_join(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- mutex_lock(&file->mut);
- mc = ucma_alloc_multicast(ctx);
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc) {
ret = -ENOMEM;
- goto err1;
+ goto err_put_ctx;
}
+
+ mc->ctx = ctx;
mc->join_state = join_state;
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
+
+ xa_lock(&multicast_table);
+ if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+ GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_free_mc;
+ }
+
+ list_add_tail(&mc->list, &ctx->mc_list);
+ xa_unlock(&multicast_table);
+
+ mutex_lock(&ctx->mutex);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
join_state, mc);
+ mutex_unlock(&ctx->mutex);
if (ret)
- goto err2;
+ goto err_xa_erase;
resp.id = mc->id;
if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof(resp))) {
ret = -EFAULT;
- goto err3;
+ goto err_leave_multicast;
}
xa_store(&multicast_table, mc->id, mc, 0);
- mutex_unlock(&file->mut);
ucma_put_ctx(ctx);
return 0;
-err3:
+err_leave_multicast:
+ mutex_lock(&ctx->mutex);
rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
+ mutex_unlock(&ctx->mutex);
ucma_cleanup_mc_events(mc);
-err2:
- xa_erase(&multicast_table, mc->id);
+err_xa_erase:
+ xa_lock(&multicast_table);
list_del(&mc->list);
+ __xa_erase(&multicast_table, mc->id);
+err_free_mc:
+ xa_unlock(&multicast_table);
kfree(mc);
-err1:
- mutex_unlock(&file->mut);
+err_put_ctx:
ucma_put_ctx(ctx);
return ret;
}
@@ -1500,24 +1575,26 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
mc = xa_load(&multicast_table, cmd.id);
if (!mc)
mc = ERR_PTR(-ENOENT);
- else if (mc->ctx->file != file)
+ else if (READ_ONCE(mc->ctx->file) != file)
mc = ERR_PTR(-EINVAL);
- else if (!atomic_inc_not_zero(&mc->ctx->ref))
+ else if (!refcount_inc_not_zero(&mc->ctx->ref))
mc = ERR_PTR(-ENXIO);
- else
- __xa_erase(&multicast_table, mc->id);
- xa_unlock(&multicast_table);
if (IS_ERR(mc)) {
+ xa_unlock(&multicast_table);
ret = PTR_ERR(mc);
goto out;
}
+ list_del(&mc->list);
+ __xa_erase(&multicast_table, mc->id);
+ xa_unlock(&multicast_table);
+
+ mutex_lock(&mc->ctx->mutex);
rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
- mutex_lock(&mc->ctx->file->mut);
+ mutex_unlock(&mc->ctx->mutex);
+
ucma_cleanup_mc_events(mc);
- list_del(&mc->list);
- mutex_unlock(&mc->ctx->file->mut);
ucma_put_ctx(mc->ctx);
resp.events_reported = mc->events_reported;
@@ -1530,45 +1607,15 @@ out:
return ret;
}
-static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
- /* Acquire mutex's based on pointer comparison to prevent deadlock. */
- if (file1 < file2) {
- mutex_lock(&file1->mut);
- mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
- } else {
- mutex_lock(&file2->mut);
- mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
- }
-}
-
-static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
- if (file1 < file2) {
- mutex_unlock(&file2->mut);
- mutex_unlock(&file1->mut);
- } else {
- mutex_unlock(&file1->mut);
- mutex_unlock(&file2->mut);
- }
-}
-
-static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
-{
- struct ucma_event *uevent, *tmp;
-
- list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
- if (uevent->ctx == ctx)
- list_move_tail(&uevent->list, &file->event_list);
-}
-
static ssize_t ucma_migrate_id(struct ucma_file *new_file,
const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_migrate_id cmd;
struct rdma_ucm_migrate_resp resp;
+ struct ucma_event *uevent, *tmp;
struct ucma_context *ctx;
+ LIST_HEAD(event_list);
struct fd f;
struct ucma_file *cur_file;
int ret = 0;
@@ -1584,40 +1631,53 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
ret = -EINVAL;
goto file_put;
}
+ cur_file = f.file->private_data;
/* Validate current fd and prevent destruction of id. */
- ctx = ucma_get_ctx(f.file->private_data, cmd.id);
+ ctx = ucma_get_ctx(cur_file, cmd.id);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
goto file_put;
}
- cur_file = ctx->file;
- if (cur_file == new_file) {
- resp.events_reported = ctx->events_reported;
- goto response;
- }
-
+ rdma_lock_handler(ctx->cm_id);
/*
- * Migrate events between fd's, maintaining order, and avoiding new
- * events being added before existing events.
+ * ctx->file can only be changed under the handler & xa_lock. xa_load()
+ * must be checked again to ensure the ctx hasn't begun destruction
+ * since the ucma_get_ctx().
*/
- ucma_lock_files(cur_file, new_file);
xa_lock(&ctx_table);
-
- list_move_tail(&ctx->list, &new_file->ctx_list);
- ucma_move_events(ctx, new_file);
+ if (_ucma_find_context(cmd.id, cur_file) != ctx) {
+ xa_unlock(&ctx_table);
+ ret = -ENOENT;
+ goto err_unlock;
+ }
ctx->file = new_file;
+ xa_unlock(&ctx_table);
+
+ mutex_lock(&cur_file->mut);
+ list_del(&ctx->list);
+ /*
+ * At this point lock_handler() prevents addition of new uevents for
+ * this ctx.
+ */
+ list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list)
+ if (uevent->ctx == ctx)
+ list_move_tail(&uevent->list, &event_list);
resp.events_reported = ctx->events_reported;
+ mutex_unlock(&cur_file->mut);
- xa_unlock(&ctx_table);
- ucma_unlock_files(cur_file, new_file);
+ mutex_lock(&new_file->mut);
+ list_add_tail(&ctx->list, &new_file->ctx_list);
+ list_splice_tail(&event_list, &new_file->event_list);
+ mutex_unlock(&new_file->mut);
-response:
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
+err_unlock:
+ rdma_unlock_handler(ctx->cm_id);
ucma_put_ctx(ctx);
file_put:
fdput(f);
@@ -1660,8 +1720,8 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
ssize_t ret;
if (!ib_safe_file_access(filp)) {
- pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
- task_tgid_vnr(current), current->comm);
+ pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ __func__, task_tgid_vnr(current), current->comm);
return -EACCES;
}
@@ -1717,13 +1777,6 @@ static int ucma_open(struct inode *inode, struct file *filp)
if (!file)
return -ENOMEM;
- file->close_wq = alloc_ordered_workqueue("ucma_close_id",
- WQ_MEM_RECLAIM);
- if (!file->close_wq) {
- kfree(file);
- return -ENOMEM;
- }
-
INIT_LIST_HEAD(&file->event_list);
INIT_LIST_HEAD(&file->ctx_list);
init_waitqueue_head(&file->poll_wait);
@@ -1738,37 +1791,23 @@ static int ucma_open(struct inode *inode, struct file *filp)
static int ucma_close(struct inode *inode, struct file *filp)
{
struct ucma_file *file = filp->private_data;
- struct ucma_context *ctx, *tmp;
-
- mutex_lock(&file->mut);
- list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
- ctx->destroying = 1;
- mutex_unlock(&file->mut);
- xa_erase(&ctx_table, ctx->id);
- flush_workqueue(file->close_wq);
- /* At that step once ctx was marked as destroying and workqueue
- * was flushed we are safe from any inflights handlers that
- * might put other closing task.
- */
- xa_lock(&ctx_table);
- if (!ctx->closing) {
- xa_unlock(&ctx_table);
- ucma_put_ctx(ctx);
- wait_for_completion(&ctx->comp);
- /* rdma_destroy_id ensures that no event handlers are
- * inflight for that id before releasing it.
- */
- rdma_destroy_id(ctx->cm_id);
- } else {
- xa_unlock(&ctx_table);
- }
+ /*
+ * All paths that touch ctx_list or ctx_list starting from write() are
+ * prevented by this being a FD release function. The list_add_tail() in
+ * ucma_connect_event_handler() can run concurrently, however it only
+ * adds to the list *after* a listening ID. By only reading the first of
+ * the list, and relying on ucma_destroy_private_ctx() to block
+ * ucma_connect_event_handler(), no additional locking is needed.
+ */
+ while (!list_empty(&file->ctx_list)) {
+ struct ucma_context *ctx = list_first_entry(
+ &file->ctx_list, struct ucma_context, list);
- ucma_free_ctx(ctx);
- mutex_lock(&file->mut);
+ WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
+ GFP_KERNEL) != ctx);
+ ucma_destroy_private_ctx(ctx);
}
- mutex_unlock(&file->mut);
- destroy_workqueue(file->close_wq);
kfree(file);
return 0;
}
@@ -1803,13 +1842,12 @@ static struct ib_client rdma_cma_client = {
};
MODULE_ALIAS_RDMA_CLIENT("rdma_cm");
-static ssize_t show_abi_version(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t abi_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
+ return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
}
-static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+static DEVICE_ATTR_RO(abi_version);
static int __init ucma_init(void)
{
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 29a45d2f8898..64d9c492de64 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -41,7 +41,7 @@
#define STRUCT_FIELD(header, field) \
.struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
- .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_unpacked_ ## header, field), \
.field_name = #header ":" #field
static const struct ib_field lrh_table[] = {
@@ -479,7 +479,7 @@ int ib_ud_header_unpack(void *buf,
buf += IB_LRH_BYTES;
if (header->lrh.link_version != 0) {
- pr_warn("Invalid LRH.link_version %d\n",
+ pr_warn("Invalid LRH.link_version %u\n",
header->lrh.link_version);
return -EINVAL;
}
@@ -496,7 +496,7 @@ int ib_ud_header_unpack(void *buf,
buf += IB_GRH_BYTES;
if (header->grh.ip_version != 6) {
- pr_warn("Invalid GRH.ip_version %d\n",
+ pr_warn("Invalid GRH.ip_version %u\n",
header->grh.ip_version);
return -EINVAL;
}
@@ -508,7 +508,7 @@ int ib_ud_header_unpack(void *buf,
break;
default:
- pr_warn("Invalid LRH.link_next_header %d\n",
+ pr_warn("Invalid LRH.link_next_header %u\n",
header->lrh.link_next_header);
return -EINVAL;
}
@@ -530,7 +530,7 @@ int ib_ud_header_unpack(void *buf,
}
if (header->bth.transport_header_version != 0) {
- pr_warn("Invalid BTH.transport_header_version %d\n",
+ pr_warn("Invalid BTH.transport_header_version %u\n",
header->bth.transport_header_version);
return -EINVAL;
}
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 06b6125b5ae1..86d479772fbc 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -2,6 +2,7 @@
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -39,92 +40,26 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
- struct sg_page_iter sg_iter;
- struct page *page;
-
- if (umem->nmap > 0)
- ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
- DMA_BIDIRECTIONAL);
-
- for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
- page = sg_page_iter_page(&sg_iter);
- unpin_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
- }
-
- sg_free_table(&umem->sg_head);
-}
-
-/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
- *
- * sg: current scatterlist entry
- * page_list: array of npage struct page pointers
- * npages: number of pages in page_list
- * max_seg_sz: maximum segment size in bytes
- * nents: [out] number of entries in the scatterlist
- *
- * Return new end of scatterlist
- */
-static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
- struct page **page_list,
- unsigned long npages,
- unsigned int max_seg_sz,
- int *nents)
-{
- unsigned long first_pfn;
- unsigned long i = 0;
- bool update_cur_sg = false;
- bool first = !sg_page(sg);
-
- /* Check if new page_list is contiguous with end of previous page_list.
- * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
- */
- if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
- page_to_pfn(page_list[0])))
- update_cur_sg = true;
-
- while (i != npages) {
- unsigned long len;
- struct page *first_page = page_list[i];
-
- first_pfn = page_to_pfn(first_page);
-
- /* Compute the number of contiguous pages we have starting
- * at i
- */
- for (len = 0; i != npages &&
- first_pfn + len == page_to_pfn(page_list[i]) &&
- len < (max_seg_sz >> PAGE_SHIFT);
- len++)
- i++;
-
- /* Squash N contiguous pages from page_list into current sge */
- if (update_cur_sg) {
- if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) {
- sg_set_page(sg, sg_page(sg),
- sg->length + (len << PAGE_SHIFT),
- 0);
- update_cur_sg = false;
- continue;
- }
- update_cur_sg = false;
- }
+ bool make_dirty = umem->writable && dirty;
+ struct scatterlist *sg;
+ unsigned int i;
- /* Squash N contiguous pages into next sge or first sge */
- if (!first)
- sg = sg_next(sg);
+ if (dirty)
+ ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
+ DMA_BIDIRECTIONAL, 0);
- (*nents)++;
- sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
- first = false;
- }
+ for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i)
+ unpin_user_page_range_dirty_lock(sg_page(sg),
+ DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty);
- return sg;
+ sg_free_append_table(&umem->sgt_append);
}
/**
@@ -146,22 +81,37 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
unsigned long virt)
{
struct scatterlist *sg;
- unsigned int best_pg_bit;
unsigned long va, pgoff;
dma_addr_t mask;
int i;
- /* At minimum, drivers must support PAGE_SIZE or smaller */
- if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
- return 0;
+ if (umem->is_odp) {
+ unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
+
+ /* ODP must always be self consistent. */
+ if (!(pgsz_bitmap & page_size))
+ return 0;
+ return page_size;
+ }
+
+ /* rdma_for_each_block() has a bug if the page size is smaller than the
+ * page size used to build the umem. For now prevent smaller page sizes
+ * from being returned.
+ */
+ pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
- va = virt;
- /* max page size not to exceed MR length */
- mask = roundup_pow_of_two(umem->length);
+ umem->iova = va = virt;
+ /* The best result is the smallest page size that results in the minimum
+ * number of required pages. Compute the largest page size that could
+ * work based on VA address bits that don't change.
+ */
+ mask = pgsz_bitmap &
+ GENMASK(BITS_PER_LONG - 1,
+ bits_per((umem->length - 1 + virt) ^ virt));
/* offset into first SGL */
pgoff = umem->address & ~PAGE_MASK;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
+ for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
/* Walk SGL and reduce max page size if VA/PA bits differ
* for any address.
*/
@@ -171,13 +121,18 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
* the maximum possible page size as the low bits of the iova
* must be zero when starting the next chunk.
*/
- if (i != (umem->nmap - 1))
+ if (i != (umem->sgt_append.sgt.nents - 1))
mask |= va;
pgoff = 0;
}
- best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
- return BIT_ULL(best_pg_bit);
+ /* The mask accumulates 1's in each position where the VA and physical
+ * address differ, thus the length of trailing 0 is the largest page
+ * size that can pass the VA through to the physical.
+ */
+ if (mask)
+ pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
+ return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0;
}
EXPORT_SYMBOL(ib_umem_find_best_pgsz);
@@ -197,10 +152,10 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
unsigned long lock_limit;
unsigned long new_pinned;
unsigned long cur_base;
+ unsigned long dma_attr = 0;
struct mm_struct *mm;
unsigned long npages;
- int ret;
- struct scatterlist *sg;
+ int pinned, ret;
unsigned int gup_flags = FOLL_WRITE;
/*
@@ -223,6 +178,11 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
umem->ibdev = device;
umem->length = size;
umem->address = addr;
+ /*
+ * Drivers should call ib_umem_find_best_pgsz() to set the iova
+ * correctly.
+ */
+ umem->iova = addr;
umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm;
mmgrab(mm);
@@ -250,50 +210,44 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
cur_base = addr & PAGE_MASK;
- ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
- if (ret)
- goto vma;
-
if (!umem->writable)
gup_flags |= FOLL_FORCE;
- sg = umem->sg_head.sgl;
-
while (npages) {
- ret = pin_user_pages_fast(cur_base,
+ cond_resched();
+ pinned = pin_user_pages_fast(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE /
sizeof(struct page *)),
gup_flags | FOLL_LONGTERM, page_list);
- if (ret < 0)
+ if (pinned < 0) {
+ ret = pinned;
goto umem_release;
+ }
- cur_base += ret * PAGE_SIZE;
- npages -= ret;
-
- sg = ib_umem_add_sg_table(sg, page_list, ret,
- dma_get_max_seg_size(device->dma_device),
- &umem->sg_nents);
+ cur_base += pinned * PAGE_SIZE;
+ npages -= pinned;
+ ret = sg_alloc_append_table_from_pages(
+ &umem->sgt_append, page_list, pinned, 0,
+ pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),
+ npages, GFP_KERNEL);
+ if (ret) {
+ unpin_user_pages_dirty_lock(page_list, pinned, 0);
+ goto umem_release;
+ }
}
- sg_mark_end(sg);
-
- umem->nmap = ib_dma_map_sg(device,
- umem->sg_head.sgl,
- umem->sg_nents,
- DMA_BIDIRECTIONAL);
+ if (access & IB_ACCESS_RELAXED_ORDERING)
+ dma_attr |= DMA_ATTR_WEAK_ORDERING;
- if (!umem->nmap) {
- ret = -ENOMEM;
+ ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt,
+ DMA_BIDIRECTIONAL, dma_attr);
+ if (ret)
goto umem_release;
- }
-
- ret = 0;
goto out;
umem_release:
__ib_umem_release(device, umem, 0);
-vma:
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
free_page((unsigned long) page_list);
@@ -314,6 +268,8 @@ void ib_umem_release(struct ib_umem *umem)
{
if (!umem)
return;
+ if (umem->is_dmabuf)
+ return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem));
if (umem->is_odp)
return ib_umem_odp_release(to_ib_umem_odp(umem));
@@ -325,18 +281,6 @@ void ib_umem_release(struct ib_umem *umem)
}
EXPORT_SYMBOL(ib_umem_release);
-int ib_umem_page_count(struct ib_umem *umem)
-{
- int i, n = 0;
- struct scatterlist *sg;
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> PAGE_SHIFT;
-
- return n;
-}
-EXPORT_SYMBOL(ib_umem_page_count);
-
/*
* Copy from the given ib_umem's pages to the given buffer.
*
@@ -354,12 +298,13 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
int ret;
if (offset > umem->length || length > umem->length - offset) {
- pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
- offset, umem->length, end);
+ pr_err("%s not in range. offset: %zd umem length: %zd end: %zd\n",
+ __func__, offset, umem->length, end);
return -EINVAL;
}
- ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->sg_nents, dst, length,
+ ret = sg_pcopy_to_buffer(umem->sgt_append.sgt.sgl,
+ umem->sgt_append.sgt.orig_nents, dst, length,
offset + ib_umem_offset(umem));
if (ret < 0)
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
new file mode 100644
index 000000000000..04c04e6d24c3
--- /dev/null
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+
+#include "uverbs.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct sg_table *sgt;
+ struct scatterlist *sg;
+ unsigned long start, end, cur = 0;
+ unsigned int nmap = 0;
+ long ret;
+ int i;
+
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (umem_dmabuf->sgt)
+ goto wait_fence;
+
+ sgt = dma_buf_map_attachment(umem_dmabuf->attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ /* modify the sg list in-place to match umem address and length */
+
+ start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE);
+ end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length,
+ PAGE_SIZE);
+ for_each_sgtable_dma_sg(sgt, sg, i) {
+ if (start < cur + sg_dma_len(sg) && cur < end)
+ nmap++;
+ if (cur <= start && start < cur + sg_dma_len(sg)) {
+ unsigned long offset = start - cur;
+
+ umem_dmabuf->first_sg = sg;
+ umem_dmabuf->first_sg_offset = offset;
+ sg_dma_address(sg) += offset;
+ sg_dma_len(sg) -= offset;
+ cur += offset;
+ }
+ if (cur < end && end <= cur + sg_dma_len(sg)) {
+ unsigned long trim = cur + sg_dma_len(sg) - end;
+
+ umem_dmabuf->last_sg = sg;
+ umem_dmabuf->last_sg_trim = trim;
+ sg_dma_len(sg) -= trim;
+ break;
+ }
+ cur += sg_dma_len(sg);
+ }
+
+ umem_dmabuf->umem.sgt_append.sgt.sgl = umem_dmabuf->first_sg;
+ umem_dmabuf->umem.sgt_append.sgt.nents = nmap;
+ umem_dmabuf->sgt = sgt;
+
+wait_fence:
+ /*
+ * Although the sg list is valid now, the content of the pages
+ * may be not up-to-date. Wait for the exporter to finish
+ * the migration.
+ */
+ ret = dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv,
+ DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -ETIMEDOUT;
+ return 0;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
+
+void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (!umem_dmabuf->sgt)
+ return;
+
+ /* retore the original sg list */
+ if (umem_dmabuf->first_sg) {
+ sg_dma_address(umem_dmabuf->first_sg) -=
+ umem_dmabuf->first_sg_offset;
+ sg_dma_len(umem_dmabuf->first_sg) +=
+ umem_dmabuf->first_sg_offset;
+ umem_dmabuf->first_sg = NULL;
+ umem_dmabuf->first_sg_offset = 0;
+ }
+ if (umem_dmabuf->last_sg) {
+ sg_dma_len(umem_dmabuf->last_sg) +=
+ umem_dmabuf->last_sg_trim;
+ umem_dmabuf->last_sg = NULL;
+ umem_dmabuf->last_sg_trim = 0;
+ }
+
+ dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt,
+ DMA_BIDIRECTIONAL);
+
+ umem_dmabuf->sgt = NULL;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages);
+
+struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
+ unsigned long offset, size_t size,
+ int fd, int access,
+ const struct dma_buf_attach_ops *ops)
+{
+ struct dma_buf *dmabuf;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ib_umem *umem;
+ unsigned long end;
+ struct ib_umem_dmabuf *ret = ERR_PTR(-EINVAL);
+
+ if (check_add_overflow(offset, (unsigned long)size, &end))
+ return ret;
+
+ if (unlikely(!ops || !ops->move_notify))
+ return ret;
+
+ dmabuf = dma_buf_get(fd);
+ if (IS_ERR(dmabuf))
+ return ERR_CAST(dmabuf);
+
+ if (dmabuf->size < end)
+ goto out_release_dmabuf;
+
+ umem_dmabuf = kzalloc(sizeof(*umem_dmabuf), GFP_KERNEL);
+ if (!umem_dmabuf) {
+ ret = ERR_PTR(-ENOMEM);
+ goto out_release_dmabuf;
+ }
+
+ umem = &umem_dmabuf->umem;
+ umem->ibdev = device;
+ umem->length = size;
+ umem->address = offset;
+ umem->writable = ib_access_writable(access);
+ umem->is_dmabuf = 1;
+
+ if (!ib_umem_num_pages(umem))
+ goto out_free_umem;
+
+ umem_dmabuf->attach = dma_buf_dynamic_attach(
+ dmabuf,
+ device->dma_device,
+ ops,
+ umem_dmabuf);
+ if (IS_ERR(umem_dmabuf->attach)) {
+ ret = ERR_CAST(umem_dmabuf->attach);
+ goto out_free_umem;
+ }
+ return umem_dmabuf;
+
+out_free_umem:
+ kfree(umem_dmabuf);
+
+out_release_dmabuf:
+ dma_buf_put(dmabuf);
+ return ret;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get);
+
+static void
+ib_umem_dmabuf_unsupported_move_notify(struct dma_buf_attachment *attach)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
+
+ ibdev_warn_ratelimited(umem_dmabuf->umem.ibdev,
+ "Invalidate callback should not be called when memory is pinned\n");
+}
+
+static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_ops = {
+ .allow_peer2peer = true,
+ .move_notify = ib_umem_dmabuf_unsupported_move_notify,
+};
+
+struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device,
+ unsigned long offset,
+ size_t size, int fd,
+ int access)
+{
+ struct ib_umem_dmabuf *umem_dmabuf;
+ int err;
+
+ umem_dmabuf = ib_umem_dmabuf_get(device, offset, size, fd, access,
+ &ib_umem_dmabuf_attach_pinned_ops);
+ if (IS_ERR(umem_dmabuf))
+ return umem_dmabuf;
+
+ dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+ err = dma_buf_pin(umem_dmabuf->attach);
+ if (err)
+ goto err_release;
+ umem_dmabuf->pinned = 1;
+
+ err = ib_umem_dmabuf_map_pages(umem_dmabuf);
+ if (err)
+ goto err_unpin;
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+
+ return umem_dmabuf;
+
+err_unpin:
+ dma_buf_unpin(umem_dmabuf->attach);
+err_release:
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+ ib_umem_release(&umem_dmabuf->umem);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned);
+
+void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf;
+
+ dma_resv_lock(dmabuf->resv, NULL);
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+ if (umem_dmabuf->pinned)
+ dma_buf_unpin(umem_dmabuf->attach);
+ dma_resv_unlock(dmabuf->resv);
+
+ dma_buf_detach(dmabuf, umem_dmabuf->attach);
+ dma_buf_put(dmabuf);
+ kfree(umem_dmabuf);
+}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index cd656ad4953b..e9fa22d31c23 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -40,10 +40,9 @@
#include <linux/vmalloc.h>
#include <linux/hugetlb.h>
#include <linux/interval_tree.h>
+#include <linux/hmm.h>
#include <linux/pagemap.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
@@ -60,7 +59,7 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
size_t page_size = 1UL << umem_odp->page_shift;
unsigned long start;
unsigned long end;
- size_t pages;
+ size_t ndmas, npfns;
start = ALIGN_DOWN(umem_odp->umem.address, page_size);
if (check_add_overflow(umem_odp->umem.address,
@@ -71,20 +70,21 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
if (unlikely(end < page_size))
return -EOVERFLOW;
- pages = (end - start) >> umem_odp->page_shift;
- if (!pages)
+ ndmas = (end - start) >> umem_odp->page_shift;
+ if (!ndmas)
return -EINVAL;
- umem_odp->page_list = kvcalloc(
- pages, sizeof(*umem_odp->page_list), GFP_KERNEL);
- if (!umem_odp->page_list)
+ npfns = (end - start) >> PAGE_SHIFT;
+ umem_odp->pfn_list = kvcalloc(
+ npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL);
+ if (!umem_odp->pfn_list)
return -ENOMEM;
umem_odp->dma_list = kvcalloc(
- pages, sizeof(*umem_odp->dma_list), GFP_KERNEL);
+ ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL);
if (!umem_odp->dma_list) {
ret = -ENOMEM;
- goto out_page_list;
+ goto out_pfn_list;
}
ret = mmu_interval_notifier_insert(&umem_odp->notifier,
@@ -98,8 +98,8 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
out_dma_list:
kvfree(umem_odp->dma_list);
-out_page_list:
- kvfree(umem_odp->page_list);
+out_pfn_list:
+ kvfree(umem_odp->pfn_list);
return ret;
}
@@ -152,6 +152,7 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
* ib_alloc_implicit_odp_umem()
* @addr: The starting userspace VA
* @size: The length of the userspace VA
+ * @ops: MMU interval ops, currently only @invalidate
*/
struct ib_umem_odp *
ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr,
@@ -213,6 +214,7 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_child);
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
+ * @ops: MMU interval ops, currently only @invalidate
*
* The driver should use when the access flags indicate ODP memory. It avoids
* pinning, instead, stores the mm for future page fault handling in
@@ -223,7 +225,6 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
const struct mmu_interval_notifier_ops *ops)
{
struct ib_umem_odp *umem_odp;
- struct mm_struct *mm;
int ret;
if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)))
@@ -237,7 +238,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
umem_odp->umem.length = size;
umem_odp->umem.address = addr;
umem_odp->umem.writable = ib_access_writable(access);
- umem_odp->umem.owning_mm = mm = current->mm;
+ umem_odp->umem.owning_mm = current->mm;
umem_odp->notifier.ops = ops;
umem_odp->page_shift = PAGE_SHIFT;
@@ -274,9 +275,9 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
mutex_unlock(&umem_odp->umem_mutex);
mmu_interval_notifier_remove(&umem_odp->notifier);
kvfree(umem_odp->dma_list);
- kvfree(umem_odp->page_list);
- put_pid(umem_odp->tgid);
+ kvfree(umem_odp->pfn_list);
}
+ put_pid(umem_odp->tgid);
kfree(umem_odp);
}
EXPORT_SYMBOL(ib_umem_odp_release);
@@ -285,87 +286,53 @@ EXPORT_SYMBOL(ib_umem_odp_release);
* Map for DMA and insert a single page into the on-demand paging page tables.
*
* @umem: the umem to insert the page to.
- * @page_index: index in the umem to add the page to.
+ * @dma_index: index in the umem to add the dma to.
* @page: the page struct to map and add.
* @access_mask: access permissions needed for this page.
- * @current_seq: sequence number for synchronization with invalidations.
- * the sequence number is taken from
- * umem_odp->notifiers_seq.
*
- * The function returns -EFAULT if the DMA mapping operation fails. It returns
- * -EAGAIN if a concurrent invalidation prevents us from updating the page.
+ * The function returns -EFAULT if the DMA mapping operation fails.
*
- * The page is released via put_page even if the operation failed. For on-demand
- * pinning, the page is released whenever it isn't stored in the umem.
*/
static int ib_umem_odp_map_dma_single_page(
struct ib_umem_odp *umem_odp,
- unsigned int page_index,
+ unsigned int dma_index,
struct page *page,
- u64 access_mask,
- unsigned long current_seq)
+ u64 access_mask)
{
struct ib_device *dev = umem_odp->umem.ibdev;
- dma_addr_t dma_addr;
- int ret = 0;
+ dma_addr_t *dma_addr = &umem_odp->dma_list[dma_index];
- if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) {
- ret = -EAGAIN;
- goto out;
- }
- if (!(umem_odp->dma_list[page_index])) {
- dma_addr =
- ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift),
- DMA_BIDIRECTIONAL);
- if (ib_dma_mapping_error(dev, dma_addr)) {
- ret = -EFAULT;
- goto out;
- }
- umem_odp->dma_list[page_index] = dma_addr | access_mask;
- umem_odp->page_list[page_index] = page;
- umem_odp->npages++;
- } else if (umem_odp->page_list[page_index] == page) {
- umem_odp->dma_list[page_index] |= access_mask;
- } else {
+ if (*dma_addr) {
/*
- * This is a race here where we could have done:
- *
- * CPU0 CPU1
- * get_user_pages()
- * invalidate()
- * page_fault()
- * mutex_lock(umem_mutex)
- * page from GUP != page in ODP
- *
- * It should be prevented by the retry test above as reading
- * the seq number should be reliable under the
- * umem_mutex. Thus something is really not working right if
- * things get here.
+ * If the page is already dma mapped it means it went through
+ * a non-invalidating trasition, like read-only to writable.
+ * Resync the flags.
*/
- WARN(true,
- "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
- umem_odp->page_list[page_index], page);
- ret = -EAGAIN;
+ *dma_addr = (*dma_addr & ODP_DMA_ADDR_MASK) | access_mask;
+ return 0;
}
-out:
- put_page(page);
- return ret;
+ *dma_addr = ib_dma_map_page(dev, page, 0, 1 << umem_odp->page_shift,
+ DMA_BIDIRECTIONAL);
+ if (ib_dma_mapping_error(dev, *dma_addr)) {
+ *dma_addr = 0;
+ return -EFAULT;
+ }
+ umem_odp->npages++;
+ *dma_addr |= access_mask;
+ return 0;
}
/**
- * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
+ * ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
*
- * Pins the range of pages passed in the argument, and maps them to
- * DMA addresses. The DMA addresses of the mapped pages is updated in
- * umem_odp->dma_list.
+ * Maps the range passed in the argument to DMA addresses.
+ * The DMA addresses of the mapped pages is updated in umem_odp->dma_list.
+ * Upon success the ODP MR will be locked to let caller complete its device
+ * page table update.
*
* Returns the number of pages mapped in success, negative error code
* for failure.
- * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
- * the function from completing its task.
- * An -ENOENT error code indicates that userspace process is being terminated
- * and mm was already destroyed.
* @umem_odp: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -374,21 +341,19 @@ out:
* the return value.
* @access_mask: bit mask of the requested access permissions for the given
* range.
- * @current_seq: the MMU notifiers sequance value for synchronization with
- * invalidations. the sequance number is read from
- * umem_odp->notifiers_seq before calling this function
+ * @fault: is faulting required for the given range
*/
-int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
- u64 bcnt, u64 access_mask,
- unsigned long current_seq)
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
+ u64 bcnt, u64 access_mask, bool fault)
+ __acquires(&umem_odp->umem_mutex)
{
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
- struct page **local_page_list = NULL;
- u64 page_mask, off;
- int j, k, ret = 0, start_idx, npages = 0;
- unsigned int flags = 0, page_shift;
- phys_addr_t p = 0;
+ int pfn_index, dma_index, ret = 0, start_idx;
+ unsigned int page_shift, hmm_order, pfn_start_idx;
+ unsigned long num_pfns, current_seq;
+ struct hmm_range range = {};
+ unsigned long timeout;
if (access_mask == 0)
return -EINVAL;
@@ -397,15 +362,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
user_virt + bcnt > ib_umem_end(umem_odp))
return -EFAULT;
- local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
- if (!local_page_list)
- return -ENOMEM;
-
page_shift = umem_odp->page_shift;
- page_mask = ~(BIT(page_shift) - 1);
- off = user_virt & (~page_mask);
- user_virt = user_virt & page_mask;
- bcnt += off; /* Charge for the first page offset as well. */
/*
* owning_process is allowed to be NULL, this means somehow the mm is
@@ -418,99 +375,104 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
goto out_put_task;
}
- if (access_mask & ODP_WRITE_ALLOWED_BIT)
- flags |= FOLL_WRITE;
+ range.notifier = &umem_odp->notifier;
+ range.start = ALIGN_DOWN(user_virt, 1UL << page_shift);
+ range.end = ALIGN(user_virt + bcnt, 1UL << page_shift);
+ pfn_start_idx = (range.start - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+ num_pfns = (range.end - range.start) >> PAGE_SHIFT;
+ if (fault) {
+ range.default_flags = HMM_PFN_REQ_FAULT;
- start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift;
- k = start_idx;
+ if (access_mask & ODP_WRITE_ALLOWED_BIT)
+ range.default_flags |= HMM_PFN_REQ_WRITE;
+ }
- while (bcnt > 0) {
- const size_t gup_num_pages = min_t(size_t,
- ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
- PAGE_SIZE / sizeof(struct page *));
+ range.hmm_pfns = &(umem_odp->pfn_list[pfn_start_idx]);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- down_read(&owning_mm->mmap_sem);
- /*
- * Note: this might result in redundent page getting. We can
- * avoid this by checking dma_list to be 0 before calling
- * get_user_pages. However, this make the code much more
- * complex (and doesn't gain us much performance in most use
- * cases).
- */
- npages = get_user_pages_remote(owning_process, owning_mm,
- user_virt, gup_num_pages,
- flags, local_page_list, NULL, NULL);
- up_read(&owning_mm->mmap_sem);
-
- if (npages < 0) {
- if (npages != -EAGAIN)
- pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
- else
- pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
- break;
- }
+retry:
+ current_seq = range.notifier_seq =
+ mmu_interval_read_begin(&umem_odp->notifier);
- bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- mutex_lock(&umem_odp->umem_mutex);
- for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
- if (user_virt & ~page_mask) {
- p += PAGE_SIZE;
- if (page_to_phys(local_page_list[j]) != p) {
- ret = -EFAULT;
- break;
- }
- put_page(local_page_list[j]);
- continue;
- }
+ mmap_read_lock(owning_mm);
+ ret = hmm_range_fault(&range);
+ mmap_read_unlock(owning_mm);
+ if (unlikely(ret)) {
+ if (ret == -EBUSY && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_put_mm;
+ }
- ret = ib_umem_odp_map_dma_single_page(
- umem_odp, k, local_page_list[j],
- access_mask, current_seq);
- if (ret < 0) {
- if (ret != -EAGAIN)
- pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
- else
- pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
- break;
- }
+ start_idx = (range.start - ib_umem_start(umem_odp)) >> page_shift;
+ dma_index = start_idx;
- p = page_to_phys(local_page_list[j]);
- k++;
- }
+ mutex_lock(&umem_odp->umem_mutex);
+ if (mmu_interval_read_retry(&umem_odp->notifier, current_seq)) {
mutex_unlock(&umem_odp->umem_mutex);
+ goto retry;
+ }
- if (ret < 0) {
+ for (pfn_index = 0; pfn_index < num_pfns;
+ pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) {
+
+ if (fault) {
/*
- * Release pages, remembering that the first page
- * to hit an error was already released by
- * ib_umem_odp_map_dma_single_page().
+ * Since we asked for hmm_range_fault() to populate
+ * pages it shouldn't return an error entry on success.
*/
- if (npages - (j + 1) > 0)
- release_pages(&local_page_list[j+1],
- npages - (j + 1));
+ WARN_ON(range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
+ WARN_ON(!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
+ } else {
+ if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID)) {
+ WARN_ON(umem_odp->dma_list[dma_index]);
+ continue;
+ }
+ access_mask = ODP_READ_ALLOWED_BIT;
+ if (range.hmm_pfns[pfn_index] & HMM_PFN_WRITE)
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
+ }
+
+ hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]);
+ /* If a hugepage was detected and ODP wasn't set for, the umem
+ * page_shift will be used, the opposite case is an error.
+ */
+ if (hmm_order + PAGE_SHIFT < page_shift) {
+ ret = -EINVAL;
+ ibdev_dbg(umem_odp->umem.ibdev,
+ "%s: un-expected hmm_order %u, page_shift %u\n",
+ __func__, hmm_order, page_shift);
break;
}
- }
- if (ret >= 0) {
- if (npages < 0 && k == start_idx)
- ret = npages;
- else
- ret = k - start_idx;
+ ret = ib_umem_odp_map_dma_single_page(
+ umem_odp, dma_index, hmm_pfn_to_page(range.hmm_pfns[pfn_index]),
+ access_mask);
+ if (ret < 0) {
+ ibdev_dbg(umem_odp->umem.ibdev,
+ "ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
+ break;
+ }
}
+ /* upon success lock should stay on hold for the callee */
+ if (!ret)
+ ret = dma_index - start_idx;
+ else
+ mutex_unlock(&umem_odp->umem_mutex);
- mmput(owning_mm);
+out_put_mm:
+ mmput_async(owning_mm);
out_put_task:
if (owning_process)
put_task_struct(owning_process);
- free_page((unsigned long)local_page_list);
return ret;
}
-EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
+EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound)
{
+ dma_addr_t dma_addr;
+ dma_addr_t dma;
int idx;
u64 addr;
struct ib_device *dev = umem_odp->umem.ibdev;
@@ -519,20 +481,16 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
virt = max_t(u64, virt, ib_umem_start(umem_odp));
bound = min_t(u64, bound, ib_umem_end(umem_odp));
- /* Note that during the run of this function, the
- * notifiers_count of the MR is > 0, preventing any racing
- * faults from completion. We might be racing with other
- * invalidations, so we must make sure we free each page only
- * once. */
for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
- if (umem_odp->page_list[idx]) {
- struct page *page = umem_odp->page_list[idx];
- dma_addr_t dma = umem_odp->dma_list[idx];
- dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
+ dma = umem_odp->dma_list[idx];
- WARN_ON(!dma_addr);
+ /* The access flags guaranteed a valid DMA address in case was NULL */
+ if (dma) {
+ unsigned long pfn_idx = (addr - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+ struct page *page = hmm_pfn_to_page(umem_odp->pfn_list[pfn_idx]);
+ dma_addr = dma & ODP_DMA_ADDR_MASK;
ib_dma_unmap_page(dev, dma_addr,
BIT(umem_odp->page_shift),
DMA_BIDIRECTIONAL);
@@ -549,7 +507,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
*/
set_page_dirty(head_page);
}
- umem_odp->page_list[idx] = NULL;
umem_odp->dma_list[idx] = 0;
umem_odp->npages--;
}
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1235ffb2389b..98cb594cd9a6 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -101,7 +101,7 @@ struct ib_umad_port {
struct ib_device *ib_dev;
struct ib_umad_device *umad_dev;
int dev_num;
- u8 port_num;
+ u32 port_num;
};
struct ib_umad_device {
@@ -142,7 +142,7 @@ static dev_t dynamic_issm_dev;
static DEFINE_IDA(umad_ida);
-static void ib_umad_add_one(struct ib_device *device);
+static int ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
static void ib_umad_dev_free(struct kref *kref)
@@ -165,8 +165,8 @@ static void ib_umad_dev_put(struct ib_umad_device *dev)
static int hdr_size(struct ib_umad_file *file)
{
- return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
- sizeof (struct ib_user_mad_hdr_old);
+ return file->use_pkey_index ? sizeof(struct ib_user_mad_hdr) :
+ sizeof(struct ib_user_mad_hdr_old);
}
/* caller must hold file->mutex */
@@ -379,6 +379,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
mutex_lock(&file->mutex);
+ if (file->agents_dead) {
+ mutex_unlock(&file->mutex);
+ return -EIO;
+ }
+
while (list_empty(&file->recv_list)) {
mutex_unlock(&file->mutex);
@@ -392,6 +397,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
mutex_lock(&file->mutex);
}
+ if (file->agents_dead) {
+ mutex_unlock(&file->mutex);
+ return -EIO;
+ }
+
packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
list_del(&packet->list);
@@ -524,7 +534,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
agent = __get_agent(file, packet->mad.hdr.id);
if (!agent) {
- ret = -EINVAL;
+ ret = -EIO;
goto err_up;
}
@@ -653,10 +663,14 @@ static __poll_t ib_umad_poll(struct file *filp, struct poll_table_struct *wait)
/* we will always be able to post a MAD send */
__poll_t mask = EPOLLOUT | EPOLLWRNORM;
+ mutex_lock(&file->mutex);
poll_wait(filp, &file->recv_wait, wait);
if (!list_empty(&file->recv_list))
mask |= EPOLLIN | EPOLLRDNORM;
+ if (file->agents_dead)
+ mask = EPOLLERR;
+ mutex_unlock(&file->mutex);
return mask;
}
@@ -674,8 +688,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
- dev_notice(&file->port->dev,
- "ib_umad_reg_agent: invalid device\n");
+ dev_notice(&file->port->dev, "%s: invalid device\n", __func__);
ret = -EPIPE;
goto out;
}
@@ -687,7 +700,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
if (ureq.qpn != 0 && ureq.qpn != 1) {
dev_notice(&file->port->dev,
- "ib_umad_reg_agent: invalid QPN %d specified\n",
+ "%s: invalid QPN %u specified\n", __func__,
ureq.qpn);
ret = -EINVAL;
goto out;
@@ -697,9 +710,9 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
if (!__get_agent(file, agent_id))
goto found;
- dev_notice(&file->port->dev,
- "ib_umad_reg_agent: Max Agents (%u) reached\n",
+ dev_notice(&file->port->dev, "%s: Max Agents (%u) reached\n", __func__,
IB_UMAD_MAX_AGENTS);
+
ret = -ENOMEM;
goto out;
@@ -776,8 +789,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
- dev_notice(&file->port->dev,
- "ib_umad_reg_agent2: invalid device\n");
+ dev_notice(&file->port->dev, "%s: invalid device\n", __func__);
ret = -EPIPE;
goto out;
}
@@ -788,17 +800,16 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
- dev_notice(&file->port->dev,
- "ib_umad_reg_agent2: invalid QPN %d specified\n",
- ureq.qpn);
+ dev_notice(&file->port->dev, "%s: invalid QPN %u specified\n",
+ __func__, ureq.qpn);
ret = -EINVAL;
goto out;
}
if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
dev_notice(&file->port->dev,
- "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
- ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
+ "%s failed: invalid registration flags specified 0x%x; supported 0x%x\n",
+ __func__, ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
ret = -EINVAL;
if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP,
@@ -813,8 +824,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
if (!__get_agent(file, agent_id))
goto found;
- dev_notice(&file->port->dev,
- "ib_umad_reg_agent2: Max Agents (%u) reached\n",
+ dev_notice(&file->port->dev, "%s: Max Agents (%u) reached\n", __func__,
IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
goto out;
@@ -826,7 +836,7 @@ found:
req.mgmt_class_version = ureq.mgmt_class_version;
if (ureq.oui & 0xff000000) {
dev_notice(&file->port->dev,
- "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
+ "%s failed: oui invalid 0x%08x\n", __func__,
ureq.oui);
ret = -EINVAL;
goto out;
@@ -1129,17 +1139,30 @@ static const struct file_operations umad_sm_fops = {
.llseek = no_llseek,
};
+static struct ib_umad_port *get_port(struct ib_device *ibdev,
+ struct ib_umad_device *umad_dev,
+ u32 port)
+{
+ if (!umad_dev)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (!rdma_is_port_valid(ibdev, port))
+ return ERR_PTR(-EINVAL);
+ if (!rdma_cap_ib_mad(ibdev, port))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return &umad_dev->ports[port - rdma_start_port(ibdev)];
+}
+
static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data,
struct ib_client_nl_info *res)
{
- struct ib_umad_device *umad_dev = client_data;
+ struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
- if (!rdma_is_port_valid(ibdev, res->port))
- return -EINVAL;
+ if (IS_ERR(port))
+ return PTR_ERR(port);
res->abi = IB_USER_MAD_ABI_VERSION;
- res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].dev;
-
+ res->cdev = &port->dev;
return 0;
}
@@ -1154,15 +1177,13 @@ MODULE_ALIAS_RDMA_CLIENT("umad");
static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data,
struct ib_client_nl_info *res)
{
- struct ib_umad_device *umad_dev =
- ib_get_client_data(ibdev, &umad_client);
+ struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
- if (!rdma_is_port_valid(ibdev, res->port))
- return -EINVAL;
+ if (IS_ERR(port))
+ return PTR_ERR(port);
res->abi = IB_USER_MAD_ABI_VERSION;
- res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].sm_dev;
-
+ res->cdev = &port->sm_dev;
return 0;
}
@@ -1180,7 +1201,7 @@ static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr,
if (!port)
return -ENODEV;
- return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
+ return sysfs_emit(buf, "%s\n", dev_name(&port->ib_dev->dev));
}
static DEVICE_ATTR_RO(ibdev);
@@ -1192,7 +1213,7 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr,
if (!port)
return -ENODEV;
- return sprintf(buf, "%d\n", port->port_num);
+ return sysfs_emit(buf, "%d\n", port->port_num);
}
static DEVICE_ATTR_RO(port);
@@ -1211,7 +1232,7 @@ static char *umad_devnode(struct device *dev, umode_t *mode)
static ssize_t abi_version_show(struct class *class,
struct class_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
+ return sysfs_emit(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
}
static CLASS_ATTR_RO(abi_version);
@@ -1325,6 +1346,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
list_for_each_entry(file, &port->file_list, port_list) {
mutex_lock(&file->mutex);
file->agents_dead = 1;
+ wake_up_interruptible(&file->recv_wait);
mutex_unlock(&file->mutex);
for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
@@ -1341,37 +1363,41 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
put_device(&port->dev);
}
-static void ib_umad_add_one(struct ib_device *device)
+static int ib_umad_add_one(struct ib_device *device)
{
struct ib_umad_device *umad_dev;
int s, e, i;
int count = 0;
+ int ret;
s = rdma_start_port(device);
e = rdma_end_port(device);
umad_dev = kzalloc(struct_size(umad_dev, ports, e - s + 1), GFP_KERNEL);
if (!umad_dev)
- return;
+ return -ENOMEM;
kref_init(&umad_dev->kref);
for (i = s; i <= e; ++i) {
if (!rdma_cap_ib_mad(device, i))
continue;
- if (ib_umad_init_port(device, i, umad_dev,
- &umad_dev->ports[i - s]))
+ ret = ib_umad_init_port(device, i, umad_dev,
+ &umad_dev->ports[i - s]);
+ if (ret)
goto err;
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(device, &umad_client, umad_dev);
- return;
+ return 0;
err:
while (--i >= s) {
@@ -1383,6 +1409,7 @@ err:
free:
/* balances kref_init */
ib_umad_dev_put(umad_dev);
+ return ret;
}
static void ib_umad_remove_one(struct ib_device *device, void *client_data)
@@ -1390,9 +1417,6 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data)
struct ib_umad_device *umad_dev = client_data;
unsigned int i;
- if (!umad_dev)
- return;
-
rdma_for_each_port (device, i) {
if (rdma_cap_ib_mad(device, i))
ib_umad_kill_port(
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 7df71983212d..821d93c8f712 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -97,7 +97,7 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata,
*/
struct ib_uverbs_device {
- atomic_t refcount;
+ refcount_t refcount;
u32 num_comp_vectors;
struct completion comp;
struct device dev;
@@ -142,7 +142,7 @@ struct ib_uverbs_file {
* ucontext_lock held
*/
struct ib_ucontext *ucontext;
- struct ib_uverbs_async_event_file *async_file;
+ struct ib_uverbs_async_event_file *default_async_file;
struct list_head list;
/*
@@ -180,6 +180,7 @@ struct ib_uverbs_mcast_entry {
struct ib_uevent_object {
struct ib_uobject uobject;
+ struct ib_uverbs_async_event_file *event_file;
/* List member for ib_uverbs_async_event_file list */
struct list_head event_list;
u32 events_reported;
@@ -219,6 +220,7 @@ void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file);
void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue);
void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+int uverbs_async_event_release(struct inode *inode, struct file *filp);
int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs);
int ib_init_ucontext(struct uverbs_attr_bundle *attrs);
@@ -227,6 +229,9 @@ void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
void ib_uverbs_release_uevent(struct ib_uevent_object *uobj);
void ib_uverbs_release_file(struct kref *ref);
+void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+ __u64 element, __u64 event,
+ struct list_head *obj_list, u32 *counter);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
@@ -292,6 +297,24 @@ static inline u32 make_port_cap_flags(const struct ib_port_attr *attr)
return res;
}
+static inline struct ib_uverbs_async_event_file *
+ib_uverbs_get_async_event(struct uverbs_attr_bundle *attrs,
+ u16 id)
+{
+ struct ib_uobject *async_ev_file_uobj;
+ struct ib_uverbs_async_event_file *async_ev_file;
+
+ async_ev_file_uobj = uverbs_attr_get_uobject(attrs, id);
+ if (IS_ERR(async_ev_file_uobj))
+ async_ev_file = READ_ONCE(attrs->ufile->default_async_file);
+ else
+ async_ev_file = container_of(async_ev_file_uobj,
+ struct ib_uverbs_async_event_file,
+ uobj);
+ if (async_ev_file)
+ uverbs_uobject_get(&async_ev_file->uobj);
+ return async_ev_file;
+}
void copy_port_attr_to_resp(struct ib_port_attr *attr,
struct ib_uverbs_query_port_resp *resp,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 060b4ebbd2ba..4796f6a8828c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -218,10 +218,12 @@ int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs)
if (!ucontext)
return -ENOMEM;
- ucontext->res.type = RDMA_RESTRACK_CTX;
ucontext->device = ib_dev;
ucontext->ufile = ufile;
xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+
+ rdma_restrack_new(&ucontext->res, RDMA_RESTRACK_CTX);
+ rdma_restrack_set_name(&ucontext->res, NULL);
attrs->context = ucontext;
return 0;
}
@@ -250,7 +252,7 @@ int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
if (ret)
goto err_uncharge;
- rdma_restrack_uadd(&ucontext->res);
+ rdma_restrack_add(&ucontext->res);
/*
* Make sure that ib_uverbs_get_ucontext() sees the pointer update
@@ -311,8 +313,9 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
return 0;
err_uobj:
- rdma_alloc_abort_uobject(uobj, attrs);
+ rdma_alloc_abort_uobject(uobj, attrs, false);
err_ucontext:
+ rdma_restrack_put(&attrs->context->res);
kfree(attrs->context);
attrs->context = NULL;
return ret;
@@ -334,7 +337,7 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->hw_ver = attr->hw_ver;
resp->max_qp = attr->max_qp;
resp->max_qp_wr = attr->max_qp_wr;
- resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
+ resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge);
resp->max_sge_rd = attr->max_sge_rd;
resp->max_cq = attr->max_cq;
@@ -356,14 +359,12 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->max_mcast_qp_attach = attr->max_mcast_qp_attach;
resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
resp->max_ah = attr->max_ah;
- resp->max_fmr = attr->max_fmr;
- resp->max_map_per_fmr = attr->max_map_per_fmr;
resp->max_srq = attr->max_srq;
resp->max_srq_wr = attr->max_srq_wr;
resp->max_srq_sge = attr->max_srq_sge;
resp->max_pkeys = attr->max_pkeys;
resp->local_ca_ack_delay = attr->local_ca_ack_delay;
- resp->phys_port_cnt = ib_dev->phys_port_cnt;
+ resp->phys_port_cnt = min_t(u32, ib_dev->phys_port_cnt, U8_MAX);
}
static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs)
@@ -417,8 +418,8 @@ static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_alloc_pd_resp resp = {};
struct ib_uverbs_alloc_pd cmd;
- struct ib_uverbs_alloc_pd_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
@@ -440,30 +441,24 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
pd->device = ib_dev;
pd->uobject = uobj;
- pd->__internal_mr = NULL;
atomic_set(&pd->usecnt, 0);
- pd->res.type = RDMA_RESTRACK_PD;
+
+ rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+ rdma_restrack_set_name(&pd->res, NULL);
ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata);
if (ret)
goto err_alloc;
+ rdma_restrack_add(&pd->res);
uobj->object = pd;
- memset(&resp, 0, sizeof resp);
- resp.pd_handle = uobj->id;
- rdma_restrack_uadd(&pd->res);
+ uobj_finalize_uobj_create(uobj, attrs);
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
+ resp.pd_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_copy:
- ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs));
- pd = NULL;
err_alloc:
+ rdma_restrack_put(&pd->res);
kfree(pd);
err:
uobj_alloc_abort(uobj, attrs);
@@ -570,15 +565,15 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_device *ibudev = attrs->ufile->device;
+ struct ib_uverbs_open_xrcd_resp resp = {};
struct ib_uverbs_open_xrcd cmd;
- struct ib_uverbs_open_xrcd_resp resp;
struct ib_uxrcd_object *obj;
struct ib_xrcd *xrcd = NULL;
- struct fd f = {NULL, 0};
struct inode *inode = NULL;
- int ret = 0;
int new_xrcd = 0;
struct ib_device *ib_dev;
+ struct fd f = {};
+ int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
@@ -616,24 +611,16 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
}
if (!xrcd) {
- xrcd = ib_dev->ops.alloc_xrcd(ib_dev, &attrs->driver_udata);
+ xrcd = ib_alloc_xrcd_user(ib_dev, inode, &attrs->driver_udata);
if (IS_ERR(xrcd)) {
ret = PTR_ERR(xrcd);
goto err;
}
-
- xrcd->inode = inode;
- xrcd->device = ib_dev;
- atomic_set(&xrcd->usecnt, 0);
- mutex_init(&xrcd->tgt_qp_mutex);
- INIT_LIST_HEAD(&xrcd->tgt_qp_list);
new_xrcd = 1;
}
atomic_set(&obj->refcnt, 0);
obj->uobject.object = xrcd;
- memset(&resp, 0, sizeof resp);
- resp.xrcd_handle = obj->uobject.id;
if (inode) {
if (new_xrcd) {
@@ -645,27 +632,17 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
atomic_inc(&xrcd->usecnt);
}
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
if (f.file)
fdput(f);
mutex_unlock(&ibudev->xrcd_tree_mutex);
+ uobj_finalize_uobj_create(&obj->uobject, attrs);
- rdma_alloc_commit_uobject(&obj->uobject, attrs);
- return 0;
-
-err_copy:
- if (inode) {
- if (new_xrcd)
- xrcd_table_delete(ibudev, inode);
- atomic_dec(&xrcd->usecnt);
- }
+ resp.xrcd_handle = obj->uobject.id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_dealloc_xrcd:
- ib_dealloc_xrcd(xrcd, uverbs_get_cleared_udata(attrs));
+ ib_dealloc_xrcd_user(xrcd, uverbs_get_cleared_udata(attrs));
err:
uobj_alloc_abort(&obj->uobject, attrs);
@@ -703,9 +680,8 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
return 0;
- ret = ib_dealloc_xrcd(xrcd, &attrs->driver_udata);
-
- if (ib_is_destroy_retryable(ret, why, uobject)) {
+ ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata);
+ if (ret) {
atomic_inc(&xrcd->usecnt);
return ret;
}
@@ -713,13 +689,13 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
if (inode)
xrcd_table_delete(dev, inode);
- return ret;
+ return 0;
}
static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_reg_mr_resp resp = {};
struct ib_uverbs_reg_mr cmd;
- struct ib_uverbs_reg_mr_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
@@ -733,29 +709,20 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
- ret = ib_check_mr_access(cmd.access_flags);
- if (ret)
- return ret;
-
uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
+ ret = ib_check_mr_access(ib_dev, cmd.access_flags);
+ if (ret)
+ goto err_free;
+
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_free;
}
- if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
- if (!(pd->device->attrs.device_cap_flags &
- IB_DEVICE_ON_DEMAND_PAGING)) {
- pr_debug("ODP support not available\n");
- ret = -EINVAL;
- goto err_put;
- }
- }
-
mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags,
&attrs->driver_udata);
@@ -771,31 +738,24 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
mr->sig_attrs = NULL;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_uadd(&mr->res);
-
- uobj->object = mr;
-
- memset(&resp, 0, sizeof resp);
- resp.lkey = mr->lkey;
- resp.rkey = mr->rkey;
- resp.mr_handle = uobj->id;
+ mr->iova = cmd.hca_va;
+ mr->length = cmd.length;
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
+ uobj->object = mr;
uobj_put_obj_read(pd);
+ uobj_finalize_uobj_create(uobj, attrs);
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
-
-err_copy:
- ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs));
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+ resp.mr_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
uobj_put_obj_read(pd);
-
err_free:
uobj_alloc_abort(uobj, attrs);
return ret;
@@ -805,23 +765,28 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_rereg_mr cmd;
struct ib_uverbs_rereg_mr_resp resp;
- struct ib_pd *pd = NULL;
struct ib_mr *mr;
- struct ib_pd *old_pd;
int ret;
struct ib_uobject *uobj;
+ struct ib_uobject *new_uobj;
+ struct ib_device *ib_dev;
+ struct ib_pd *orig_pd;
+ struct ib_pd *new_pd;
+ struct ib_mr *new_mr;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+ if (!cmd.flags)
return -EINVAL;
+ if (cmd.flags & ~IB_MR_REREG_SUPPORTED)
+ return -EOPNOTSUPP;
+
if ((cmd.flags & IB_MR_REREG_TRANS) &&
- (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
- (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
- return -EINVAL;
+ (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+ return -EINVAL;
uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
if (IS_ERR(uobj))
@@ -835,32 +800,72 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
}
if (cmd.flags & IB_MR_REREG_ACCESS) {
- ret = ib_check_mr_access(cmd.access_flags);
+ ret = ib_check_mr_access(mr->device, cmd.access_flags);
if (ret)
goto put_uobjs;
}
+ orig_pd = mr->pd;
if (cmd.flags & IB_MR_REREG_PD) {
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
- attrs);
- if (!pd) {
+ new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+ attrs);
+ if (!new_pd) {
ret = -EINVAL;
goto put_uobjs;
}
+ } else {
+ new_pd = mr->pd;
}
- old_pd = mr->pd;
- ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start,
- cmd.length, cmd.hca_va,
- cmd.access_flags, pd,
- &attrs->driver_udata);
- if (ret)
+ /*
+ * The driver might create a new HW object as part of the rereg, we need
+ * to have a uobject ready to hold it.
+ */
+ new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
+ if (IS_ERR(new_uobj)) {
+ ret = PTR_ERR(new_uobj);
goto put_uobj_pd;
+ }
- if (cmd.flags & IB_MR_REREG_PD) {
- atomic_inc(&pd->usecnt);
- mr->pd = pd;
- atomic_dec(&old_pd->usecnt);
+ new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length,
+ cmd.hca_va, cmd.access_flags, new_pd,
+ &attrs->driver_udata);
+ if (IS_ERR(new_mr)) {
+ ret = PTR_ERR(new_mr);
+ goto put_new_uobj;
+ }
+ if (new_mr) {
+ new_mr->device = new_pd->device;
+ new_mr->pd = new_pd;
+ new_mr->type = IB_MR_TYPE_USER;
+ new_mr->uobject = uobj;
+ atomic_inc(&new_pd->usecnt);
+ new_uobj->object = new_mr;
+
+ rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&new_mr->res, NULL);
+ rdma_restrack_add(&new_mr->res);
+
+ /*
+ * The new uobj for the new HW object is put into the same spot
+ * in the IDR and the old uobj & HW object is deleted.
+ */
+ rdma_assign_uobject(uobj, new_uobj, attrs);
+ rdma_alloc_commit_uobject(new_uobj, attrs);
+ uobj_put_destroy(uobj);
+ new_uobj = NULL;
+ uobj = NULL;
+ mr = new_mr;
+ } else {
+ if (cmd.flags & IB_MR_REREG_PD) {
+ atomic_dec(&orig_pd->usecnt);
+ mr->pd = new_pd;
+ atomic_inc(&new_pd->usecnt);
+ }
+ if (cmd.flags & IB_MR_REREG_TRANS) {
+ mr->iova = cmd.hca_va;
+ mr->length = cmd.length;
+ }
}
memset(&resp, 0, sizeof(resp));
@@ -869,12 +874,16 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
ret = uverbs_response(attrs, &resp, sizeof(resp));
+put_new_uobj:
+ if (new_uobj)
+ uobj_alloc_abort(new_uobj, attrs);
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- uobj_put_obj_read(pd);
+ uobj_put_obj_read(new_pd);
put_uobjs:
- uobj_put_write(uobj);
+ if (uobj)
+ uobj_put_write(uobj);
return ret;
}
@@ -894,7 +903,7 @@ static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_mw cmd;
- struct ib_uverbs_alloc_mw_resp resp;
+ struct ib_uverbs_alloc_mw_resp resp = {};
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mw *mw;
@@ -920,33 +929,33 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
goto err_put;
}
- mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
- if (IS_ERR(mw)) {
- ret = PTR_ERR(mw);
+ mw = rdma_zalloc_drv_obj(ib_dev, ib_mw);
+ if (!mw) {
+ ret = -ENOMEM;
goto err_put;
}
- mw->device = pd->device;
- mw->pd = pd;
+ mw->device = ib_dev;
+ mw->pd = pd;
mw->uobject = uobj;
+ mw->type = cmd.mw_type;
+
+ ret = pd->device->ops.alloc_mw(mw, &attrs->driver_udata);
+ if (ret)
+ goto err_alloc;
+
atomic_inc(&pd->usecnt);
uobj->object = mw;
+ uobj_put_obj_read(pd);
+ uobj_finalize_uobj_create(uobj, attrs);
- memset(&resp, 0, sizeof(resp));
- resp.rkey = mw->rkey;
+ resp.rkey = mw->rkey;
resp.mw_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
- uobj_put_obj_read(pd);
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
-
-err_copy:
- uverbs_dealloc_mw(mw);
+err_alloc:
+ kfree(mw);
err_put:
uobj_put_obj_read(pd);
err_free:
@@ -983,40 +992,33 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- resp.fd = uobj->id;
-
ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
uobj);
ib_uverbs_init_event_queue(&ev_file->ev_queue);
+ uobj_finalize_uobj_create(uobj, attrs);
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret) {
- uobj_alloc_abort(uobj, attrs);
- return ret;
- }
-
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
+ resp.fd = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
- struct ib_uverbs_ex_create_cq *cmd)
+static int create_cq(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_cq *cmd)
{
struct ib_ucq_object *obj;
struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
- struct ib_uverbs_ex_create_cq_resp resp;
+ struct ib_uverbs_ex_create_cq_resp resp = {};
struct ib_cq_init_attr attr = {};
struct ib_device *ib_dev;
if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs,
&ib_dev);
if (IS_ERR(obj))
- return obj;
+ return PTR_ERR(obj);
if (cmd->comp_channel >= 0) {
ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs);
@@ -1046,46 +1048,40 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, NULL);
+
ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
if (ret)
goto err_free;
+ rdma_restrack_add(&cq->res);
obj->uevent.uobject.object = cq;
- memset(&resp, 0, sizeof resp);
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
+
resp.base.cq_handle = obj->uevent.uobject.id;
- resp.base.cqe = cq->cqe;
+ resp.base.cqe = cq->cqe;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_uadd(&cq->res);
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_cb;
-
- rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
- return obj;
-
-err_cb:
- ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs));
- cq = NULL;
err_free:
+ rdma_restrack_put(&cq->res);
kfree(cq);
err_file:
if (ev_file)
ib_uverbs_release_ucq(ev_file, obj);
-
err:
uobj_alloc_abort(&obj->uevent.uobject, attrs);
-
- return ERR_PTR(ret);
+ return ret;
}
static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_cq cmd;
struct ib_uverbs_ex_create_cq cmd_ex;
- struct ib_ucq_object *obj;
int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -1098,14 +1094,12 @@ static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
cmd_ex.comp_vector = cmd.comp_vector;
cmd_ex.comp_channel = cmd.comp_channel;
- obj = create_cq(attrs, &cmd_ex);
- return PTR_ERR_OR_ZERO(obj);
+ return create_cq(attrs, &cmd_ex);
}
static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_ex_create_cq cmd;
- struct ib_ucq_object *obj;
int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -1118,8 +1112,7 @@ static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
if (cmd.reserved)
return -EINVAL;
- obj = create_cq(attrs, &cmd);
- return PTR_ERR_OR_ZERO(obj);
+ return create_cq(attrs, &cmd);
}
static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
@@ -1127,7 +1120,7 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
struct ib_uverbs_resize_cq cmd;
struct ib_uverbs_resize_cq_resp resp = {};
struct ib_cq *cq;
- int ret = -EINVAL;
+ int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
@@ -1294,14 +1287,27 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
struct ib_srq *srq = NULL;
struct ib_qp *qp;
struct ib_qp_init_attr attr = {};
- struct ib_uverbs_ex_create_qp_resp resp;
+ struct ib_uverbs_ex_create_qp_resp resp = {};
int ret;
struct ib_rwq_ind_table *ind_tbl = NULL;
bool has_sq = true;
struct ib_device *ib_dev;
- if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
- return -EPERM;
+ switch (cmd->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+ break;
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ case IB_QPT_DRIVER:
+ break;
+ default:
+ return -EINVAL;
+ }
obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
&ib_dev);
@@ -1376,7 +1382,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
if (has_sq)
scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
cmd->send_cq_handle, attrs);
- if (!ind_tbl)
+ if (!ind_tbl && cmd->qp_type != IB_QPT_XRC_INI)
rcq = rcq ?: scq;
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
attrs);
@@ -1396,7 +1402,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
IB_SIGNAL_REQ_WR;
attr.qp_type = cmd->qp_type;
- attr.create_flags = 0;
attr.cap.max_send_wr = cmd->max_send_wr;
attr.cap.max_recv_wr = cmd->max_recv_wr;
@@ -1429,51 +1434,18 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
attr.source_qpn = cmd->source_qpn;
}
- if (cmd->qp_type == IB_QPT_XRC_TGT)
- qp = ib_create_qp(pd, &attr);
- else
- qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
- obj);
-
+ qp = ib_create_qp_user(device, pd, &attr, &attrs->driver_udata, obj,
+ KBUILD_MODNAME);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_put;
}
-
- if (cmd->qp_type != IB_QPT_XRC_TGT) {
- ret = ib_create_qp_security(qp, device);
- if (ret)
- goto err_cb;
-
- atomic_inc(&pd->usecnt);
- if (attr.send_cq)
- atomic_inc(&attr.send_cq->usecnt);
- if (attr.recv_cq)
- atomic_inc(&attr.recv_cq->usecnt);
- if (attr.srq)
- atomic_inc(&attr.srq->usecnt);
- if (ind_tbl)
- atomic_inc(&ind_tbl->usecnt);
- } else {
- /* It is done in _ib_create_qp for other QP types */
- qp->uobject = obj;
- }
+ ib_qp_usecnt_inc(qp);
obj->uevent.uobject.object = qp;
-
- memset(&resp, 0, sizeof resp);
- resp.base.qpn = qp->qp_num;
- resp.base.qp_handle = obj->uevent.uobject.id;
- resp.base.max_recv_sge = attr.cap.max_recv_sge;
- resp.base.max_send_sge = attr.cap.max_send_sge;
- resp.base.max_recv_wr = attr.cap.max_recv_wr;
- resp.base.max_send_wr = attr.cap.max_send_wr;
- resp.base.max_inline_data = attr.cap.max_inline_data;
- resp.response_length = uverbs_response_length(attrs, sizeof(resp));
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_cb;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
if (xrcd) {
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
@@ -1495,11 +1467,17 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
UVERBS_LOOKUP_READ);
if (ind_tbl)
uobj_put_obj_read(ind_tbl);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
- return 0;
-err_cb:
- ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
+ resp.base.qpn = qp->qp_num;
+ resp.base.qp_handle = obj->uevent.uobject.id;
+ resp.base.max_recv_sge = attr.cap.max_recv_sge;
+ resp.base.max_send_sge = attr.cap.max_send_sge;
+ resp.base.max_recv_wr = attr.cap.max_recv_wr;
+ resp.base.max_send_wr = attr.cap.max_send_wr;
+ resp.base.max_inline_data = attr.cap.max_inline_data;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
if (!IS_ERR(xrcd_uobj))
@@ -1570,14 +1548,14 @@ static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_create_qp_resp resp = {};
struct ib_uverbs_open_qp cmd;
- struct ib_uverbs_create_qp_resp resp;
struct ib_uqp_object *obj;
struct ib_xrcd *xrcd;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_qp *qp;
struct ib_qp_open_attr attr = {};
int ret;
+ struct ib_uobject *xrcd_uobj;
struct ib_device *ib_dev;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -1617,24 +1595,16 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
obj->uevent.uobject.object = qp;
obj->uevent.uobject.user_handle = cmd.user_handle;
- memset(&resp, 0, sizeof resp);
- resp.qpn = qp->qp_num;
- resp.qp_handle = obj->uevent.uobject.id;
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_destroy;
-
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
qp->uobject = obj;
uobj_put_read(xrcd_uobj);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
- return 0;
+ resp.qpn = qp->qp_num;
+ resp.qp_handle = obj->uevent.uobject.id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_destroy:
- ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
err_xrcd:
uobj_put_read(xrcd_uobj);
err_put:
@@ -1947,8 +1917,7 @@ static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs)
if (ret)
return ret;
- if (cmd.base.attr_mask &
- ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
+ if (cmd.base.attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
return -EOPNOTSUPP;
return modify_qp(attrs, &cmd);
@@ -1970,10 +1939,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
* Last bit is reserved for extending the attr_mask by
* using another field.
*/
- BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
-
- if (cmd.base.attr_mask &
- ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
+ if (cmd.base.attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
return -EOPNOTSUPP;
ret = modify_qp(attrs, &cmd);
@@ -2010,12 +1976,13 @@ static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs)
static void *alloc_wr(size_t wr_size, __u32 num_sge)
{
- if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) /
- sizeof (struct ib_sge))
+ if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof(struct ib_sge))) /
+ sizeof(struct ib_sge))
return NULL;
- return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
- num_sge * sizeof (struct ib_sge), GFP_KERNEL);
+ return kmalloc(ALIGN(wr_size, sizeof(struct ib_sge)) +
+ num_sge * sizeof(struct ib_sge),
+ GFP_KERNEL);
}
static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
@@ -2224,7 +2191,7 @@ ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
const struct ib_sge __user *sgls;
const void __user *wqes;
- if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
+ if (wqe_size < sizeof(struct ib_uverbs_recv_wr))
return ERR_PTR(-EINVAL);
wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count);
@@ -2257,14 +2224,14 @@ ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
}
if (user_wr->num_sge >=
- (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) /
- sizeof (struct ib_sge)) {
+ (U32_MAX - ALIGN(sizeof(*next), sizeof(struct ib_sge))) /
+ sizeof(struct ib_sge)) {
ret = -EINVAL;
goto err;
}
- next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
- user_wr->num_sge * sizeof (struct ib_sge),
+ next = kmalloc(ALIGN(sizeof(*next), sizeof(struct ib_sge)) +
+ user_wr->num_sge * sizeof(struct ib_sge),
GFP_KERNEL);
if (!next) {
ret = -ENOMEM;
@@ -2282,8 +2249,8 @@ ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
next->num_sge = user_wr->num_sge;
if (next->num_sge) {
- next->sg_list = (void *) next +
- ALIGN(sizeof *next, sizeof (struct ib_sge));
+ next->sg_list = (void *)next +
+ ALIGN(sizeof(*next), sizeof(struct ib_sge));
if (copy_from_user(next->sg_list, sgls + sg_ind,
next->num_sge *
sizeof(struct ib_sge))) {
@@ -2470,24 +2437,14 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
ah->uobject = uobj;
uobj->user_handle = cmd.user_handle;
uobj->object = ah;
-
- resp.ah_handle = uobj->id;
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
uobj_put_obj_read(pd);
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
+ uobj_finalize_uobj_create(uobj, attrs);
-err_copy:
- rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE,
- uverbs_get_cleared_udata(attrs));
+ resp.ah_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
uobj_put_obj_read(pd);
-
err:
uobj_alloc_abort(uobj, attrs);
return ret;
@@ -2954,11 +2911,11 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
wq_init_attr.cq = cq;
wq_init_attr.max_sge = cmd.max_sge;
wq_init_attr.max_wr = cmd.max_wr;
- wq_init_attr.wq_context = attrs->ufile;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
wq_init_attr.create_flags = cmd.create_flags;
INIT_LIST_HEAD(&obj->uevent.event_list);
+ obj->uevent.uobject.user_handle = cmd.user_handle;
wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
if (IS_ERR(wq)) {
@@ -2972,31 +2929,25 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
wq->cq = cq;
wq->pd = pd;
wq->device = pd->device;
- wq->wq_context = wq_init_attr.wq_context;
atomic_set(&wq->usecnt, 0);
atomic_inc(&pd->usecnt);
atomic_inc(&cq->usecnt);
- wq->uobject = obj;
- obj->uevent.uobject.object = wq;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
+
+ uobj_put_obj_read(pd);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
resp.max_sge = wq_init_attr.max_sge;
resp.max_wr = wq_init_attr.max_wr;
resp.wqn = wq->wq_num;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- err = uverbs_response(attrs, &resp, sizeof(resp));
- if (err)
- goto err_copy;
-
- uobj_put_obj_read(pd);
- rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
- UVERBS_LOOKUP_READ);
- rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
- return 0;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_copy:
- ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs));
err_put_cq:
rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
@@ -3057,12 +3008,29 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
if (!wq)
return -EINVAL;
- wq_attr.curr_wq_state = cmd.curr_wq_state;
- wq_attr.wq_state = cmd.wq_state;
if (cmd.attr_mask & IB_WQ_FLAGS) {
wq_attr.flags = cmd.flags;
wq_attr.flags_mask = cmd.flags_mask;
}
+
+ if (cmd.attr_mask & IB_WQ_CUR_STATE) {
+ if (cmd.curr_wq_state > IB_WQS_ERR)
+ return -EINVAL;
+
+ wq_attr.curr_wq_state = cmd.curr_wq_state;
+ } else {
+ wq_attr.curr_wq_state = wq->state;
+ }
+
+ if (cmd.attr_mask & IB_WQ_STATE) {
+ if (cmd.wq_state > IB_WQS_ERR)
+ return -EINVAL;
+
+ wq_attr.wq_state = cmd.wq_state;
+ } else {
+ wq_attr.wq_state = wq_attr.curr_wq_state;
+ }
+
ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask,
&attrs->driver_udata);
rdma_lookup_put_uobject(&wq->uobject->uevent.uobject,
@@ -3074,14 +3042,14 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_ex_create_rwq_ind_table cmd;
struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj;
int err;
struct ib_rwq_ind_table_init_attr init_attr = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
- struct ib_wq **wqs = NULL;
+ struct ib_wq **wqs = NULL;
u32 *wqs_handles = NULL;
struct ib_wq *wq = NULL;
- int i, j, num_read_wqs;
+ int i, num_read_wqs;
u32 num_wq_handles;
struct uverbs_req_iter iter;
struct ib_device *ib_dev;
@@ -3127,6 +3095,7 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
}
wqs[num_read_wqs] = wq;
+ atomic_inc(&wqs[num_read_wqs]->usecnt);
}
uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev);
@@ -3135,17 +3104,15 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
goto put_wqs;
}
- init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
- init_attr.ind_tbl = wqs;
-
- rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr,
- &attrs->driver_udata);
-
- if (IS_ERR(rwq_ind_tbl)) {
- err = PTR_ERR(rwq_ind_tbl);
+ rwq_ind_tbl = rdma_zalloc_drv_obj(ib_dev, ib_rwq_ind_table);
+ if (!rwq_ind_tbl) {
+ err = -ENOMEM;
goto err_uobj;
}
+ init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
+ init_attr.ind_tbl = wqs;
+
rwq_ind_tbl->ind_tbl = wqs;
rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
rwq_ind_tbl->uobject = uobj;
@@ -3153,34 +3120,32 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
rwq_ind_tbl->device = ib_dev;
atomic_set(&rwq_ind_tbl->usecnt, 0);
+ err = ib_dev->ops.create_rwq_ind_table(rwq_ind_tbl, &init_attr,
+ &attrs->driver_udata);
+ if (err)
+ goto err_create;
+
for (i = 0; i < num_wq_handles; i++)
- atomic_inc(&wqs[i]->usecnt);
+ rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ kfree(wqs_handles);
+ uobj_finalize_uobj_create(uobj, attrs);
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
- err = uverbs_response(attrs, &resp, sizeof(resp));
- if (err)
- goto err_copy;
-
- kfree(wqs_handles);
-
- for (j = 0; j < num_read_wqs; j++)
- rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject,
- UVERBS_LOOKUP_READ);
-
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
-
-err_copy:
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
+err_create:
+ kfree(rwq_ind_tbl);
err_uobj:
uobj_alloc_abort(uobj, attrs);
put_wqs:
- for (j = 0; j < num_read_wqs; j++)
- rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject,
+ for (i = 0; i < num_read_wqs; i++) {
+ rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
+ atomic_dec(&wqs[i]->usecnt);
+ }
err_free:
kfree(wqs_handles);
kfree(wqs);
@@ -3206,7 +3171,7 @@ static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_flow cmd;
- struct ib_uverbs_create_flow_resp resp;
+ struct ib_uverbs_create_flow_resp resp = {};
struct ib_uobject *uobj;
struct ib_flow *flow_id;
struct ib_uverbs_flow_attr *kern_flow_attr;
@@ -3274,6 +3239,11 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
goto err_free_attr;
}
+ if (!rdma_is_port_valid(uobj->context->device, cmd.flow_attr.port)) {
+ err = -EINVAL;
+ goto err_uobj;
+ }
+
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp) {
err = -EINVAL;
@@ -3323,14 +3293,14 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
ib_spec += ((union ib_flow_spec *) ib_spec)->size;
}
if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
- pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
+ pr_warn("create flow failed, flow %d: %u bytes left from uverb cmd\n",
i, cmd.flow_attr.size);
err = -EINVAL;
goto err_free;
}
- flow_id = qp->device->ops.create_flow(
- qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata);
+ flow_id = qp->device->ops.create_flow(qp, flow_attr,
+ &attrs->driver_udata);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
@@ -3339,23 +3309,17 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
- memset(&resp, 0, sizeof(resp));
- resp.flow_handle = uobj->id;
-
- err = uverbs_response(attrs, &resp, sizeof(resp));
- if (err)
- goto err_copy;
-
rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
kfree(flow_attr);
+
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
-err_copy:
- if (!qp->device->ops.destroy_flow(flow_id))
- atomic_dec(&qp->usecnt);
+ uobj_finalize_uobj_create(uobj, attrs);
+
+ resp.flow_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
+
err_free:
ib_uverbs_flow_resources_free(uflow_res);
err_free_flow_attr:
@@ -3390,13 +3354,13 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
{
- struct ib_uverbs_create_srq_resp resp;
+ struct ib_uverbs_create_srq_resp resp = {};
struct ib_usrq_object *obj;
struct ib_pd *pd;
struct ib_srq *srq;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_srq_init_attr attr;
int ret;
+ struct ib_uobject *xrcd_uobj;
struct ib_device *ib_dev;
obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs,
@@ -3441,58 +3405,29 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
}
attr.event_handler = ib_uverbs_srq_event_handler;
- attr.srq_context = attrs->ufile;
attr.srq_type = cmd->srq_type;
attr.attr.max_wr = cmd->max_wr;
attr.attr.max_sge = cmd->max_sge;
attr.attr.srq_limit = cmd->srq_limit;
INIT_LIST_HEAD(&obj->uevent.event_list);
+ obj->uevent.uobject.user_handle = cmd->user_handle;
- srq = rdma_zalloc_drv_obj(ib_dev, ib_srq);
- if (!srq) {
- ret = -ENOMEM;
- goto err_put;
- }
-
- srq->device = pd->device;
- srq->pd = pd;
- srq->srq_type = cmd->srq_type;
- srq->uobject = obj;
- srq->event_handler = attr.event_handler;
- srq->srq_context = attr.srq_context;
-
- ret = pd->device->ops.create_srq(srq, &attr, udata);
- if (ret)
- goto err_free;
-
- if (ib_srq_has_cq(cmd->srq_type)) {
- srq->ext.cq = attr.ext.cq;
- atomic_inc(&attr.ext.cq->usecnt);
- }
-
- if (cmd->srq_type == IB_SRQT_XRC) {
- srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
- atomic_inc(&attr.ext.xrc.xrcd->usecnt);
+ srq = ib_create_srq_user(pd, &attr, obj, udata);
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
+ goto err_put_pd;
}
- atomic_inc(&pd->usecnt);
- atomic_set(&srq->usecnt, 0);
-
obj->uevent.uobject.object = srq;
obj->uevent.uobject.user_handle = cmd->user_handle;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
- memset(&resp, 0, sizeof resp);
- resp.srq_handle = obj->uevent.uobject.id;
- resp.max_wr = attr.attr.max_wr;
- resp.max_sge = attr.attr.max_sge;
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
if (cmd->srq_type == IB_SRQT_XRC)
uobj_put_read(xrcd_uobj);
@@ -3501,18 +3436,15 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
UVERBS_LOOKUP_READ);
uobj_put_obj_read(pd);
- rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
- return 0;
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
-err_copy:
- ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs));
- /* It was released in ib_destroy_srq_user */
- srq = NULL;
-err_free:
- kfree(srq);
-err_put:
- uobj_put_obj_read(pd);
+ resp.srq_handle = obj->uevent.uobject.id;
+ resp.max_wr = attr.attr.max_wr;
+ resp.max_sge = attr.attr.max_sge;
+ return uverbs_response(attrs, &resp, sizeof(resp));
+err_put_pd:
+ uobj_put_obj_read(pd);
err_put_cq:
if (ib_srq_has_cq(cmd->srq_type))
rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
@@ -3751,7 +3683,7 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
#define UAPI_DEF_WRITE_IO(req, resp) \
.write.has_resp = 1 + \
BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \
- BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \
+ BUILD_BUG_ON_ZERO(sizeof_field(req, response) != \
sizeof(u64)), \
.write.req_size = sizeof(req), .write.resp_size = sizeof(resp)
@@ -3791,13 +3723,13 @@ const struct uapi_definition uverbs_def_write_intf[] = {
ib_uverbs_create_ah,
UAPI_DEF_WRITE_UDATA_IO(
struct ib_uverbs_create_ah,
- struct ib_uverbs_create_ah_resp),
- UAPI_DEF_METHOD_NEEDS_FN(create_ah)),
+ struct ib_uverbs_create_ah_resp)),
DECLARE_UVERBS_WRITE(
IB_USER_VERBS_CMD_DESTROY_AH,
ib_uverbs_destroy_ah,
- UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah),
- UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))),
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah)),
+ UAPI_DEF_OBJ_NEEDS_FN(create_user_ah),
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
DECLARE_UVERBS_OBJECT(
UVERBS_OBJECT_COMP_CHANNEL,
@@ -3851,7 +3783,7 @@ const struct uapi_definition uverbs_def_write_intf[] = {
IB_USER_VERBS_EX_CMD_MODIFY_CQ,
ib_uverbs_ex_modify_cq,
UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq),
- UAPI_DEF_METHOD_NEEDS_FN(create_cq))),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_cq))),
DECLARE_UVERBS_OBJECT(
UVERBS_OBJECT_DEVICE,
@@ -4097,8 +4029,7 @@ const struct uapi_definition uverbs_def_write_intf[] = {
DECLARE_UVERBS_WRITE(
IB_USER_VERBS_CMD_CLOSE_XRCD,
ib_uverbs_close_xrcd,
- UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd),
- UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)),
+ UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd)),
DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP,
ib_uverbs_open_qp,
UAPI_DEF_WRITE_UDATA_IO(
@@ -4108,8 +4039,9 @@ const struct uapi_definition uverbs_def_write_intf[] = {
ib_uverbs_open_xrcd,
UAPI_DEF_WRITE_UDATA_IO(
struct ib_uverbs_open_xrcd,
- struct ib_uverbs_open_xrcd_resp),
- UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))),
+ struct ib_uverbs_open_xrcd_resp)),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_xrcd),
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)),
{},
};
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 538affbc517e..d9799706c58e 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -58,6 +58,7 @@ struct bundle_priv {
DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
+ DECLARE_BITMAP(uobj_hw_obj_valid, UVERBS_API_ATTR_BKEY_LEN);
/*
* Must be last. bundle ends in a flex array which overlaps
@@ -90,7 +91,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
}
/**
- * uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * _uverbs_alloc() - Quickly allocate memory for use with a bundle
* @bundle: The bundle
* @size: Number of bytes to allocate
* @flags: Allocator flags
@@ -136,7 +137,7 @@ EXPORT_SYMBOL(_uverbs_alloc);
static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
u16 len)
{
- if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data))
+ if (uattr->len > sizeof_field(struct ib_uverbs_attr, data))
return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len,
uattr->len - len);
@@ -230,7 +231,8 @@ static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
for (i = 0; i != attr->len; i++)
uverbs_finalize_object(attr->uobjects[i],
- spec->u2.objs_arr.access, commit, attrs);
+ spec->u2.objs_arr.access, false, commit,
+ attrs);
}
static int uverbs_process_attr(struct bundle_priv *pbundle,
@@ -257,7 +259,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
return -EOPNOTSUPP;
e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id;
- /* fall through */
+ fallthrough;
case UVERBS_ATTR_TYPE_PTR_IN:
/* Ensure that any data provided by userspace beyond the known
* struct is zero. Userspace that knows how to use some future
@@ -269,7 +271,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
!uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
return -EOPNOTSUPP;
- /* fall through */
+ fallthrough;
case UVERBS_ATTR_TYPE_PTR_OUT:
if (uattr->len < val_spec->u.ptr.min_len ||
(!val_spec->zero_trailing &&
@@ -335,6 +337,14 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
break;
+ case UVERBS_ATTR_TYPE_RAW_FD:
+ if (uattr->attr_data.reserved || uattr->len != 0 ||
+ uattr->data_s64 < INT_MIN || uattr->data_s64 > INT_MAX)
+ return -EINVAL;
+ /* _uverbs_get_const_signed() is the accessor */
+ e->ptr_attr.data = uattr->data_s64;
+ break;
+
case UVERBS_ATTR_TYPE_IDRS_ARRAY:
return uverbs_process_idrs_array(pbundle, attr_uapi,
&e->objs_arr_attr, uattr,
@@ -502,7 +512,9 @@ static void bundle_destroy(struct bundle_priv *pbundle, bool commit)
uverbs_finalize_object(
attr->obj_attr.uobject,
- attr->obj_attr.attr_elm->spec.u.obj.access, commit,
+ attr->obj_attr.attr_elm->spec.u.obj.access,
+ test_bit(i, pbundle->uobj_hw_obj_valid),
+ commit,
&pbundle->bundle);
}
@@ -590,6 +602,8 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
sizeof(pbundle->bundle.attr_present));
memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
+ memset(pbundle->uobj_hw_obj_valid, 0,
+ sizeof(pbundle->uobj_hw_obj_valid));
ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
bundle_destroy(pbundle, ret == 0);
@@ -746,9 +760,10 @@ int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx)
return uverbs_set_output(bundle, attr);
}
-int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
- size_t idx, s64 lower_bound, u64 upper_bound,
- s64 *def_val)
+int _uverbs_get_const_signed(s64 *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val)
{
const struct uverbs_attr *attr;
@@ -767,7 +782,30 @@ int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
return 0;
}
-EXPORT_SYMBOL(_uverbs_get_const);
+EXPORT_SYMBOL(_uverbs_get_const_signed);
+
+int _uverbs_get_const_unsigned(u64 *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 upper_bound, u64 *def_val)
+{
+ const struct uverbs_attr *attr;
+
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ if (IS_ERR(attr)) {
+ if ((PTR_ERR(attr) != -ENOENT) || !def_val)
+ return PTR_ERR(attr);
+
+ *to = *def_val;
+ } else {
+ *to = attr->ptr_attr.data;
+ }
+
+ if (*to > upper_bound)
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL(_uverbs_get_const_unsigned);
int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
size_t idx, const void *from, size_t size)
@@ -784,3 +822,16 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
}
return uverbs_copy_to(bundle, idx, from, size);
}
+EXPORT_SYMBOL(uverbs_copy_to_struct_or_zero);
+
+/* Once called an abort will call through to the type's destroy_hw() */
+void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle,
+ u16 idx)
+{
+ struct bundle_priv *pbundle =
+ container_of(bundle, struct bundle_priv, bundle);
+
+ __set_bit(uapi_bkey_attr(uapi_key_attr(idx)),
+ pbundle->uobj_hw_obj_valid);
+}
+EXPORT_SYMBOL(uverbs_finalize_uobj_create);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2d4083bf4a04..d54434088727 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -75,7 +75,7 @@ static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
static DEFINE_IDA(uverbs_ida);
-static void ib_uverbs_add_one(struct ib_device *device);
+static int ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
/*
@@ -108,8 +108,11 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
int ret;
ret = mw->device->ops.dealloc_mw(mw);
- if (!ret)
- atomic_dec(&pd->usecnt);
+ if (ret)
+ return ret;
+
+ atomic_dec(&pd->usecnt);
+ kfree(mw);
return ret;
}
@@ -146,8 +149,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
{
- struct ib_uverbs_async_event_file *async_file =
- READ_ONCE(uobj->uobject.ufile->async_file);
+ struct ib_uverbs_async_event_file *async_file = uobj->event_file;
struct ib_uverbs_event *evt, *tmp;
if (!async_file)
@@ -159,6 +161,7 @@ void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
kfree(evt);
}
spin_unlock_irq(&async_file->ev_queue.lock);
+ uverbs_uobject_put(&async_file->uobj);
}
void ib_uverbs_detach_umcast(struct ib_qp *qp,
@@ -194,11 +197,11 @@ void ib_uverbs_release_file(struct kref *ref)
module_put(ib_dev->ops.owner);
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
- if (atomic_dec_and_test(&file->device->refcount))
+ if (refcount_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device);
- if (file->async_file)
- uverbs_uobject_put(&file->async_file->uobj);
+ if (file->default_async_file)
+ uverbs_uobject_put(&file->default_async_file->uobj);
put_device(&file->device->dev);
if (file->disassociate_page)
@@ -296,6 +299,8 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
if (!list_empty(&ev_queue->event_list))
pollflags = EPOLLIN | EPOLLRDNORM;
+ else if (ev_queue->is_closed)
+ pollflags = EPOLLERR;
spin_unlock_irq(&ev_queue->lock);
return pollflags;
@@ -346,7 +351,7 @@ const struct file_operations uverbs_async_event_fops = {
.owner = THIS_MODULE,
.read = ib_uverbs_async_event_read,
.poll = ib_uverbs_async_event_poll,
- .release = uverbs_uobject_fd_release,
+ .release = uverbs_async_event_release,
.fasync = ib_uverbs_async_event_fasync,
.llseek = no_llseek,
};
@@ -386,10 +391,9 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
-static void
-ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
- __u64 element, __u64 event, struct list_head *obj_list,
- u32 *counter)
+void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+ __u64 element, __u64 event,
+ struct list_head *obj_list, u32 *counter)
{
struct ib_uverbs_event *entry;
unsigned long flags;
@@ -426,7 +430,7 @@ ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
static void uverbs_uobj_event(struct ib_uevent_object *eobj,
struct ib_event *event)
{
- ib_uverbs_async_handler(READ_ONCE(eobj->uobject.ufile->async_file),
+ ib_uverbs_async_handler(eobj->event_file,
eobj->uobject.user_handle, event->event,
&eobj->event_list, &eobj->events_reported);
}
@@ -483,10 +487,10 @@ void ib_uverbs_init_async_event_file(
/* The first async_event_file becomes the default one for the file. */
mutex_lock(&uverbs_file->ucontext_lock);
- if (!uverbs_file->async_file) {
+ if (!uverbs_file->default_async_file) {
/* Pairs with the put in ib_uverbs_release_file */
uverbs_uobject_get(&async_file->uobj);
- smp_store_release(&uverbs_file->async_file, async_file);
+ smp_store_release(&uverbs_file->default_async_file, async_file);
}
mutex_unlock(&uverbs_file->ucontext_lock);
@@ -600,6 +604,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
bundle.ufile = file;
bundle.context = NULL; /* only valid if bundle has uobject */
+ bundle.uobject = NULL;
if (!method_elm->is_ex) {
size_t in_len = hdr.in_words * 4 - sizeof(hdr);
size_t out_len = hdr.out_words * 4;
@@ -663,6 +668,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
}
ret = method_elm->handler(&bundle);
+ if (bundle.uobject)
+ uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true,
+ !ret, &bundle);
out_unlock:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return (ret) ? : count;
@@ -820,6 +828,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
ret = mmget_not_zero(mm);
if (!ret) {
list_del_init(&priv->list);
+ if (priv->entry) {
+ rdma_user_mmap_entry_put(priv->entry);
+ priv->entry = NULL;
+ }
mm = NULL;
continue;
}
@@ -830,14 +842,12 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
return;
/*
- * The umap_lock is nested under mmap_sem since it used within
+ * The umap_lock is nested under mmap_lock since it used within
* the vma_ops callbacks, so we have to clean the list one mm
* at a time to get the lock ordering right. Typically there
* will only be one mm, so no big deal.
*/
- down_read(&mm->mmap_sem);
- if (!mmget_still_valid(mm))
- goto skip_mm;
+ mmap_read_lock(mm);
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) {
@@ -856,8 +866,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
}
}
mutex_unlock(&ufile->umap_lock);
- skip_mm:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
mmput(mm);
}
}
@@ -882,7 +891,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
int srcu_key;
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
- if (!atomic_inc_not_zero(&dev->refcount))
+ if (!refcount_inc_not_zero(&dev->refcount))
return -ENXIO;
get_device(&dev->dev);
@@ -946,7 +955,7 @@ err_module:
err:
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
- if (atomic_dec_and_test(&dev->refcount))
+ if (refcount_dec_and_test(&dev->refcount))
ib_uverbs_comp_dev(dev);
put_device(&dev->dev);
@@ -1037,7 +1046,7 @@ static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
- ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
+ ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev));
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
@@ -1056,7 +1065,7 @@ static ssize_t abi_version_show(struct device *device,
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
- ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
+ ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
@@ -1089,7 +1098,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device,
return 0;
}
-static void ib_uverbs_add_one(struct ib_device *device)
+static int ib_uverbs_add_one(struct ib_device *device)
{
int devnum;
dev_t base;
@@ -1097,16 +1106,16 @@ static void ib_uverbs_add_one(struct ib_device *device)
int ret;
if (!device->ops.alloc_ucontext)
- return;
+ return -EOPNOTSUPP;
uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
if (!uverbs_dev)
- return;
+ return -ENOMEM;
ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
if (ret) {
kfree(uverbs_dev);
- return;
+ return -ENOMEM;
}
device_initialize(&uverbs_dev->dev);
@@ -1115,7 +1124,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
uverbs_dev->dev.release = ib_uverbs_release_dev;
uverbs_dev->groups[0] = &dev_attr_group;
uverbs_dev->dev.groups = uverbs_dev->groups;
- atomic_set(&uverbs_dev->refcount, 1);
+ refcount_set(&uverbs_dev->refcount, 1);
init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex);
@@ -1126,15 +1135,18 @@ static void ib_uverbs_add_one(struct ib_device *device)
devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
GFP_KERNEL);
- if (devnum < 0)
+ if (devnum < 0) {
+ ret = -ENOMEM;
goto err;
+ }
uverbs_dev->devnum = devnum;
if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
else
base = IB_UVERBS_BASE_DEV + devnum;
- if (ib_uverbs_create_uapi(device, uverbs_dev))
+ ret = ib_uverbs_create_uapi(device, uverbs_dev);
+ if (ret)
goto err_uapi;
uverbs_dev->dev.devt = base;
@@ -1149,16 +1161,16 @@ static void ib_uverbs_add_one(struct ib_device *device)
goto err_uapi;
ib_set_client_data(device, &uverbs_client, uverbs_dev);
- return;
+ return 0;
err_uapi:
ida_free(&uverbs_ida, devnum);
err:
- if (atomic_dec_and_test(&uverbs_dev->refcount))
+ if (refcount_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
put_device(&uverbs_dev->dev);
- return;
+ return ret;
}
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
@@ -1183,9 +1195,6 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
*/
mutex_unlock(&uverbs_dev->lists_mutex);
- ib_uverbs_async_handler(READ_ONCE(file->async_file), 0,
- IB_EVENT_DEVICE_FATAL, NULL, NULL);
-
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
kref_put(&file->ref, ib_uverbs_release_file);
@@ -1201,9 +1210,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
struct ib_uverbs_device *uverbs_dev = client_data;
int wait_clients = 1;
- if (!uverbs_dev)
- return;
-
cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
ida_free(&uverbs_ida, uverbs_dev->devnum);
@@ -1223,7 +1229,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
wait_clients = 0;
}
- if (atomic_dec_and_test(&uverbs_dev->refcount))
+ if (refcount_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index b8d715c68ca4..11a080646916 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -66,7 +66,7 @@ void ib_copy_ah_attr_to_user(struct ib_device *device,
struct rdma_ah_attr *src = ah_attr;
struct rdma_ah_attr conv_ah;
- memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
+ memset(&dst->grh, 0, sizeof(dst->grh));
if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
(rdma_ah_get_dlid(ah_attr) > be16_to_cpu(IB_LID_PERMISSIVE)) &&
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 3abfc63225cb..13776a66e2e4 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -75,96 +75,28 @@ static int uverbs_free_mw(struct ib_uobject *uobject,
return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
}
-static int uverbs_free_qp(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_qp *qp = uobject->object;
- struct ib_uqp_object *uqp =
- container_of(uobject, struct ib_uqp_object, uevent.uobject);
- int ret;
-
- /*
- * If this is a user triggered destroy then do not allow destruction
- * until the user cleans up all the mcast bindings. Unlike in other
- * places we forcibly clean up the mcast attachments for !DESTROY
- * because the mcast attaches are not ubojects and will not be
- * destroyed by anything else during cleanup processing.
- */
- if (why == RDMA_REMOVE_DESTROY) {
- if (!list_empty(&uqp->mcast_list))
- return -EBUSY;
- } else if (qp == qp->real_qp) {
- ib_uverbs_detach_umcast(qp, uqp);
- }
-
- ret = ib_destroy_qp_user(qp, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
- return ret;
-
- if (uqp->uxrcd)
- atomic_dec(&uqp->uxrcd->refcnt);
-
- ib_uverbs_release_uevent(&uqp->uevent);
- return ret;
-}
-
static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
{
struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
- int ret;
-
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (ib_is_destroy_retryable(ret, why, uobject))
- return ret;
+ u32 table_size = (1 << rwq_ind_tbl->log_ind_tbl_size);
+ int ret, i;
- kfree(ind_tbl);
- return ret;
-}
+ if (atomic_read(&rwq_ind_tbl->usecnt))
+ return -EBUSY;
-static int uverbs_free_wq(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_wq *wq = uobject->object;
- struct ib_uwq_object *uwq =
- container_of(uobject, struct ib_uwq_object, uevent.uobject);
- int ret;
-
- ret = ib_destroy_wq(wq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
- return ret;
-
- ib_uverbs_release_uevent(&uwq->uevent);
- return ret;
-}
-
-static int uverbs_free_srq(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_srq *srq = uobject->object;
- struct ib_uevent_object *uevent =
- container_of(uobject, struct ib_uevent_object, uobject);
- enum ib_srq_type srq_type = srq->srq_type;
- int ret;
-
- ret = ib_destroy_srq_user(srq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
+ ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl);
+ if (ret)
return ret;
- if (srq_type == IB_SRQT_XRC) {
- struct ib_usrq_object *us =
- container_of(uevent, struct ib_usrq_object, uevent);
-
- atomic_dec(&us->uxrcd->refcnt);
- }
+ for (i = 0; i < table_size; i++)
+ atomic_dec(&ind_tbl[i]->usecnt);
- ib_uverbs_release_uevent(uevent);
- return ret;
+ kfree(rwq_ind_tbl);
+ kfree(ind_tbl);
+ return 0;
}
static int uverbs_free_xrcd(struct ib_uobject *uobject,
@@ -176,9 +108,8 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject,
container_of(uobject, struct ib_uxrcd_object, uobject);
int ret;
- ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&uxrcd->refcnt))
+ return -EBUSY;
mutex_lock(&attrs->ufile->device->xrcd_tree_mutex);
ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs);
@@ -192,14 +123,11 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct ib_pd *pd = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&pd->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&pd->usecnt))
+ return -EBUSY;
- ib_dealloc_pd_user(pd, &attrs->driver_udata);
- return 0;
+ return ib_dealloc_pd_user(pd, &attrs->driver_udata);
}
void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
@@ -226,7 +154,7 @@ void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
spin_unlock_irq(&event_queue->lock);
}
-static int
+static void
uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
@@ -235,7 +163,6 @@ uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj,
uobj);
ib_uverbs_free_event_queue(&file->ev_queue);
- return 0;
}
int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
@@ -252,10 +179,6 @@ DECLARE_UVERBS_NAMED_OBJECT(
"[infinibandevent]",
O_RDONLY));
-DECLARE_UVERBS_NAMED_OBJECT(
- UVERBS_OBJECT_QP,
- UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_MW_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE,
@@ -267,11 +190,6 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw),
&UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY));
-DECLARE_UVERBS_NAMED_OBJECT(
- UVERBS_OBJECT_SRQ,
- UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
- uverbs_free_srq));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_AH_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE,
@@ -296,10 +214,6 @@ DECLARE_UVERBS_NAMED_OBJECT(
uverbs_free_flow),
&UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY));
-DECLARE_UVERBS_NAMED_OBJECT(
- UVERBS_OBJECT_WQ,
- UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_RWQ_IND_TBL_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE,
@@ -340,18 +254,12 @@ const struct uapi_definition uverbs_def_obj_intf[] = {
UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL,
UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
- UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH,
UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW,
UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)),
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
- UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW,
UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)),
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
- UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
UVERBS_OBJECT_RWQ_IND_TBL,
UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)),
diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c
index 82ec0806b34b..cc24cfdf7aee 100644
--- a/drivers/infiniband/core/uverbs_std_types_async_fd.c
+++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c
@@ -19,15 +19,42 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)(
return 0;
}
-static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static void uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
struct ib_uverbs_async_event_file *event_file =
container_of(uobj, struct ib_uverbs_async_event_file, uobj);
ib_unregister_event_handler(&event_file->event_handler);
+
+ if (why == RDMA_REMOVE_DRIVER_REMOVE)
+ ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL,
+ NULL, NULL);
+}
+
+int uverbs_async_event_release(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_async_event_file *event_file;
+ struct ib_uobject *uobj = filp->private_data;
+ int ret;
+
+ if (!uobj)
+ return uverbs_uobject_fd_release(inode, filp);
+
+ event_file =
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj);
+
+ /*
+ * The async event FD has to deliver IB_EVENT_DEVICE_FATAL even after
+ * disassociation, so cleaning the event list must only happen after
+ * release. The user knows it has reached the end of the event stream
+ * when it sees IB_EVENT_DEVICE_FATAL.
+ */
+ uverbs_uobject_get(uobj);
+ ret = uverbs_uobject_fd_release(inode, filp);
ib_uverbs_free_event_queue(&event_file->ev_queue);
- return 0;
+ uverbs_uobject_put(uobj);
+ return ret;
}
DECLARE_UVERBS_NAMED_METHOD(
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 9f013304e677..999da9c79866 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -42,11 +42,14 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
struct ib_counters *counters = uobject->object;
int ret;
- ret = ib_destroy_usecnt(&counters->usecnt, why, uobject);
+ if (atomic_read(&counters->usecnt))
+ return -EBUSY;
+
+ ret = counters->device->ops.destroy_counters(counters);
if (ret)
return ret;
-
- return counters->device->ops.destroy_counters(counters);
+ kfree(counters);
+ return 0;
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
@@ -66,20 +69,19 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
if (!ib_dev->ops.create_counters)
return -EOPNOTSUPP;
- counters = ib_dev->ops.create_counters(ib_dev, attrs);
- if (IS_ERR(counters)) {
- ret = PTR_ERR(counters);
- goto err_create_counters;
- }
+ counters = rdma_zalloc_drv_obj(ib_dev, ib_counters);
+ if (!counters)
+ return -ENOMEM;
counters->device = ib_dev;
counters->uobject = uobj;
uobj->object = counters;
atomic_set(&counters->usecnt, 0);
- return 0;
+ ret = ib_dev->ops.create_counters(counters, attrs);
+ if (ret)
+ kfree(counters);
-err_create_counters:
return ret;
}
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index da4110a0eea2..370ad7c83f88 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -33,6 +33,7 @@
#include <rdma/uverbs_std_types.h>
#include "rdma_core.h"
#include "uverbs.h"
+#include "restrack.h"
static int uverbs_free_cq(struct ib_uobject *uobject,
enum rdma_remove_reason why,
@@ -45,7 +46,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_cq_user(cq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
+ if (ret)
return ret;
ib_uverbs_release_ucq(
@@ -54,7 +55,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
ev_queue) :
NULL,
ucq);
- return ret;
+ return 0;
}
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
@@ -100,6 +101,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
uverbs_uobject_get(ev_file_uobj);
}
+ obj->uevent.event_file = ib_uverbs_get_async_event(
+ attrs, UVERBS_ATTR_CREATE_CQ_EVENT_FD);
+
if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) {
ret = -EINVAL;
goto err_event_file;
@@ -120,7 +124,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, NULL);
ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
if (ret)
@@ -128,20 +134,19 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
obj->uevent.uobject.object = cq;
obj->uevent.uobject.user_handle = user_handle;
- rdma_restrack_uadd(&cq->res);
+ rdma_restrack_add(&cq->res);
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE);
ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
sizeof(cq->cqe));
- if (ret)
- goto err_cq;
+ return ret;
- return 0;
-err_cq:
- ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs));
- cq = NULL;
err_free:
+ rdma_restrack_put(&cq->res);
kfree(cq);
err_event_file:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
if (ev_file)
uverbs_uobject_put(ev_file_uobj);
return ret;
@@ -171,6 +176,10 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
UVERBS_ATTR_UHW());
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
@@ -202,11 +211,8 @@ DECLARE_UVERBS_NAMED_METHOD(
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_CQ,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq),
-
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
&UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
-#endif
);
const struct uapi_definition uverbs_def_obj_cq[] = {
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index ae4a59d6f9b1..049684880ae0 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -3,11 +3,13 @@
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
*/
+#include <linux/overflow.h>
#include <rdma/uverbs_std_types.h>
#include "rdma_core.h"
#include "uverbs.h"
#include <rdma/uverbs_ioctl.h>
#include <rdma/opa_addr.h>
+#include <rdma/ib_cache.h>
/*
* This ioctl method allows calling any defined write or write_ex
@@ -38,7 +40,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)(
attrs->ucore.outlen < method_elm->resp_size)
return -ENOSPC;
- return method_elm->handler(attrs);
+ attrs->uobject = NULL;
+ rc = method_elm->handler(attrs);
+ if (attrs->uobject)
+ uverbs_finalize_object(attrs->uobject, UVERBS_ACCESS_NEW, true,
+ !rc, attrs);
+ return rc;
}
DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE,
@@ -110,8 +117,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)(
return ret;
uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id);
- if (!uapi_object)
- return -EINVAL;
+ if (IS_ERR(uapi_object))
+ return PTR_ERR(uapi_object);
handles = gather_objects_handle(attrs->ufile, uapi_object, attrs,
out_len, &total);
@@ -160,7 +167,8 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr,
resp->subnet_timeout = attr->subnet_timeout;
resp->init_type_reply = attr->init_type_reply;
resp->active_width = attr->active_width;
- resp->active_speed = attr->active_speed;
+ /* This ABI needs to be extended to provide any speed more than IB_SPEED_NDR */
+ resp->active_speed = min_t(u16, attr->active_speed, IB_SPEED_NDR);
resp->phys_state = attr->phys_state;
resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num);
}
@@ -229,6 +237,199 @@ static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)(
return 0;
}
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ u32 num_comp;
+ int ret;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (!ib_dev->ops.query_ucontext)
+ return -EOPNOTSUPP;
+
+ num_comp = attrs->ufile->device->num_comp_vectors;
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS,
+ &num_comp, sizeof(num_comp));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT,
+ &core_support, sizeof(core_support));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ return ucontext->device->ops.query_ucontext(ucontext, attrs);
+}
+
+static int copy_gid_entries_to_user(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_gid_entry *entries,
+ size_t num_entries, size_t user_entry_size)
+{
+ const struct uverbs_attr *attr;
+ void __user *user_entries;
+ size_t copy_len;
+ int ret;
+ int i;
+
+ if (user_entry_size == sizeof(*entries)) {
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ entries, sizeof(*entries) * num_entries);
+ return ret;
+ }
+
+ copy_len = min_t(size_t, user_entry_size, sizeof(*entries));
+ attr = uverbs_attr_get(attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ user_entries = u64_to_user_ptr(attr->ptr_attr.data);
+ for (i = 0; i < num_entries; i++) {
+ if (copy_to_user(user_entries, entries, copy_len))
+ return -EFAULT;
+
+ if (user_entry_size > sizeof(*entries)) {
+ if (clear_user(user_entries + sizeof(*entries),
+ user_entry_size - sizeof(*entries)))
+ return -EFAULT;
+ }
+
+ entries++;
+ user_entries += user_entry_size;
+ }
+
+ return uverbs_output_written(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_gid_entry *entries;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ size_t user_entry_size;
+ ssize_t num_entries;
+ int max_entries;
+ u32 flags;
+ int ret;
+
+ ret = uverbs_get_flags32(&flags, attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, 0);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&user_entry_size, attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE);
+ if (ret)
+ return ret;
+
+ if (!user_entry_size)
+ return -EINVAL;
+
+ max_entries = uverbs_attr_ptr_get_array_size(
+ attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ user_entry_size);
+ if (max_entries <= 0)
+ return max_entries ?: -EINVAL;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ entries = uverbs_kcalloc(attrs, max_entries, sizeof(*entries));
+ if (IS_ERR(entries))
+ return PTR_ERR(entries);
+
+ num_entries = rdma_query_gid_table(ib_dev, entries, max_entries);
+ if (num_entries < 0)
+ return -EINVAL;
+
+ ret = copy_gid_entries_to_user(attrs, entries, num_entries,
+ user_entry_size);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+ &num_entries, sizeof(num_entries));
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_ENTRY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_gid_entry entry = {};
+ const struct ib_gid_attr *gid_attr;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ struct net_device *ndev;
+ u32 gid_index;
+ u32 port_num;
+ u32 flags;
+ int ret;
+
+ ret = uverbs_get_flags32(&flags, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, 0);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&port_num, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_PORT);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&gid_index, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX);
+ if (ret)
+ return ret;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (!rdma_is_port_valid(ib_dev, port_num))
+ return -EINVAL;
+
+ gid_attr = rdma_get_gid_attr(ib_dev, port_num, gid_index);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
+
+ memcpy(&entry.gid, &gid_attr->gid, sizeof(gid_attr->gid));
+ entry.gid_index = gid_attr->index;
+ entry.port_num = gid_attr->port_num;
+ entry.gid_type = gid_attr->gid_type;
+
+ rcu_read_lock();
+ ndev = rdma_read_gid_attr_ndev_rcu(gid_attr);
+ if (IS_ERR(ndev)) {
+ if (PTR_ERR(ndev) != -ENODEV) {
+ ret = PTR_ERR(ndev);
+ rcu_read_unlock();
+ goto out;
+ }
+ } else {
+ entry.netdev_ifindex = ndev->ifindex;
+ }
+ rcu_read_unlock();
+
+ ret = uverbs_copy_to_struct_or_zero(
+ attrs, UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY, &entry,
+ sizeof(entry));
+out:
+ rdma_put_gid_attr(gid_attr);
+ return ret;
+}
+
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_GET_CONTEXT,
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
@@ -238,6 +439,13 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_UHW());
DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT,
+ UVERBS_ATTR_TYPE(u64), UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_INFO_HANDLES,
/* Also includes any device specific object ids */
UVERBS_ATTR_CONST_IN(UVERBS_ATTR_INFO_OBJECT_ID,
@@ -256,11 +464,38 @@ DECLARE_UVERBS_NAMED_METHOD(
reserved),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_GID_TABLE,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, u64,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, u32,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ UVERBS_ATTR_MIN_SIZE(0), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_GID_ENTRY,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_PORT, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_gid_entry,
+ netdev_ifindex),
+ UA_MANDATORY));
+
DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
&UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
&UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
&UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
- &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT));
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_TABLE),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_ENTRY));
const struct uapi_definition uverbs_def_obj_device[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index d5a1de33c2c9..98c522cf86d6 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -39,11 +39,9 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct ib_dm *dm = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&dm->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&dm->usecnt))
+ return -EBUSY;
return dm->device->ops.dealloc_dm(dm, attrs);
}
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index 459cf165b231..0ddcf6da66c4 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -39,394 +39,13 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct ib_flow_action *action = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&action->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&action->usecnt))
+ return -EBUSY;
return action->device->ops.destroy_flow_action(action);
}
-static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
- u32 flags, bool is_modify)
-{
- u64 verbs_flags = flags;
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN))
- verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED;
-
- if (is_modify && uverbs_attr_is_valid(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS))
- verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS;
-
- return verbs_flags;
-};
-
-static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat)
-{
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm =
- &keymat->keymat.aes_gcm;
-
- if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return -EOPNOTSUPP;
-
- if (aes_gcm->key_len != 32 &&
- aes_gcm->key_len != 24 &&
- aes_gcm->key_len != 16)
- return -EINVAL;
-
- if (aes_gcm->icv_len != 16 &&
- aes_gcm->icv_len != 8 &&
- aes_gcm->icv_len != 12)
- return -EINVAL;
-
- return 0;
-}
-
-static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = {
- [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm,
-};
-
-static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify)
-{
- /* This is used in order to modify an esp flow action with an enabled
- * replay protection to a disabled one. This is only supported via
- * modify, as in create verb we can simply drop the REPLAY attribute and
- * achieve the same thing.
- */
- return is_modify ? 0 : -EINVAL;
-}
-
-static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify)
-{
- /* Some replay protections could always be enabled without validating
- * anything.
- */
- return 0;
-}
-
-static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify) = {
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none,
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok,
-};
-
-static int parse_esp_ip(enum ib_flow_spec_type proto,
- const void __user *val_ptr,
- size_t len, union ib_flow_spec *out)
-{
- int ret;
- const struct ib_uverbs_flow_ipv4_filter ipv4 = {
- .src_ip = cpu_to_be32(0xffffffffUL),
- .dst_ip = cpu_to_be32(0xffffffffUL),
- .proto = 0xff,
- .tos = 0xff,
- .ttl = 0xff,
- .flags = 0xff,
- };
- const struct ib_uverbs_flow_ipv6_filter ipv6 = {
- .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
- .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
- .flow_label = cpu_to_be32(0xffffffffUL),
- .next_hdr = 0xff,
- .traffic_class = 0xff,
- .hop_limit = 0xff,
- };
- union {
- struct ib_uverbs_flow_ipv4_filter ipv4;
- struct ib_uverbs_flow_ipv6_filter ipv6;
- } user_val = {};
- const void *user_pmask;
- size_t val_len;
-
- /* If the flow IPv4/IPv6 flow specifications are extended, the mask
- * should be changed as well.
- */
- BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) +
- sizeof(ipv4.flags) != sizeof(ipv4));
- BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) +
- sizeof(ipv6.reserved) != sizeof(ipv6));
-
- switch (proto) {
- case IB_FLOW_SPEC_IPV4:
- if (len > sizeof(user_val.ipv4) &&
- !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4),
- len - sizeof(user_val.ipv4)))
- return -EOPNOTSUPP;
-
- val_len = min_t(size_t, len, sizeof(user_val.ipv4));
- ret = copy_from_user(&user_val.ipv4, val_ptr,
- val_len);
- if (ret)
- return -EFAULT;
-
- user_pmask = &ipv4;
- break;
- case IB_FLOW_SPEC_IPV6:
- if (len > sizeof(user_val.ipv6) &&
- !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6),
- len - sizeof(user_val.ipv6)))
- return -EOPNOTSUPP;
-
- val_len = min_t(size_t, len, sizeof(user_val.ipv6));
- ret = copy_from_user(&user_val.ipv6, val_ptr,
- val_len);
- if (ret)
- return -EFAULT;
-
- user_pmask = &ipv6;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask,
- &user_val,
- val_len, out);
-}
-
-static int flow_action_esp_get_encap(struct ib_flow_spec_list *out,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uverbs_flow_action_esp_encap uverbs_encap;
- int ret;
-
- ret = uverbs_copy_from(&uverbs_encap, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP);
- if (ret)
- return ret;
-
- /* We currently support only one encap */
- if (uverbs_encap.next_ptr)
- return -EOPNOTSUPP;
-
- if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 &&
- uverbs_encap.type != IB_FLOW_SPEC_IPV6)
- return -EOPNOTSUPP;
-
- return parse_esp_ip(uverbs_encap.type,
- u64_to_user_ptr(uverbs_encap.val_ptr),
- uverbs_encap.len,
- &out->spec);
-}
-
-struct ib_flow_action_esp_attr {
- struct ib_flow_action_attrs_esp hdr;
- struct ib_flow_action_attrs_esp_keymats keymat;
- struct ib_flow_action_attrs_esp_replays replay;
- /* We currently support only one spec */
- struct ib_flow_spec_list encap;
-};
-
-#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
-static int parse_flow_action_esp(struct ib_device *ib_dev,
- struct uverbs_attr_bundle *attrs,
- struct ib_flow_action_esp_attr *esp_attr,
- bool is_modify)
-{
- struct ib_uverbs_flow_action_esp uverbs_esp = {};
- int ret;
-
- /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
- ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ESN);
- if (IS_UVERBS_COPY_ERR(ret))
- return ret;
-
- /* This can be called from FLOW_ACTION_ESP_MODIFY where
- * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional
- */
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) {
- ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS);
- if (ret)
- return ret;
-
- if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1))
- return -EOPNOTSUPP;
-
- esp_attr->hdr.spi = uverbs_esp.spi;
- esp_attr->hdr.seq = uverbs_esp.seq;
- esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad;
- esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts;
- }
- esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags,
- is_modify);
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) {
- esp_attr->keymat.protocol =
- uverbs_attr_get_enum_id(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
- ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat,
- attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
- if (ret)
- return ret;
-
- ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat);
- if (ret)
- return ret;
-
- esp_attr->hdr.keymat = &esp_attr->keymat;
- }
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) {
- esp_attr->replay.protocol =
- uverbs_attr_get_enum_id(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
-
- ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay,
- attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
- if (ret)
- return ret;
-
- ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay,
- is_modify);
- if (ret)
- return ret;
-
- esp_attr->hdr.replay = &esp_attr->replay;
- }
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) {
- ret = flow_action_esp_get_encap(&esp_attr->encap, attrs);
- if (ret)
- return ret;
-
- esp_attr->hdr.encap = &esp_attr->encap;
- }
-
- return 0;
-}
-
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
- struct ib_device *ib_dev = attrs->context->device;
- int ret;
- struct ib_flow_action *action;
- struct ib_flow_action_esp_attr esp_attr = {};
-
- if (!ib_dev->ops.create_flow_action_esp)
- return -EOPNOTSUPP;
-
- ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false);
- if (ret)
- return ret;
-
- /* No need to check as this attribute is marked as MANDATORY */
- action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr,
- attrs);
- if (IS_ERR(action))
- return PTR_ERR(action);
-
- uverbs_flow_action_fill_action(action, uobj, ib_dev,
- IB_FLOW_ACTION_ESP);
-
- return 0;
-}
-
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
- struct ib_flow_action *action = uobj->object;
- int ret;
- struct ib_flow_action_esp_attr esp_attr = {};
-
- if (!action->device->ops.modify_flow_action_esp)
- return -EOPNOTSUPP;
-
- ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true);
- if (ret)
- return ret;
-
- if (action->type != IB_FLOW_ACTION_ESP)
- return -EINVAL;
-
- return action->device->ops.modify_flow_action_esp(action,
- &esp_attr.hdr,
- attrs);
-}
-
-static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
- [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_STRUCT(
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm,
- aes_key),
- },
-};
-
-static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp,
- size),
- },
-};
-
-DECLARE_UVERBS_NAMED_METHOD(
- UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
- UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
- hard_limit_pkts),
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
- UVERBS_ATTR_TYPE(__u32),
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat,
- UA_MANDATORY),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD(
- UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
- UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_WRITE,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
- hard_limit_pkts),
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
- UVERBS_ATTR_TYPE(__u32),
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat,
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
- UA_OPTIONAL));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_FLOW_ACTION_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
@@ -437,9 +56,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_FLOW_ACTION,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY));
const struct uapi_definition uverbs_def_obj_flow_action[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index c1286a52dc84..03e1db5d1e8c 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -33,6 +34,7 @@
#include "rdma_core.h"
#include "uverbs.h"
#include <rdma/uverbs_std_types.h>
+#include "restrack.h"
static int uverbs_free_mr(struct ib_uobject *uobject,
enum rdma_remove_reason why,
@@ -69,7 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)(
num_sge = uverbs_attr_ptr_get_array_size(
attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge));
- if (num_sge < 0)
+ if (num_sge <= 0)
return num_sge;
sg_list = uverbs_attr_get_alloced_ptr(attrs,
@@ -114,7 +116,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
if (!(attr.access_flags & IB_ZERO_BASED))
return -EINVAL;
- ret = ib_check_mr_access(attr.access_flags);
+ ret = ib_check_mr_access(ib_dev, attr.access_flags);
if (ret)
return ret;
@@ -134,23 +136,133 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
atomic_inc(&pd->usecnt);
atomic_inc(&dm->usecnt);
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
+
ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey,
sizeof(mr->lkey));
if (ret)
- goto err_dereg;
+ return ret;
ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
&mr->rkey, sizeof(mr->rkey));
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_mr *mr =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_QUERY_MR_HANDLE);
+ int ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LKEY, &mr->lkey,
+ sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LENGTH,
+ &mr->length, sizeof(mr->length));
+
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_IOVA,
+ &mr->iova, sizeof(mr->iova));
+
+ return IS_UVERBS_COPY_ERR(ret) ? ret : 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE);
+ struct ib_device *ib_dev = pd->device;
+
+ u64 offset, length, iova;
+ u32 fd, access_flags;
+ struct ib_mr *mr;
+ int ret;
+
+ if (!ib_dev->ops.reg_user_mr_dmabuf)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&offset, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_OFFSET);
if (ret)
- goto err_dereg;
+ return ret;
+
+ ret = uverbs_copy_from(&length, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&iova, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_IOVA);
+ if (ret)
+ return ret;
- return 0;
+ if ((offset & ~PAGE_MASK) != (iova & ~PAGE_MASK))
+ return -EINVAL;
-err_dereg:
- ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs));
+ ret = uverbs_copy_from(&fd, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_FD);
+ if (ret)
+ return ret;
+ ret = uverbs_get_flags32(&access_flags, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_ATOMIC |
+ IB_ACCESS_RELAXED_ORDERING);
+ if (ret)
+ return ret;
+
+ ret = ib_check_mr_access(ib_dev, access_flags);
+ if (ret)
+ return ret;
+
+ mr = pd->device->ops.reg_user_mr_dmabuf(pd, offset, length, iova, fd,
+ access_flags,
+ &attrs->driver_udata);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->type = IB_MR_TYPE_USER;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
+ uobj->object = mr;
+
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY,
+ &mr->lkey, sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
return ret;
}
@@ -172,6 +284,25 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_ALLOC_AND_COPY));
DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_QUERY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_IOVA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_DM_MR_REG,
UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
UVERBS_OBJECT_MR,
@@ -200,6 +331,37 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_REG_DMABUF_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_IOVA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_FD,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ enum ib_access_flags),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_MR_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE,
@@ -210,9 +372,11 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_MR,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
+ &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR),
&UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
&UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
- &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR));
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR),
+ &UVERBS_METHOD(UVERBS_METHOD_REG_DMABUF_MR));
const struct uapi_definition uverbs_def_obj_mr[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c
new file mode 100644
index 000000000000..dd1075466f61
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_qp.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+#include "core_priv.h"
+
+static int uverbs_free_qp(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_qp *qp = uobject->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobject, struct ib_uqp_object, uevent.uobject);
+ int ret;
+
+ /*
+ * If this is a user triggered destroy then do not allow destruction
+ * until the user cleans up all the mcast bindings. Unlike in other
+ * places we forcibly clean up the mcast attachments for !DESTROY
+ * because the mcast attaches are not ubojects and will not be
+ * destroyed by anything else during cleanup processing.
+ */
+ if (why == RDMA_REMOVE_DESTROY) {
+ if (!list_empty(&uqp->mcast_list))
+ return -EBUSY;
+ } else if (qp == qp->real_qp) {
+ ib_uverbs_detach_umcast(qp, uqp);
+ }
+
+ ret = ib_destroy_qp_user(qp, &attrs->driver_udata);
+ if (ret)
+ return ret;
+
+ if (uqp->uxrcd)
+ atomic_dec(&uqp->uxrcd->refcnt);
+
+ ib_uverbs_release_uevent(&uqp->uevent);
+ return 0;
+}
+
+static int check_creation_flags(enum ib_qp_type qp_type,
+ u32 create_flags)
+{
+ create_flags &= ~IB_UVERBS_QP_CREATE_SQ_SIG_ALL;
+
+ if (!create_flags || qp_type == IB_QPT_DRIVER)
+ return 0;
+
+ if (qp_type != IB_QPT_RAW_PACKET && qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ if ((create_flags & IB_UVERBS_QP_CREATE_SCATTER_FCS ||
+ create_flags & IB_UVERBS_QP_CREATE_CVLAN_STRIPPING) &&
+ qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void set_caps(struct ib_qp_init_attr *attr,
+ struct ib_uverbs_qp_cap *cap, bool req)
+{
+ if (req) {
+ attr->cap.max_send_wr = cap->max_send_wr;
+ attr->cap.max_recv_wr = cap->max_recv_wr;
+ attr->cap.max_send_sge = cap->max_send_sge;
+ attr->cap.max_recv_sge = cap->max_recv_sge;
+ attr->cap.max_inline_data = cap->max_inline_data;
+ } else {
+ cap->max_send_wr = attr->cap.max_send_wr;
+ cap->max_recv_wr = attr->cap.max_recv_wr;
+ cap->max_send_sge = attr->cap.max_send_sge;
+ cap->max_recv_sge = attr->cap.max_recv_sge;
+ cap->max_inline_data = attr->cap.max_inline_data;
+ }
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uqp_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_QP_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_qp_init_attr attr = {};
+ struct ib_uverbs_qp_cap cap = {};
+ struct ib_rwq_ind_table *rwq_ind_tbl = NULL;
+ struct ib_qp *qp;
+ struct ib_pd *pd = NULL;
+ struct ib_srq *srq = NULL;
+ struct ib_cq *recv_cq = NULL;
+ struct ib_cq *send_cq = NULL;
+ struct ib_xrcd *xrcd = NULL;
+ struct ib_uobject *xrcd_uobj = NULL;
+ struct ib_device *device;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_copy_from_or_zero(&cap, attrs,
+ UVERBS_ATTR_CREATE_QP_CAP);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_QP_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&attr.qp_type, attrs,
+ UVERBS_ATTR_CREATE_QP_TYPE);
+ if (ret)
+ return ret;
+
+ switch (attr.qp_type) {
+ case IB_QPT_XRC_TGT:
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_PD_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE))
+ return -EINVAL;
+
+ xrcd_uobj = uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_QP_XRCD_HANDLE);
+ if (IS_ERR(xrcd_uobj))
+ return PTR_ERR(xrcd_uobj);
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!xrcd)
+ return -EINVAL;
+ device = xrcd->device;
+ break;
+ case IB_UVERBS_QPT_RAW_PACKET:
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+ fallthrough;
+ case IB_UVERBS_QPT_RC:
+ case IB_UVERBS_QPT_UC:
+ case IB_UVERBS_QPT_UD:
+ case IB_UVERBS_QPT_XRC_INI:
+ case IB_UVERBS_QPT_DRIVER:
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_XRCD_HANDLE) ||
+ (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE) &&
+ attr.qp_type == IB_QPT_XRC_INI))
+ return -EINVAL;
+
+ pd = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_PD_HANDLE);
+ if (IS_ERR(pd))
+ return PTR_ERR(pd);
+
+ rwq_ind_tbl = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE);
+ if (!IS_ERR(rwq_ind_tbl)) {
+ if (cap.max_recv_wr || cap.max_recv_sge ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE))
+ return -EINVAL;
+
+ /* send_cq is optinal */
+ if (cap.max_send_wr) {
+ send_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);
+ if (IS_ERR(send_cq))
+ return PTR_ERR(send_cq);
+ }
+ attr.rwq_ind_tbl = rwq_ind_tbl;
+ } else {
+ send_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);
+ if (IS_ERR(send_cq))
+ return PTR_ERR(send_cq);
+
+ if (attr.qp_type != IB_QPT_XRC_INI) {
+ recv_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE);
+ if (IS_ERR(recv_cq))
+ return PTR_ERR(recv_cq);
+ }
+ }
+
+ device = pd->device;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = uverbs_get_flags32(&attr.create_flags, attrs,
+ UVERBS_ATTR_CREATE_QP_FLAGS,
+ IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+ IB_UVERBS_QP_CREATE_SCATTER_FCS |
+ IB_UVERBS_QP_CREATE_CVLAN_STRIPPING |
+ IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING |
+ IB_UVERBS_QP_CREATE_SQ_SIG_ALL);
+ if (ret)
+ return ret;
+
+ ret = check_creation_flags(attr.qp_type, attr.create_flags);
+ if (ret)
+ return ret;
+
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SOURCE_QPN)) {
+ ret = uverbs_copy_from(&attr.source_qpn, attrs,
+ UVERBS_ATTR_CREATE_QP_SOURCE_QPN);
+ if (ret)
+ return ret;
+ attr.create_flags |= IB_QP_CREATE_SOURCE_QPN;
+ }
+
+ srq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE);
+ if (!IS_ERR(srq)) {
+ if ((srq->srq_type == IB_SRQT_XRC &&
+ attr.qp_type != IB_QPT_XRC_TGT) ||
+ (srq->srq_type != IB_SRQT_XRC &&
+ attr.qp_type == IB_QPT_XRC_TGT))
+ return -EINVAL;
+ attr.srq = srq;
+ }
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_QP_EVENT_FD);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ INIT_LIST_HEAD(&obj->mcast_list);
+ obj->uevent.uobject.user_handle = user_handle;
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.send_cq = send_cq;
+ attr.recv_cq = recv_cq;
+ attr.xrcd = xrcd;
+ if (attr.create_flags & IB_UVERBS_QP_CREATE_SQ_SIG_ALL) {
+ /* This creation bit is uverbs one, need to mask before
+ * calling drivers. It was added to prevent an extra user attr
+ * only for that when using ioctl.
+ */
+ attr.create_flags &= ~IB_UVERBS_QP_CREATE_SQ_SIG_ALL;
+ attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ } else {
+ attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ }
+
+ set_caps(&attr, &cap, true);
+ mutex_init(&obj->mcast_lock);
+
+ qp = ib_create_qp_user(device, pd, &attr, &attrs->driver_udata, obj,
+ KBUILD_MODNAME);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_put;
+ }
+ ib_qp_usecnt_inc(qp);
+
+ if (attr.qp_type == IB_QPT_XRC_TGT) {
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ }
+
+ obj->uevent.uobject.object = qp;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_QP_HANDLE);
+
+ set_caps(&attr, &cap, false);
+ ret = uverbs_copy_to_struct_or_zero(attrs,
+ UVERBS_ATTR_CREATE_QP_RESP_CAP, &cap,
+ sizeof(cap));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_QP_RESP_QP_NUM,
+ &qp->qp_num,
+ sizeof(qp->qp_num));
+
+ return ret;
+err_put:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QP_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_HANDLE,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_CAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_qp_cap,
+ max_inline_data),
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_QP_TYPE,
+ enum ib_uverbs_qp_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_QP_FLAGS,
+ enum ib_uverbs_qp_create_flags,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_SOURCE_QPN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_QP_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_QP_RESP_CAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_qp_cap,
+ max_inline_data),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_QP_RESP_QP_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QP_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_QP_HANDLE);
+ struct ib_uqp_object *obj =
+ container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ struct ib_uverbs_destroy_qp_resp resp = {
+ .events_reported = obj->uevent.events_reported
+ };
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_QP_RESP, &resp,
+ sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QP_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_QP_HANDLE,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_QP_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_qp_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_QP,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp),
+ &UVERBS_METHOD(UVERBS_METHOD_QP_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_QP_DESTROY));
+
+const struct uapi_definition uverbs_def_obj_qp[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c
new file mode 100644
index 000000000000..e5513f828bdc
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_srq.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_srq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_srq *srq = uobject->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobject, struct ib_uevent_object, uobject);
+ enum ib_srq_type srq_type = srq->srq_type;
+ int ret;
+
+ ret = ib_destroy_srq_user(srq, &attrs->driver_udata);
+ if (ret)
+ return ret;
+
+ if (srq_type == IB_SRQT_XRC) {
+ struct ib_usrq_object *us =
+ container_of(uobject, struct ib_usrq_object,
+ uevent.uobject);
+
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
+ ib_uverbs_release_uevent(uevent);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_usrq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_SRQ_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_SRQ_PD_HANDLE);
+ struct ib_srq_init_attr attr = {};
+ struct ib_uobject *xrcd_uobj;
+ struct ib_srq *srq;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_copy_from(&attr.attr.max_sge, attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_SGE);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.attr.max_wr, attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_WR);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.attr.srq_limit, attrs,
+ UVERBS_ATTR_CREATE_SRQ_LIMIT);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_SRQ_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&attr.srq_type, attrs,
+ UVERBS_ATTR_CREATE_SRQ_TYPE);
+ if (ret)
+ return ret;
+
+ if (ib_srq_has_cq(attr.srq_type)) {
+ attr.ext.cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE);
+ if (IS_ERR(attr.ext.cq))
+ return PTR_ERR(attr.ext.cq);
+ }
+
+ switch (attr.srq_type) {
+ case IB_UVERBS_SRQT_XRC:
+ xrcd_uobj = uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE);
+ if (IS_ERR(xrcd_uobj))
+ return PTR_ERR(xrcd_uobj);
+
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd)
+ return -EINVAL;
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ break;
+ case IB_UVERBS_SRQT_TM:
+ ret = uverbs_copy_from(&attr.ext.tag_matching.max_num_tags,
+ attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS);
+ if (ret)
+ return ret;
+ break;
+ case IB_UVERBS_SRQT_BASIC:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_SRQ_EVENT_FD);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ attr.event_handler = ib_uverbs_srq_event_handler;
+ obj->uevent.uobject.user_handle = user_handle;
+
+ srq = ib_create_srq_user(pd, &attr, obj, &attrs->driver_udata);
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
+ goto err;
+ }
+
+ obj->uevent.uobject.object = srq;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_SRQ_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR,
+ &attr.attr.max_wr,
+ sizeof(attr.attr.max_wr));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE,
+ &attr.attr.max_sge,
+ sizeof(attr.attr.max_sge));
+ if (ret)
+ return ret;
+
+ if (attr.srq_type == IB_SRQT_XRC) {
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM,
+ &srq->ext.xrc.srq_num,
+ sizeof(srq->ext.xrc.srq_num));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+err:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ if (attr.srq_type == IB_SRQT_XRC)
+ atomic_dec(&obj->uxrcd->refcnt);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_SRQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_SRQ_TYPE,
+ enum ib_uverbs_srq_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_LIMIT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_SRQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_SRQ_HANDLE);
+ struct ib_usrq_object *obj =
+ container_of(uobj, struct ib_usrq_object, uevent.uobject);
+ struct ib_uverbs_destroy_srq_resp resp = {
+ .events_reported = obj->uevent.events_reported
+ };
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_SRQ_RESP, &resp,
+ sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_SRQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_SRQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_srq_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
+ uverbs_free_srq),
+ &UVERBS_METHOD(UVERBS_METHOD_SRQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_SRQ_DESTROY)
+);
+
+const struct uapi_definition uverbs_def_obj_srq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c
new file mode 100644
index 000000000000..7ded8339346f
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_wq.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_wq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_wq *wq = uobject->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobject, struct ib_uwq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_wq_user(wq, &attrs->driver_udata);
+ if (ret)
+ return ret;
+
+ ib_uverbs_release_uevent(&uwq->uevent);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uwq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_WQ_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_WQ_PD_HANDLE);
+ struct ib_cq *cq =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_WQ_CQ_HANDLE);
+ struct ib_wq_init_attr wq_init_attr = {};
+ struct ib_wq *wq;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_get_flags32(&wq_init_attr.create_flags, attrs,
+ UVERBS_ATTR_CREATE_WQ_FLAGS,
+ IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING |
+ IB_UVERBS_WQ_FLAGS_SCATTER_FCS |
+ IB_UVERBS_WQ_FLAGS_DELAY_DROP |
+ IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING);
+ if (!ret)
+ ret = uverbs_copy_from(&wq_init_attr.max_sge, attrs,
+ UVERBS_ATTR_CREATE_WQ_MAX_SGE);
+ if (!ret)
+ ret = uverbs_copy_from(&wq_init_attr.max_wr, attrs,
+ UVERBS_ATTR_CREATE_WQ_MAX_WR);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_WQ_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&wq_init_attr.wq_type, attrs,
+ UVERBS_ATTR_CREATE_WQ_TYPE);
+ if (ret)
+ return ret;
+
+ if (wq_init_attr.wq_type != IB_WQT_RQ)
+ return -EINVAL;
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_WQ_EVENT_FD);
+ obj->uevent.uobject.user_handle = user_handle;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ wq_init_attr.wq_context = attrs->ufile;
+ wq_init_attr.cq = cq;
+
+ wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
+ if (IS_ERR(wq)) {
+ ret = PTR_ERR(wq);
+ goto err;
+ }
+
+ obj->uevent.uobject.object = wq;
+ wq->wq_type = wq_init_attr.wq_type;
+ wq->cq = cq;
+ wq->pd = pd;
+ wq->device = pd->device;
+ wq->wq_context = wq_init_attr.wq_context;
+ atomic_set(&wq->usecnt, 0);
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&cq->usecnt);
+ wq->uobject = obj;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_WQ_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR,
+ &wq_init_attr.max_wr,
+ sizeof(wq_init_attr.max_wr));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE,
+ &wq_init_attr.max_sge,
+ sizeof(wq_init_attr.max_sge));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM,
+ &wq->wq_num,
+ sizeof(wq->wq_num));
+ return ret;
+
+err:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_WQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_HANDLE,
+ UVERBS_OBJECT_WQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_WQ_TYPE,
+ enum ib_wq_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_WQ_FLAGS,
+ enum ib_uverbs_wq_flags,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_WQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_WQ_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_WQ_HANDLE);
+ struct ib_uwq_object *obj =
+ container_of(uobj, struct ib_uwq_object, uevent.uobject);
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_WQ_RESP,
+ &obj->uevent.events_reported,
+ sizeof(obj->uevent.events_reported));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_WQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_WQ_HANDLE,
+ UVERBS_OBJECT_WQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_WQ_RESP,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_WQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq),
+ &UVERBS_METHOD(UVERBS_METHOD_WQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_WQ_DESTROY)
+);
+
+const struct uapi_definition uverbs_def_obj_wq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 3f121ac31e0a..a02916a3a79c 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -79,10 +79,7 @@ static int uapi_create_write(struct uverbs_api *uapi,
method_elm->is_ex = def->write.is_ex;
method_elm->handler = def->func_write;
- if (def->write.is_ex)
- method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask &
- BIT_ULL(def->write.command_num));
- else
+ if (!def->write.is_ex)
method_elm->disabled = !(ibdev->uverbs_cmd_mask &
BIT_ULL(def->write.command_num));
@@ -450,6 +447,9 @@ static int uapi_finalize(struct uverbs_api *uapi)
uapi->num_write_ex = max_write_ex + 1;
data = kmalloc_array(uapi->num_write + uapi->num_write_ex,
sizeof(*uapi->write_methods), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++)
data[i] = &uapi->notsupp_method;
uapi->write_methods = data;
@@ -520,7 +520,7 @@ static void uapi_key_okay(u32 key)
count++;
if (uapi_key_is_attr(key))
count++;
- WARN(count != 1, "Bad count %d key=%x", count, key);
+ WARN(count != 1, "Bad count %u key=%x", count, key);
}
static void uapi_finalize_disable(struct uverbs_api *uapi)
@@ -634,6 +634,9 @@ static const struct uapi_definition uverbs_core_api[] = {
UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
UAPI_DEF_CHAIN(uverbs_def_obj_intf),
UAPI_DEF_CHAIN(uverbs_def_obj_mr),
+ UAPI_DEF_CHAIN(uverbs_def_obj_qp),
+ UAPI_DEF_CHAIN(uverbs_def_obj_srq),
+ UAPI_DEF_CHAIN(uverbs_def_obj_wq),
UAPI_DEF_CHAIN(uverbs_def_write_intf),
{},
};
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e62c9dfc7837..26b021f43ba4 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -50,12 +50,11 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
#include <rdma/rw.h>
+#include <rdma/lag.h>
#include "core_priv.h"
#include <trace/events/rdma_core.h>
-#include <trace/events/rdma_core.h>
-
static int ib_resolve_eth_dmac(struct ib_device *device,
struct rdma_ah_attr *ah_attr);
@@ -97,10 +96,10 @@ static const char * const wc_statuses[] = {
[IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error",
[IB_WC_LOC_PROT_ERR] = "local protection error",
[IB_WC_WR_FLUSH_ERR] = "WR flushed",
- [IB_WC_MW_BIND_ERR] = "memory management operation error",
+ [IB_WC_MW_BIND_ERR] = "memory bind operation error",
[IB_WC_BAD_RESP_ERR] = "bad response error",
[IB_WC_LOC_ACCESS_ERR] = "local access error",
- [IB_WC_REM_INV_REQ_ERR] = "invalid request error",
+ [IB_WC_REM_INV_REQ_ERR] = "remote invalid request error",
[IB_WC_REM_ACCESS_ERR] = "remote access error",
[IB_WC_REM_OP_ERR] = "remote operation error",
[IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded",
@@ -228,7 +227,8 @@ rdma_node_get_transport(unsigned int node_type)
}
EXPORT_SYMBOL(rdma_node_get_transport);
-enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
+enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
+ u32 port_num)
{
enum rdma_transport_type lt;
if (device->ops.get_link_layer)
@@ -245,7 +245,7 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
/* Protection domains */
/**
- * ib_alloc_pd - Allocates an unused protection domain.
+ * __ib_alloc_pd - Allocates an unused protection domain.
* @device: The device on which to allocate the protection domain.
* @flags: protection domain flags
* @caller: caller's build-time module name
@@ -268,22 +268,20 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
return ERR_PTR(-ENOMEM);
pd->device = device;
- pd->uobject = NULL;
- pd->__internal_mr = NULL;
- atomic_set(&pd->usecnt, 0);
pd->flags = flags;
- pd->res.type = RDMA_RESTRACK_PD;
- rdma_restrack_set_task(&pd->res, caller);
+ rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+ rdma_restrack_set_name(&pd->res, caller);
ret = device->ops.alloc_pd(pd, NULL);
if (ret) {
+ rdma_restrack_put(&pd->res);
kfree(pd);
return ERR_PTR(ret);
}
- rdma_restrack_kadd(&pd->res);
+ rdma_restrack_add(&pd->res);
- if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+ if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else
mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
@@ -310,7 +308,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
pd->__internal_mr = mr;
- if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY))
+ if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY))
pd->local_dma_lkey = pd->__internal_mr->lkey;
if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@@ -330,7 +328,7 @@ EXPORT_SYMBOL(__ib_alloc_pd);
* exist. The caller is responsible to synchronously destroy them and
* guarantee no new allocations will happen.
*/
-void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
{
int ret;
@@ -340,13 +338,13 @@ void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
pd->__internal_mr = NULL;
}
- /* uverbs manipulates usecnt with proper locking, while the kabi
- requires the caller to guarantee we can't race here. */
- WARN_ON(atomic_read(&pd->usecnt));
+ ret = pd->device->ops.dealloc_pd(pd, udata);
+ if (ret)
+ return ret;
rdma_restrack_del(&pd->res);
- pd->device->ops.dealloc_pd(pd, udata);
kfree(pd);
+ return ret;
}
EXPORT_SYMBOL(ib_dealloc_pd_user);
@@ -502,15 +500,17 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
u32 flags,
- struct ib_udata *udata)
+ struct ib_udata *udata,
+ struct net_device *xmit_slave)
{
+ struct rdma_ah_init_attr init_attr = {};
struct ib_device *device = pd->device;
struct ib_ah *ah;
int ret;
might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
- if (!device->ops.create_ah)
+ if (!udata && !device->ops.create_ah)
return ERR_PTR(-EOPNOTSUPP);
ah = rdma_zalloc_drv_obj_gfp(
@@ -523,8 +523,14 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
ah->pd = pd;
ah->type = ah_attr->type;
ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
+ init_attr.ah_attr = ah_attr;
+ init_attr.flags = flags;
+ init_attr.xmit_slave = xmit_slave;
- ret = device->ops.create_ah(ah, ah_attr, flags, udata);
+ if (udata)
+ ret = device->ops.create_user_ah(ah, &init_attr, udata);
+ else
+ ret = device->ops.create_ah(ah, &init_attr, NULL);
if (ret) {
kfree(ah);
return ERR_PTR(ret);
@@ -549,15 +555,22 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
u32 flags)
{
const struct ib_gid_attr *old_sgid_attr;
+ struct net_device *slave;
struct ib_ah *ah;
int ret;
ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
if (ret)
return ERR_PTR(ret);
-
- ah = _rdma_create_ah(pd, ah_attr, flags, NULL);
-
+ slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr,
+ (flags & RDMA_CREATE_AH_SLEEPABLE) ?
+ GFP_KERNEL : GFP_ATOMIC);
+ if (IS_ERR(slave)) {
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return (void *)slave;
+ }
+ ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave);
+ rdma_lag_put_ah_roce_slave(slave);
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
return ah;
}
@@ -596,7 +609,8 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
}
}
- ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata);
+ ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE,
+ udata, NULL);
out:
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
@@ -638,7 +652,7 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
EXPORT_SYMBOL(ib_get_rdma_header_version);
static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
const struct ib_grh *grh)
{
int grh_version;
@@ -681,7 +695,7 @@ static bool find_gid_index(const union ib_gid *gid,
}
static const struct ib_gid_attr *
-get_sgid_attr_from_eth(struct ib_device *device, u8 port_num,
+get_sgid_attr_from_eth(struct ib_device *device, u32 port_num,
u16 vlan_id, const union ib_gid *sgid,
enum ib_gid_type gid_type)
{
@@ -716,7 +730,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
(struct in6_addr *)dgid);
return 0;
} else if (net_type == RDMA_NETWORK_IPV6 ||
- net_type == RDMA_NETWORK_IB) {
+ net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) {
*dgid = hdr->ibgrh.dgid;
*sgid = hdr->ibgrh.sgid;
return 0;
@@ -768,7 +782,7 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
* On success the caller is responsible to call rdma_destroy_ah_attr on the
* attr.
*/
-int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
+int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct rdma_ah_attr *ah_attr)
{
@@ -899,7 +913,7 @@ void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
EXPORT_SYMBOL(rdma_destroy_ah_attr);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
- const struct ib_grh *grh, u8 port_num)
+ const struct ib_grh *grh, u32 port_num)
{
struct rdma_ah_attr ah_attr;
struct ib_ah *ah;
@@ -952,32 +966,50 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata)
{
const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
struct ib_pd *pd;
+ int ret;
might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
pd = ah->pd;
- ah->device->ops.destroy_ah(ah, flags);
+ ret = ah->device->ops.destroy_ah(ah, flags);
+ if (ret)
+ return ret;
+
atomic_dec(&pd->usecnt);
if (sgid_attr)
rdma_put_gid_attr(sgid_attr);
kfree(ah);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(rdma_destroy_ah_user);
/* Shared receive queues */
-struct ib_srq *ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *srq_init_attr)
+/**
+ * ib_create_srq_user - Creates a SRQ associated with the specified protection
+ * domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @srq_init_attr: A list of initial attributes required to create the
+ * SRQ. If SRQ creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created SRQ.
+ * @uobject: uobject pointer if this is not a kernel SRQ
+ * @udata: udata pointer if this is not a kernel SRQ
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the SRQ, and set to the actual values allocated
+ * on return. If ib_create_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_usrq_object *uobject,
+ struct ib_udata *udata)
{
struct ib_srq *srq;
int ret;
- if (!pd->device->ops.create_srq)
- return ERR_PTR(-EOPNOTSUPP);
-
srq = rdma_zalloc_drv_obj(pd->device, ib_srq);
if (!srq)
return ERR_PTR(-ENOMEM);
@@ -987,6 +1019,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
srq->srq_type = srq_init_attr->srq_type;
+ srq->uobject = uobject;
if (ib_srq_has_cq(srq->srq_type)) {
srq->ext.cq = srq_init_attr->ext.cq;
@@ -994,14 +1027,19 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
}
if (srq->srq_type == IB_SRQT_XRC) {
srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
- atomic_inc(&srq->ext.xrc.xrcd->usecnt);
+ if (srq->ext.xrc.xrcd)
+ atomic_inc(&srq->ext.xrc.xrcd->usecnt);
}
atomic_inc(&pd->usecnt);
- ret = pd->device->ops.create_srq(srq, srq_init_attr, NULL);
+ rdma_restrack_new(&srq->res, RDMA_RESTRACK_SRQ);
+ rdma_restrack_parent_name(&srq->res, &pd->res);
+
+ ret = pd->device->ops.create_srq(srq, srq_init_attr, udata);
if (ret) {
- atomic_dec(&srq->pd->usecnt);
- if (srq->srq_type == IB_SRQT_XRC)
+ rdma_restrack_put(&srq->res);
+ atomic_dec(&pd->usecnt);
+ if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
atomic_dec(&srq->ext.xrc.xrcd->usecnt);
if (ib_srq_has_cq(srq->srq_type))
atomic_dec(&srq->ext.cq->usecnt);
@@ -1009,9 +1047,11 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
return ERR_PTR(ret);
}
+ rdma_restrack_add(&srq->res);
+
return srq;
}
-EXPORT_SYMBOL(ib_create_srq);
+EXPORT_SYMBOL(ib_create_srq_user);
int ib_modify_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
@@ -1033,19 +1073,24 @@ EXPORT_SYMBOL(ib_query_srq);
int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
{
+ int ret;
+
if (atomic_read(&srq->usecnt))
return -EBUSY;
- srq->device->ops.destroy_srq(srq, udata);
+ ret = srq->device->ops.destroy_srq(srq, udata);
+ if (ret)
+ return ret;
atomic_dec(&srq->pd->usecnt);
- if (srq->srq_type == IB_SRQT_XRC)
+ if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
atomic_dec(&srq->ext.xrc.xrcd->usecnt);
if (ib_srq_has_cq(srq->srq_type))
atomic_dec(&srq->ext.cq->usecnt);
+ rdma_restrack_del(&srq->res);
kfree(srq);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ib_destroy_srq_user);
@@ -1063,13 +1108,6 @@ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags);
}
-static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
-{
- mutex_lock(&xrcd->tgt_qp_mutex);
- list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
- mutex_unlock(&xrcd->tgt_qp_mutex);
-}
-
static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
void (*event_handler)(struct ib_event *, void *),
void *qp_context)
@@ -1112,25 +1150,24 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
return ERR_PTR(-EINVAL);
- qp = ERR_PTR(-EINVAL);
- mutex_lock(&xrcd->tgt_qp_mutex);
- list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
- if (real_qp->qp_num == qp_open_attr->qp_num) {
- qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
- qp_open_attr->qp_context);
- break;
- }
+ down_read(&xrcd->tgt_qps_rwsem);
+ real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num);
+ if (!real_qp) {
+ up_read(&xrcd->tgt_qps_rwsem);
+ return ERR_PTR(-EINVAL);
}
- mutex_unlock(&xrcd->tgt_qp_mutex);
+ qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
+ qp_open_attr->qp_context);
+ up_read(&xrcd->tgt_qps_rwsem);
return qp;
}
EXPORT_SYMBOL(ib_open_qp);
static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata)
+ struct ib_qp_init_attr *qp_init_attr)
{
struct ib_qp *real_qp = qp;
+ int err;
qp->event_handler = __ib_shared_qp_event_handler;
qp->qp_context = qp;
@@ -1146,27 +1183,154 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
if (IS_ERR(qp))
return qp;
- __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
+ err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num,
+ real_qp, GFP_KERNEL));
+ if (err) {
+ ib_close_qp(qp);
+ return ERR_PTR(err);
+ }
return qp;
}
-struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata)
+static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller)
{
- struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
+ struct ib_udata dummy = {};
struct ib_qp *qp;
int ret;
- if (qp_init_attr->rwq_ind_tbl &&
- (qp_init_attr->recv_cq ||
- qp_init_attr->srq || qp_init_attr->cap.max_recv_wr ||
- qp_init_attr->cap.max_recv_sge))
- return ERR_PTR(-EINVAL);
+ if (!dev->ops.create_qp)
+ return ERR_PTR(-EOPNOTSUPP);
- if ((qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) &&
- !(device->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER))
- return ERR_PTR(-EINVAL);
+ qp = rdma_zalloc_drv_obj_numa(dev, ib_qp);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->device = dev;
+ qp->pd = pd;
+ qp->uobject = uobj;
+ qp->real_qp = qp;
+
+ qp->qp_type = attr->qp_type;
+ qp->rwq_ind_tbl = attr->rwq_ind_tbl;
+ qp->srq = attr->srq;
+ qp->event_handler = attr->event_handler;
+ qp->port = attr->port_num;
+ qp->qp_context = attr->qp_context;
+
+ spin_lock_init(&qp->mr_lock);
+ INIT_LIST_HEAD(&qp->rdma_mrs);
+ INIT_LIST_HEAD(&qp->sig_mrs);
+
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
+
+ rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
+ WARN_ONCE(!udata && !caller, "Missing kernel QP owner");
+ rdma_restrack_set_name(&qp->res, udata ? NULL : caller);
+ ret = dev->ops.create_qp(qp, attr, udata);
+ if (ret)
+ goto err_create;
+
+ /*
+ * TODO: The mlx4 internally overwrites send_cq and recv_cq.
+ * Unfortunately, it is not an easy task to fix that driver.
+ */
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
+
+ ret = ib_create_qp_security(qp, dev);
+ if (ret)
+ goto err_security;
+
+ rdma_restrack_add(&qp->res);
+ return qp;
+
+err_security:
+ qp->device->ops.destroy_qp(qp, udata ? &dummy : NULL);
+err_create:
+ rdma_restrack_put(&qp->res);
+ kfree(qp);
+ return ERR_PTR(ret);
+
+}
+
+/**
+ * ib_create_qp_user - Creates a QP associated with the specified protection
+ * domain.
+ * @dev: IB device
+ * @pd: The protection domain associated with the QP.
+ * @attr: A list of initial attributes required to create the
+ * QP. If QP creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created QP.
+ * @udata: User data
+ * @uobj: uverbs obect
+ * @caller: caller's build-time module name
+ */
+struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller)
+{
+ struct ib_qp *qp, *xrc_qp;
+
+ if (attr->qp_type == IB_QPT_XRC_TGT)
+ qp = create_qp(dev, pd, attr, NULL, NULL, caller);
+ else
+ qp = create_qp(dev, pd, attr, udata, uobj, NULL);
+ if (attr->qp_type != IB_QPT_XRC_TGT || IS_ERR(qp))
+ return qp;
+
+ xrc_qp = create_xrc_qp_user(qp, attr);
+ if (IS_ERR(xrc_qp)) {
+ ib_destroy_qp(qp);
+ return xrc_qp;
+ }
+
+ xrc_qp->uobject = uobj;
+ return xrc_qp;
+}
+EXPORT_SYMBOL(ib_create_qp_user);
+
+void ib_qp_usecnt_inc(struct ib_qp *qp)
+{
+ if (qp->pd)
+ atomic_inc(&qp->pd->usecnt);
+ if (qp->send_cq)
+ atomic_inc(&qp->send_cq->usecnt);
+ if (qp->recv_cq)
+ atomic_inc(&qp->recv_cq->usecnt);
+ if (qp->srq)
+ atomic_inc(&qp->srq->usecnt);
+ if (qp->rwq_ind_tbl)
+ atomic_inc(&qp->rwq_ind_tbl->usecnt);
+}
+EXPORT_SYMBOL(ib_qp_usecnt_inc);
+
+void ib_qp_usecnt_dec(struct ib_qp *qp)
+{
+ if (qp->rwq_ind_tbl)
+ atomic_dec(&qp->rwq_ind_tbl->usecnt);
+ if (qp->srq)
+ atomic_dec(&qp->srq->usecnt);
+ if (qp->recv_cq)
+ atomic_dec(&qp->recv_cq->usecnt);
+ if (qp->send_cq)
+ atomic_dec(&qp->send_cq->usecnt);
+ if (qp->pd)
+ atomic_dec(&qp->pd->usecnt);
+}
+EXPORT_SYMBOL(ib_qp_usecnt_dec);
+
+struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ const char *caller)
+{
+ struct ib_device *device = pd->device;
+ struct ib_qp *qp;
+ int ret;
/*
* If the callers is using the RDMA API calculate the resources
@@ -1177,47 +1341,11 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
if (qp_init_attr->cap.max_rdma_ctxs)
rdma_rw_init_qp(device, qp_init_attr);
- qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
+ qp = create_qp(device, pd, qp_init_attr, NULL, NULL, caller);
if (IS_ERR(qp))
return qp;
- ret = ib_create_qp_security(qp, device);
- if (ret)
- goto err;
-
- if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
- struct ib_qp *xrc_qp =
- create_xrc_qp_user(qp, qp_init_attr, udata);
-
- if (IS_ERR(xrc_qp)) {
- ret = PTR_ERR(xrc_qp);
- goto err;
- }
- return xrc_qp;
- }
-
- qp->event_handler = qp_init_attr->event_handler;
- qp->qp_context = qp_init_attr->qp_context;
- if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
- qp->recv_cq = NULL;
- qp->srq = NULL;
- } else {
- qp->recv_cq = qp_init_attr->recv_cq;
- if (qp_init_attr->recv_cq)
- atomic_inc(&qp_init_attr->recv_cq->usecnt);
- qp->srq = qp_init_attr->srq;
- if (qp->srq)
- atomic_inc(&qp_init_attr->srq->usecnt);
- }
-
- qp->send_cq = qp_init_attr->send_cq;
- qp->xrcd = NULL;
-
- atomic_inc(&pd->usecnt);
- if (qp_init_attr->send_cq)
- atomic_inc(&qp_init_attr->send_cq->usecnt);
- if (qp_init_attr->rwq_ind_tbl)
- atomic_inc(&qp->rwq_ind_tbl->usecnt);
+ ib_qp_usecnt_inc(qp);
if (qp_init_attr->cap.max_rdma_ctxs) {
ret = rdma_rw_init_mrs(qp, qp_init_attr);
@@ -1243,7 +1371,7 @@ err:
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(ib_create_qp_user);
+EXPORT_SYMBOL(ib_create_qp_kernel);
static const struct {
int valid;
@@ -1616,22 +1744,48 @@ static bool is_qp_type_connected(const struct ib_qp *qp)
qp->qp_type == IB_QPT_XRC_TGT);
}
-/**
+/*
* IB core internal function to perform QP attributes modification.
*/
static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
- u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ u32 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
const struct ib_gid_attr *old_sgid_attr_av;
const struct ib_gid_attr *old_sgid_attr_alt_av;
int ret;
+ attr->xmit_slave = NULL;
if (attr_mask & IB_QP_AV) {
ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
&old_sgid_attr_av);
if (ret)
return ret;
+
+ if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
+ struct net_device *slave;
+
+ /*
+ * If the user provided the qp_attr then we have to
+ * resolve it. Kerne users have to provide already
+ * resolved rdma_ah_attr's.
+ */
+ if (udata) {
+ ret = ib_resolve_eth_dmac(qp->device,
+ &attr->ah_attr);
+ if (ret)
+ goto out_av;
+ }
+ slave = rdma_lag_get_ah_roce_slave(qp->device,
+ &attr->ah_attr,
+ GFP_KERNEL);
+ if (IS_ERR(slave)) {
+ ret = PTR_ERR(slave);
+ goto out_av;
+ }
+ attr->xmit_slave = slave;
+ }
}
if (attr_mask & IB_QP_ALT_PATH) {
/*
@@ -1653,23 +1807,11 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
if (!(rdma_protocol_ib(qp->device,
attr->alt_ah_attr.port_num) &&
rdma_protocol_ib(qp->device, port))) {
- ret = EINVAL;
+ ret = -EINVAL;
goto out;
}
}
- /*
- * If the user provided the qp_attr then we have to resolve it. Kernel
- * users have to provide already resolved rdma_ah_attr's
- */
- if (udata && (attr_mask & IB_QP_AV) &&
- attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
- is_qp_type_connected(qp)) {
- ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
- if (ret)
- goto out;
- }
-
if (rdma_ib_or_roce(qp->device, port)) {
if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
dev_warn(&qp->device->dev,
@@ -1711,8 +1853,10 @@ out:
if (attr_mask & IB_QP_ALT_PATH)
rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
out_av:
- if (attr_mask & IB_QP_AV)
+ if (attr_mask & IB_QP_AV) {
+ rdma_lag_put_ah_roce_slave(attr->xmit_slave);
rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
+ }
return ret;
}
@@ -1734,7 +1878,7 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
+int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
{
int rc;
u32 netdev_speed;
@@ -1754,11 +1898,11 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
dev_put(netdev);
- if (!rc) {
+ if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
netdev_speed = lksettings.base.speed;
} else {
netdev_speed = SPEED_1000;
- pr_warn("%s speed is unknown, defaulting to %d\n", netdev->name,
+ pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name,
netdev_speed);
}
@@ -1838,21 +1982,18 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp)
real_qp = qp->real_qp;
xrcd = real_qp->xrcd;
-
- mutex_lock(&xrcd->tgt_qp_mutex);
+ down_write(&xrcd->tgt_qps_rwsem);
ib_close_qp(qp);
if (atomic_read(&real_qp->usecnt) == 0)
- list_del(&real_qp->xrcd_list);
+ xa_erase(&xrcd->tgt_qps, real_qp->qp_num);
else
real_qp = NULL;
- mutex_unlock(&xrcd->tgt_qp_mutex);
+ up_write(&xrcd->tgt_qps_rwsem);
if (real_qp) {
ret = ib_destroy_qp(real_qp);
if (!ret)
atomic_dec(&xrcd->usecnt);
- else
- __ib_insert_xrcd_qp(xrcd, real_qp);
}
return 0;
@@ -1862,10 +2003,6 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
{
const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
- struct ib_pd *pd;
- struct ib_cq *scq, *rcq;
- struct ib_srq *srq;
- struct ib_rwq_ind_table *ind_tbl;
struct ib_qp_security *sec;
int ret;
@@ -1877,11 +2014,6 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
if (qp->real_qp != qp)
return __ib_destroy_shared_qp(qp);
- pd = qp->pd;
- scq = qp->send_cq;
- rcq = qp->recv_cq;
- srq = qp->srq;
- ind_tbl = qp->rwq_ind_tbl;
sec = qp->qp_sec;
if (sec)
ib_destroy_qp_security_begin(sec);
@@ -1890,30 +2022,24 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
rdma_rw_cleanup_mrs(qp);
rdma_counter_unbind_qp(qp, true);
- rdma_restrack_del(&qp->res);
ret = qp->device->ops.destroy_qp(qp, udata);
- if (!ret) {
- if (alt_path_sgid_attr)
- rdma_put_gid_attr(alt_path_sgid_attr);
- if (av_sgid_attr)
- rdma_put_gid_attr(av_sgid_attr);
- if (pd)
- atomic_dec(&pd->usecnt);
- if (scq)
- atomic_dec(&scq->usecnt);
- if (rcq)
- atomic_dec(&rcq->usecnt);
- if (srq)
- atomic_dec(&srq->usecnt);
- if (ind_tbl)
- atomic_dec(&ind_tbl->usecnt);
- if (sec)
- ib_destroy_qp_security_end(sec);
- } else {
+ if (ret) {
if (sec)
ib_destroy_qp_security_abort(sec);
+ return ret;
}
+ if (alt_path_sgid_attr)
+ rdma_put_gid_attr(alt_path_sgid_attr);
+ if (av_sgid_attr)
+ rdma_put_gid_attr(av_sgid_attr);
+
+ ib_qp_usecnt_dec(qp);
+ if (sec)
+ ib_destroy_qp_security_end(sec);
+
+ rdma_restrack_del(&qp->res);
+ kfree(qp);
return ret;
}
EXPORT_SYMBOL(ib_destroy_qp_user);
@@ -1940,22 +2066,27 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
cq->event_handler = event_handler;
cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_set_task(&cq->res, caller);
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
ret = device->ops.create_cq(cq, cq_attr, NULL);
if (ret) {
+ rdma_restrack_put(&cq->res);
kfree(cq);
return ERR_PTR(ret);
}
- rdma_restrack_kadd(&cq->res);
+ rdma_restrack_add(&cq->res);
return cq;
}
EXPORT_SYMBOL(__ib_create_cq);
int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
+ if (cq->shared)
+ return -EOPNOTSUPP;
+
return cq->device->ops.modify_cq ?
cq->device->ops.modify_cq(cq, cq_count,
cq_period) : -EOPNOTSUPP;
@@ -1964,18 +2095,29 @@ EXPORT_SYMBOL(rdma_set_cq_moderation);
int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
+ int ret;
+
+ if (WARN_ON_ONCE(cq->shared))
+ return -EOPNOTSUPP;
+
if (atomic_read(&cq->usecnt))
return -EBUSY;
+ ret = cq->device->ops.destroy_cq(cq, udata);
+ if (ret)
+ return ret;
+
rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
kfree(cq);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ib_destroy_cq_user);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
+ if (cq->shared)
+ return -EOPNOTSUPP;
+
return cq->device->ops.resize_cq ?
cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
}
@@ -1989,8 +2131,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mr *mr;
if (access_flags & IB_ACCESS_ON_DEMAND) {
- if (!(pd->device->attrs.device_cap_flags &
- IB_DEVICE_ON_DEMAND_PAGING)) {
+ if (!(pd->device->attrs.kernel_cap_flags &
+ IBK_ON_DEMAND_PAGING)) {
pr_debug("ODP support not available\n");
return ERR_PTR(-EINVAL);
}
@@ -2003,11 +2145,16 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return mr;
mr->device = pd->device;
+ mr->type = IB_MR_TYPE_USER;
mr->pd = pd;
mr->dm = NULL;
atomic_inc(&pd->usecnt);
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
+ mr->iova = virt_addr;
+ mr->length = length;
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
return mr;
}
@@ -2019,6 +2166,9 @@ int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
if (!pd->device->ops.advise_mr)
return -EOPNOTSUPP;
+ if (!num_sge)
+ return 0;
+
return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
NULL);
}
@@ -2046,11 +2196,10 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
EXPORT_SYMBOL(ib_dereg_mr_user);
/**
- * ib_alloc_mr_user() - Allocates a memory region
+ * ib_alloc_mr() - Allocates a memory region
* @pd: protection domain associated with the region
* @mr_type: memory region type
* @max_num_sg: maximum sg entries available for registration.
- * @udata: user data or null for kernel objects
*
* Notes:
* Memory registeration page/sg lists must not exceed max_num_sg.
@@ -2058,8 +2207,8 @@ EXPORT_SYMBOL(ib_dereg_mr_user);
* max_num_sg * used_page_size.
*
*/
-struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
{
struct ib_mr *mr;
@@ -2074,25 +2223,27 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
goto out;
}
- mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata);
- if (!IS_ERR(mr)) {
- mr->device = pd->device;
- mr->pd = pd;
- mr->dm = NULL;
- mr->uobject = NULL;
- atomic_inc(&pd->usecnt);
- mr->need_inval = false;
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
- mr->type = mr_type;
- mr->sig_attrs = NULL;
- }
+ mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg);
+ if (IS_ERR(mr))
+ goto out;
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = NULL;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
+ mr->type = mr_type;
+ mr->sig_attrs = NULL;
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
out:
trace_mr_alloc(pd, mr_type, max_num_sg, mr);
return mr;
}
-EXPORT_SYMBOL(ib_alloc_mr_user);
+EXPORT_SYMBOL(ib_alloc_mr);
/**
* ib_alloc_mr_integrity() - Allocates an integrity memory region
@@ -2143,65 +2294,18 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
mr->type = IB_MR_TYPE_INTEGRITY;
mr->sig_attrs = sig_attrs;
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
out:
trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr);
return mr;
}
EXPORT_SYMBOL(ib_alloc_mr_integrity);
-/* "Fast" memory regions */
-
-struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
- int mr_access_flags,
- struct ib_fmr_attr *fmr_attr)
-{
- struct ib_fmr *fmr;
-
- if (!pd->device->ops.alloc_fmr)
- return ERR_PTR(-EOPNOTSUPP);
-
- fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr);
- if (!IS_ERR(fmr)) {
- fmr->device = pd->device;
- fmr->pd = pd;
- atomic_inc(&pd->usecnt);
- }
-
- return fmr;
-}
-EXPORT_SYMBOL(ib_alloc_fmr);
-
-int ib_unmap_fmr(struct list_head *fmr_list)
-{
- struct ib_fmr *fmr;
-
- if (list_empty(fmr_list))
- return 0;
-
- fmr = list_entry(fmr_list->next, struct ib_fmr, list);
- return fmr->device->ops.unmap_fmr(fmr_list);
-}
-EXPORT_SYMBOL(ib_unmap_fmr);
-
-int ib_dealloc_fmr(struct ib_fmr *fmr)
-{
- struct ib_pd *pd;
- int ret;
-
- pd = fmr->pd;
- ret = fmr->device->ops.dealloc_fmr(fmr);
- if (!ret)
- atomic_dec(&pd->usecnt);
-
- return ret;
-}
-EXPORT_SYMBOL(ib_dealloc_fmr);
-
/* Multicast groups */
static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
@@ -2209,7 +2313,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
struct ib_qp_init_attr init_attr = {};
struct ib_qp_attr attr = {};
int num_eth_ports = 0;
- int port;
+ unsigned int port;
/* If QP state >= init, it is assigned to a port and we can check this
* port only.
@@ -2224,7 +2328,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
}
/* Can't get a quick answer, iterate over all ports */
- for (port = 0; port < qp->device->phys_port_cnt; port++)
+ rdma_for_each_port(qp->device, port)
if (rdma_port_get_link_layer(qp->device, port) !=
IB_LINK_LAYER_INFINIBAND)
num_eth_ports++;
@@ -2278,45 +2382,61 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
}
EXPORT_SYMBOL(ib_detach_mcast);
-struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
+/**
+ * ib_alloc_xrcd_user - Allocates an XRC domain.
+ * @device: The device on which to allocate the XRC domain.
+ * @inode: inode to connect XRCD
+ * @udata: Valid user data or NULL for kernel object
+ */
+struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
+ struct inode *inode, struct ib_udata *udata)
{
struct ib_xrcd *xrcd;
+ int ret;
if (!device->ops.alloc_xrcd)
return ERR_PTR(-EOPNOTSUPP);
- xrcd = device->ops.alloc_xrcd(device, NULL);
- if (!IS_ERR(xrcd)) {
- xrcd->device = device;
- xrcd->inode = NULL;
- atomic_set(&xrcd->usecnt, 0);
- mutex_init(&xrcd->tgt_qp_mutex);
- INIT_LIST_HEAD(&xrcd->tgt_qp_list);
- }
+ xrcd = rdma_zalloc_drv_obj(device, ib_xrcd);
+ if (!xrcd)
+ return ERR_PTR(-ENOMEM);
+
+ xrcd->device = device;
+ xrcd->inode = inode;
+ atomic_set(&xrcd->usecnt, 0);
+ init_rwsem(&xrcd->tgt_qps_rwsem);
+ xa_init(&xrcd->tgt_qps);
+ ret = device->ops.alloc_xrcd(xrcd, udata);
+ if (ret)
+ goto err;
return xrcd;
+err:
+ kfree(xrcd);
+ return ERR_PTR(ret);
}
-EXPORT_SYMBOL(__ib_alloc_xrcd);
+EXPORT_SYMBOL(ib_alloc_xrcd_user);
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+/**
+ * ib_dealloc_xrcd_user - Deallocates an XRC domain.
+ * @xrcd: The XRC domain to deallocate.
+ * @udata: Valid user data or NULL for kernel object
+ */
+int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
- struct ib_qp *qp;
int ret;
if (atomic_read(&xrcd->usecnt))
return -EBUSY;
- while (!list_empty(&xrcd->tgt_qp_list)) {
- qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
- ret = ib_destroy_qp(qp);
- if (ret)
- return ret;
- }
- mutex_destroy(&xrcd->tgt_qp_mutex);
-
- return xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ WARN_ON(!xa_empty(&xrcd->tgt_qps));
+ ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ if (ret)
+ return ret;
+ kfree(xrcd);
+ return ret;
}
-EXPORT_SYMBOL(ib_dealloc_xrcd);
+EXPORT_SYMBOL(ib_dealloc_xrcd_user);
/**
* ib_create_wq - Creates a WQ associated with the specified protection
@@ -2358,108 +2478,28 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
EXPORT_SYMBOL(ib_create_wq);
/**
- * ib_destroy_wq - Destroys the specified user WQ.
+ * ib_destroy_wq_user - Destroys the specified user WQ.
* @wq: The WQ to destroy.
* @udata: Valid user data
*/
-int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata)
{
struct ib_cq *cq = wq->cq;
struct ib_pd *pd = wq->pd;
+ int ret;
if (atomic_read(&wq->usecnt))
return -EBUSY;
- wq->device->ops.destroy_wq(wq, udata);
+ ret = wq->device->ops.destroy_wq(wq, udata);
+ if (ret)
+ return ret;
+
atomic_dec(&pd->usecnt);
atomic_dec(&cq->usecnt);
-
- return 0;
-}
-EXPORT_SYMBOL(ib_destroy_wq);
-
-/**
- * ib_modify_wq - Modifies the specified WQ.
- * @wq: The WQ to modify.
- * @wq_attr: On input, specifies the WQ attributes to modify.
- * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ
- * are being modified.
- * On output, the current values of selected WQ attributes are returned.
- */
-int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
- u32 wq_attr_mask)
-{
- int err;
-
- if (!wq->device->ops.modify_wq)
- return -EOPNOTSUPP;
-
- err = wq->device->ops.modify_wq(wq, wq_attr, wq_attr_mask, NULL);
- return err;
-}
-EXPORT_SYMBOL(ib_modify_wq);
-
-/*
- * ib_create_rwq_ind_table - Creates a RQ Indirection Table.
- * @device: The device on which to create the rwq indirection table.
- * @ib_rwq_ind_table_init_attr: A list of initial attributes required to
- * create the Indirection Table.
- *
- * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less
- * than the created ib_rwq_ind_table object and the caller is responsible
- * for its memory allocation/free.
- */
-struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr)
-{
- struct ib_rwq_ind_table *rwq_ind_table;
- int i;
- u32 table_size;
-
- if (!device->ops.create_rwq_ind_table)
- return ERR_PTR(-EOPNOTSUPP);
-
- table_size = (1 << init_attr->log_ind_tbl_size);
- rwq_ind_table = device->ops.create_rwq_ind_table(device,
- init_attr, NULL);
- if (IS_ERR(rwq_ind_table))
- return rwq_ind_table;
-
- rwq_ind_table->ind_tbl = init_attr->ind_tbl;
- rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size;
- rwq_ind_table->device = device;
- rwq_ind_table->uobject = NULL;
- atomic_set(&rwq_ind_table->usecnt, 0);
-
- for (i = 0; i < table_size; i++)
- atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt);
-
- return rwq_ind_table;
-}
-EXPORT_SYMBOL(ib_create_rwq_ind_table);
-
-/*
- * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
- * @wq_ind_table: The Indirection Table to destroy.
-*/
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
-{
- int err, i;
- u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
- struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
-
- if (atomic_read(&rwq_ind_table->usecnt))
- return -EBUSY;
-
- err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table);
- if (!err) {
- for (i = 0; i < table_size; i++)
- atomic_dec(&ind_tbl[i]->usecnt);
- }
-
- return err;
+ return ret;
}
-EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
+EXPORT_SYMBOL(ib_destroy_wq_user);
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
@@ -2471,7 +2511,7 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
}
EXPORT_SYMBOL(ib_check_mr_status);
-int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port,
int state)
{
if (!device->ops.set_vf_link_state)
@@ -2481,7 +2521,7 @@ int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
}
EXPORT_SYMBOL(ib_set_vf_link_state);
-int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_config(struct ib_device *device, int vf, u32 port,
struct ifla_vf_info *info)
{
if (!device->ops.get_vf_config)
@@ -2491,7 +2531,7 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
}
EXPORT_SYMBOL(ib_get_vf_config);
-int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_stats(struct ib_device *device, int vf, u32 port,
struct ifla_vf_stats *stats)
{
if (!device->ops.get_vf_stats)
@@ -2501,7 +2541,7 @@ int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
}
EXPORT_SYMBOL(ib_get_vf_stats);
-int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid,
int type)
{
if (!device->ops.set_vf_guid)
@@ -2511,7 +2551,7 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
}
EXPORT_SYMBOL(ib_set_vf_guid);
-int ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
struct ifla_vf_guid *node_guid,
struct ifla_vf_guid *port_guid)
{
@@ -2568,6 +2608,7 @@ EXPORT_SYMBOL(ib_map_mr_sg_pi);
* @page_size: page vector desired page size
*
* Constraints:
+ *
* - The first sg element is allowed to have an offset.
* - Each sg element must either be aligned to page_size or virtually
* contiguous to the previous element. In case an sg element has a
@@ -2601,10 +2642,12 @@ EXPORT_SYMBOL(ib_map_mr_sg);
* @mr: memory region
* @sgl: dma mapped scatterlist
* @sg_nents: number of entries in sg
- * @sg_offset_p: IN: start offset in bytes into sg
- * OUT: offset in bytes for element n of the sg of the first
+ * @sg_offset_p: ==== =======================================================
+ * IN start offset in bytes into sg
+ * OUT offset in bytes for element n of the sg of the first
* byte that has not been processed where n is the return
* value of this function.
+ * ==== =======================================================
* @set_page: driver page assignment function pointer
*
* Core service helper for drivers to convert the largest
@@ -2850,7 +2893,7 @@ void ib_drain_qp(struct ib_qp *qp)
}
EXPORT_SYMBOL(ib_drain_qp);
-struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *))
@@ -2876,7 +2919,7 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
}
EXPORT_SYMBOL(rdma_alloc_netdev);
-int rdma_init_netdev(struct ib_device *device, u8 port_num,
+int rdma_init_netdev(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
@@ -2931,3 +2974,52 @@ bool __rdma_block_iter_next(struct ib_block_iter *biter)
return true;
}
EXPORT_SYMBOL(__rdma_block_iter_next);
+
+/**
+ * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
+ * for the drivers.
+ * @descs: array of static descriptors
+ * @num_counters: number of elements in array
+ * @lifespan: milliseconds between updates
+ */
+struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
+ const struct rdma_stat_desc *descs, int num_counters,
+ unsigned long lifespan)
+{
+ struct rdma_hw_stats *stats;
+
+ stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL);
+ if (!stats)
+ return NULL;
+
+ stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters),
+ sizeof(*stats->is_disabled), GFP_KERNEL);
+ if (!stats->is_disabled)
+ goto err;
+
+ stats->descs = descs;
+ stats->num_counters = num_counters;
+ stats->lifespan = msecs_to_jiffies(lifespan);
+ mutex_init(&stats->lock);
+
+ return stats;
+
+err:
+ kfree(stats);
+ return NULL;
+}
+EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
+
+/**
+ * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats
+ * @stats: statistics to release
+ */
+void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats)
+{
+ if (!stats)
+ return;
+
+ kfree(stats->is_disabled);
+ kfree(stats);
+}
+EXPORT_SYMBOL(rdma_free_hw_stats_struct);