aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--drivers/infiniband/core/cma.c230
-rw-r--r--drivers/infiniband/core/cma_priv.h1
-rw-r--r--drivers/infiniband/core/rdma_core.c2
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h2
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig2
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c4
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c4
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c248
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h13
-rw-r--r--drivers/infiniband/hw/irdma/cm.c11
-rw-r--r--drivers/infiniband/hw/irdma/ctrl.c8
-rw-r--r--drivers/infiniband/hw/irdma/hw.c33
-rw-r--r--drivers/infiniband/hw/irdma/main.h2
-rw-r--r--drivers/infiniband/hw/irdma/utils.c1
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c16
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/dm.c53
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c159
-rw-r--r--drivers/infiniband/hw/mlx5/main.c4
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h79
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c515
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c2
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c78
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c23
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c43
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c213
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c106
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c121
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c236
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c16
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c78
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h26
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c7
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c6
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c6
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c14
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c50
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h21
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c32
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c2
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c32
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h3
-rw-r--r--include/linux/mlx5/device.h36
-rw-r--r--include/linux/mlx5/driver.h7
-rw-r--r--include/linux/mlx5/fs.h16
-rw-r--r--include/linux/mlx5/mlx5_ifc.h157
-rw-r--r--include/rdma/ib_verbs.h2
-rw-r--r--include/rdma/rdma_cm.h1
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h17
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_verbs.h1
79 files changed, 2021 insertions, 996 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index fabca5e51e3d..46d06678dfbe 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -11,6 +11,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/rbtree.h>
#include <linux/igmp.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
@@ -20,6 +21,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -168,6 +170,9 @@ static struct ib_sa_client sa_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
+static struct rb_root id_table = RB_ROOT;
+/* Serialize operations of id_table tree */
+static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;
@@ -202,6 +207,11 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
}
}
+struct id_table_entry {
+ struct list_head id_list;
+ struct rb_node rb_node;
+};
+
struct cma_device {
struct list_head list;
struct ib_device *device;
@@ -420,11 +430,21 @@ static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
return hdr->ip_version >> 4;
}
-static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
{
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
+static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
+}
+
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
+}
+
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
struct in_device *in_dev = NULL;
@@ -445,6 +465,117 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
return (in_dev) ? 0 : -ENODEV;
}
+static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
+ struct id_table_entry *entry_b)
+{
+ struct rdma_id_private *id_priv = list_first_entry(
+ &entry_b->id_list, struct rdma_id_private, id_list_entry);
+ int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
+ struct sockaddr *sb = cma_dst_addr(id_priv);
+
+ if (ifindex_a != ifindex_b)
+ return (ifindex_a > ifindex_b) ? 1 : -1;
+
+ if (sa->sa_family != sb->sa_family)
+ return sa->sa_family - sb->sa_family;
+
+ if (sa->sa_family == AF_INET)
+ return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr,
+ (char *)&((struct sockaddr_in *)sb)->sin_addr,
+ sizeof(((struct sockaddr_in *)sa)->sin_addr));
+
+ return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
+ &((struct sockaddr_in6 *)sb)->sin6_addr);
+}
+
+static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
+{
+ struct rb_node **new, *parent = NULL;
+ struct id_table_entry *this, *node;
+ unsigned long flags;
+ int result;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ new = &id_table.rb_node;
+ while (*new) {
+ this = container_of(*new, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(
+ node_id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(node_id_priv), this);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else {
+ list_add_tail(&node_id_priv->id_list_entry,
+ &this->id_list);
+ kfree(node);
+ goto unlock;
+ }
+ }
+
+ INIT_LIST_HEAD(&node->id_list);
+ list_add_tail(&node_id_priv->id_list_entry, &node->id_list);
+
+ rb_link_node(&node->rb_node, parent, new);
+ rb_insert_color(&node->rb_node, &id_table);
+
+unlock:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return 0;
+}
+
+static struct id_table_entry *
+node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
+{
+ struct rb_node *node = root->rb_node;
+ struct id_table_entry *data;
+ int result;
+
+ while (node) {
+ data = container_of(node, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(ifindex, sa, data);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+
+ return NULL;
+}
+
+static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
+{
+ struct id_table_entry *data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (list_empty(&id_priv->id_list_entry))
+ goto out;
+
+ data = node_from_ndev_ip(&id_table,
+ id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(id_priv));
+ if (!data)
+ goto out;
+
+ list_del_init(&id_priv->id_list_entry);
+ if (list_empty(&data->id_list)) {
+ rb_erase(&data->rb_node, &id_table);
+ kfree(data);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+}
+
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
@@ -481,16 +612,6 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-}
-
-static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
-}
-
static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
return id_priv->id.route.addr.src_addr.ss_family;
@@ -861,6 +982,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
refcount_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
INIT_LIST_HEAD(&id_priv->device_item);
+ INIT_LIST_HEAD(&id_priv->id_list_entry);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
@@ -1883,6 +2005,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
cma_cancel_operation(id_priv, state);
rdma_restrack_del(&id_priv->res);
+ cma_remove_id_from_tree(id_priv);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
@@ -3172,8 +3295,11 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
- else if (rdma_protocol_roce(id->device, id->port_num))
+ else if (rdma_protocol_roce(id->device, id->port_num)) {
ret = cma_resolve_iboe_route(id_priv);
+ if (!ret)
+ cma_add_id_to_tree(id_priv);
+ }
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv);
else
@@ -4922,10 +5048,87 @@ out:
return ret;
}
+static void cma_netevent_work_handler(struct work_struct *_work)
+{
+ struct rdma_id_private *id_priv =
+ container_of(_work, struct rdma_id_private, id.net_work);
+ struct rdma_cm_event event = {};
+
+ mutex_lock(&id_priv->handler_mutex);
+
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -ETIMEDOUT;
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ __acquire(&id_priv->handler_mutex);
+ id_priv->cm_id.ib = NULL;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ return;
+ }
+
+out_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+}
+
+static int cma_netevent_callback(struct notifier_block *self,
+ unsigned long event, void *ctx)
+{
+ struct id_table_entry *ips_node = NULL;
+ struct rdma_id_private *current_id;
+ struct neighbour *neigh = ctx;
+ unsigned long flags;
+
+ if (event != NETEVENT_NEIGH_UPDATE)
+ return NOTIFY_DONE;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (neigh->tbl->family == AF_INET6) {
+ struct sockaddr_in6 neigh_sock_6;
+
+ neigh_sock_6.sin6_family = AF_INET6;
+ neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_6);
+ } else if (neigh->tbl->family == AF_INET) {
+ struct sockaddr_in neigh_sock_4;
+
+ neigh_sock_4.sin_family = AF_INET;
+ neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_4);
+ } else
+ goto out;
+
+ if (!ips_node)
+ goto out;
+
+ list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
+ if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
+ neigh->ha, ETH_ALEN))
+ continue;
+ INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler);
+ cma_id_get(current_id);
+ queue_work(cma_wq, &current_id->id.net_work);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return NOTIFY_DONE;
+}
+
static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
+static struct notifier_block cma_netevent_cb = {
+ .notifier_call = cma_netevent_callback
+};
+
static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
@@ -5148,6 +5351,7 @@ static int __init cma_init(void)
ib_sa_register_client(&sa_client);
register_netdevice_notifier(&cma_nb);
+ register_netevent_notifier(&cma_netevent_cb);
ret = ib_register_client(&cma_client);
if (ret)
@@ -5162,6 +5366,7 @@ static int __init cma_init(void)
err_ib:
ib_unregister_client(&cma_client);
err:
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
@@ -5174,6 +5379,7 @@ static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ib_unregister_client(&cma_client);
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 757a0ef79872..b7354c94cf1b 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -64,6 +64,7 @@ struct rdma_id_private {
struct list_head listen_item;
struct list_head listen_list;
};
+ struct list_head id_list_entry;
struct cma_device *cma_dev;
struct list_head mc_list;
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 94d83b665a2f..29b1ab1d5f93 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -68,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
* In exclusive access mode, we check that the counter is zero (nobody
* claimed this object) and we set it to -1. Releasing a shared access
* lock is done simply by decreasing the counter. As for exclusive
- * access locks, since only a single one of them is is allowed
+ * access locks, since only a single one of them is allowed
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 68197e576433..e958c43dd28f 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -250,7 +250,7 @@ static bool upper_device_filter(struct ib_device *ib_dev, u32 port,
/**
* is_upper_ndev_bond_master_filter - Check if a given netdevice
- * is bond master device of netdevice of the the RDMA device of port.
+ * is bond master device of netdevice of the RDMA device of port.
* @ib_dev: IB device to check
* @port: Port to consider for adding default GID
* @rdma_ndev: Pointer to rdma netdevice
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 79401e6c6aa9..785c37cae3c0 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -173,7 +173,7 @@ struct bnxt_re_dev {
/* Max of 2 lossless traffic class supported per port */
u16 cosq[2];
- /* QP for for handling QP1 packets */
+ /* QP for handling QP1 packets */
struct bnxt_re_gsi_context gsi_ctx;
struct bnxt_re_stats stats;
atomic_t nq_alloc_cnt;
diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index 6eb739052121..14b92e12bf29 100644
--- a/drivers/infiniband/hw/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_HFI1
tristate "Cornelis OPX Gen1 support"
- depends on X86_64 && INFINIBAND_RDMAVT && I2C
+ depends on X86_64 && INFINIBAND_RDMAVT && I2C && !UML
select MMU_NOTIFIER
select CRC32
select I2C_ALGOBIT
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 2e4cf2b11653..629beff053ad 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1179,8 +1179,10 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
goto done;
ret = init_user_ctxt(fd, uctxt);
- if (ret)
+ if (ret) {
+ hfi1_free_ctxt_rcv_groups(uctxt);
goto done;
+ }
user_init(uctxt);
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index d6bbdb8fcb50..5d9a7b09ca37 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -742,9 +742,7 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
kzalloc_node(sizeof(*tx->sdma_hdr),
GFP_KERNEL, priv->dd->node);
- netif_tx_napi_add(dev, &txq->napi,
- hfi1_ipoib_poll_tx_ring,
- NAPI_POLL_WEIGHT);
+ netif_napi_add_tx(dev, &txq->napi, hfi1_ipoib_poll_tx_ring);
}
return 0;
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
index 03b098a494b5..3dfa5aff2512 100644
--- a/drivers/infiniband/hw/hfi1/netdev_rx.c
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -216,7 +216,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx)
* right now.
*/
set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
- netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
+ netif_napi_add_weight(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
rc = msix_netdev_request_rcd_irq(rxq->rcd);
if (rc)
goto bail_context_irq_failure;
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 136f9a99e1e0..7690f996d5e3 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -172,7 +172,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
}
/*
- * Read nbytes from "from" and and place them in the low bytes
+ * Read nbytes from "from" and place them in the low bytes
* of pbuf->carry. Other bytes are left as-is. Any previous
* value in pbuf->carry is lost.
*
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 2855e9ad4b32..f848eedc6a23 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -959,6 +959,7 @@ struct hns_roce_dev {
const struct hns_roce_hw *hw;
void *priv;
struct workqueue_struct *irq_workq;
+ struct work_struct ecc_work;
const struct hns_roce_dfx_hw *dfx;
u32 func_num;
u32 is_vf;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index ba3c742258ef..cbdafaac678a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -55,6 +55,42 @@ enum {
CMD_RST_PRC_EBUSY,
};
+enum ecc_resource_type {
+ ECC_RESOURCE_QPC,
+ ECC_RESOURCE_CQC,
+ ECC_RESOURCE_MPT,
+ ECC_RESOURCE_SRQC,
+ ECC_RESOURCE_GMV,
+ ECC_RESOURCE_QPC_TIMER,
+ ECC_RESOURCE_CQC_TIMER,
+ ECC_RESOURCE_SCCC,
+ ECC_RESOURCE_COUNT,
+};
+
+static const struct {
+ const char *name;
+ u8 read_bt0_op;
+ u8 write_bt0_op;
+} fmea_ram_res[] = {
+ { "ECC_RESOURCE_QPC",
+ HNS_ROCE_CMD_READ_QPC_BT0, HNS_ROCE_CMD_WRITE_QPC_BT0 },
+ { "ECC_RESOURCE_CQC",
+ HNS_ROCE_CMD_READ_CQC_BT0, HNS_ROCE_CMD_WRITE_CQC_BT0 },
+ { "ECC_RESOURCE_MPT",
+ HNS_ROCE_CMD_READ_MPT_BT0, HNS_ROCE_CMD_WRITE_MPT_BT0 },
+ { "ECC_RESOURCE_SRQC",
+ HNS_ROCE_CMD_READ_SRQC_BT0, HNS_ROCE_CMD_WRITE_SRQC_BT0 },
+ /* ECC_RESOURCE_GMV is handled by cmdq, not mailbox */
+ { "ECC_RESOURCE_GMV",
+ 0, 0 },
+ { "ECC_RESOURCE_QPC_TIMER",
+ HNS_ROCE_CMD_READ_QPC_TIMER_BT0, HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 },
+ { "ECC_RESOURCE_CQC_TIMER",
+ HNS_ROCE_CMD_READ_CQC_TIMER_BT0, HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 },
+ { "ECC_RESOURCE_SCCC",
+ HNS_ROCE_CMD_READ_SCCC_BT0, HNS_ROCE_CMD_WRITE_SCCC_BT0 },
+};
+
static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
struct ib_sge *sg)
{
@@ -5855,12 +5891,12 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
}
-static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
{
struct device *dev = hr_dev->dev;
struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
- int aeqe_found = 0;
+ irqreturn_t aeqe_found = IRQ_NONE;
int event_type;
u32 queue_num;
int sub_type;
@@ -5914,7 +5950,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
eq->event_type = event_type;
eq->sub_type = sub_type;
++eq->cons_index;
- aeqe_found = 1;
+ aeqe_found = IRQ_HANDLED;
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
@@ -5922,7 +5958,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
}
update_eq_db(eq);
- return aeqe_found;
+
+ return IRQ_RETVAL(aeqe_found);
}
static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
@@ -5937,11 +5974,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
-static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
{
struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
- int ceqe_found = 0;
+ irqreturn_t ceqe_found = IRQ_NONE;
u32 cqn;
while (ceqe) {
@@ -5955,21 +5992,21 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
hns_roce_cq_completion(hr_dev, cqn);
++eq->cons_index;
- ceqe_found = 1;
+ ceqe_found = IRQ_HANDLED;
ceqe = next_ceqe_sw_v2(eq);
}
update_eq_db(eq);
- return ceqe_found;
+ return IRQ_RETVAL(ceqe_found);
}
static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
{
struct hns_roce_eq *eq = eq_ptr;
struct hns_roce_dev *hr_dev = eq->hr_dev;
- int int_work;
+ irqreturn_t int_work;
if (eq->type_flag == HNS_ROCE_CEQ)
/* Completion event interrupt */
@@ -5981,27 +6018,22 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
return IRQ_RETVAL(int_work);
}
-static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev,
+ u32 int_st)
{
- struct hns_roce_dev *hr_dev = dev_id;
- struct device *dev = hr_dev->dev;
- int int_work = 0;
- u32 int_st;
+ struct pci_dev *pdev = hr_dev->pci_dev;
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+ const struct hnae3_ae_ops *ops = ae_dev->ops;
+ irqreturn_t int_work = IRQ_NONE;
u32 int_en;
- /* Abnormal interrupt */
- int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
- struct pci_dev *pdev = hr_dev->pci_dev;
- struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
- const struct hnae3_ae_ops *ops = ae_dev->ops;
+ dev_err(hr_dev->dev, "AEQ overflow!\n");
- dev_err(dev, "AEQ overflow!\n");
-
- int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S;
- roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG,
+ 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S);
/* Set reset level for reset_event() */
if (ops->set_default_reset_request)
@@ -6013,19 +6045,165 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
- int_work = 1;
- } else if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_RAS_INT_S)) {
- dev_err(dev, "RAS interrupt!\n");
+ int_work = IRQ_HANDLED;
+ } else {
+ dev_err(hr_dev->dev, "there is no basic abn irq found.\n");
+ }
+
+ return IRQ_RETVAL(int_work);
+}
- int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_RAS_INT_S;
- roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+static int fmea_ram_ecc_query(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ int ret;
- int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
- roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_QUERY_RAM_ECC, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ ecc_info->is_ecc_err = hr_reg_read(req, QUERY_RAM_ECC_1BIT_ERR);
+ ecc_info->res_type = hr_reg_read(req, QUERY_RAM_ECC_RES_TYPE);
+ ecc_info->index = hr_reg_read(req, QUERY_RAM_ECC_TAG);
+
+ return 0;
+}
+
+static int fmea_recover_gmv(struct hns_roce_dev *hr_dev, u32 idx)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 addr_upper;
+ u32 addr_low;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, true);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read gmv, ret = %d.\n", ret);
+ return ret;
+ }
+
+ addr_low = hr_reg_read(req, CFG_GMV_BT_BA_L);
+ addr_upper = hr_reg_read(req, CFG_GMV_BT_BA_H);
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, false);
+ hr_reg_write(req, CFG_GMV_BT_BA_L, addr_low);
+ hr_reg_write(req, CFG_GMV_BT_BA_H, addr_upper);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data)
+{
+ if (res_type == ECC_RESOURCE_QPC_TIMER ||
+ res_type == ECC_RESOURCE_CQC_TIMER ||
+ res_type == ECC_RESOURCE_SCCC)
+ return le64_to_cpu(*data);
+
+ return le64_to_cpu(*data) << PAGE_SHIFT;
+}
- int_work = 1;
+static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type,
+ u32 index)
+{
+ u8 write_bt0_op = fmea_ram_res[res_type].write_bt0_op;
+ u8 read_bt0_op = fmea_ram_res[res_type].read_bt0_op;
+ struct hns_roce_cmd_mailbox *mailbox;
+ u64 addr;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, read_bt0_op, index);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read fmea ram, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ addr = fmea_get_ram_res_addr(res_type, mailbox->buf);
+
+ ret = hns_roce_cmd_mbox(hr_dev, addr, 0, write_bt0_op, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to write fmea ram, ret = %d.\n",
+ ret);
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static void fmea_ram_ecc_recover(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ u32 res_type = ecc_info->res_type;
+ u32 index = ecc_info->index;
+ int ret;
+
+ BUILD_BUG_ON(ARRAY_SIZE(fmea_ram_res) != ECC_RESOURCE_COUNT);
+
+ if (res_type >= ECC_RESOURCE_COUNT) {
+ dev_err(hr_dev->dev, "unsupported fmea ram ecc type %u.\n",
+ res_type);
+ return;
+ }
+
+ if (res_type == ECC_RESOURCE_GMV)
+ ret = fmea_recover_gmv(hr_dev, index);
+ else
+ ret = fmea_recover_others(hr_dev, res_type, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to recover %s, index = %u, ret = %d.\n",
+ fmea_ram_res[res_type].name, index, ret);
+}
+
+static void fmea_ram_ecc_work(struct work_struct *ecc_work)
+{
+ struct hns_roce_dev *hr_dev =
+ container_of(ecc_work, struct hns_roce_dev, ecc_work);
+ struct fmea_ram_ecc ecc_info = {};
+
+ if (fmea_ram_ecc_query(hr_dev, &ecc_info)) {
+ dev_err(hr_dev->dev, "failed to query fmea ram ecc.\n");
+ return;
+ }
+
+ if (!ecc_info.is_ecc_err) {
+ dev_err(hr_dev->dev, "there is no fmea ram ecc err found.\n");
+ return;
+ }
+
+ fmea_ram_ecc_recover(hr_dev, &ecc_info);
+}
+
+static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+{
+ struct hns_roce_dev *hr_dev = dev_id;
+ irqreturn_t int_work = IRQ_NONE;
+ u32 int_st;
+
+ int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
+
+ if (int_st) {
+ int_work = abnormal_interrupt_basic(hr_dev, int_st);
+ } else if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ queue_work(hr_dev->irq_workq, &hr_dev->ecc_work);
+ int_work = IRQ_HANDLED;
} else {
- dev_err(dev, "There is no abnormal irq found!\n");
+ dev_err(hr_dev->dev, "there is no abnormal irq found.\n");
}
return IRQ_RETVAL(int_work);
@@ -6342,6 +6520,8 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
}
}
+ INIT_WORK(&hr_dev->ecc_work, fmea_ram_ecc_work);
+
hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
if (!hr_dev->irq_workq) {
dev_err(dev, "failed to create irq workqueue.\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 7ffb7824d268..f96debac30fe 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -250,6 +250,7 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
HNS_ROCE_OPC_EXT_CFG = 0x8512,
+ HNS_ROCE_QUERY_RAM_ECC = 0x8513,
HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
@@ -1107,6 +1108,11 @@ enum {
#define CFG_GMV_BT_BA_H CMQ_REQ_FIELD_LOC(51, 32)
#define CFG_GMV_BT_IDX CMQ_REQ_FIELD_LOC(95, 64)
+/* Fields of HNS_ROCE_QUERY_RAM_ECC */
+#define QUERY_RAM_ECC_1BIT_ERR CMQ_REQ_FIELD_LOC(31, 0)
+#define QUERY_RAM_ECC_RES_TYPE CMQ_REQ_FIELD_LOC(63, 32)
+#define QUERY_RAM_ECC_TAG CMQ_REQ_FIELD_LOC(95, 64)
+
struct hns_roce_cfg_sgid_tb {
__le32 table_idx_rsv;
__le32 vf_sgid_l;
@@ -1343,6 +1349,12 @@ struct hns_roce_dip {
struct list_head node; /* all dips are on a list */
};
+struct fmea_ram_ecc {
+ u32 is_ecc_err;
+ u32 res_type;
+ u32 index;
+};
+
/* only for RNR timeout issue of HIP08 */
#define HNS_ROCE_CLOCK_ADJUST 1000
#define HNS_ROCE_MAX_CQ_PERIOD 65
@@ -1382,7 +1394,6 @@ struct hns_roce_dip {
#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
#define HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S 0
-#define HNS_ROCE_V2_VF_INT_ST_RAS_INT_S 1
#define HNS_ROCE_EQ_DB_CMD_AEQ 0x0
#define HNS_ROCE_EQ_DB_CMD_AEQ_ARMED 0x1
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 638bf4a1ed94..e4d837f82d75 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -1477,12 +1477,13 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port,
list_for_each_entry (listen_node, &cm_core->listen_list, list) {
memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
listen_port = listen_node->loc_port;
+ if (listen_port != dst_port ||
+ !(listener_state & listen_node->listener_state))
+ continue;
/* compare node pair, return node handle if a match */
- if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
- !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
- listen_port == dst_port &&
- vlan_id == listen_node->vlan_id &&
- (listener_state & listen_node->listener_state)) {
+ if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) ||
+ (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) &&
+ vlan_id == listen_node->vlan_id)) {
refcount_inc(&listen_node->refcnt);
spin_unlock_irqrestore(&cm_core->listen_list_lock,
flags);
diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
index 58c0e181ca2b..a41e0d21143a 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -4872,10 +4872,12 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
sd_diff = sd_needed - hmc_fpm_misc->max_sds;
if (sd_diff > 128) {
- if (qpwanted > 128 && sd_diff > 144)
+ if (!(loop_count % 2) && qpwanted > 128) {
qpwanted /= 2;
- mrwanted /= 2;
- pblewanted /= 2;
+ } else {
+ mrwanted /= 2;
+ pblewanted /= 2;
+ }
continue;
}
if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF &&
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index dd3943d22dc6..4f132c6fb653 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -257,10 +257,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
iwqp->last_aeq = info->ae_id;
spin_unlock_irqrestore(&iwqp->lock, flags);
ctx_info = &iwqp->ctx_info;
- if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1))
- ctx_info->roce_info->err_rq_idx_valid = true;
- else
- ctx_info->iwarp_info->err_rq_idx_valid = true;
} else {
if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR)
continue;
@@ -370,16 +366,12 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
case IRDMA_AE_LCE_CQ_CATASTROPHIC:
case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
- if (rdma_protocol_roce(&iwdev->ibdev, 1))
- ctx_info->roce_info->err_rq_idx_valid = false;
- else
- ctx_info->iwarp_info->err_rq_idx_valid = false;
- fallthrough;
default:
- ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d\n",
- info->ae_id, info->qp, info->qp_cq_id);
+ ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n",
+ info->ae_id, info->qp, info->qp_cq_id, info->ae_src);
if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
- if (!info->sq && ctx_info->roce_info->err_rq_idx_valid) {
+ ctx_info->roce_info->err_rq_idx_valid = info->rq;
+ if (info->rq) {
ctx_info->roce_info->err_rq_idx = info->wqe_idx;
irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va,
ctx_info);
@@ -388,7 +380,8 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
irdma_cm_disconn(iwqp);
break;
}
- if (!info->sq && ctx_info->iwarp_info->err_rq_idx_valid) {
+ ctx_info->iwarp_info->err_rq_idx_valid = info->rq;
+ if (info->rq) {
ctx_info->iwarp_info->err_rq_idx = info->wqe_idx;
ctx_info->tcp_info_valid = false;
ctx_info->iwarp_info_valid = true;
@@ -1512,10 +1505,7 @@ static int irdma_hmc_setup(struct irdma_pci_f *rf)
int status;
u32 qpcnt;
- if (rf->rdma_ver == IRDMA_GEN_1)
- qpcnt = rsrc_limits_table[rf->limits_sel].qplimit * 2;
- else
- qpcnt = rsrc_limits_table[rf->limits_sel].qplimit;
+ qpcnt = rsrc_limits_table[rf->limits_sel].qplimit;
rf->sd_type = IRDMA_SD_TYPE_DIRECT;
status = irdma_cfg_fpm_val(&rf->sc_dev, qpcnt);
@@ -1543,7 +1533,7 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf)
rf->obj_mem.pa);
rf->obj_mem.va = NULL;
if (rf->rdma_ver != IRDMA_GEN_1) {
- kfree(rf->allocated_ws_nodes);
+ bitmap_free(rf->allocated_ws_nodes);
rf->allocated_ws_nodes = NULL;
}
kfree(rf->ceqlist);
@@ -1972,9 +1962,8 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
u32 ret;
if (rf->rdma_ver != IRDMA_GEN_1) {
- rf->allocated_ws_nodes =
- kcalloc(BITS_TO_LONGS(IRDMA_MAX_WS_NODES),
- sizeof(unsigned long), GFP_KERNEL);
+ rf->allocated_ws_nodes = bitmap_zalloc(IRDMA_MAX_WS_NODES,
+ GFP_KERNEL);
if (!rf->allocated_ws_nodes)
return -ENOMEM;
@@ -2023,7 +2012,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
return 0;
mem_rsrc_kzalloc_fail:
- kfree(rf->allocated_ws_nodes);
+ bitmap_free(rf->allocated_ws_nodes);
rf->allocated_ws_nodes = NULL;
return ret;
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index ef862bced20f..65e966ad3453 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -85,7 +85,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define IRDMA_NO_QSET 0xffff
#define IW_CFG_FPM_QP_COUNT 32768
-#define IRDMA_MAX_PAGES_PER_FMR 512
+#define IRDMA_MAX_PAGES_PER_FMR 262144
#define IRDMA_MIN_PAGES_PER_FMR 1
#define IRDMA_CQP_COMPL_RQ_WQE_FLUSHED 2
#define IRDMA_CQP_COMPL_SQ_WQE_FLUSHED 3
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index ab3c5208a123..fdf4cc88cb91 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -652,6 +652,7 @@ static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = {
};
static const struct irdma_cqp_err_info irdma_noncrit_err_list[] = {
+ {0xffff, 0x8002, "Invalid State"},
{0xffff, 0x8006, "Flush No Wqe Pending"},
{0xffff, 0x8007, "Modify QP Bad Close"},
{0xffff, 0x8009, "LLP Closed"},
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index c4412ece5a6d..d78b11a41b6b 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -1776,11 +1776,11 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
spin_unlock_irqrestore(&iwcq->lock, flags);
irdma_cq_wq_destroy(iwdev->rf, cq);
- irdma_cq_free_rsrc(iwdev->rf, iwcq);
spin_lock_irqsave(&iwceq->ce_lock, flags);
irdma_sc_cleanup_ceqes(cq, ceq);
spin_unlock_irqrestore(&iwceq->ce_lock, flags);
+ irdma_cq_free_rsrc(iwdev->rf, iwcq);
return 0;
}
@@ -2605,7 +2605,7 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
palloc = &iwpbl->pble_alloc;
iwmr->page_cnt = max_num_sg;
err_code = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt,
- true);
+ false);
if (err_code)
goto err_get_pble;
@@ -2641,8 +2641,16 @@ static int irdma_set_page(struct ib_mr *ibmr, u64 addr)
if (unlikely(iwmr->npages == iwmr->page_cnt))
return -ENOMEM;
- pbl = palloc->level1.addr;
- pbl[iwmr->npages++] = addr;
+ if (palloc->level == PBLE_LEVEL_2) {
+ struct irdma_pble_info *palloc_info =
+ palloc->level2.leaf + (iwmr->npages >> PBLE_512_SHIFT);
+
+ palloc_info->addr[iwmr->npages & (PBLE_PER_PAGE - 1)] = addr;
+ } else {
+ pbl = palloc->level1.addr;
+ pbl[iwmr->npages] = addr;
+ }
+ iwmr->npages++;
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 08371a80fdc2..be189e0525de 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -523,6 +523,10 @@ repoll:
"Requestor" : "Responder", cq->mcq.cqn);
mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
err_cqe->syndrome, err_cqe->vendor_err_synd);
+ if (wc->status != IB_WC_WR_FLUSH_ERR &&
+ (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
+ dev->umrc.state = MLX5_UMR_STATE_RECOVER;
+
if (opcode == MLX5_CQE_REQ_ERR) {
wq = &(*cur_qp)->sq;
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
index 001d766cf291..3669c90b2dad 100644
--- a/drivers/infiniband/hw/mlx5/dm.c
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -336,9 +336,15 @@ err_copy:
static enum mlx5_sw_icm_type get_icm_type(int uapi_type)
{
- return uapi_type == MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM ?
- MLX5_SW_ICM_TYPE_STEERING :
- MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ switch (uapi_type) {
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ return MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ return MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ default:
+ return MLX5_SW_ICM_TYPE_STEERING;
+ }
}
static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
@@ -347,11 +353,32 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
int type)
{
struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
- enum mlx5_sw_icm_type icm_type = get_icm_type(type);
+ enum mlx5_sw_icm_type icm_type;
struct mlx5_ib_dm_icm *dm;
u64 act_size;
int err;
+ if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW))
+ return ERR_PTR(-EPERM);
+
+ switch (type) {
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2)))
+ return ERR_PTR(-EOPNOTSUPP);
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ !MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))
+ return ERR_PTR(-EOPNOTSUPP);
+ break;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
dm = kzalloc(sizeof(*dm), GFP_KERNEL);
if (!dm)
return ERR_PTR(-ENOMEM);
@@ -359,19 +386,6 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
dm->base.type = type;
dm->base.ibdm.device = ctx->device;
- if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW)) {
- err = -EPERM;
- goto free;
- }
-
- if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))) {
- err = -EOPNOTSUPP;
- goto free;
- }
-
/* Allocation size must a multiple of the basic block size
* and a power of 2.
*/
@@ -379,6 +393,8 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
act_size = roundup_pow_of_two(act_size);
dm->base.size = act_size;
+ icm_type = get_icm_type(type);
+
err = mlx5_dm_sw_icm_alloc(dev, icm_type, act_size, attr->alignment,
to_mucontext(ctx)->devx_uid,
&dm->base.dev_addr, &dm->obj_id);
@@ -420,8 +436,8 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
case MLX5_IB_UAPI_DM_TYPE_MEMIC:
return handle_alloc_dm_memic(context, attr, attrs);
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- return handle_alloc_dm_sw_icm(context, attr, attrs, type);
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
return handle_alloc_dm_sw_icm(context, attr, attrs, type);
default:
return ERR_PTR(-EOPNOTSUPP);
@@ -474,6 +490,7 @@ static int mlx5_ib_dealloc_dm(struct ib_dm *ibdm,
return 0;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
return mlx5_dm_icm_dealloc(ctx, to_icm(ibdm));
default:
return -EOPNOTSUPP;
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 39ffb363ba0c..691d00c89f33 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -679,7 +679,15 @@ enum flow_table_type {
#define MLX5_FS_MAX_TYPES 6
#define MLX5_FS_MAX_ENTRIES BIT(16)
-static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
+static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
+}
+
+static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_namespace *ns,
struct mlx5_ib_flow_prio *prio,
int priority,
int num_entries, int num_groups,
@@ -688,6 +696,8 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
+ if (mlx5_ib_shared_ft_allowed(&dev->ib_dev))
+ ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
@@ -784,8 +794,8 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
ft = prio->flow_table;
if (!ft)
- return _get_prio(ns, prio, priority, max_table_size, num_groups,
- flags);
+ return _get_prio(dev, ns, prio, priority, max_table_size,
+ num_groups, flags);
return prio;
}
@@ -927,7 +937,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
prio = &dev->flow_db->opfcs[type];
if (!prio->flow_table) {
- prio = _get_prio(ns, prio, priority,
+ prio = _get_prio(dev, ns, prio, priority,
dev->num_ports * MAX_OPFC_RULES, 1, 0);
if (IS_ERR(prio)) {
err = PTR_ERR(prio);
@@ -1407,8 +1417,8 @@ free_ucmd:
}
static struct mlx5_ib_flow_prio *
-_get_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
+_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
+ enum mlx5_flow_namespace_type ns_type,
bool mcast)
{
struct mlx5_flow_namespace *ns = NULL;
@@ -1421,11 +1431,11 @@ _get_flow_table(struct mlx5_ib_dev *dev,
if (mcast)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
- priority = ib_prio_to_core_prio(fs_matcher->priority, false);
+ priority = ib_prio_to_core_prio(user_priority, false);
esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- switch (fs_matcher->ns_type) {
+ switch (ns_type) {
case MLX5_FLOW_NAMESPACE_BYPASS:
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
@@ -1452,17 +1462,17 @@ _get_flow_table(struct mlx5_ib_dev *dev,
reformat_l3_tunnel_to_l2) &&
esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- priority = fs_matcher->priority;
+ priority = user_priority;
break;
case MLX5_FLOW_NAMESPACE_RDMA_RX:
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
- priority = fs_matcher->priority;
+ priority = user_priority;
break;
case MLX5_FLOW_NAMESPACE_RDMA_TX:
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
- priority = fs_matcher->priority;
+ priority = user_priority;
break;
default:
break;
@@ -1470,11 +1480,11 @@ _get_flow_table(struct mlx5_ib_dev *dev,
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
- ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
+ ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
if (!ns)
return ERR_PTR(-EOPNOTSUPP);
- switch (fs_matcher->ns_type) {
+ switch (ns_type) {
case MLX5_FLOW_NAMESPACE_BYPASS:
prio = &dev->flow_db->prios[priority];
break;
@@ -1499,7 +1509,7 @@ _get_flow_table(struct mlx5_ib_dev *dev,
if (prio->flow_table)
return prio;
- return _get_prio(ns, prio, priority, max_table_size,
+ return _get_prio(dev, ns, prio, priority, max_table_size,
MLX5_FS_MAX_TYPES, flags);
}
@@ -1618,7 +1628,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
mutex_lock(&dev->flow_db->lock);
- ft_prio = _get_flow_table(dev, fs_matcher, mcast);
+ ft_prio = _get_flow_table(dev, fs_matcher->priority,
+ fs_matcher->ns_type, mcast);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -2015,6 +2026,23 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
return 0;
}
+static int steering_anchor_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_steering_anchor *obj = uobject->object;
+
+ if (atomic_read(&obj->usecnt))
+ return -EBUSY;
+
+ mutex_lock(&obj->dev->flow_db->lock);
+ put_flow_table(obj->dev, obj->ft_prio, true);
+ mutex_unlock(&obj->dev->flow_db->lock);
+
+ kfree(obj);
+ return 0;
+}
+
static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
struct mlx5_ib_flow_matcher *obj)
{
@@ -2121,6 +2149,75 @@ end:
return err;
}
+static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_ib_steering_anchor *obj;
+ struct mlx5_ib_flow_prio *ft_prio;
+ u16 priority;
+ u32 ft_id;
+ int err;
+
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+
+ err = uverbs_get_const(&ib_uapi_ft_type, attrs,
+ MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
+ if (err)
+ return err;
+
+ err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
+ if (err)
+ return err;
+
+ err = uverbs_copy_from(&priority, attrs,
+ MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
+ if (err)
+ return err;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ mutex_lock(&dev->flow_db->lock);
+ ft_prio = _get_flow_table(dev, priority, ns_type, 0);
+ if (IS_ERR(ft_prio)) {
+ mutex_unlock(&dev->flow_db->lock);
+ err = PTR_ERR(ft_prio);
+ goto free_obj;
+ }
+
+ ft_prio->refcount++;
+ ft_id = mlx5_flow_table_id(ft_prio->flow_table);
+ mutex_unlock(&dev->flow_db->lock);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
+ &ft_id, sizeof(ft_id));
+ if (err)
+ goto put_flow_table;
+
+ uobj->object = obj;
+ obj->dev = dev;
+ obj->ft_prio = ft_prio;
+ atomic_set(&obj->usecnt, 0);
+
+ return 0;
+
+put_flow_table:
+ mutex_lock(&dev->flow_db->lock);
+ put_flow_table(dev, ft_prio, true);
+ mutex_unlock(&dev->flow_db->lock);
+free_obj:
+ kfree(obj);
+
+ return err;
+}
+
static struct ib_flow_action *
mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
enum mlx5_ib_uapi_flow_table_type ft_type,
@@ -2477,6 +2574,35 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
+
const struct uapi_definition mlx5_ib_flow_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
MLX5_IB_OBJECT_FLOW_MATCHER),
@@ -2485,6 +2611,9 @@ const struct uapi_definition mlx5_ib_flow_defs[] = {
&mlx5_ib_fs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
&mlx5_ib_flow_actions),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
{},
};
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b68fddeac0f1..a174a0eee8dc 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4002,7 +4002,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
int err;
- err = mlx5_mr_cache_cleanup(dev);
+ err = mlx5_mkey_cache_cleanup(dev);
if (err)
mlx5_ib_warn(dev, "mr cache cleanup failed\n");
@@ -4022,7 +4022,7 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
if (ret)
return ret;
- ret = mlx5_mr_cache_init(dev);
+ ret = mlx5_mkey_cache_init(dev);
if (ret) {
mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
mlx5r_umr_resource_cleanup(dev);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 998b67509a53..2e2ad3918385 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -259,6 +259,12 @@ struct mlx5_ib_flow_matcher {
u8 match_criteria_enable;
};
+struct mlx5_ib_steering_anchor {
+ struct mlx5_ib_flow_prio *ft_prio;
+ struct mlx5_ib_dev *dev;
+ atomic_t usecnt;
+};
+
struct mlx5_ib_pp {
u16 index;
struct mlx5_core_dev *mdev;
@@ -613,6 +619,7 @@ struct mlx5_ib_mkey {
unsigned int ndescs;
struct wait_queue_head wait;
refcount_t usecount;
+ struct mlx5_cache_ent *cache_ent;
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -635,20 +642,9 @@ struct mlx5_ib_mr {
struct ib_mr ibmr;
struct mlx5_ib_mkey mmkey;
- /* User MR data */
- struct mlx5_cache_ent *cache_ent;
- /* Everything after cache_ent is zero'd when MR allocated */
struct ib_umem *umem;
union {
- /* Used only while the MR is in the cache */
- struct {
- u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
- struct mlx5_async_work cb_work;
- /* Cache list element */
- struct list_head list;
- };
-
/* Used only by kernel MRs (umem == NULL) */
struct {
void *descs;
@@ -688,12 +684,6 @@ struct mlx5_ib_mr {
};
};
-/* Zero the fields in the mr that are variant depending on usage */
-static inline void mlx5_clear_mr(struct mlx5_ib_mr *mr)
-{
- memset_after(mr, 0, cache_ent);
-}
-
static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
{
return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
@@ -717,21 +707,29 @@ struct mlx5_ib_umr_context {
struct completion done;
};
+enum {
+ MLX5_UMR_STATE_ACTIVE,
+ MLX5_UMR_STATE_RECOVER,
+ MLX5_UMR_STATE_ERR,
+};
+
struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
- /* control access to UMR QP
+ /* Protects from UMR QP overflow
*/
struct semaphore sem;
+ /* Protects from using UMR while the UMR is not active
+ */
+ struct mutex lock;
+ unsigned int state;
};
struct mlx5_cache_ent {
- struct list_head head;
- /* sync access to the cahce entry
- */
- spinlock_t lock;
-
+ struct xarray mkeys;
+ unsigned long stored;
+ unsigned long reserved;
char name[4];
u32 order;
@@ -743,18 +741,11 @@ struct mlx5_cache_ent {
u8 fill_to_high_water:1;
/*
- * - available_mrs is the length of list head, ie the number of MRs
- * available for immediate allocation.
- * - total_mrs is available_mrs plus all in use MRs that could be
- * returned to the cache.
- * - limit is the low water mark for available_mrs, 2* limit is the
+ * - limit is the low water mark for stored mkeys, 2* limit is the
* upper water mark.
- * - pending is the number of MRs currently being created
*/
- u32 total_mrs;
- u32 available_mrs;
+ u32 in_use;
u32 limit;
- u32 pending;
/* Statistics */
u32 miss;
@@ -763,9 +754,19 @@ struct mlx5_cache_ent {
struct delayed_work dwork;
};
-struct mlx5_mr_cache {
+struct mlx5r_async_create_mkey {
+ union {
+ u32 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
+ u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
+ };
+ struct mlx5_async_work cb_work;
+ struct mlx5_cache_ent *ent;
+ u32 mkey;
+};
+
+struct mlx5_mkey_cache {
struct workqueue_struct *wq;
- struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES];
+ struct mlx5_cache_ent ent[MAX_MKEY_CACHE_ENTRIES];
struct dentry *root;
unsigned long last_add;
};
@@ -1064,7 +1065,7 @@ struct mlx5_ib_dev {
struct mlx5_ib_resources devr;
atomic_t mkey_var;
- struct mlx5_mr_cache cache;
+ struct mlx5_mkey_cache cache;
struct timer_list delay_timer;
/* Prevents soft lock on massive reg MRs */
struct mutex slow_path_mutex;
@@ -1309,8 +1310,8 @@ void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
u64 access_flags);
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
-int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
-int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
+int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
struct mlx5_cache_ent *ent,
@@ -1338,7 +1339,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq);
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
+void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent);
void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
struct mlx5_ib_mr *mr, int flags);
@@ -1357,7 +1358,7 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev,
static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
-static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
+static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {}
static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
struct mlx5_ib_mr *mr, int flags) {}
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1e7653c997b5..129d531bd01b 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -82,15 +82,14 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
MLX5_SET64(mkc, mkc, start_addr, start_addr);
}
-static void assign_mkey_variant(struct mlx5_ib_dev *dev,
- struct mlx5_ib_mkey *mkey, u32 *in)
+static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in)
{
u8 key = atomic_inc_return(&dev->mkey_var);
void *mkc;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, mkey_7_0, key);
- mkey->key = key;
+ *mkey = key;
}
static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
@@ -98,7 +97,7 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
{
int ret;
- assign_mkey_variant(dev, mkey, in);
+ assign_mkey_variant(dev, &mkey->key, in);
ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen);
if (!ret)
init_waitqueue_head(&mkey->wait);
@@ -106,20 +105,21 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
return ret;
}
-static int
-mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
- struct mlx5_ib_mkey *mkey,
- struct mlx5_async_ctx *async_ctx,
- u32 *in, int inlen, u32 *out, int outlen,
- struct mlx5_async_work *context)
+static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create)
{
- MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
- assign_mkey_variant(dev, mkey, in);
- return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
- create_mkey_callback, context);
+ struct mlx5_ib_dev *dev = async_create->ent->dev;
+ size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out);
+
+ MLX5_SET(create_mkey_in, async_create->in, opcode,
+ MLX5_CMD_OP_CREATE_MKEY);
+ assign_mkey_variant(dev, &async_create->mkey, async_create->in);
+ return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen,
+ async_create->out, outlen, create_mkey_callback,
+ &async_create->cb_work);
}
-static int mr_cache_max_order(struct mlx5_ib_dev *dev);
+static int mkey_cache_max_order(struct mlx5_ib_dev *dev);
static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
@@ -142,40 +142,132 @@ static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
}
+
+static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
+ void *to_store)
+{
+ XA_STATE(xas, &ent->mkeys, 0);
+ void *curr;
+
+ xa_lock_irq(&ent->mkeys);
+ if (limit_pendings &&
+ (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) {
+ xa_unlock_irq(&ent->mkeys);
+ return -EAGAIN;
+ }
+ while (1) {
+ /*
+ * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version
+ * doesn't transparently unlock. Instead we set the xas index to
+ * the current value of reserved every iteration.
+ */
+ xas_set(&xas, ent->reserved);
+ curr = xas_load(&xas);
+ if (!curr) {
+ if (to_store && ent->stored == ent->reserved)
+ xas_store(&xas, to_store);
+ else
+ xas_store(&xas, XA_ZERO_ENTRY);
+ if (xas_valid(&xas)) {
+ ent->reserved++;
+ if (to_store) {
+ if (ent->stored != ent->reserved)
+ __xa_store(&ent->mkeys,
+ ent->stored,
+ to_store,
+ GFP_KERNEL);
+ ent->stored++;
+ queue_adjust_cache_locked(ent);
+ WRITE_ONCE(ent->dev->cache.last_add,
+ jiffies);
+ }
+ }
+ }
+ xa_unlock_irq(&ent->mkeys);
+
+ /*
+ * Notice xas_nomem() must always be called as it cleans
+ * up any cached allocation.
+ */
+ if (!xas_nomem(&xas, GFP_KERNEL))
+ break;
+ xa_lock_irq(&ent->mkeys);
+ }
+ if (xas_error(&xas))
+ return xas_error(&xas);
+ if (WARN_ON(curr))
+ return -EINVAL;
+ return 0;
+}
+
+static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent)
+{
+ void *old;
+
+ ent->reserved--;
+ old = __xa_erase(&ent->mkeys, ent->reserved);
+ WARN_ON(old);
+}
+
+static void push_to_reserved(struct mlx5_cache_ent *ent, u32 mkey)
+{
+ void *old;
+
+ old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mkey), 0);
+ WARN_ON(old);
+ ent->stored++;
+}
+
+static u32 pop_stored_mkey(struct mlx5_cache_ent *ent)
+{
+ void *old, *xa_mkey;
+
+ ent->stored--;
+ ent->reserved--;
+
+ if (ent->stored == ent->reserved) {
+ xa_mkey = __xa_erase(&ent->mkeys, ent->stored);
+ WARN_ON(!xa_mkey);
+ return (u32)xa_to_value(xa_mkey);
+ }
+
+ xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ WARN_ON(!xa_mkey || xa_is_err(xa_mkey));
+ old = __xa_erase(&ent->mkeys, ent->reserved);
+ WARN_ON(old);
+ return (u32)xa_to_value(xa_mkey);
+}
+
static void create_mkey_callback(int status, struct mlx5_async_work *context)
{
- struct mlx5_ib_mr *mr =
- container_of(context, struct mlx5_ib_mr, cb_work);
- struct mlx5_cache_ent *ent = mr->cache_ent;
+ struct mlx5r_async_create_mkey *mkey_out =
+ container_of(context, struct mlx5r_async_create_mkey, cb_work);
+ struct mlx5_cache_ent *ent = mkey_out->ent;
struct mlx5_ib_dev *dev = ent->dev;
unsigned long flags;
if (status) {
- create_mkey_warn(dev, status, mr->out);
- kfree(mr);
- spin_lock_irqsave(&ent->lock, flags);
- ent->pending--;
+ create_mkey_warn(dev, status, mkey_out->out);
+ kfree(mkey_out);
+ xa_lock_irqsave(&ent->mkeys, flags);
+ undo_push_reserve_mkey(ent);
WRITE_ONCE(dev->fill_delay, 1);
- spin_unlock_irqrestore(&ent->lock, flags);
+ xa_unlock_irqrestore(&ent->mkeys, flags);
mod_timer(&dev->delay_timer, jiffies + HZ);
return;
}
- mr->mmkey.type = MLX5_MKEY_MR;
- mr->mmkey.key |= mlx5_idx_to_mkey(
- MLX5_GET(create_mkey_out, mr->out, mkey_index));
- init_waitqueue_head(&mr->mmkey.wait);
-
+ mkey_out->mkey |= mlx5_idx_to_mkey(
+ MLX5_GET(create_mkey_out, mkey_out->out, mkey_index));
WRITE_ONCE(dev->cache.last_add, jiffies);
- spin_lock_irqsave(&ent->lock, flags);
- list_add_tail(&mr->list, &ent->head);
- ent->available_mrs++;
- ent->total_mrs++;
+ xa_lock_irqsave(&ent->mkeys, flags);
+ push_to_reserved(ent, mkey_out->mkey);
/* If we are doing fill_to_high_water then keep going. */
queue_adjust_cache_locked(ent);
- ent->pending--;
- spin_unlock_irqrestore(&ent->lock, flags);
+ xa_unlock_irqrestore(&ent->mkeys, flags);
+ kfree(mkey_out);
}
static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
@@ -197,15 +289,8 @@ static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
return ret;
}
-static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
+static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
{
- struct mlx5_ib_mr *mr;
-
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return NULL;
- mr->cache_ent = ent;
-
set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
@@ -215,133 +300,106 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
MLX5_SET(mkc, mkc, translations_octword_size,
get_mkc_octo_size(ent->access_mode, ent->ndescs));
MLX5_SET(mkc, mkc, log_page_size, ent->page);
- return mr;
}
/* Asynchronously schedule new MRs to be populated in the cache. */
static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
{
- size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_ib_mr *mr;
+ struct mlx5r_async_create_mkey *async_create;
void *mkc;
- u32 *in;
int err = 0;
int i;
- in = kzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
for (i = 0; i < num; i++) {
- mr = alloc_cache_mr(ent, mkc);
- if (!mr) {
- err = -ENOMEM;
- break;
- }
- spin_lock_irq(&ent->lock);
- if (ent->pending >= MAX_PENDING_REG_MR) {
- err = -EAGAIN;
- spin_unlock_irq(&ent->lock);
- kfree(mr);
- break;
- }
- ent->pending++;
- spin_unlock_irq(&ent->lock);
- err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
- &ent->dev->async_ctx, in, inlen,
- mr->out, sizeof(mr->out),
- &mr->cb_work);
+ async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey),
+ GFP_KERNEL);
+ if (!async_create)
+ return -ENOMEM;
+ mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in,
+ memory_key_mkey_entry);
+ set_cache_mkc(ent, mkc);
+ async_create->ent = ent;
+
+ err = push_mkey(ent, true, NULL);
+ if (err)
+ goto free_async_create;
+
+ err = mlx5_ib_create_mkey_cb(async_create);
if (err) {
- spin_lock_irq(&ent->lock);
- ent->pending--;
- spin_unlock_irq(&ent->lock);
mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
- kfree(mr);
- break;
+ goto err_undo_reserve;
}
}
- kfree(in);
+ return 0;
+
+err_undo_reserve:
+ xa_lock_irq(&ent->mkeys);
+ undo_push_reserve_mkey(ent);
+ xa_unlock_irq(&ent->mkeys);
+free_async_create:
+ kfree(async_create);
return err;
}
/* Synchronously create a MR in the cache */
-static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
+static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey)
{
size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
int err;
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ set_cache_mkc(ent, mkc);
- mr = alloc_cache_mr(ent, mkc);
- if (!mr) {
- err = -ENOMEM;
- goto free_in;
- }
-
- err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey.key, in, inlen);
+ err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen);
if (err)
- goto free_mr;
+ goto free_in;
- init_waitqueue_head(&mr->mmkey.wait);
- mr->mmkey.type = MLX5_MKEY_MR;
WRITE_ONCE(ent->dev->cache.last_add, jiffies);
- spin_lock_irq(&ent->lock);
- ent->total_mrs++;
- spin_unlock_irq(&ent->lock);
- kfree(in);
- return mr;
-free_mr:
- kfree(mr);
free_in:
kfree(in);
- return ERR_PTR(err);
+ return err;
}
static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
{
- struct mlx5_ib_mr *mr;
+ u32 mkey;
- lockdep_assert_held(&ent->lock);
- if (list_empty(&ent->head))
+ lockdep_assert_held(&ent->mkeys.xa_lock);
+ if (!ent->stored)
return;
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
- ent->available_mrs--;
- ent->total_mrs--;
- spin_unlock_irq(&ent->lock);
- mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key);
- kfree(mr);
- spin_lock_irq(&ent->lock);
+ mkey = pop_stored_mkey(ent);
+ xa_unlock_irq(&ent->mkeys);
+ mlx5_core_destroy_mkey(ent->dev->mdev, mkey);
+ xa_lock_irq(&ent->mkeys);
}
static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
bool limit_fill)
+ __acquires(&ent->mkeys) __releases(&ent->mkeys)
{
int err;
- lockdep_assert_held(&ent->lock);
+ lockdep_assert_held(&ent->mkeys.xa_lock);
while (true) {
if (limit_fill)
target = ent->limit * 2;
- if (target == ent->available_mrs + ent->pending)
+ if (target == ent->reserved)
return 0;
- if (target > ent->available_mrs + ent->pending) {
- u32 todo = target - (ent->available_mrs + ent->pending);
+ if (target > ent->reserved) {
+ u32 todo = target - ent->reserved;
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
err = add_keys(ent, todo);
if (err == -EAGAIN)
usleep_range(3000, 5000);
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
if (err) {
if (err != -EAGAIN)
return err;
@@ -366,15 +424,15 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
/*
* Target is the new value of total_mrs the user requests, however we
- * cannot free MRs that are in use. Compute the target value for
- * available_mrs.
+ * cannot free MRs that are in use. Compute the target value for stored
+ * mkeys.
*/
- spin_lock_irq(&ent->lock);
- if (target < ent->total_mrs - ent->available_mrs) {
+ xa_lock_irq(&ent->mkeys);
+ if (target < ent->in_use) {
err = -EINVAL;
goto err_unlock;
}
- target = target - (ent->total_mrs - ent->available_mrs);
+ target = target - ent->in_use;
if (target < ent->limit || target > ent->limit*2) {
err = -EINVAL;
goto err_unlock;
@@ -382,12 +440,12 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
err = resize_available_mrs(ent, target, false);
if (err)
goto err_unlock;
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
return count;
err_unlock:
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
return err;
}
@@ -398,7 +456,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
char lbuf[20];
int err;
- err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
+ err = snprintf(lbuf, sizeof(lbuf), "%ld\n", ent->stored + ent->in_use);
if (err < 0)
return err;
@@ -427,10 +485,10 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
* Upon set we immediately fill the cache to high water mark implied by
* the limit.
*/
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
ent->limit = var;
err = resize_available_mrs(ent, 0, true);
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
if (err)
return err;
return count;
@@ -457,17 +515,17 @@ static const struct file_operations limit_fops = {
.read = limit_read,
};
-static bool someone_adding(struct mlx5_mr_cache *cache)
+static bool someone_adding(struct mlx5_mkey_cache *cache)
{
unsigned int i;
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
struct mlx5_cache_ent *ent = &cache->ent[i];
bool ret;
- spin_lock_irq(&ent->lock);
- ret = ent->available_mrs < ent->limit;
- spin_unlock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
+ ret = ent->stored < ent->limit;
+ xa_unlock_irq(&ent->mkeys);
if (ret)
return true;
}
@@ -481,26 +539,26 @@ static bool someone_adding(struct mlx5_mr_cache *cache)
*/
static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
{
- lockdep_assert_held(&ent->lock);
+ lockdep_assert_held(&ent->mkeys.xa_lock);
if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
return;
- if (ent->available_mrs < ent->limit) {
+ if (ent->stored < ent->limit) {
ent->fill_to_high_water = true;
mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
} else if (ent->fill_to_high_water &&
- ent->available_mrs + ent->pending < 2 * ent->limit) {
+ ent->reserved < 2 * ent->limit) {
/*
* Once we start populating due to hitting a low water mark
* continue until we pass the high water mark.
*/
mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
- } else if (ent->available_mrs == 2 * ent->limit) {
+ } else if (ent->stored == 2 * ent->limit) {
ent->fill_to_high_water = false;
- } else if (ent->available_mrs > 2 * ent->limit) {
+ } else if (ent->stored > 2 * ent->limit) {
/* Queue deletion of excess entries */
ent->fill_to_high_water = false;
- if (ent->pending)
+ if (ent->stored != ent->reserved)
queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
msecs_to_jiffies(1000));
else
@@ -511,25 +569,24 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
static void __cache_work_func(struct mlx5_cache_ent *ent)
{
struct mlx5_ib_dev *dev = ent->dev;
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
int err;
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
if (ent->disabled)
goto out;
- if (ent->fill_to_high_water &&
- ent->available_mrs + ent->pending < 2 * ent->limit &&
+ if (ent->fill_to_high_water && ent->reserved < 2 * ent->limit &&
!READ_ONCE(dev->fill_delay)) {
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
err = add_keys(ent, 1);
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
if (ent->disabled)
goto out;
if (err) {
/*
- * EAGAIN only happens if pending is positive, so we
- * will be rescheduled from reg_mr_callback(). The only
+ * EAGAIN only happens if there are pending MRs, so we
+ * will be rescheduled when storing them. The only
* failure path here is ENOMEM.
*/
if (err != -EAGAIN) {
@@ -541,7 +598,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
msecs_to_jiffies(1000));
}
}
- } else if (ent->available_mrs > 2 * ent->limit) {
+ } else if (ent->stored > 2 * ent->limit) {
bool need_delay;
/*
@@ -556,11 +613,11 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
* the garbage collection work to try to run in next cycle, in
* order to free CPU resources to other tasks.
*/
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
need_delay = need_resched() || someone_adding(cache) ||
!time_after(jiffies,
READ_ONCE(cache->last_add) + 300 * HZ);
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
if (ent->disabled)
goto out;
if (need_delay) {
@@ -571,7 +628,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
queue_adjust_cache_locked(ent);
}
out:
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
}
static void delayed_cache_work_func(struct work_struct *work)
@@ -587,73 +644,59 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
int access_flags)
{
struct mlx5_ib_mr *mr;
+ int err;
- /* Matches access in alloc_cache_mr() */
if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
return ERR_PTR(-EOPNOTSUPP);
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ xa_lock_irq(&ent->mkeys);
+ ent->in_use++;
+
+ if (!ent->stored) {
queue_adjust_cache_locked(ent);
ent->miss++;
- spin_unlock_irq(&ent->lock);
- mr = create_cache_mr(ent);
- if (IS_ERR(mr))
- return mr;
+ xa_unlock_irq(&ent->mkeys);
+ err = create_cache_mkey(ent, &mr->mmkey.key);
+ if (err) {
+ xa_lock_irq(&ent->mkeys);
+ ent->in_use--;
+ xa_unlock_irq(&ent->mkeys);
+ kfree(mr);
+ return ERR_PTR(err);
+ }
} else {
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
- ent->available_mrs--;
+ mr->mmkey.key = pop_stored_mkey(ent);
queue_adjust_cache_locked(ent);
- spin_unlock_irq(&ent->lock);
-
- mlx5_clear_mr(mr);
+ xa_unlock_irq(&ent->mkeys);
}
+ mr->mmkey.cache_ent = ent;
+ mr->mmkey.type = MLX5_MKEY_MR;
+ init_waitqueue_head(&mr->mmkey.wait);
return mr;
}
-static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
-{
- struct mlx5_cache_ent *ent = mr->cache_ent;
-
- WRITE_ONCE(dev->cache.last_add, jiffies);
- spin_lock_irq(&ent->lock);
- list_add_tail(&mr->list, &ent->head);
- ent->available_mrs++;
- queue_adjust_cache_locked(ent);
- spin_unlock_irq(&ent->lock);
-}
-
static void clean_keys(struct mlx5_ib_dev *dev, int c)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
- struct mlx5_ib_mr *tmp_mr;
- struct mlx5_ib_mr *mr;
- LIST_HEAD(del_list);
+ u32 mkey;
cancel_delayed_work(&ent->dwork);
- while (1) {
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
- spin_unlock_irq(&ent->lock);
- break;
- }
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_move(&mr->list, &del_list);
- ent->available_mrs--;
- ent->total_mrs--;
- spin_unlock_irq(&ent->lock);
- mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
- }
-
- list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
- list_del(&mr->list);
- kfree(mr);
+ xa_lock_irq(&ent->mkeys);
+ while (ent->stored) {
+ mkey = pop_stored_mkey(ent);
+ xa_unlock_irq(&ent->mkeys);
+ mlx5_core_destroy_mkey(dev->mdev, mkey);
+ xa_lock_irq(&ent->mkeys);
}
+ xa_unlock_irq(&ent->mkeys);
}
-static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
{
if (!mlx5_debugfs_root || dev->is_rep)
return;
@@ -662,9 +705,9 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
dev->cache.root = NULL;
}
-static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
+static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
struct dentry *dir;
int i;
@@ -674,13 +717,13 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev));
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
ent = &cache->ent[i];
sprintf(ent->name, "%d", ent->order);
dir = debugfs_create_dir(ent->name, cache->root);
debugfs_create_file("size", 0600, dir, ent, &size_fops);
debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
- debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
+ debugfs_create_ulong("cur", 0400, dir, &ent->stored);
debugfs_create_u32("miss", 0600, dir, &ent->miss);
}
}
@@ -692,9 +735,9 @@ static void delay_time_func(struct timer_list *t)
WRITE_ONCE(dev->fill_delay, 0);
}
-int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
int i;
@@ -707,22 +750,21 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
timer_setup(&dev->delay_timer, delay_time_func, 0);
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
ent = &cache->ent[i];
- INIT_LIST_HEAD(&ent->head);
- spin_lock_init(&ent->lock);
+ xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
ent->order = i + 2;
ent->dev = dev;
ent->limit = 0;
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
- if (i > MR_CACHE_LAST_STD_ENTRY) {
- mlx5_odp_init_mr_cache_entry(ent);
+ if (i > MKEY_CACHE_LAST_STD_ENTRY) {
+ mlx5_odp_init_mkey_cache_entry(ent);
continue;
}
- if (ent->order > mr_cache_max_order(dev))
+ if (ent->order > mkey_cache_max_order(dev))
continue;
ent->page = PAGE_SHIFT;
@@ -734,36 +776,36 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->limit = dev->mdev->profile.mr_cache[i].limit;
else
ent->limit = 0;
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
queue_adjust_cache_locked(ent);
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
}
- mlx5_mr_cache_debugfs_init(dev);
+ mlx5_mkey_cache_debugfs_init(dev);
return 0;
}
-int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
+int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
{
unsigned int i;
if (!dev->cache.wq)
return 0;
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
struct mlx5_cache_ent *ent = &dev->cache.ent[i];
- spin_lock_irq(&ent->lock);
+ xa_lock_irq(&ent->mkeys);
ent->disabled = true;
- spin_unlock_irq(&ent->lock);
+ xa_unlock_irq(&ent->mkeys);
cancel_delayed_work_sync(&ent->dwork);
}
- mlx5_mr_cache_debugfs_cleanup(dev);
+ mlx5_mkey_cache_debugfs_cleanup(dev);
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
+ for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++)
clean_keys(dev, i);
destroy_workqueue(dev->cache.wq);
@@ -830,22 +872,22 @@ static int get_octo_len(u64 addr, u64 len, int page_shift)
return (npages + 1) / 2;
}
-static int mr_cache_max_order(struct mlx5_ib_dev *dev)
+static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
- return MR_CACHE_LAST_STD_ENTRY + 2;
+ return MKEY_CACHE_LAST_STD_ENTRY + 2;
return MLX5_MAX_UMR_SHIFT;
}
-static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
- unsigned int order)
+static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
+ unsigned int order)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
if (order < cache->ent[0].order)
return &cache->ent[0];
order = order - cache->ent[0].order;
- if (order > MR_CACHE_LAST_STD_ENTRY)
+ if (order > MKEY_CACHE_LAST_STD_ENTRY)
return NULL;
return &cache->ent[order];
}
@@ -888,7 +930,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
0, iova);
if (WARN_ON(!page_size))
return ERR_PTR(-EINVAL);
- ent = mr_cache_ent_from_order(
+ ent = mkey_cache_ent_from_order(
dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
/*
* Matches access in alloc_cache_mr(). If the MR can't come from the
@@ -1083,6 +1125,7 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
return ERR_PTR(-EINVAL);
@@ -1319,7 +1362,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
/* We only track the allocated sizes of MRs from the cache */
- if (!mr->cache_ent)
+ if (!mr->mmkey.cache_ent)
return false;
if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
return false;
@@ -1328,7 +1371,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
if (WARN_ON(!*page_size))
return false;
- return (1ULL << mr->cache_ent->order) >=
+ return (1ULL << mr->mmkey.cache_ent->order) >=
ib_umem_num_dma_blocks(new_umem, *page_size);
}
@@ -1569,15 +1612,17 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
/* Stop DMA */
- if (mr->cache_ent) {
- if (mlx5r_umr_revoke_mr(mr)) {
- spin_lock_irq(&mr->cache_ent->lock);
- mr->cache_ent->total_mrs--;
- spin_unlock_irq(&mr->cache_ent->lock);
- mr->cache_ent = NULL;
- }
+ if (mr->mmkey.cache_ent) {
+ xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
+ mr->mmkey.cache_ent->in_use--;
+ xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
+
+ if (mlx5r_umr_revoke_mr(mr) ||
+ push_mkey(mr->mmkey.cache_ent, false,
+ xa_mk_value(mr->mmkey.key)))
+ mr->mmkey.cache_ent = NULL;
}
- if (!mr->cache_ent) {
+ if (!mr->mmkey.cache_ent) {
rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
if (rc)
return rc;
@@ -1594,12 +1639,10 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
mlx5_ib_free_odp_mr(mr);
}
- if (mr->cache_ent) {
- mlx5_mr_cache_free(dev, mr);
- } else {
+ if (!mr->mmkey.cache_ent)
mlx5_free_priv_descs(mr);
- kfree(mr);
- }
+
+ kfree(mr);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 84da5674e1ab..e305bf1dc6c2 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1588,7 +1588,7 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
return err;
}
-void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
+void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
{
if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return;
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 3a48364c0918..e00b94d1b1ea 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -176,6 +176,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
+ mutex_init(&dev->umrc.lock);
return 0;
@@ -195,6 +196,31 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
ib_dealloc_pd(dev->umrc.pd);
}
+static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct ib_qp_attr attr;
+ int err;
+
+ attr.qp_state = IB_QPS_RESET;
+ err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
+ if (err) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ goto err;
+ }
+
+ err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
+ if (err)
+ goto err;
+
+ umrc->state = MLX5_UMR_STATE_ACTIVE;
+ return 0;
+
+err:
+ umrc->state = MLX5_UMR_STATE_ERR;
+ return err;
+}
+
static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
struct mlx5r_umr_wqe *wqe, bool with_data)
{
@@ -231,7 +257,7 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
id.ib_cqe = cqe;
mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
- MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR);
+ MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
mlx5r_ring_db(qp, 1, ctrl);
@@ -270,17 +296,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
mlx5r_umr_init_context(&umr_context);
down(&umrc->sem);
- err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
- with_data);
- if (err)
- mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
- else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed (%u)\n",
- umr_context.status);
+ while (true) {
+ mutex_lock(&umrc->lock);
+ if (umrc->state == MLX5_UMR_STATE_ERR) {
+ mutex_unlock(&umrc->lock);
err = -EFAULT;
+ break;
+ }
+
+ if (umrc->state == MLX5_UMR_STATE_RECOVER) {
+ mutex_unlock(&umrc->lock);
+ usleep_range(3000, 5000);
+ continue;
+ }
+
+ err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
+ with_data);
+ mutex_unlock(&umrc->lock);
+ if (err) {
+ mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
+ err);
+ break;
}
+
+ wait_for_completion(&umr_context.done);
+
+ if (umr_context.status == IB_WC_SUCCESS)
+ break;
+
+ if (umr_context.status == IB_WC_WR_FLUSH_ERR)
+ continue;
+
+ WARN_ON_ONCE(1);
+ mlx5_ib_warn(dev,
+ "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n",
+ umr_context.status);
+ mutex_lock(&umrc->lock);
+ err = mlx5r_umr_recover(dev);
+ mutex_unlock(&umrc->lock);
+ if (err)
+ mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
+ err);
+ err = -EFAULT;
+ break;
}
up(&umrc->sem);
return err;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index f0f43b6db89e..3ecb472c9217 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -3082,7 +3082,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
else
DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
- goto err0;
+ goto err1;
}
/* Index only, 18 bit long, lkey = itid << 8 | key */
@@ -3106,7 +3106,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
if (rc) {
DP_ERR(dev, "roce register tid returned an error %d\n", rc);
- goto err1;
+ goto err2;
}
mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
@@ -3115,8 +3115,10 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
return mr;
-err1:
+err2:
dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+err1:
+ qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
err0:
kfree(mr);
return ERR_PTR(rc);
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index b37b1c6d35c6..26c615772be3 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -321,7 +321,7 @@ struct qib_verbs_txreq {
* These 7 values (SDR, DDR, and QDR may be ORed for auto-speed
* negotiation) are used for the 3rd argument to path_f_set_ib_cfg
* with cmd QIB_IB_CFG_SPD_ENB, by direct calls or via sysfs. They
- * are also the the possible values for qib_link_speed_enabled and active
+ * are also the possible values for qib_link_speed_enabled and active
* The values were chosen to match values used within the IB spec.
*/
#define QIB_IB_SDR 1
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index aa290928cf96..3937144b2ae5 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -153,7 +153,7 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
kinfo->spi_tidcnt += dd->rcvtidcnt % subctxt_cnt;
/*
* for this use, may be cfgctxts summed over all chips that
- * are are configured and present
+ * are configured and present
*/
kinfo->spi_nctxts = dd->cfgctxts;
/* unit (chip/board) our context is on */
@@ -851,7 +851,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
ret = -EPERM;
goto bail;
}
- /* don't allow them to later change to writeable with mprotect */
+ /* don't allow them to later change to writable with mprotect */
vma->vm_flags &= ~VM_MAYWRITE;
start = vma->vm_start;
@@ -941,7 +941,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
goto bail;
}
/*
- * Don't allow permission to later change to writeable
+ * Don't allow permission to later change to writable
* with mprotect.
*/
vma->vm_flags &= ~VM_MAYWRITE;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ceed302cf6a0..6861c6384f18 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2850,9 +2850,9 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
qib_7322_free_irq(dd);
kfree(dd->cspec->cntrs);
- kfree(dd->cspec->sendchkenable);
- kfree(dd->cspec->sendgrhchk);
- kfree(dd->cspec->sendibchk);
+ bitmap_free(dd->cspec->sendchkenable);
+ bitmap_free(dd->cspec->sendgrhchk);
+ bitmap_free(dd->cspec->sendibchk);
kfree(dd->cspec->msix_entries);
for (i = 0; i < dd->num_pports; i++) {
unsigned long flags;
@@ -6383,18 +6383,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
features = qib_7322_boardname(dd);
/* now that piobcnt2k and 4k set, we can allocate these */
- sbufcnt = dd->piobcnt2k + dd->piobcnt4k +
- NUM_VL15_BUFS + BITS_PER_LONG - 1;
- sbufcnt /= BITS_PER_LONG;
- dd->cspec->sendchkenable =
- kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendchkenable),
- GFP_KERNEL);
- dd->cspec->sendgrhchk =
- kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendgrhchk),
- GFP_KERNEL);
- dd->cspec->sendibchk =
- kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendibchk),
- GFP_KERNEL);
+ sbufcnt = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS;
+
+ dd->cspec->sendchkenable = bitmap_zalloc(sbufcnt, GFP_KERNEL);
+ dd->cspec->sendgrhchk = bitmap_zalloc(sbufcnt, GFP_KERNEL);
+ dd->cspec->sendibchk = bitmap_zalloc(sbufcnt, GFP_KERNEL);
if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk ||
!dd->cspec->sendibchk) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index d1a72e89e297..45211008449f 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1106,8 +1106,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
if (!qib_cpulist_count) {
u32 count = num_online_cpus();
- qib_cpulist = kcalloc(BITS_TO_LONGS(count), sizeof(long),
- GFP_KERNEL);
+ qib_cpulist = bitmap_zalloc(count, GFP_KERNEL);
if (qib_cpulist)
qib_cpulist_count = count;
}
@@ -1279,7 +1278,7 @@ static void __exit qib_ib_cleanup(void)
#endif
qib_cpulist_count = 0;
- kfree(qib_cpulist);
+ bitmap_free(qib_cpulist);
WARN_ON(!xa_empty(&qib_dev_table));
qib_dev_cleanup();
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index 81b810d006c0..1dc3ccf0cf1f 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -587,7 +587,7 @@ static int epb_access(struct qib_devdata *dd, int sdnum, int claim)
/* Need to release */
u64 pollval;
/*
- * The only writeable bits are the request and CS.
+ * The only writable bits are the request and CS.
* Both should be clear
*/
u64 newval = 0;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index e212929369df..67a1b4562dc2 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -482,7 +482,7 @@ int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
if (err)
goto out_free_dev;
- if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) {
+ if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
usnic_err("IOMMU of %s does not support cache coherency\n",
dev_name(dev));
err = -EINVAL;
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index da3a398053b8..bc53cad077aa 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -114,6 +114,8 @@ void retransmit_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
+ pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index);
+
if (qp->valid) {
qp->comp.timeout = 1;
rxe_run_task(&qp->comp.task, 1);
@@ -560,7 +562,7 @@ int rxe_completer(void *arg)
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
- int ret = 0;
+ int ret;
if (!rxe_get(qp))
return -EAGAIN;
@@ -569,8 +571,7 @@ int rxe_completer(void *arg)
qp->req.state == QP_STATE_RESET) {
rxe_drain_resp_pkts(qp, qp->valid &&
qp->req.state == QP_STATE_ERROR);
- ret = -EAGAIN;
- goto done;
+ goto exit;
}
if (qp->comp.timeout) {
@@ -580,10 +581,8 @@ int rxe_completer(void *arg)
qp->comp.timeout_retry = 0;
}
- if (qp->req.need_retry) {
- ret = -EAGAIN;
- goto done;
- }
+ if (qp->req.need_retry)
+ goto exit;
state = COMPST_GET_ACK;
@@ -676,8 +675,7 @@ int rxe_completer(void *arg)
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
- ret = -EAGAIN;
- goto done;
+ goto exit;
case COMPST_ERROR_RETRY:
/* we come here if the retry timer fired and we did
@@ -689,10 +687,8 @@ int rxe_completer(void *arg)
*/
/* there is nothing to retry in this case */
- if (!wqe || (wqe->state == wqe_state_posted)) {
- ret = -EAGAIN;
- goto done;
- }
+ if (!wqe || (wqe->state == wqe_state_posted))
+ goto exit;
/* if we've started a retry, don't start another
* retry sequence, unless this is a timeout.
@@ -730,18 +726,21 @@ int rxe_completer(void *arg)
break;
case COMPST_RNR_RETRY:
+ /* we come here if we received an RNR NAK */
if (qp->comp.rnr_retry > 0) {
if (qp->comp.rnr_retry != 7)
qp->comp.rnr_retry--;
- qp->req.need_retry = 1;
+ /* don't start a retry flow until the
+ * rnr timer has fired
+ */
+ qp->req.wait_for_rnr_timer = 1;
pr_debug("qp#%d set rnr nak timer\n",
qp_num(qp));
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
- ret = -EAGAIN;
- goto done;
+ goto exit;
} else {
rxe_counter_inc(rxe,
RXE_CNT_RNR_RETRY_EXCEEDED);
@@ -754,12 +753,20 @@ int rxe_completer(void *arg)
WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
do_complete(qp, wqe);
rxe_qp_error(qp);
- ret = -EAGAIN;
- goto done;
+ goto exit;
}
}
+ /* A non-zero return value will cause rxe_do_task to
+ * exit its loop and end the tasklet. A zero return
+ * will continue looping and return to rxe_completer
+ */
done:
+ ret = 0;
+ goto out;
+exit:
+ ret = -EAGAIN;
+out:
if (pkt)
free_pkt(pkt);
rxe_put(qp);
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 642b52539ac3..b1a0ab3cd4bd 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -19,16 +19,16 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
}
if (cqe > rxe->attr.max_cqe) {
- pr_warn("cqe(%d) > max_cqe(%d)\n",
- cqe, rxe->attr.max_cqe);
+ pr_debug("cqe(%d) > max_cqe(%d)\n",
+ cqe, rxe->attr.max_cqe);
goto err1;
}
if (cq) {
count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (cqe < count) {
- pr_warn("cqe(%d) < current # elements in queue (%d)",
- cqe, count);
+ pr_debug("cqe(%d) < current # elements in queue (%d)",
+ cqe, count);
goto err1;
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 0e022ae1b8a5..22f6cc31d1d6 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -77,9 +77,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
enum rxe_mr_lookup_type type);
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
-int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey);
+int rxe_invalidate_mr(struct rxe_qp *qp, u32 key);
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
-int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr);
int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
void rxe_mr_cleanup(struct rxe_pool_elem *elem);
@@ -145,7 +144,7 @@ static inline int rcv_wqe_size(int max_sge)
max_sge * sizeof(struct ib_sge);
}
-void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res);
+void free_rd_atomic_resource(struct resp_res *res);
static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
{
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index fc3942e04a1f..850b80f5ad8b 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -24,7 +24,7 @@ u8 rxe_get_next_key(u32 last_key)
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
{
- struct rxe_map_set *set = mr->cur_map_set;
+
switch (mr->type) {
case IB_MR_TYPE_DMA:
@@ -32,8 +32,8 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
case IB_MR_TYPE_USER:
case IB_MR_TYPE_MEM_REG:
- if (iova < set->iova || length > set->length ||
- iova > set->iova + set->length - length)
+ if (iova < mr->iova || length > mr->length ||
+ iova > mr->iova + mr->length - length)
return -EFAULT;
return 0;
@@ -65,89 +65,41 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
}
-static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set)
-{
- int i;
-
- for (i = 0; i < num_map; i++)
- kfree(set->map[i]);
-
- kfree(set->map);
- kfree(set);
-}
-
-static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp)
+static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
{
int i;
- struct rxe_map_set *set;
+ int num_map;
+ struct rxe_map **map = mr->map;
- set = kmalloc(sizeof(*set), GFP_KERNEL);
- if (!set)
- goto err_out;
+ num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
- set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL);
- if (!set->map)
- goto err_free_set;
+ mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
+ if (!mr->map)
+ goto err1;
for (i = 0; i < num_map; i++) {
- set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL);
- if (!set->map[i])
- goto err_free_map;
+ mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
+ if (!mr->map[i])
+ goto err2;
}
- *setp = set;
-
- return 0;
-
-err_free_map:
- for (i--; i >= 0; i--)
- kfree(set->map[i]);
-
- kfree(set->map);
-err_free_set:
- kfree(set);
-err_out:
- return -ENOMEM;
-}
-
-/**
- * rxe_mr_alloc() - Allocate memory map array(s) for MR
- * @mr: Memory region
- * @num_buf: Number of buffer descriptors to support
- * @both: If non zero allocate both mr->map and mr->next_map
- * else just allocate mr->map. Used for fast MRs
- *
- * Return: 0 on success else an error
- */
-static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both)
-{
- int ret;
- int num_map;
-
BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
- num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
mr->map_mask = RXE_BUF_PER_MAP - 1;
+
mr->num_buf = num_buf;
- mr->max_buf = num_map * RXE_BUF_PER_MAP;
mr->num_map = num_map;
-
- ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set);
- if (ret)
- return -ENOMEM;
-
- if (both) {
- ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set);
- if (ret)
- goto err_free;
- }
+ mr->max_buf = num_map * RXE_BUF_PER_MAP;
return 0;
-err_free:
- rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
- mr->cur_map_set = NULL;
+err2:
+ for (i--; i >= 0; i--)
+ kfree(mr->map[i]);
+
+ kfree(mr->map);
+err1:
return -ENOMEM;
}
@@ -164,7 +116,6 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
int access, struct rxe_mr *mr)
{
- struct rxe_map_set *set;
struct rxe_map **map;
struct rxe_phys_buf *buf = NULL;
struct ib_umem *umem;
@@ -172,6 +123,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
int num_buf;
void *vaddr;
int err;
+ int i;
umem = ib_umem_get(pd->ibpd.device, start, length, access);
if (IS_ERR(umem)) {
@@ -185,20 +137,18 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
rxe_mr_init(access, mr);
- err = rxe_mr_alloc(mr, num_buf, 0);
+ err = rxe_mr_alloc(mr, num_buf);
if (err) {
pr_warn("%s: Unable to allocate memory for map\n",
__func__);
goto err_release_umem;
}
- set = mr->cur_map_set;
- set->page_shift = PAGE_SHIFT;
- set->page_mask = PAGE_SIZE - 1;
-
- num_buf = 0;
- map = set->map;
+ mr->page_shift = PAGE_SHIFT;
+ mr->page_mask = PAGE_SIZE - 1;
+ num_buf = 0;
+ map = mr->map;
if (length > 0) {
buf = map[0]->buf;
@@ -214,29 +164,33 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
pr_warn("%s: Unable to get virtual address\n",
__func__);
err = -ENOMEM;
- goto err_release_umem;
+ goto err_cleanup_map;
}
buf->addr = (uintptr_t)vaddr;
buf->size = PAGE_SIZE;
num_buf++;
buf++;
+
}
}
mr->ibmr.pd = &pd->ibpd;
mr->umem = umem;
mr->access = access;
+ mr->length = length;
+ mr->iova = iova;
+ mr->va = start;
+ mr->offset = ib_umem_offset(umem);
mr->state = RXE_MR_STATE_VALID;
mr->type = IB_MR_TYPE_USER;
- set->length = length;
- set->iova = iova;
- set->va = start;
- set->offset = ib_umem_offset(umem);
-
return 0;
+err_cleanup_map:
+ for (i = 0; i < mr->num_map; i++)
+ kfree(mr->map[i]);
+ kfree(mr->map);
err_release_umem:
ib_umem_release(umem);
err_out:
@@ -250,7 +204,7 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
/* always allow remote access for FMRs */
rxe_mr_init(IB_ACCESS_REMOTE, mr);
- err = rxe_mr_alloc(mr, max_pages, 1);
+ err = rxe_mr_alloc(mr, max_pages);
if (err)
goto err1;
@@ -268,24 +222,21 @@ err1:
static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
size_t *offset_out)
{
- struct rxe_map_set *set = mr->cur_map_set;
- size_t offset = iova - set->iova + set->offset;
+ size_t offset = iova - mr->iova + mr->offset;
int map_index;
int buf_index;
u64 length;
- struct rxe_map *map;
- if (likely(set->page_shift)) {
- *offset_out = offset & set->page_mask;
- offset >>= set->page_shift;
+ if (likely(mr->page_shift)) {
+ *offset_out = offset & mr->page_mask;
+ offset >>= mr->page_shift;
*n_out = offset & mr->map_mask;
*m_out = offset >> mr->map_shift;
} else {
map_index = 0;
buf_index = 0;
- map = set->map[map_index];
- length = map->buf[buf_index].size;
+ length = mr->map[map_index]->buf[buf_index].size;
while (offset >= length) {
offset -= length;
@@ -295,8 +246,7 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
map_index++;
buf_index = 0;
}
- map = set->map[map_index];
- length = map->buf[buf_index].size;
+ length = mr->map[map_index]->buf[buf_index].size;
}
*m_out = map_index;
@@ -317,7 +267,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
goto out;
}
- if (!mr->cur_map_set) {
+ if (!mr->map) {
addr = (void *)(uintptr_t)iova;
goto out;
}
@@ -330,13 +280,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
lookup_iova(mr, iova, &m, &n, &offset);
- if (offset + length > mr->cur_map_set->map[m]->buf[n].size) {
+ if (offset + length > mr->map[m]->buf[n].size) {
pr_warn("crosses page boundary\n");
addr = NULL;
goto out;
}
- addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset;
+ addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
out:
return addr;
@@ -372,7 +322,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
return 0;
}
- WARN_ON_ONCE(!mr->cur_map_set);
+ WARN_ON_ONCE(!mr->map);
err = mr_check_range(mr, iova, length);
if (err) {
@@ -382,7 +332,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
lookup_iova(mr, iova, &m, &i, &offset);
- map = mr->cur_map_set->map + m;
+ map = mr->map + m;
buf = map[0]->buf + i;
while (length > 0) {
@@ -576,22 +526,22 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
return mr;
}
-int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
+int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_mr *mr;
int ret;
- mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+ mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
if (!mr) {
- pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
+ pr_err("%s: No MR for key %#x\n", __func__, key);
ret = -EINVAL;
goto err;
}
- if (rkey != mr->rkey) {
- pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n",
- __func__, rkey, mr->rkey);
+ if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
+ pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
+ __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
ret = -EINVAL;
goto err_drop_ref;
}
@@ -628,9 +578,8 @@ err:
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
- u32 key = wqe->wr.wr.reg.key & 0xff;
+ u32 key = wqe->wr.wr.reg.key;
u32 access = wqe->wr.wr.reg.access;
- struct rxe_map_set *set;
/* user can only register MR in free state */
if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
@@ -646,36 +595,19 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
return -EINVAL;
}
+ /* user is only allowed to change key portion of l/rkey */
+ if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
+ pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
+ __func__, key, mr->lkey);
+ return -EINVAL;
+ }
+
mr->access = access;
- mr->lkey = (mr->lkey & ~0xff) | key;
- mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0;
+ mr->lkey = key;
+ mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
+ mr->iova = wqe->wr.wr.reg.mr->iova;
mr->state = RXE_MR_STATE_VALID;
- set = mr->cur_map_set;
- mr->cur_map_set = mr->next_map_set;
- mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova;
- mr->next_map_set = set;
-
- return 0;
-}
-
-int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct rxe_mr *mr = to_rmr(ibmr);
- struct rxe_map_set *set = mr->next_map_set;
- struct rxe_map *map;
- struct rxe_phys_buf *buf;
-
- if (unlikely(set->nbuf == mr->num_buf))
- return -ENOMEM;
-
- map = set->map[set->nbuf / RXE_BUF_PER_MAP];
- buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP];
-
- buf->addr = addr;
- buf->size = ibmr->page_size;
- set->nbuf++;
-
return 0;
}
@@ -687,7 +619,7 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
if (atomic_read(&mr->num_mw) > 0)
return -EINVAL;
- rxe_put(mr);
+ rxe_cleanup(mr);
return 0;
}
@@ -695,14 +627,15 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
void rxe_mr_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
+ int i;
rxe_put(mr_pd(mr));
-
ib_umem_release(mr->umem);
- if (mr->cur_map_set)
- rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
+ if (mr->map) {
+ for (i = 0; i < mr->num_map; i++)
+ kfree(mr->map[i]);
- if (mr->next_map_set)
- rxe_mr_free_map_set(mr->num_map, mr->next_map_set);
+ kfree(mr->map);
+ }
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 2e1fa844fabf..104993801a80 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -33,6 +33,8 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
RXE_MW_STATE_FREE : RXE_MW_STATE_VALID;
spin_lock_init(&mw->lock);
+ rxe_finalize(mw);
+
return 0;
}
@@ -40,7 +42,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
{
struct rxe_mw *mw = to_rmw(ibmw);
- rxe_put(mw);
+ rxe_cleanup(mw);
return 0;
}
@@ -48,8 +50,6 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_mw *mw, struct rxe_mr *mr)
{
- u32 key = wqe->wr.wr.mw.rkey & 0xff;
-
if (mw->ibmw.type == IB_MW_TYPE_1) {
if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
pr_err_once(
@@ -87,11 +87,6 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
}
- if (unlikely(key == (mw->rkey & 0xff))) {
- pr_err_once("attempt to bind MW with same key\n");
- return -EINVAL;
- }
-
/* remaining checks only apply to a nonzero MR */
if (!mr)
return 0;
@@ -113,21 +108,21 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
(IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
!(mr->access & IB_ACCESS_LOCAL_WRITE))) {
pr_err_once(
- "attempt to bind an writeable MW to an MR without local write access\n");
+ "attempt to bind an Writable MW to an MR without local write access\n");
return -EINVAL;
}
/* C10-75 */
if (mw->access & IB_ZERO_BASED) {
- if (unlikely(wqe->wr.wr.mw.length > mr->cur_map_set->length)) {
+ if (unlikely(wqe->wr.wr.mw.length > mr->length)) {
pr_err_once(
"attempt to bind a ZB MW outside of the MR\n");
return -EINVAL;
}
} else {
- if (unlikely((wqe->wr.wr.mw.addr < mr->cur_map_set->iova) ||
+ if (unlikely((wqe->wr.wr.mw.addr < mr->iova) ||
((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
- (mr->cur_map_set->iova + mr->cur_map_set->length)))) {
+ (mr->iova + mr->length)))) {
pr_err_once(
"attempt to bind a VA MW outside of the MR\n");
return -EINVAL;
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 568a7cbd13d4..86c7a8bf3cbb 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -105,6 +105,12 @@ enum rxe_device_param {
RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64,
RXE_INFLIGHT_SKBS_PER_QP_LOW = 16,
+ /* Max number of interations of each tasklet
+ * before yielding the cpu to let other
+ * work make progress
+ */
+ RXE_MAX_ITERATIONS = 1024,
+
/* Delay before calling arbiter timer */
RXE_NSEC_ARB_TIMER_DELAY = 200,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 19b14826385b..f50620f5a0a1 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -6,6 +6,7 @@
#include "rxe.h"
+#define RXE_POOL_TIMEOUT (200)
#define RXE_POOL_ALIGN (16)
static const struct rxe_type_info {
@@ -136,10 +137,14 @@ void *rxe_alloc(struct rxe_pool *pool)
elem->pool = pool;
elem->obj = obj;
kref_init(&elem->ref_cnt);
+ init_completion(&elem->complete);
- err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
+ /* allocate index in array but leave pointer as NULL so it
+ * can't be looked up until rxe_finalize() is called
+ */
+ err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
&pool->next, GFP_KERNEL);
- if (err)
+ if (err < 0)
goto err_free;
return obj;
@@ -151,9 +156,11 @@ err_cnt:
return NULL;
}
-int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
+ bool sleepable)
{
int err;
+ gfp_t gfp_flags;
if (WARN_ON(pool->type == RXE_TYPE_MR))
return -EINVAL;
@@ -164,10 +171,19 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
elem->pool = pool;
elem->obj = (u8 *)elem - pool->elem_offset;
kref_init(&elem->ref_cnt);
-
- err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
- &pool->next, GFP_KERNEL);
- if (err)
+ init_completion(&elem->complete);
+
+ /* AH objects are unique in that the create_ah verb
+ * can be called in atomic context. If the create_ah
+ * call is not sleepable use GFP_ATOMIC.
+ */
+ gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC;
+
+ if (sleepable)
+ might_sleep();
+ err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
+ &pool->next, gfp_flags);
+ if (err < 0)
goto err_cnt;
return 0;
@@ -181,16 +197,15 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
{
struct rxe_pool_elem *elem;
struct xarray *xa = &pool->xa;
- unsigned long flags;
void *obj;
- xa_lock_irqsave(xa, flags);
+ rcu_read_lock();
elem = xa_load(xa, index);
if (elem && kref_get_unless_zero(&elem->ref_cnt))
obj = elem->obj;
else
obj = NULL;
- xa_unlock_irqrestore(xa, flags);
+ rcu_read_unlock();
return obj;
}
@@ -198,17 +213,74 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
static void rxe_elem_release(struct kref *kref)
{
struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);
- struct rxe_pool *pool = elem->pool;
- xa_erase(&pool->xa, elem->index);
+ complete(&elem->complete);
+}
+
+int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable)
+{
+ struct rxe_pool *pool = elem->pool;
+ struct xarray *xa = &pool->xa;
+ static int timeout = RXE_POOL_TIMEOUT;
+ int ret, err = 0;
+ void *xa_ret;
+
+ if (sleepable)
+ might_sleep();
+
+ /* erase xarray entry to prevent looking up
+ * the pool elem from its index
+ */
+ xa_ret = xa_erase(xa, elem->index);
+ WARN_ON(xa_err(xa_ret));
+
+ /* if this is the last call to rxe_put complete the
+ * object. It is safe to touch obj->elem after this since
+ * it is freed below
+ */
+ __rxe_put(elem);
+
+ /* wait until all references to the object have been
+ * dropped before final object specific cleanup and
+ * return to rdma-core
+ */
+ if (sleepable) {
+ if (!completion_done(&elem->complete) && timeout) {
+ ret = wait_for_completion_timeout(&elem->complete,
+ timeout);
+
+ /* Shouldn't happen. There are still references to
+ * the object but, rather than deadlock, free the
+ * object or pass back to rdma-core.
+ */
+ if (WARN_ON(!ret))
+ err = -EINVAL;
+ }
+ } else {
+ unsigned long until = jiffies + timeout;
+
+ /* AH objects are unique in that the destroy_ah verb
+ * can be called in atomic context. This delay
+ * replaces the wait_for_completion call above
+ * when the destroy_ah call is not sleepable
+ */
+ while (!completion_done(&elem->complete) &&
+ time_before(jiffies, until))
+ mdelay(1);
+
+ if (WARN_ON(!completion_done(&elem->complete)))
+ err = -EINVAL;
+ }
if (pool->cleanup)
pool->cleanup(elem);
if (pool->type == RXE_TYPE_MR)
- kfree(elem->obj);
+ kfree_rcu(elem->obj);
atomic_dec(&pool->num_elem);
+
+ return err;
}
int __rxe_get(struct rxe_pool_elem *elem)
@@ -220,3 +292,11 @@ int __rxe_put(struct rxe_pool_elem *elem)
{
return kref_put(&elem->ref_cnt, rxe_elem_release);
}
+
+void __rxe_finalize(struct rxe_pool_elem *elem)
+{
+ void *xa_ret;
+
+ xa_ret = xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL);
+ WARN_ON(xa_err(xa_ret));
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 0860660d65ec..9d83cb32092f 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -24,6 +24,7 @@ struct rxe_pool_elem {
void *obj;
struct kref ref_cnt;
struct list_head list;
+ struct completion complete;
u32 index;
};
@@ -57,21 +58,28 @@ void rxe_pool_cleanup(struct rxe_pool *pool);
void *rxe_alloc(struct rxe_pool *pool);
/* connect already allocated object to pool */
-int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem);
-
-#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem)
+int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
+ bool sleepable);
+#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem, true)
+#define rxe_add_to_pool_ah(pool, obj, sleepable) __rxe_add_to_pool(pool, \
+ &(obj)->elem, sleepable)
/* lookup an indexed object from index. takes a reference on object */
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
int __rxe_get(struct rxe_pool_elem *elem);
-
#define rxe_get(obj) __rxe_get(&(obj)->elem)
int __rxe_put(struct rxe_pool_elem *elem);
-
#define rxe_put(obj) __rxe_put(&(obj)->elem)
+int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable);
+#define rxe_cleanup(obj) __rxe_cleanup(&(obj)->elem, true)
+#define rxe_cleanup_ah(obj, sleepable) __rxe_cleanup(&(obj)->elem, sleepable)
+
#define rxe_read(obj) kref_read(&(obj)->elem.ref_cnt)
+void __rxe_finalize(struct rxe_pool_elem *elem);
+#define rxe_finalize(obj) __rxe_finalize(&(obj)->elem)
+
#endif /* RXE_POOL_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 22e9b85344c3..eef91b8cb4ed 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -120,17 +120,15 @@ static void free_rd_atomic_resources(struct rxe_qp *qp)
for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
struct resp_res *res = &qp->resp.resources[i];
- free_rd_atomic_resource(qp, res);
+ free_rd_atomic_resource(res);
}
kfree(qp->resp.resources);
qp->resp.resources = NULL;
}
}
-void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
+void free_rd_atomic_resource(struct resp_res *res)
{
- if (res->type == RXE_ATOMIC_MASK)
- kfree_skb(res->atomic.skb);
res->type = 0;
}
@@ -142,7 +140,7 @@ static void cleanup_rd_atomic_resources(struct rxe_qp *qp)
if (qp->resp.resources) {
for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
res = &qp->resp.resources[i];
- free_rd_atomic_resource(qp, res);
+ free_rd_atomic_resource(res);
}
}
}
@@ -507,6 +505,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
atomic_set(&qp->ssn, 0);
qp->req.opcode = -1;
qp->req.need_retry = 0;
+ qp->req.wait_for_rnr_timer = 0;
qp->req.noack_pkts = 0;
qp->resp.msn = 0;
qp->resp.opcode = -1;
@@ -804,13 +803,15 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
if (qp->rq.queue)
rxe_queue_cleanup(qp->rq.queue);
- atomic_dec(&qp->scq->num_wq);
- if (qp->scq)
+ if (qp->scq) {
+ atomic_dec(&qp->scq->num_wq);
rxe_put(qp->scq);
+ }
- atomic_dec(&qp->rcq->num_wq);
- if (qp->rcq)
+ if (qp->rcq) {
+ atomic_dec(&qp->rcq->num_wq);
rxe_put(qp->rcq);
+ }
if (qp->pd)
rxe_put(qp->pd);
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 6227112ef7a2..ed44042782fa 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -7,9 +7,6 @@
#ifndef RXE_QUEUE_H
#define RXE_QUEUE_H
-/* for definition of shared struct rxe_queue_buf */
-#include <uapi/rdma/rdma_user_rxe.h>
-
/* Implements a simple circular buffer that is shared between user
* and the driver and can be resized. The requested element size is
* rounded up to a power of 2 and the number of elements in the buffer
@@ -53,6 +50,8 @@ enum queue_type {
QUEUE_TYPE_FROM_DRIVER,
};
+struct rxe_queue_buf;
+
struct rxe_queue {
struct rxe_dev *rxe;
struct rxe_queue_buf *buf;
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 9d98237389cf..49e8f54db6f5 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -15,8 +15,7 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
u32 opcode);
static inline void retry_first_write_send(struct rxe_qp *qp,
- struct rxe_send_wqe *wqe,
- unsigned int mask, int npsn)
+ struct rxe_send_wqe *wqe, int npsn)
{
int i;
@@ -83,7 +82,7 @@ static void req_retry(struct rxe_qp *qp)
if (mask & WR_WRITE_OR_SEND_MASK) {
npsn = (qp->comp.psn - wqe->first_psn) &
BTH_PSN_MASK;
- retry_first_write_send(qp, wqe, mask, npsn);
+ retry_first_write_send(qp, wqe, npsn);
}
if (mask & WR_READ_MASK) {
@@ -101,7 +100,11 @@ void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
- pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp));
+ pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
+
+ /* request a send queue retry */
+ qp->req.need_retry = 1;
+ qp->req.wait_for_rnr_timer = 0;
rxe_run_task(&qp->req.task, 1);
}
@@ -161,16 +164,36 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
(wqe->state != wqe_state_processing)))
return NULL;
- if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&
- (index != cons))) {
- qp->req.wait_fence = 1;
- return NULL;
- }
-
wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
return wqe;
}
+/**
+ * rxe_wqe_is_fenced - check if next wqe is fenced
+ * @qp: the queue pair
+ * @wqe: the next wqe
+ *
+ * Returns: 1 if wqe needs to wait
+ * 0 if wqe is ready to go
+ */
+static int rxe_wqe_is_fenced(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ /* Local invalidate fence (LIF) see IBA 10.6.5.1
+ * Requires ALL previous operations on the send queue
+ * are complete. Make mandatory for the rxe driver.
+ */
+ if (wqe->wr.opcode == IB_WR_LOCAL_INV)
+ return qp->req.wqe_index != queue_get_consumer(qp->sq.queue,
+ QUEUE_TYPE_FROM_CLIENT);
+
+ /* Fence see IBA 10.8.3.3
+ * Requires that all previous read and atomic operations
+ * are complete.
+ */
+ return (wqe->wr.send_flags & IB_SEND_FENCE) &&
+ atomic_read(&qp->req.rd_atomic) != qp->attr.max_rd_atomic;
+}
+
static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
{
switch (opcode) {
@@ -581,9 +604,11 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
wqe->status = IB_WC_SUCCESS;
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
- if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- qp->sq_sig_type == IB_SIGNAL_ALL_WR)
- rxe_run_task(&qp->comp.task, 1);
+ /* There is no ack coming for local work requests
+ * which can lead to a deadlock. So go ahead and complete
+ * it now.
+ */
+ rxe_run_task(&qp->comp.task, 1);
return 0;
}
@@ -599,6 +624,7 @@ int rxe_requester(void *arg)
u32 payload;
int mtu;
int opcode;
+ int err;
int ret;
struct rxe_send_wqe rollback_wqe;
u32 rollback_psn;
@@ -609,7 +635,6 @@ int rxe_requester(void *arg)
if (!rxe_get(qp))
return -EAGAIN;
-next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
goto exit;
@@ -620,10 +645,17 @@ next_wqe:
qp->req.need_rd_atomic = 0;
qp->req.wait_psn = 0;
qp->req.need_retry = 0;
+ qp->req.wait_for_rnr_timer = 0;
goto exit;
}
- if (unlikely(qp->req.need_retry)) {
+ /* we come here if the retransmit timer has fired
+ * or if the rnr timer has fired. If the retransmit
+ * timer fires while we are processing an RNR NAK wait
+ * until the rnr timer has fired before starting the
+ * retry flow
+ */
+ if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) {
req_retry(qp);
qp->req.need_retry = 0;
}
@@ -632,12 +664,17 @@ next_wqe:
if (unlikely(!wqe))
goto exit;
+ if (rxe_wqe_is_fenced(qp, wqe)) {
+ qp->req.wait_fence = 1;
+ goto exit;
+ }
+
if (wqe->mask & WR_LOCAL_OP_MASK) {
- ret = rxe_do_local_ops(qp, wqe);
- if (unlikely(ret))
+ err = rxe_do_local_ops(qp, wqe);
+ if (unlikely(err))
goto err;
else
- goto next_wqe;
+ goto done;
}
if (unlikely(qp_type(qp) == IB_QPT_RC &&
@@ -685,9 +722,8 @@ next_wqe:
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- __rxe_do_task(&qp->comp.task);
- rxe_put(qp);
- return 0;
+ rxe_run_task(&qp->comp.task, 0);
+ goto done;
}
payload = mtu;
}
@@ -703,25 +739,29 @@ next_wqe:
if (unlikely(!av)) {
pr_err("qp#%d Failed no address vector\n", qp_num(qp));
wqe->status = IB_WC_LOC_QP_OP_ERR;
- goto err_drop_ah;
+ goto err;
}
skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
if (unlikely(!skb)) {
pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
wqe->status = IB_WC_LOC_QP_OP_ERR;
- goto err_drop_ah;
+ if (ah)
+ rxe_put(ah);
+ goto err;
}
- ret = finish_packet(qp, av, wqe, &pkt, skb, payload);
- if (unlikely(ret)) {
+ err = finish_packet(qp, av, wqe, &pkt, skb, payload);
+ if (unlikely(err)) {
pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
- if (ret == -EFAULT)
+ if (err == -EFAULT)
wqe->status = IB_WC_LOC_PROT_ERR;
else
wqe->status = IB_WC_LOC_QP_OP_ERR;
kfree_skb(skb);
- goto err_drop_ah;
+ if (ah)
+ rxe_put(ah);
+ goto err;
}
if (ah)
@@ -736,13 +776,14 @@ next_wqe:
save_state(wqe, qp, &rollback_wqe, &rollback_psn);
update_wqe_state(qp, wqe, &pkt);
update_wqe_psn(qp, wqe, &pkt, payload);
- ret = rxe_xmit_packet(qp, &pkt, skb);
- if (ret) {
+
+ err = rxe_xmit_packet(qp, &pkt, skb);
+ if (err) {
qp->need_req_skb = 1;
rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
- if (ret == -EAGAIN) {
+ if (err == -EAGAIN) {
rxe_run_task(&qp->req.task, 1);
goto exit;
}
@@ -753,16 +794,20 @@ next_wqe:
update_state(qp, &pkt);
- goto next_wqe;
-
-err_drop_ah:
- if (ah)
- rxe_put(ah);
+ /* A non-zero return value will cause rxe_do_task to
+ * exit its loop and end the tasklet. A zero return
+ * will continue looping and return to rxe_requester
+ */
+done:
+ ret = 0;
+ goto out;
err:
wqe->state = wqe_state_error;
- __rxe_do_task(&qp->comp.task);
-
+ rxe_run_task(&qp->comp.task, 0);
exit:
+ ret = -EAGAIN;
+out:
rxe_put(qp);
- return -EAGAIN;
+
+ return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index f4f6ee5d81fe..b36ec5c4d5e0 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -21,6 +21,7 @@ enum resp_states {
RESPST_CHK_RKEY,
RESPST_EXECUTE,
RESPST_READ_REPLY,
+ RESPST_ATOMIC_REPLY,
RESPST_COMPLETE,
RESPST_ACKNOWLEDGE,
RESPST_CLEANUP,
@@ -55,6 +56,7 @@ static char *resp_state_name[] = {
[RESPST_CHK_RKEY] = "CHK_RKEY",
[RESPST_EXECUTE] = "EXECUTE",
[RESPST_READ_REPLY] = "READ_REPLY",
+ [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
[RESPST_COMPLETE] = "COMPLETE",
[RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
[RESPST_CLEANUP] = "CLEANUP",
@@ -448,7 +450,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (rkey_is_mw(rkey)) {
mw = rxe_lookup_mw(qp, access, rkey);
if (!mw) {
- pr_err("%s: no MW matches rkey %#x\n", __func__, rkey);
+ pr_debug("%s: no MW matches rkey %#x\n",
+ __func__, rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -468,7 +471,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
- pr_err("%s: no MR matches rkey %#x\n", __func__, rkey);
+ pr_debug("%s: no MR matches rkey %#x\n",
+ __func__, rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -549,49 +553,106 @@ out:
return rc;
}
+static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ int type)
+{
+ struct resp_res *res;
+ u32 pkts;
+
+ res = &qp->resp.resources[qp->resp.res_head];
+ rxe_advance_resp_resource(qp);
+ free_rd_atomic_resource(res);
+
+ res->type = type;
+ res->replay = 0;
+
+ switch (type) {
+ case RXE_READ_MASK:
+ res->read.va = qp->resp.va + qp->resp.offset;
+ res->read.va_org = qp->resp.va + qp->resp.offset;
+ res->read.resid = qp->resp.resid;
+ res->read.length = qp->resp.resid;
+ res->read.rkey = qp->resp.rkey;
+
+ pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
+ res->first_psn = pkt->psn;
+ res->cur_psn = pkt->psn;
+ res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
+
+ res->state = rdatm_res_state_new;
+ break;
+ case RXE_ATOMIC_MASK:
+ res->first_psn = pkt->psn;
+ res->last_psn = pkt->psn;
+ res->cur_psn = pkt->psn;
+ break;
+ }
+
+ return res;
+}
+
/* Guarantee atomicity of atomic operations at the machine level. */
static DEFINE_SPINLOCK(atomic_ops_lock);
-static enum resp_states process_atomic(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt)
+static enum resp_states atomic_reply(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
{
u64 *vaddr;
enum resp_states ret;
struct rxe_mr *mr = qp->resp.mr;
+ struct resp_res *res = qp->resp.res;
+ u64 value;
- if (mr->state != RXE_MR_STATE_VALID) {
- ret = RESPST_ERR_RKEY_VIOLATION;
- goto out;
+ if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK);
+ qp->resp.res = res;
}
- vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, sizeof(u64));
+ if (!res->replay) {
+ if (mr->state != RXE_MR_STATE_VALID) {
+ ret = RESPST_ERR_RKEY_VIOLATION;
+ goto out;
+ }
- /* check vaddr is 8 bytes aligned. */
- if (!vaddr || (uintptr_t)vaddr & 7) {
- ret = RESPST_ERR_MISALIGNED_ATOMIC;
- goto out;
- }
+ vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset,
+ sizeof(u64));
- spin_lock_bh(&atomic_ops_lock);
+ /* check vaddr is 8 bytes aligned. */
+ if (!vaddr || (uintptr_t)vaddr & 7) {
+ ret = RESPST_ERR_MISALIGNED_ATOMIC;
+ goto out;
+ }
- qp->resp.atomic_orig = *vaddr;
+ spin_lock_bh(&atomic_ops_lock);
+ res->atomic.orig_val = value = *vaddr;
- if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
- if (*vaddr == atmeth_comp(pkt))
- *vaddr = atmeth_swap_add(pkt);
- } else {
- *vaddr += atmeth_swap_add(pkt);
- }
+ if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
+ if (value == atmeth_comp(pkt))
+ value = atmeth_swap_add(pkt);
+ } else {
+ value += atmeth_swap_add(pkt);
+ }
+
+ *vaddr = value;
+ spin_unlock_bh(&atomic_ops_lock);
+
+ qp->resp.msn++;
- spin_unlock_bh(&atomic_ops_lock);
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
- ret = RESPST_NONE;
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+ }
+
+ ret = RESPST_ACKNOWLEDGE;
out:
return ret;
}
static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt,
struct rxe_pkt_info *ack,
int opcode,
int payload,
@@ -629,7 +690,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
}
if (ack->mask & RXE_ATMACK_MASK)
- atmack_set_orig(ack, qp->resp.atomic_orig);
+ atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
err = rxe_prepare(&qp->pri_av, ack, skb);
if (err) {
@@ -640,34 +701,6 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
return skb;
}
-static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt)
-{
- struct resp_res *res;
- u32 pkts;
-
- res = &qp->resp.resources[qp->resp.res_head];
- rxe_advance_resp_resource(qp);
- free_rd_atomic_resource(qp, res);
-
- res->type = RXE_READ_MASK;
- res->replay = 0;
- res->read.va = qp->resp.va + qp->resp.offset;
- res->read.va_org = qp->resp.va + qp->resp.offset;
- res->read.resid = qp->resp.resid;
- res->read.length = qp->resp.resid;
- res->read.rkey = qp->resp.rkey;
-
- pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
- res->first_psn = pkt->psn;
- res->cur_psn = pkt->psn;
- res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
-
- res->state = rdatm_res_state_new;
-
- return res;
-}
-
/**
* rxe_recheck_mr - revalidate MR from rkey and get a reference
* @qp: the qp
@@ -738,7 +771,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
struct rxe_mr *mr;
if (!res) {
- res = rxe_prepare_read_res(qp, req_pkt);
+ res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
qp->resp.res = res;
}
@@ -771,7 +804,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
payload = min_t(int, res->read.resid, mtu);
- skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload,
+ skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
res->cur_psn, AETH_ACK_UNLIMITED);
if (!skb)
return RESPST_ERR_RNR;
@@ -858,9 +891,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
qp->resp.msn++;
return RESPST_READ_REPLY;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
- err = process_atomic(qp, pkt);
- if (err)
- return err;
+ return RESPST_ATOMIC_REPLY;
} else {
/* Unreachable */
WARN_ON_ONCE(1);
@@ -997,14 +1028,13 @@ finish:
return RESPST_CLEANUP;
}
-static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
- u8 syndrome, u32 psn)
+static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
{
int err = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
+ skb = prepare_ack_packet(qp, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
0, psn, syndrome);
if (!skb) {
err = -ENOMEM;
@@ -1019,40 +1049,29 @@ err1:
return err;
}
-static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
- u8 syndrome)
+static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
{
- int rc = 0;
+ int err = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- struct resp_res *res;
- skb = prepare_ack_packet(qp, pkt, &ack_pkt,
- IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn,
- syndrome);
+ skb = prepare_ack_packet(qp, &ack_pkt, IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE,
+ 0, psn, syndrome);
if (!skb) {
- rc = -ENOMEM;
+ err = -ENOMEM;
goto out;
}
- res = &qp->resp.resources[qp->resp.res_head];
- free_rd_atomic_resource(qp, res);
- rxe_advance_resp_resource(qp);
-
- skb_get(skb);
- res->type = RXE_ATOMIC_MASK;
- res->atomic.skb = skb;
- res->first_psn = ack_pkt.psn;
- res->last_psn = ack_pkt.psn;
- res->cur_psn = ack_pkt.psn;
+ err = rxe_xmit_packet(qp, &ack_pkt, skb);
+ if (err)
+ pr_err_ratelimited("Failed sending atomic ack\n");
- rc = rxe_xmit_packet(qp, &ack_pkt, skb);
- if (rc) {
- pr_err_ratelimited("Failed sending ack\n");
- rxe_put(qp);
- }
+ /* have to clear this since it is used to trigger
+ * long read replies
+ */
+ qp->resp.res = NULL;
out:
- return rc;
+ return err;
}
static enum resp_states acknowledge(struct rxe_qp *qp,
@@ -1062,11 +1081,11 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
return RESPST_CLEANUP;
if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
- send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn);
+ send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
else if (pkt->mask & RXE_ATOMIC_MASK)
- send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED);
+ send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
else if (bth_ack(pkt))
- send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn);
+ send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
return RESPST_CLEANUP;
}
@@ -1119,7 +1138,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
if (pkt->mask & RXE_SEND_MASK ||
pkt->mask & RXE_WRITE_MASK) {
/* SEND. Ack again and cleanup. C9-105. */
- send_ack(qp, pkt, AETH_ACK_UNLIMITED, prev_psn);
+ send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
return RESPST_CLEANUP;
} else if (pkt->mask & RXE_READ_MASK) {
struct resp_res *res;
@@ -1173,14 +1192,11 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
/* Find the operation in our list of responder resources. */
res = find_resource(qp, pkt->psn);
if (res) {
- skb_get(res->atomic.skb);
- /* Resend the result. */
- rc = rxe_xmit_packet(qp, pkt, res->atomic.skb);
- if (rc) {
- pr_err("Failed resending result. This flow is not handled - skb ignored\n");
- rc = RESPST_CLEANUP;
- goto out;
- }
+ res->replay = 1;
+ res->cur_psn = pkt->psn;
+ qp->resp.res = res;
+ rc = RESPST_ATOMIC_REPLY;
+ goto out;
}
/* Resource not found. Class D error. Drop the request. */
@@ -1260,17 +1276,15 @@ int rxe_responder(void *arg)
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
enum resp_states state;
struct rxe_pkt_info *pkt = NULL;
- int ret = 0;
+ int ret;
if (!rxe_get(qp))
return -EAGAIN;
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
- if (!qp->valid) {
- ret = -EINVAL;
- goto done;
- }
+ if (!qp->valid)
+ goto exit;
switch (qp->resp.state) {
case QP_STATE_RESET:
@@ -1316,6 +1330,9 @@ int rxe_responder(void *arg)
case RESPST_READ_REPLY:
state = read_reply(qp, pkt);
break;
+ case RESPST_ATOMIC_REPLY:
+ state = atomic_reply(qp, pkt);
+ break;
case RESPST_ACKNOWLEDGE:
state = acknowledge(qp, pkt);
break;
@@ -1327,7 +1344,7 @@ int rxe_responder(void *arg)
break;
case RESPST_ERR_PSN_OUT_OF_SEQ:
/* RC only - Class B. Drop packet. */
- send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
+ send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
state = RESPST_CLEANUP;
break;
@@ -1349,7 +1366,7 @@ int rxe_responder(void *arg)
if (qp_type(qp) == IB_QPT_RC) {
rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
/* RC - class B */
- send_ack(qp, pkt, AETH_RNR_NAK |
+ send_ack(qp, AETH_RNR_NAK |
(~AETH_TYPE_MASK &
qp->attr.min_rnr_timer),
pkt->psn);
@@ -1438,7 +1455,7 @@ int rxe_responder(void *arg)
case RESPST_ERROR:
qp->resp.goto_error = 0;
- pr_warn("qp#%d moved to error state\n", qp_num(qp));
+ pr_debug("qp#%d moved to error state\n", qp_num(qp));
rxe_qp_error(qp);
goto exit;
@@ -1447,9 +1464,16 @@ int rxe_responder(void *arg)
}
}
+ /* A non-zero return value will cause rxe_do_task to
+ * exit its loop and end the tasklet. A zero return
+ * will continue looping and return to rxe_responder
+ */
+done:
+ ret = 0;
+ goto out;
exit:
ret = -EAGAIN;
-done:
+out:
rxe_put(qp);
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 0c4db5bb17d7..2248cf33d776 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -8,7 +8,7 @@
#include <linux/interrupt.h>
#include <linux/hardirq.h>
-#include "rxe_task.h"
+#include "rxe.h"
int __rxe_do_task(struct rxe_task *task)
@@ -33,6 +33,7 @@ void rxe_do_task(struct tasklet_struct *t)
int cont;
int ret;
struct rxe_task *task = from_tasklet(task, t, tasklet);
+ unsigned int iterations = RXE_MAX_ITERATIONS;
spin_lock_bh(&task->state_lock);
switch (task->state) {
@@ -61,13 +62,20 @@ void rxe_do_task(struct tasklet_struct *t)
spin_lock_bh(&task->state_lock);
switch (task->state) {
case TASK_STATE_BUSY:
- if (ret)
+ if (ret) {
task->state = TASK_STATE_START;
- else
+ } else if (iterations--) {
cont = 1;
+ } else {
+ /* reschedule the tasklet and exit
+ * the loop to give up the cpu
+ */
+ tasklet_schedule(&task->tasklet);
+ task->state = TASK_STATE_START;
+ }
break;
- /* soneone tried to run the task since the last time we called
+ /* someone tried to run the task since the last time we called
* func, so we will call one more time regardless of the
* return value
*/
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 9d995854a174..e264cf69bf55 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -115,7 +115,7 @@ static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
{
struct rxe_ucontext *uc = to_ruc(ibuc);
- rxe_put(uc);
+ rxe_cleanup(uc);
}
static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
@@ -149,7 +149,7 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rxe_pd *pd = to_rpd(ibpd);
- rxe_put(pd);
+ rxe_cleanup(pd);
return 0;
}
@@ -176,7 +176,8 @@ static int rxe_create_ah(struct ib_ah *ibah,
if (err)
return err;
- err = rxe_add_to_pool(&rxe->ah_pool, ah);
+ err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
+ init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
if (err)
return err;
@@ -188,7 +189,7 @@ static int rxe_create_ah(struct ib_ah *ibah,
err = copy_to_user(&uresp->ah_num, &ah->ah_num,
sizeof(uresp->ah_num));
if (err) {
- rxe_put(ah);
+ rxe_cleanup(ah);
return -EFAULT;
}
} else if (ah->is_user) {
@@ -197,6 +198,8 @@ static int rxe_create_ah(struct ib_ah *ibah,
}
rxe_init_av(init_attr->ah_attr, &ah->av);
+ rxe_finalize(ah);
+
return 0;
}
@@ -228,7 +231,8 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct rxe_ah *ah = to_rah(ibah);
- rxe_put(ah);
+ rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE);
+
return 0;
}
@@ -308,12 +312,13 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
if (err)
- goto err_put;
+ goto err_cleanup;
return 0;
-err_put:
- rxe_put(srq);
+err_cleanup:
+ rxe_cleanup(srq);
+
return err;
}
@@ -362,7 +367,7 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct rxe_srq *srq = to_rsrq(ibsrq);
- rxe_put(srq);
+ rxe_cleanup(srq);
return 0;
}
@@ -429,10 +434,11 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
if (err)
goto qp_init;
+ rxe_finalize(qp);
return 0;
qp_init:
- rxe_put(qp);
+ rxe_cleanup(qp);
return err;
}
@@ -485,7 +491,7 @@ static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (ret)
return ret;
- rxe_put(qp);
+ rxe_cleanup(qp);
return 0;
}
@@ -803,7 +809,7 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
rxe_cq_disable(cq);
- rxe_put(cq);
+ rxe_cleanup(cq);
return 0;
}
@@ -898,6 +904,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
rxe_get(pd);
rxe_mr_init_dma(pd, access, mr);
+ rxe_finalize(mr);
return &mr->ibmr;
}
@@ -926,11 +933,13 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
if (err)
goto err3;
+ rxe_finalize(mr);
+
return &mr->ibmr;
err3:
rxe_put(pd);
- rxe_put(mr);
+ rxe_cleanup(mr);
err2:
return ERR_PTR(err);
}
@@ -958,35 +967,52 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
if (err)
goto err2;
+ rxe_finalize(mr);
+
return &mr->ibmr;
err2:
rxe_put(pd);
- rxe_put(mr);
+ rxe_cleanup(mr);
err1:
return ERR_PTR(err);
}
-/* build next_map_set from scatterlist
- * The IB_WR_REG_MR WR will swap map_sets
- */
+static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct rxe_mr *mr = to_rmr(ibmr);
+ struct rxe_map *map;
+ struct rxe_phys_buf *buf;
+
+ if (unlikely(mr->nbuf == mr->num_buf))
+ return -ENOMEM;
+
+ map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
+ buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
+
+ buf->addr = addr;
+ buf->size = ibmr->page_size;
+ mr->nbuf++;
+
+ return 0;
+}
+
static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset)
{
struct rxe_mr *mr = to_rmr(ibmr);
- struct rxe_map_set *set = mr->next_map_set;
int n;
- set->nbuf = 0;
+ mr->nbuf = 0;
- n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_mr_set_page);
+ n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
- set->va = ibmr->iova;
- set->iova = ibmr->iova;
- set->length = ibmr->length;
- set->page_shift = ilog2(ibmr->page_size);
- set->page_mask = ibmr->page_size - 1;
- set->offset = set->iova & set->page_mask;
+ mr->va = ibmr->iova;
+ mr->iova = ibmr->iova;
+ mr->length = ibmr->length;
+ mr->page_shift = ilog2(ibmr->page_size);
+ mr->page_mask = ibmr->page_size - 1;
+ mr->offset = mr->iova & mr->page_mask;
return n;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index ac464e68c923..a24fbe984066 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -9,7 +9,6 @@
#include <linux/interrupt.h>
#include <linux/workqueue.h>
-#include <rdma/rdma_user_rxe.h>
#include "rxe_pool.h"
#include "rxe_task.h"
#include "rxe_hw_counters.h"
@@ -124,6 +123,7 @@ struct rxe_req_info {
int need_rd_atomic;
int wait_psn;
int need_retry;
+ int wait_for_rnr_timer;
int noack_pkts;
struct rxe_task task;
};
@@ -155,7 +155,7 @@ struct resp_res {
union {
struct {
- struct sk_buff *skb;
+ u64 orig_val;
} atomic;
struct {
u64 va_org;
@@ -189,7 +189,6 @@ struct rxe_resp_info {
u32 resid;
u32 rkey;
u32 length;
- u64 atomic_orig;
/* SRQ only */
struct {
@@ -288,17 +287,6 @@ struct rxe_map {
struct rxe_phys_buf buf[RXE_BUF_PER_MAP];
};
-struct rxe_map_set {
- struct rxe_map **map;
- u64 va;
- u64 iova;
- size_t length;
- u32 offset;
- u32 nbuf;
- int page_shift;
- int page_mask;
-};
-
static inline int rkey_is_mw(u32 rkey)
{
u32 index = rkey >> 8;
@@ -316,20 +304,26 @@ struct rxe_mr {
u32 rkey;
enum rxe_mr_state state;
enum ib_mr_type type;
+ u64 va;
+ u64 iova;
+ size_t length;
+ u32 offset;
int access;
+ int page_shift;
+ int page_mask;
int map_shift;
int map_mask;
u32 num_buf;
+ u32 nbuf;
u32 max_buf;
u32 num_map;
atomic_t num_mw;
- struct rxe_map_set *cur_map_set;
- struct rxe_map_set *next_map_set;
+ struct rxe_map **map;
};
enum rxe_mw_state {
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 17f34d584cd9..f88d2971c2c6 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -725,11 +725,11 @@ static int siw_proc_mpareply(struct siw_cep *cep)
enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR;
rv = siw_recv_mpa_rr(cep);
- if (rv != -EAGAIN)
- siw_cancel_mpatimer(cep);
if (rv)
goto out_err;
+ siw_cancel_mpatimer(cep);
+
rep = &cep->mpa.hdr;
if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) {
@@ -895,7 +895,8 @@ static int siw_proc_mpareply(struct siw_cep *cep)
}
out_err:
- siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
+ if (rv != -EAGAIN)
+ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
return rv;
}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 09316072b789..8dedae7ae79e 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1167,7 +1167,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
err_out:
siw_dbg(base_cq->device, "CQ creation failed: %d", rv);
- if (cq && cq->queue) {
+ if (cq->queue) {
struct siw_ucontext *ctx =
rdma_udata_to_drv_context(udata, struct siw_ucontext,
base_ucontext);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 2c3dca41d3bd..ab86165e45de 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1109,7 +1109,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
* if he sets the device address back to be based on GID index 0,
* he no longer wishs to control it.
*
- * If the user doesn't control the the device address,
+ * If the user doesn't control the device address,
* IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means
* the port GUID has changed and GID at index 0 has changed
* so we need to change priv->local_gid and priv->dev->dev_addr
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2a8961b685c2..a4904371e2db 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1664,8 +1664,10 @@ static void ipoib_napi_add(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- netif_napi_add(dev, &priv->recv_napi, ipoib_rx_poll, IPOIB_NUM_WC);
- netif_napi_add(dev, &priv->send_napi, ipoib_tx_poll, MAX_SEND_CQE);
+ netif_napi_add_weight(dev, &priv->recv_napi, ipoib_rx_poll,
+ IPOIB_NUM_WC);
+ netif_napi_add_weight(dev, &priv->send_napi, ipoib_tx_poll,
+ MAX_SEND_CQE);
}
static void ipoib_napi_del(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index c08f2d9133b6..a00ca117303a 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -246,6 +246,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
device = ib_conn->device;
ib_dev = device->ib_device;
+ /* +1 for drain */
if (ib_conn->pi_support)
max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
else
@@ -267,7 +268,8 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
init_attr.qp_context = (void *)ib_conn;
init_attr.send_cq = ib_conn->cq;
init_attr.recv_cq = ib_conn->cq;
- init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
+ /* +1 for drain */
+ init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS + 1;
init_attr.cap.max_send_sge = 2;
init_attr.cap.max_recv_sge = 1;
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -485,7 +487,7 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
iser_conn, err);
/* block until all flush errors are consumed */
- ib_drain_sq(ib_conn->qp);
+ ib_drain_qp(ib_conn->qp);
}
return 1;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
index 385a19846c24..1e6ffafa2db3 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
@@ -32,11 +32,7 @@ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con)
void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats)
{
- struct rtrs_clt_stats_pcpu *s;
-
- s = get_cpu_ptr(stats->pcpu_stats);
- s->rdma.failover_cnt++;
- put_cpu_ptr(stats->pcpu_stats);
+ this_cpu_inc(stats->pcpu_stats->rdma.failover_cnt);
}
int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf)
@@ -169,12 +165,8 @@ int rtrs_clt_reset_all_stats(struct rtrs_clt_stats *s, bool enable)
static inline void rtrs_clt_update_rdma_stats(struct rtrs_clt_stats *stats,
size_t size, int d)
{
- struct rtrs_clt_stats_pcpu *s;
-
- s = get_cpu_ptr(stats->pcpu_stats);
- s->rdma.dir[d].cnt++;
- s->rdma.dir[d].size_total += size;
- put_cpu_ptr(stats->pcpu_stats);
+ this_cpu_inc(stats->pcpu_stats->rdma.dir[d].cnt);
+ this_cpu_add(stats->pcpu_stats->rdma.dir[d].size_total, size);
}
void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir)
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 9809c3883979..baecde41d126 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -740,25 +740,25 @@ struct path_it {
struct rtrs_clt_path *(*next_path)(struct path_it *it);
};
-/**
- * list_next_or_null_rr_rcu - get next list element in round-robin fashion.
+/*
+ * rtrs_clt_get_next_path_or_null - get clt path from the list or return NULL
* @head: the head for the list.
- * @ptr: the list head to take the next element from.
- * @type: the type of the struct this is embedded in.
- * @memb: the name of the list_head within the struct.
+ * @clt_path: The element to take the next clt_path from.
*
- * Next element returned in round-robin fashion, i.e. head will be skipped,
+ * Next clt path returned in round-robin fashion, i.e. head will be skipped,
* but if list is observed as empty, NULL will be returned.
*
- * This primitive may safely run concurrently with the _rcu list-mutation
+ * This function may safely run concurrently with the _rcu list-mutation
* primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
*/
-#define list_next_or_null_rr_rcu(head, ptr, type, memb) \
-({ \
- list_next_or_null_rcu(head, ptr, type, memb) ?: \
- list_next_or_null_rcu(head, READ_ONCE((ptr)->next), \
- type, memb); \
-})
+static inline struct rtrs_clt_path *
+rtrs_clt_get_next_path_or_null(struct list_head *head, struct rtrs_clt_path *clt_path)
+{
+ return list_next_or_null_rcu(head, &clt_path->s.entry, typeof(*clt_path), s.entry) ?:
+ list_next_or_null_rcu(head,
+ READ_ONCE((&clt_path->s.entry)->next),
+ typeof(*clt_path), s.entry);
+}
/**
* get_next_path_rr() - Returns path in round-robin fashion.
@@ -789,10 +789,8 @@ static struct rtrs_clt_path *get_next_path_rr(struct path_it *it)
path = list_first_or_null_rcu(&clt->paths_list,
typeof(*path), s.entry);
else
- path = list_next_or_null_rr_rcu(&clt->paths_list,
- &path->s.entry,
- typeof(*path),
- s.entry);
+ path = rtrs_clt_get_next_path_or_null(&clt->paths_list, path);
+
rcu_assign_pointer(*ppcpu_path, path);
return path;
@@ -1403,8 +1401,7 @@ static int alloc_permits(struct rtrs_clt_sess *clt)
unsigned int chunk_bits;
int err, i;
- clt->permits_map = kcalloc(BITS_TO_LONGS(clt->queue_depth),
- sizeof(long), GFP_KERNEL);
+ clt->permits_map = bitmap_zalloc(clt->queue_depth, GFP_KERNEL);
if (!clt->permits_map) {
err = -ENOMEM;
goto out_err;
@@ -1426,7 +1423,7 @@ static int alloc_permits(struct rtrs_clt_sess *clt)
return 0;
err_map:
- kfree(clt->permits_map);
+ bitmap_free(clt->permits_map);
clt->permits_map = NULL;
out_err:
return err;
@@ -1434,13 +1431,11 @@ out_err:
static void free_permits(struct rtrs_clt_sess *clt)
{
- if (clt->permits_map) {
- size_t sz = clt->queue_depth;
-
+ if (clt->permits_map)
wait_event(clt->permits_wait,
- find_first_bit(clt->permits_map, sz) >= sz);
- }
- kfree(clt->permits_map);
+ bitmap_empty(clt->permits_map, clt->queue_depth));
+
+ bitmap_free(clt->permits_map);
clt->permits_map = NULL;
kfree(clt->permits);
clt->permits = NULL;
@@ -2277,8 +2272,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_path *clt_path)
* removed. If @sess is the last element, then @next is NULL.
*/
rcu_read_lock();
- next = list_next_or_null_rr_rcu(&clt->paths_list, &clt_path->s.entry,
- typeof(*next), s.entry);
+ next = rtrs_clt_get_next_path_or_null(&clt->paths_list, clt_path);
rcu_read_unlock();
/*
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index 9a1e5c2ae55c..ac0df734eba8 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -23,6 +23,17 @@
#define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
__stringify(RTRS_PROTO_VER_MINOR)
+/*
+ * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
+ * and the minimum chunk size is 4096 (2^12).
+ * So the maximum sess_queue_depth is 65536 (2^16) in theory.
+ * But mempool_create, create_qp and ib_post_send fail with
+ * "cannot allocate memory" error if sess_queue_depth is too big.
+ * Therefore the pratical max value of sess_queue_depth is
+ * somewhere between 1 and 65534 and it depends on the system.
+ */
+#define MAX_SESS_QUEUE_DEPTH 65535
+
enum rtrs_imm_const {
MAX_IMM_TYPE_BITS = 4,
MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
@@ -46,16 +57,6 @@ enum {
MAX_PATHS_NUM = 128,
- /*
- * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
- * and the minimum chunk size is 4096 (2^12).
- * So the maximum sess_queue_depth is 65536 (2^16) in theory.
- * But mempool_create, create_qp and ib_post_send fail with
- * "cannot allocate memory" error if sess_queue_depth is too big.
- * Therefore the pratical max value of sess_queue_depth is
- * somewhere between 1 and 65534 and it depends on the system.
- */
- MAX_SESS_QUEUE_DEPTH = 65535,
MIN_CHUNK_SIZE = 8192,
RTRS_HB_INTERVAL_MS = 5000,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c
index 44b1c1652131..2aff1213a19d 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c
@@ -14,9 +14,14 @@
int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable)
{
if (enable) {
- struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats;
+ int cpu;
+ struct rtrs_srv_stats_rdma_stats *r;
+
+ for_each_possible_cpu(cpu) {
+ r = per_cpu_ptr(stats->rdma_stats, cpu);
+ memset(r, 0, sizeof(*r));
+ }
- memset(r, 0, sizeof(*r));
return 0;
}
@@ -25,11 +30,22 @@ int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable)
ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page)
{
- struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats;
+ int cpu;
+ struct rtrs_srv_stats_rdma_stats sum;
+ struct rtrs_srv_stats_rdma_stats *r;
+
+ memset(&sum, 0, sizeof(sum));
+
+ for_each_possible_cpu(cpu) {
+ r = per_cpu_ptr(stats->rdma_stats, cpu);
+
+ sum.dir[READ].cnt += r->dir[READ].cnt;
+ sum.dir[READ].size_total += r->dir[READ].size_total;
+ sum.dir[WRITE].cnt += r->dir[WRITE].cnt;
+ sum.dir[WRITE].size_total += r->dir[WRITE].size_total;
+ }
- return sysfs_emit(page, "%lld %lld %lld %lldn %u\n",
- (s64)atomic64_read(&r->dir[READ].cnt),
- (s64)atomic64_read(&r->dir[READ].size_total),
- (s64)atomic64_read(&r->dir[WRITE].cnt),
- (s64)atomic64_read(&r->dir[WRITE].size_total), 0);
+ return sysfs_emit(page, "%llu %llu %llu %llu\n",
+ sum.dir[READ].cnt, sum.dir[READ].size_total,
+ sum.dir[WRITE].cnt, sum.dir[WRITE].size_total);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index b94ae12c2795..2a3c9ac64a42 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -220,6 +220,8 @@ static void rtrs_srv_path_stats_release(struct kobject *kobj)
stats = container_of(kobj, struct rtrs_srv_stats, kobj_stats);
+ free_percpu(stats->rdma_stats);
+
kfree(stats);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 24024bce2566..34c03bde5064 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -11,7 +11,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
#include <linux/module.h>
-#include <linux/mempool.h>
#include "rtrs-srv.h"
#include "rtrs-log.h"
@@ -26,11 +25,7 @@ MODULE_LICENSE("GPL");
#define DEFAULT_SESS_QUEUE_DEPTH 512
#define MAX_HDR_SIZE PAGE_SIZE
-/* We guarantee to serve 10 paths at least */
-#define CHUNK_POOL_SZ 10
-
static struct rtrs_rdma_dev_pd dev_pd;
-static mempool_t *chunk_pool;
struct class *rtrs_dev_class;
static struct rtrs_srv_ib_ctx ib_ctx;
@@ -1358,7 +1353,7 @@ static void free_srv(struct rtrs_srv_sess *srv)
WARN_ON(refcount_read(&srv->refcount));
for (i = 0; i < srv->queue_depth; i++)
- mempool_free(srv->chunks[i], chunk_pool);
+ __free_pages(srv->chunks[i], get_order(max_chunk_size));
kfree(srv->chunks);
mutex_destroy(&srv->paths_mutex);
mutex_destroy(&srv->paths_ev_mutex);
@@ -1411,7 +1406,8 @@ static struct rtrs_srv_sess *get_or_create_srv(struct rtrs_srv_ctx *ctx,
goto err_free_srv;
for (i = 0; i < srv->queue_depth; i++) {
- srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL);
+ srv->chunks[i] = alloc_pages(GFP_KERNEL,
+ get_order(max_chunk_size));
if (!srv->chunks[i])
goto err_free_chunks;
}
@@ -1424,7 +1420,7 @@ static struct rtrs_srv_sess *get_or_create_srv(struct rtrs_srv_ctx *ctx,
err_free_chunks:
while (i--)
- mempool_free(srv->chunks[i], chunk_pool);
+ __free_pages(srv->chunks[i], get_order(max_chunk_size));
kfree(srv->chunks);
err_free_srv:
@@ -1513,6 +1509,7 @@ static void free_path(struct rtrs_srv_path *srv_path)
kobject_del(&srv_path->kobj);
kobject_put(&srv_path->kobj);
} else {
+ free_percpu(srv_path->stats->rdma_stats);
kfree(srv_path->stats);
kfree(srv_path);
}
@@ -1755,13 +1752,17 @@ static struct rtrs_srv_path *__alloc_path(struct rtrs_srv_sess *srv,
if (!srv_path->stats)
goto err_free_sess;
+ srv_path->stats->rdma_stats = alloc_percpu(struct rtrs_srv_stats_rdma_stats);
+ if (!srv_path->stats->rdma_stats)
+ goto err_free_stats;
+
srv_path->stats->srv_path = srv_path;
srv_path->dma_addr = kcalloc(srv->queue_depth,
sizeof(*srv_path->dma_addr),
GFP_KERNEL);
if (!srv_path->dma_addr)
- goto err_free_stats;
+ goto err_free_percpu;
srv_path->s.con = kcalloc(con_num, sizeof(*srv_path->s.con),
GFP_KERNEL);
@@ -1813,6 +1814,8 @@ err_free_con:
kfree(srv_path->s.con);
err_free_dma_addr:
kfree(srv_path->dma_addr);
+err_free_percpu:
+ free_percpu(srv_path->stats->rdma_stats);
err_free_stats:
kfree(srv_path->stats);
err_free_sess:
@@ -2266,14 +2269,10 @@ static int __init rtrs_server_init(void)
err);
return err;
}
- chunk_pool = mempool_create_page_pool(sess_queue_depth * CHUNK_POOL_SZ,
- get_order(max_chunk_size));
- if (!chunk_pool)
- return -ENOMEM;
rtrs_dev_class = class_create(THIS_MODULE, "rtrs-server");
if (IS_ERR(rtrs_dev_class)) {
err = PTR_ERR(rtrs_dev_class);
- goto out_chunk_pool;
+ goto out_err;
}
rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0);
if (!rtrs_wq) {
@@ -2285,9 +2284,7 @@ static int __init rtrs_server_init(void)
out_dev_class:
class_destroy(rtrs_dev_class);
-out_chunk_pool:
- mempool_destroy(chunk_pool);
-
+out_err:
return err;
}
@@ -2295,7 +2292,6 @@ static void __exit rtrs_server_exit(void)
{
destroy_workqueue(rtrs_wq);
class_destroy(rtrs_dev_class);
- mempool_destroy(chunk_pool);
rtrs_rdma_dev_pd_deinit(&dev_pd);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
index 6292e87f6afd..186a63c217df 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
@@ -12,6 +12,7 @@
#include <linux/device.h>
#include <linux/refcount.h>
+#include <linux/percpu.h>
#include "rtrs-pri.h"
/*
@@ -29,15 +30,15 @@ enum rtrs_srv_state {
*/
struct rtrs_srv_stats_rdma_stats {
struct {
- atomic64_t cnt;
- atomic64_t size_total;
+ u64 cnt;
+ u64 size_total;
} dir[2];
};
struct rtrs_srv_stats {
- struct kobject kobj_stats;
- struct rtrs_srv_stats_rdma_stats rdma_stats;
- struct rtrs_srv_path *srv_path;
+ struct kobject kobj_stats;
+ struct rtrs_srv_stats_rdma_stats __percpu *rdma_stats;
+ struct rtrs_srv_path *srv_path;
};
struct rtrs_srv_con {
@@ -130,8 +131,8 @@ void close_path(struct rtrs_srv_path *srv_path);
static inline void rtrs_srv_update_rdma_stats(struct rtrs_srv_stats *s,
size_t size, int d)
{
- atomic64_inc(&s->rdma_stats.dir[d].cnt);
- atomic64_add(size, &s->rdma_stats.dir[d].size_total);
+ this_cpu_inc(s->rdma_stats->dir[d].cnt);
+ this_cpu_add(s->rdma_stats->dir[d].size_total, size);
}
/* functions which are implemented in rtrs-srv-stats.c */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 2ccf7bef9b05..e735e19461ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -50,10 +50,12 @@ static int mlx5_cmd_stub_update_root_ft(struct mlx5_flow_root_namespace *ns,
static int mlx5_cmd_stub_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft)
{
- ft->max_fte = size ? roundup_pow_of_two(size) : 1;
+ int max_fte = ft_attr->max_fte;
+
+ ft->max_fte = max_fte ? roundup_pow_of_two(max_fte) : 1;
return 0;
}
@@ -258,7 +260,7 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft)
{
int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
@@ -267,17 +269,19 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {};
u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
+ unsigned int size;
int err;
- if (size != POOL_NEXT_SIZE)
- size = roundup_pow_of_two(size);
- size = mlx5_ft_pool_get_avail_sz(dev, ft->type, size);
+ if (ft_attr->max_fte != POOL_NEXT_SIZE)
+ size = roundup_pow_of_two(ft_attr->max_fte);
+ size = mlx5_ft_pool_get_avail_sz(dev, ft->type, ft_attr->max_fte);
if (!size)
return -ENOSPC;
MLX5_SET(create_flow_table_in, in, opcode,
MLX5_CMD_OP_CREATE_FLOW_TABLE);
+ MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid);
MLX5_SET(create_flow_table_in, in, table_type, ft->type);
MLX5_SET(create_flow_table_in, in, flow_table_context.level, ft->level);
MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, size ? ilog2(size) : 0);
@@ -479,6 +483,30 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
return 0;
}
+
+static void
+mlx5_cmd_set_fte_flow_meter(struct fs_fte *fte, void *in_flow_context)
+{
+ void *exe_aso_ctrl;
+ void *execute_aso;
+
+ execute_aso = MLX5_ADDR_OF(flow_context, in_flow_context,
+ execute_aso[0]);
+ MLX5_SET(execute_aso, execute_aso, valid, 1);
+ MLX5_SET(execute_aso, execute_aso, aso_object_id,
+ fte->action.exe_aso.object_id);
+
+ exe_aso_ctrl = MLX5_ADDR_OF(execute_aso, execute_aso, exe_aso_ctrl);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, return_reg_id,
+ fte->action.exe_aso.return_reg_id);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, aso_type,
+ fte->action.exe_aso.type);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, init_color,
+ fte->action.exe_aso.flow_meter.init_color);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, meter_id,
+ fte->action.exe_aso.flow_meter.meter_idx);
+}
+
static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
int opmod, int modify_mask,
struct mlx5_flow_table *ft,
@@ -663,6 +691,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
list_size);
}
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+ if (fte->action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) {
+ mlx5_cmd_set_fte_flow_meter(fte, in_flow_context);
+ } else {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+ }
+
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
err_out:
kvfree(in);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 274004e80f03..8ef4254b9ea1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -38,7 +38,7 @@
struct mlx5_flow_cmds {
int (*create_flow_table)(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft);
int (*destroy_flow_table)(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fdcf7f529330..35d89edb1bcd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1155,7 +1155,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
find_next_chained_ft(fs_prio);
ft->def_miss_action = ns->def_miss_action;
ft->ns = ns;
- err = root->cmds->create_flow_table(root, ft, ft_attr->max_fte, next_ft);
+ err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft);
if (err)
goto free_ft;
@@ -1195,6 +1195,12 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
}
EXPORT_SYMBOL(mlx5_create_flow_table);
+u32 mlx5_flow_table_id(struct mlx5_flow_table *ft)
+{
+ return ft->id;
+}
+EXPORT_SYMBOL(mlx5_flow_table_id);
+
struct mlx5_flow_table *
mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table_attr *ft_attr, u16 vport)
@@ -2866,6 +2872,14 @@ static int create_fdb_bypass(struct mlx5_flow_steering *steering)
return 0;
}
+static void cleanup_fdb_root_ns(struct mlx5_flow_steering *steering)
+{
+ cleanup_root_ns(steering->fdb_root_ns);
+ steering->fdb_root_ns = NULL;
+ kfree(steering->fdb_sub_ns);
+ steering->fdb_sub_ns = NULL;
+}
+
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *maj_prio;
@@ -2916,10 +2930,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
return 0;
out_err:
- cleanup_root_ns(steering->fdb_root_ns);
- kfree(steering->fdb_sub_ns);
- steering->fdb_sub_ns = NULL;
- steering->fdb_root_ns = NULL;
+ cleanup_fdb_root_ns(steering);
return err;
}
@@ -3079,10 +3090,7 @@ void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
struct mlx5_flow_steering *steering = dev->priv.steering;
cleanup_root_ns(steering->root_ns);
- cleanup_root_ns(steering->fdb_root_ns);
- steering->fdb_root_ns = NULL;
- kfree(steering->fdb_sub_ns);
- steering->fdb_sub_ns = NULL;
+ cleanup_fdb_root_ns(steering);
cleanup_root_ns(steering->port_sel_root_ns);
cleanup_root_ns(steering->sniffer_rx_root_ns);
cleanup_root_ns(steering->sniffer_tx_root_ns);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
index 3d5e57ff558c..7e02cbe8c3b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
@@ -12,13 +12,16 @@ struct mlx5_dm {
spinlock_t lock;
unsigned long *steering_sw_icm_alloc_blocks;
unsigned long *header_modify_sw_icm_alloc_blocks;
+ unsigned long *header_modify_pattern_sw_icm_alloc_blocks;
};
struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
{
+ u64 header_modify_pattern_icm_blocks = 0;
u64 header_modify_icm_blocks = 0;
u64 steering_icm_blocks = 0;
struct mlx5_dm *dm;
+ bool support_v2;
if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM))
return NULL;
@@ -53,8 +56,27 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
goto err_modify_hdr;
}
+ support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) &&
+ MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address);
+
+ if (support_v2) {
+ header_modify_pattern_icm_blocks =
+ BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_pattern_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ dm->header_modify_pattern_sw_icm_alloc_blocks =
+ kcalloc(BITS_TO_LONGS(header_modify_pattern_icm_blocks),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!dm->header_modify_pattern_sw_icm_alloc_blocks)
+ goto err_pattern;
+ }
+
return dm;
+err_pattern:
+ kfree(dm->header_modify_sw_icm_alloc_blocks);
+
err_modify_hdr:
kfree(dm->steering_sw_icm_alloc_blocks);
@@ -86,6 +108,14 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
kfree(dm->header_modify_sw_icm_alloc_blocks);
}
+ if (dm->header_modify_pattern_sw_icm_alloc_blocks) {
+ WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks,
+ BIT(MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_pattern_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+ kfree(dm->header_modify_pattern_sw_icm_alloc_blocks);
+ }
+
kfree(dm);
}
@@ -130,6 +160,13 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
log_header_modify_sw_icm_size);
block_map = dm->header_modify_sw_icm_alloc_blocks;
break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+ header_modify_pattern_sw_icm_start_address);
+ log_icm_size = MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_pattern_sw_icm_size);
+ block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+ break;
default:
return -EINVAL;
}
@@ -203,6 +240,11 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type
icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
block_map = dm->header_modify_sw_icm_alloc_blocks;
break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+ header_modify_pattern_sw_icm_start_address);
+ block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+ break;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c9b4e50a593e..2078d9f03a5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -314,13 +314,6 @@ struct mlx5_reg_host_endianness {
u8 rsvd[15];
};
-#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
-
-enum {
- MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
- MLX5_DEV_CAP_FLAG_DCT,
-};
-
static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
{
switch (size) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 223c8741b7ae..16d65fe4f654 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -439,6 +439,7 @@ int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE);
MLX5_SET(create_flow_table_in, in, table_type, attr->table_type);
+ MLX5_SET(create_flow_table_in, in, uid, attr->uid);
ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context);
MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
index e5f6412baea9..31d443dd8386 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -214,7 +214,7 @@ static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl)
tbl->table_type);
}
-static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
+static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl, u16 uid)
{
bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
@@ -236,6 +236,7 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
ft_attr.sw_owner = true;
ft_attr.decap_en = en_decap;
ft_attr.reformat_en = en_encap;
+ ft_attr.uid = uid;
ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr,
NULL, &tbl->table_id);
@@ -243,7 +244,8 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
return ret;
}
-struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u32 flags)
+struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level,
+ u32 flags, u16 uid)
{
struct mlx5dr_table *tbl;
int ret;
@@ -263,7 +265,7 @@ struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u
if (ret)
goto free_tbl;
- ret = dr_table_create_sw_owned_tbl(tbl);
+ ret = dr_table_create_sw_owned_tbl(tbl, uid);
if (ret)
goto uninit_tbl;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 98320e3945ad..50b0dd4fb4a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -1200,6 +1200,7 @@ struct mlx5dr_cmd_query_flow_table_details {
struct mlx5dr_cmd_create_flow_table_attr {
u32 table_type;
+ u16 uid;
u64 icm_addr_rx;
u64 icm_addr_tx;
u8 level;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 6a9abba92df6..c30ed8e18458 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -62,7 +62,7 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns,
static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
- unsigned int size,
+ struct mlx5_flow_table_attr *ft_attr,
struct mlx5_flow_table *next_ft)
{
struct mlx5dr_table *tbl;
@@ -71,7 +71,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
if (mlx5_dr_is_fw_table(ft->flags))
return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
- size,
+ ft_attr,
next_ft);
flags = ft->flags;
/* turn off encap/decap if not supported for sw-str by fw */
@@ -79,7 +79,8 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
- tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags);
+ tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags,
+ ft_attr->uid);
if (!tbl) {
mlx5_core_err(ns->dev, "Failed creating dr flow_table\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 7626c85643b1..3bb14860b36d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -51,7 +51,8 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn);
struct mlx5dr_table *
-mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags);
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags,
+ u16 uid);
struct mlx5dr_table *
mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 604b85dd770a..b5f58fd37a0f 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -387,21 +387,6 @@ enum {
};
enum {
- MLX5_DEV_CAP_FLAG_XRC = 1LL << 3,
- MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8,
- MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9,
- MLX5_DEV_CAP_FLAG_APM = 1LL << 17,
- MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
- MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23,
- MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24,
- MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29,
- MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30,
- MLX5_DEV_CAP_FLAG_DCT = 1LL << 37,
- MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40,
- MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46,
-};
-
-enum {
MLX5_ROCE_VERSION_1 = 0,
MLX5_ROCE_VERSION_2 = 2,
};
@@ -455,6 +440,7 @@ enum {
MLX5_OPCODE_UMR = 0x25,
+ MLX5_OPCODE_ACCESS_ASO = 0x2d,
};
enum {
@@ -496,10 +482,6 @@ enum {
};
enum {
- MLX5_CAP_OFF_CMDIF_CSUM = 46,
-};
-
-enum {
/*
* Max wqe size for rdma read is 512 bytes, so this
* limits our max_sge_rd as the wqe needs to fit:
@@ -840,7 +822,10 @@ struct mlx5_cqe64 {
__be32 timestamp_l;
__be32 sop_drop_qpn;
__be16 wqe_counter;
- u8 signature;
+ union {
+ u8 signature;
+ u8 validity_iteration_count;
+ };
u8 op_own;
};
@@ -872,6 +857,11 @@ enum {
MLX5_CQE_FORMAT_CSUM_STRIDX = 0x3,
};
+enum {
+ MLX5_CQE_COMPRESS_LAYOUT_BASIC = 0,
+ MLX5_CQE_COMPRESS_LAYOUT_ENHANCED = 1,
+};
+
#define MLX5_MINI_CQE_ARRAY_SIZE 8
static inline u8 mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
@@ -884,6 +874,12 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe)
return cqe->op_own >> 4;
}
+static inline u8 get_cqe_enhanced_num_mini_cqes(struct mlx5_cqe64 *cqe)
+{
+ /* num_of_mini_cqes is zero based */
+ return get_cqe_opcode(cqe) + 1;
+}
+
static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
{
return (cqe->lro.tcppsh_abort_dupack >> 6) & 1;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5040cd774c5a..220597c2f436 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -676,6 +676,7 @@ struct mlx5e_resources {
enum mlx5_sw_icm_type {
MLX5_SW_ICM_TYPE_STEERING,
MLX5_SW_ICM_TYPE_HEADER_MODIFY,
+ MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN,
};
#define MLX5_MAX_RESERVED_GIDS 8
@@ -727,10 +728,10 @@ enum {
};
enum {
- MR_CACHE_LAST_STD_ENTRY = 20,
+ MKEY_CACHE_LAST_STD_ENTRY = 20,
MLX5_IMR_MTT_CACHE_ENTRY,
MLX5_IMR_KSM_CACHE_ENTRY,
- MAX_MR_CACHE_ENTRIES
+ MAX_MKEY_CACHE_ENTRIES
};
struct mlx5_profile {
@@ -739,7 +740,7 @@ struct mlx5_profile {
struct {
int size;
int limit;
- } mr_cache[MAX_MR_CACHE_ENTRIES];
+ } mr_cache[MAX_MKEY_CACHE_ENTRIES];
};
struct mlx5_hca_cap {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 8135713b0d2d..8e73c377da2c 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -178,6 +178,7 @@ struct mlx5_flow_table_attr {
int max_fte;
u32 level;
u32 flags;
+ u16 uid;
struct mlx5_flow_table *next_ft;
struct {
@@ -212,6 +213,19 @@ struct mlx5_flow_group *
mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in);
void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
+struct mlx5_exe_aso {
+ u32 object_id;
+ u8 type;
+ u8 return_reg_id;
+ union {
+ u32 ctrl_data;
+ struct {
+ u8 meter_idx;
+ u8 init_color;
+ } flow_meter;
+ };
+};
+
struct mlx5_fs_vlan {
u16 ethtype;
u16 vid;
@@ -237,6 +251,7 @@ struct mlx5_flow_act {
struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
struct ib_counters *counters;
struct mlx5_flow_group *fg;
+ struct mlx5_exe_aso exe_aso;
};
#define MLX5_DECLARE_FLOW_ACT(name) \
@@ -301,4 +316,5 @@ struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
struct mlx5_pkt_reformat *reformat);
+u32 mlx5_flow_table_id(struct mlx5_flow_table *ft);
#endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fd7d083a34d3..9321d774e2d8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -442,7 +442,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 max_modify_header_actions[0x8];
u8 max_ft_level[0x8];
- u8 reserved_at_40[0x20];
+ u8 reserved_at_40[0x6];
+ u8 execute_aso[0x1];
+ u8 reserved_at_47[0x19];
u8 reserved_at_60[0x2];
u8 reformat_insert[0x1];
@@ -940,7 +942,17 @@ struct mlx5_ifc_qos_cap_bits {
u8 max_tsar_bw_share[0x20];
- u8 reserved_at_100[0x700];
+ u8 reserved_at_100[0x20];
+
+ u8 reserved_at_120[0x3];
+ u8 log_meter_aso_granularity[0x5];
+ u8 reserved_at_128[0x3];
+ u8 log_meter_aso_max_alloc[0x5];
+ u8 reserved_at_130[0x3];
+ u8 log_max_num_meter_aso[0x5];
+ u8 reserved_at_138[0x8];
+
+ u8 reserved_at_140[0x6c0];
};
struct mlx5_ifc_debug_cap_bits {
@@ -1086,11 +1098,14 @@ struct mlx5_ifc_device_mem_cap_bits {
u8 log_sw_icm_alloc_granularity[0x6];
u8 log_steering_sw_icm_size[0x8];
- u8 reserved_at_120[0x20];
+ u8 reserved_at_120[0x18];
+ u8 log_header_modify_pattern_sw_icm_size[0x8];
u8 header_modify_sw_icm_start_address[0x40];
- u8 reserved_at_180[0x80];
+ u8 reserved_at_180[0x40];
+
+ u8 header_modify_pattern_sw_icm_start_address[0x40];
u8 memic_operations[0x20];
@@ -1356,7 +1371,9 @@ enum {
};
struct mlx5_ifc_cmd_hca_cap_bits {
- u8 reserved_at_0[0x1f];
+ u8 reserved_at_0[0x10];
+ u8 shared_object_to_user_object_allowed[0x1];
+ u8 reserved_at_13[0xe];
u8 vhca_resource_manager[0x1];
u8 hca_cap_2[0x1];
@@ -1426,7 +1443,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_120[0xa];
u8 log_max_ra_req_dc[0x6];
- u8 reserved_at_130[0xa];
+ u8 reserved_at_130[0x9];
+ u8 vnic_env_cq_overrun[0x1];
u8 log_max_ra_res_dc[0x6];
u8 reserved_at_140[0x5];
@@ -1621,7 +1639,11 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 nic_receive_steering_discard[0x1];
u8 receive_discard_vport_down[0x1];
u8 transmit_discard_vport_down[0x1];
- u8 reserved_at_343[0x5];
+ u8 eq_overrun_count[0x1];
+ u8 reserved_at_344[0x1];
+ u8 invalid_command_count[0x1];
+ u8 quota_exceeded_count[0x1];
+ u8 reserved_at_347[0x1];
u8 log_max_flow_counter_bulk[0x8];
u8 max_flow_counter_15_0[0x10];
@@ -1719,7 +1741,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_max_dci_errored_streams[0x5];
u8 reserved_at_598[0x8];
- u8 reserved_at_5a0[0x13];
+ u8 reserved_at_5a0[0x10];
+ u8 enhanced_cqe_compression[0x1];
+ u8 reserved_at_5b1[0x2];
u8 log_max_dek[0x5];
u8 reserved_at_5b8[0x4];
u8 mini_cqe_resp_stride_index[0x1];
@@ -3277,6 +3301,7 @@ enum {
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800,
MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT = 0x1000,
MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT = 0x2000,
+ MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO = 0x4000,
};
enum {
@@ -3292,6 +3317,38 @@ struct mlx5_ifc_vlan_bits {
u8 vid[0xc];
};
+enum {
+ MLX5_FLOW_METER_COLOR_RED = 0x0,
+ MLX5_FLOW_METER_COLOR_YELLOW = 0x1,
+ MLX5_FLOW_METER_COLOR_GREEN = 0x2,
+ MLX5_FLOW_METER_COLOR_UNDEFINED = 0x3,
+};
+
+enum {
+ MLX5_EXE_ASO_FLOW_METER = 0x2,
+};
+
+struct mlx5_ifc_exe_aso_ctrl_flow_meter_bits {
+ u8 return_reg_id[0x4];
+ u8 aso_type[0x4];
+ u8 reserved_at_8[0x14];
+ u8 action[0x1];
+ u8 init_color[0x2];
+ u8 meter_id[0x1];
+};
+
+union mlx5_ifc_exe_aso_ctrl {
+ struct mlx5_ifc_exe_aso_ctrl_flow_meter_bits exe_aso_ctrl_flow_meter;
+};
+
+struct mlx5_ifc_execute_aso_bits {
+ u8 valid[0x1];
+ u8 reserved_at_1[0x7];
+ u8 aso_object_id[0x18];
+
+ union mlx5_ifc_exe_aso_ctrl exe_aso_ctrl;
+};
+
struct mlx5_ifc_flow_context_bits {
struct mlx5_ifc_vlan_bits push_vlan;
@@ -3323,7 +3380,9 @@ struct mlx5_ifc_flow_context_bits {
struct mlx5_ifc_fte_match_param_bits match_value;
- u8 reserved_at_1200[0x600];
+ struct mlx5_ifc_execute_aso_bits execute_aso[4];
+
+ u8 reserved_at_1300[0x500];
union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[];
};
@@ -3391,11 +3450,21 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits {
u8 transmit_discard_vport_down[0x40];
- u8 reserved_at_140[0xa0];
+ u8 async_eq_overrun[0x20];
+
+ u8 comp_eq_overrun[0x20];
+
+ u8 reserved_at_180[0x20];
+
+ u8 invalid_command[0x20];
+
+ u8 quota_exceeded_command[0x20];
u8 internal_rq_out_of_buffer[0x20];
- u8 reserved_at_200[0xe00];
+ u8 cq_overrun[0x20];
+
+ u8 reserved_at_220[0xde0];
};
struct mlx5_ifc_traffic_counter_bits {
@@ -4074,7 +4143,8 @@ struct mlx5_ifc_cqc_bits {
u8 cqe_comp_en[0x1];
u8 mini_cqe_res_format[0x2];
u8 st[0x4];
- u8 reserved_at_18[0x8];
+ u8 reserved_at_18[0x6];
+ u8 cqe_compression_layout[0x2];
u8 reserved_at_20[0x20];
@@ -5970,7 +6040,9 @@ struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
u8 obj_id[0x20];
- u8 reserved_at_60[0x20];
+ u8 reserved_at_60[0x3];
+ u8 log_obj_range[0x5];
+ u8 reserved_at_68[0x18];
};
struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
@@ -8437,7 +8509,7 @@ struct mlx5_ifc_create_flow_table_out_bits {
struct mlx5_ifc_create_flow_table_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -11370,12 +11442,14 @@ enum {
MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT_ULL(0xc),
MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT_ULL(0x13),
MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT_ULL(0x20),
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = BIT_ULL(0x24),
};
enum {
MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc,
MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13,
MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24,
};
enum {
@@ -11448,6 +11522,61 @@ struct mlx5_ifc_create_encryption_key_in_bits {
struct mlx5_ifc_encryption_key_obj_bits encryption_key_object;
};
+enum {
+ MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH = 0x0,
+ MLX5_FLOW_METER_MODE_BYTES_CALC_WITH_L2 = 0x1,
+ MLX5_FLOW_METER_MODE_BYTES_CALC_WITH_L2_IPG = 0x2,
+ MLX5_FLOW_METER_MODE_NUM_PACKETS = 0x3,
+};
+
+struct mlx5_ifc_flow_meter_parameters_bits {
+ u8 valid[0x1];
+ u8 bucket_overflow[0x1];
+ u8 start_color[0x2];
+ u8 both_buckets_on_green[0x1];
+ u8 reserved_at_5[0x1];
+ u8 meter_mode[0x2];
+ u8 reserved_at_8[0x18];
+
+ u8 reserved_at_20[0x20];
+
+ u8 reserved_at_40[0x3];
+ u8 cbs_exponent[0x5];
+ u8 cbs_mantissa[0x8];
+ u8 reserved_at_50[0x3];
+ u8 cir_exponent[0x5];
+ u8 cir_mantissa[0x8];
+
+ u8 reserved_at_60[0x20];
+
+ u8 reserved_at_80[0x3];
+ u8 ebs_exponent[0x5];
+ u8 ebs_mantissa[0x8];
+ u8 reserved_at_90[0x3];
+ u8 eir_exponent[0x5];
+ u8 eir_mantissa[0x8];
+
+ u8 reserved_at_a0[0x60];
+};
+
+struct mlx5_ifc_flow_meter_aso_obj_bits {
+ u8 modify_field_select[0x40];
+
+ u8 reserved_at_40[0x40];
+
+ u8 reserved_at_80[0x8];
+ u8 meter_aso_access_pd[0x18];
+
+ u8 reserved_at_a0[0x160];
+
+ struct mlx5_ifc_flow_meter_parameters_bits flow_meter_parameters[2];
+};
+
+struct mlx5_ifc_create_flow_meter_aso_obj_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+ struct mlx5_ifc_flow_meter_aso_obj_bits flow_meter_aso_obj;
+};
+
struct mlx5_ifc_sampler_obj_bits {
u8 modify_field_select[0x40];
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9c6317cf80d5..7c2f76f34f6f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4603,7 +4603,7 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
/**
* ib_lid_cpu16 - Return lid in 16bit CPU encoding.
- * In the current implementation the only way to get
+ * In the current implementation the only way to
* get the 32bit lid is from other sources for OPA.
* For IB, lids will always be 16bits so cast the
* value accordingly.
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index d989f030fae0..5b18e2e36ee6 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -108,6 +108,7 @@ struct rdma_cm_id {
enum rdma_ucm_port_space ps;
enum ib_qp_type qp_type;
u32 port_num;
+ struct work_struct net_work;
};
struct rdma_cm_id *
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index e539c84d63f1..3bee490eb585 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -228,6 +228,7 @@ enum mlx5_ib_objects {
MLX5_IB_OBJECT_VAR,
MLX5_IB_OBJECT_PP,
MLX5_IB_OBJECT_UAR,
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
};
enum mlx5_ib_flow_matcher_create_attrs {
@@ -248,6 +249,22 @@ enum mlx5_ib_flow_matcher_methods {
MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
};
+enum mlx5_ib_flow_steering_anchor_create_attrs {
+ MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
+ MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
+ MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
+};
+
+enum mlx5_ib_flow_steering_anchor_destroy_attrs {
+ MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_steering_anchor_methods {
+ MLX5_IB_METHOD_STEERING_ANCHOR_CREATE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
+};
+
enum mlx5_ib_device_query_context_attrs {
MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX = (1U << UVERBS_ID_NS_SHIFT),
};
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index a21ca8ece8db..7af9e09ea556 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -63,6 +63,7 @@ enum mlx5_ib_uapi_dm_type {
MLX5_IB_UAPI_DM_TYPE_MEMIC,
MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM,
MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM,
+ MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM,
};
enum mlx5_ib_uapi_devx_create_event_channel_flags {