aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/cma.c13
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c17
-rw-r--r--drivers/infiniband/core/verbs.c62
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx4/main.c48
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h42
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c1031
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c20
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h4
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c421
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c9
-rw-r--r--drivers/infiniband/hw/mlx5/main.c325
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h79
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c18
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c2
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c122
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c12
24 files changed, 2093 insertions, 150 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0eb393237ba2..a8c2f0ccd225 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3998,7 +3998,8 @@ static void iboe_mcast_work_handler(struct work_struct *work)
kfree(mw);
}
-static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type)
{
struct sockaddr_in *sin = (struct sockaddr_in *)addr;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
@@ -4008,8 +4009,8 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
} else if (addr->sa_family == AF_INET6) {
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else {
- mgid->raw[0] = 0xff;
- mgid->raw[1] = 0x0e;
+ mgid->raw[0] = (gid_type == IB_GID_TYPE_IB) ? 0xff : 0;
+ mgid->raw[1] = (gid_type == IB_GID_TYPE_IB) ? 0x0e : 0;
mgid->raw[2] = 0;
mgid->raw[3] = 0;
mgid->raw[4] = 0;
@@ -4050,7 +4051,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
goto out1;
}
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
+ gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
if (id_priv->id.ps == RDMA_PS_UDP)
@@ -4066,8 +4069,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
- gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
- rdma_start_port(id_priv->cma_dev->device)];
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2c98533a0203..60535c754db3 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1383,8 +1383,9 @@ static int create_qp(struct ib_uverbs_file *file,
attr.rwq_ind_tbl = ind_tbl;
}
- if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) +
- sizeof(cmd->reserved1)) && cmd->reserved1) {
+ if (cmd_sz > sizeof(*cmd) &&
+ !ib_is_udata_cleared(ucore, sizeof(*cmd),
+ cmd_sz - sizeof(*cmd))) {
ret = -EOPNOTSUPP;
goto err_put;
}
@@ -1482,11 +1483,21 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
IB_QP_CREATE_SCATTER_FCS |
- IB_QP_CREATE_CVLAN_STRIPPING)) {
+ IB_QP_CREATE_CVLAN_STRIPPING |
+ IB_QP_CREATE_SOURCE_QPN)) {
ret = -EINVAL;
goto err_put;
}
+ if (attr.create_flags & IB_QP_CREATE_SOURCE_QPN) {
+ if (!capable(CAP_NET_RAW)) {
+ ret = -EPERM;
+ goto err_put;
+ }
+
+ attr.source_qpn = cmd->source_qpn;
+ }
+
buf = (void *)cmd + sizeof(*cmd);
if (cmd_sz > sizeof(*cmd))
if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index fb98ed67d5bc..e8006677b01c 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1244,6 +1244,18 @@ int ib_resolve_eth_dmac(struct ib_device *device,
if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
ah_attr->roce.dmac);
+ return 0;
+ }
+ if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ __be32 addr = 0;
+
+ memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4);
+ ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac);
+ } else {
+ ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw,
+ (char *)ah_attr->roce.dmac);
+ }
} else {
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
@@ -1569,15 +1581,53 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
/* Multicast groups */
+static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
+{
+ struct ib_qp_init_attr init_attr = {};
+ struct ib_qp_attr attr = {};
+ int num_eth_ports = 0;
+ int port;
+
+ /* If QP state >= init, it is assigned to a port and we can check this
+ * port only.
+ */
+ if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
+ if (attr.qp_state >= IB_QPS_INIT) {
+ if (qp->device->get_link_layer(qp->device, attr.port_num) !=
+ IB_LINK_LAYER_INFINIBAND)
+ return true;
+ goto lid_check;
+ }
+ }
+
+ /* Can't get a quick answer, iterate over all ports */
+ for (port = 0; port < qp->device->phys_port_cnt; port++)
+ if (qp->device->get_link_layer(qp->device, port) !=
+ IB_LINK_LAYER_INFINIBAND)
+ num_eth_ports++;
+
+ /* If we have at lease one Ethernet port, RoCE annex declares that
+ * multicast LID should be ignored. We can't tell at this step if the
+ * QP belongs to an IB or Ethernet port.
+ */
+ if (num_eth_ports)
+ return true;
+
+ /* If all the ports are IB, we can check according to IB spec. */
+lid_check:
+ return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE));
+}
+
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
if (!qp->device->attach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
- lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- lid == be16_to_cpu(IB_LID_PERMISSIVE))
+
+ if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
+ qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
ret = qp->device->attach_mcast(qp, gid, lid);
@@ -1593,9 +1643,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->detach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
- lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- lid == be16_to_cpu(IB_LID_PERMISSIVE))
+
+ if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
+ qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
ret = qp->device->detach_mcast(qp, gid, lid);
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index ff931c580557..95382faa7ad1 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -218,6 +218,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_mtt;
uar = &to_mucontext(context)->uar;
+ cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS;
} else {
err = mlx4_db_alloc(dev->dev, &cq->db, 1);
if (err)
@@ -233,6 +234,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_db;
uar = &dev->priv_uar;
+ cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
}
if (dev->eq_table)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index d1b43cbbfea7..1dee0a51ef67 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -81,6 +81,8 @@ static const char mlx4_ib_version[] =
DRV_VERSION "\n";
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
+static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
+ u8 port_num);
static struct workqueue_struct *wq;
@@ -552,6 +554,16 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
props->max_ah = INT_MAX;
+ if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
+ (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
+ mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) {
+ props->rss_caps.max_rwq_indirection_tables = props->max_qp;
+ props->rss_caps.max_rwq_indirection_table_size =
+ dev->dev->caps.max_rss_tbl_sz;
+ props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
+ props->max_wq_type_rq = props->max_qp;
+ }
+
if (!mlx4_is_slave(dev->dev))
err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
@@ -563,6 +575,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
}
}
+ if (uhw->outlen >= resp.response_length +
+ sizeof(resp.max_inl_recv_sz)) {
+ resp.response_length += sizeof(resp.max_inl_recv_sz);
+ resp.max_inl_recv_sz = dev->dev->caps.max_rq_sg *
+ sizeof(struct mlx4_wqe_data_seg);
+ }
+
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
@@ -1069,6 +1088,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
+ INIT_LIST_HEAD(&context->wqn_ranges_list);
+ mutex_init(&context->wqn_ranges_mutex);
+
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
else
@@ -2713,6 +2735,26 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
+ if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
+ ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
+ IB_LINK_LAYER_ETHERNET) ||
+ (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
+ IB_LINK_LAYER_ETHERNET))) {
+ ibdev->ib_dev.create_wq = mlx4_ib_create_wq;
+ ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq;
+ ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq;
+ ibdev->ib_dev.create_rwq_ind_table =
+ mlx4_ib_create_rwq_ind_table;
+ ibdev->ib_dev.destroy_rwq_ind_table =
+ mlx4_ib_destroy_rwq_ind_table;
+ ibdev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ }
+
if (!mlx4_is_slave(ibdev->dev)) {
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
@@ -2772,7 +2814,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
allocated = 0;
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
- err = mlx4_counter_alloc(ibdev->dev, &counter_index);
+ err = mlx4_counter_alloc(ibdev->dev, &counter_index,
+ MLX4_RES_USAGE_DRIVER);
/* if failed to allocate a new counter, use default */
if (err)
counter_index =
@@ -2827,7 +2870,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
MLX4_IB_UC_STEER_QPN_ALIGN,
- &ibdev->steer_qpn_base, 0);
+ &ibdev->steer_qpn_base, 0,
+ MLX4_RES_USAGE_DRIVER);
if (err)
goto err_counter;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 9db82e67e959..1fa19820355a 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -46,6 +46,7 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/qp.h>
#define MLX4_IB_DRV_NAME "mlx4_ib"
@@ -88,6 +89,8 @@ struct mlx4_ib_ucontext {
struct list_head db_page_list;
struct mutex db_page_mutex;
struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
+ struct list_head wqn_ranges_list;
+ struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */
};
struct mlx4_ib_pd {
@@ -289,8 +292,25 @@ struct mlx4_roce_smac_vlan_info {
int update_vid;
};
+struct mlx4_wqn_range {
+ int base_wqn;
+ int size;
+ int refcount;
+ bool dirty;
+ struct list_head list;
+};
+
+struct mlx4_ib_rss {
+ unsigned int base_qpn_tbl_sz;
+ u8 flags;
+ u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
+};
+
struct mlx4_ib_qp {
- struct ib_qp ibqp;
+ union {
+ struct ib_qp ibqp;
+ struct ib_wq ibwq;
+ };
struct mlx4_qp mqp;
struct mlx4_buf buf;
@@ -318,6 +338,7 @@ struct mlx4_ib_qp {
u8 sq_no_prefetch;
u8 state;
int mlx_type;
+ u32 inl_recv_sz;
struct list_head gid_list;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
@@ -328,6 +349,10 @@ struct mlx4_ib_qp {
struct list_head cq_recv_list;
struct list_head cq_send_list;
struct counter_index *counter_index;
+ struct mlx4_wqn_range *wqn_range;
+ /* Number of RSS QP parents that uses this WQ */
+ u32 rss_usecnt;
+ struct mlx4_ib_rss *rss_ctx;
};
struct mlx4_ib_srq {
@@ -623,6 +648,8 @@ struct mlx4_uverbs_ex_query_device_resp {
__u32 comp_mask;
__u32 response_length;
__u64 hca_core_clock_offset;
+ __u32 max_inl_recv_sz;
+ __u32 reserved;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@@ -890,4 +917,17 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port);
+struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_destroy_wq(struct ib_wq *wq);
+int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+
+struct ib_rwq_ind_table
+*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
+
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 75c0e6c5dd56..e42acfb20588 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -53,6 +53,7 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
struct mlx4_ib_cq *recv_cq);
static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq,
struct mlx4_ib_cq *recv_cq);
+static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state);
enum {
MLX4_IB_ACK_REQ_FREQ = 8,
@@ -116,6 +117,11 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
};
+enum mlx4_ib_source_type {
+ MLX4_IB_QP_SRC = 0,
+ MLX4_IB_RWQ_SRC = 1,
+};
+
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
{
return container_of(mqp, struct mlx4_ib_sqp, qp);
@@ -330,6 +336,12 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
}
}
+static void mlx4_ib_wq_event(struct mlx4_qp *qp, enum mlx4_event type)
+{
+ pr_warn_ratelimited("Unexpected event type %d on WQ 0x%06x. Events are not supported for WQs\n",
+ type, qp->qpn);
+}
+
static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
{
/*
@@ -377,7 +389,8 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
}
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- int is_user, int has_rq, struct mlx4_ib_qp *qp)
+ int is_user, int has_rq, struct mlx4_ib_qp *qp,
+ u32 inl_recv_sz)
{
/* Sanity check RQ size before proceeding */
if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
@@ -385,18 +398,24 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
return -EINVAL;
if (!has_rq) {
- if (cap->max_recv_wr)
+ if (cap->max_recv_wr || inl_recv_sz)
return -EINVAL;
qp->rq.wqe_cnt = qp->rq.max_gs = 0;
} else {
+ u32 max_inl_recv_sz = dev->dev->caps.max_rq_sg *
+ sizeof(struct mlx4_wqe_data_seg);
+ u32 wqe_size;
+
/* HW requires >= 1 RQ entry with >= 1 gather entry */
- if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
+ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge ||
+ inl_recv_sz > max_inl_recv_sz))
return -EINVAL;
qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr));
qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
- qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
+ wqe_size = qp->rq.max_gs * sizeof(struct mlx4_wqe_data_seg);
+ qp->rq.wqe_shift = ilog2(max_t(u32, wqe_size, inl_recv_sz));
}
/* leave userspace return values as they were, so as not to break ABI */
@@ -632,7 +651,297 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
qp->counter_index = NULL;
}
+static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx4_ib_create_qp_rss *ucmd)
+{
+ rss_ctx->base_qpn_tbl_sz = init_attr->rwq_ind_tbl->ind_tbl[0]->wq_num |
+ (init_attr->rwq_ind_tbl->log_ind_tbl_size << 24);
+
+ if ((ucmd->rx_hash_function == MLX4_IB_RX_HASH_FUNC_TOEPLITZ) &&
+ (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) {
+ memcpy(rss_ctx->rss_key, ucmd->rx_hash_key,
+ MLX4_EN_RSS_KEY_SIZE);
+ } else {
+ pr_debug("RX Hash function is not supported\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) {
+ rss_ctx->flags = MLX4_RSS_IPV4;
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) {
+ pr_debug("RX Hash fields_mask is not supported - both IPv4 SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) {
+ rss_ctx->flags |= MLX4_RSS_IPV6;
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) {
+ pr_debug("RX Hash fields_mask is not supported - both IPv6 SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS)) {
+ pr_debug("RX Hash fields_mask for UDP is not supported\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if (rss_ctx->flags & MLX4_RSS_IPV4) {
+ rss_ctx->flags |= MLX4_RSS_UDP_IPV4;
+ } else if (rss_ctx->flags & MLX4_RSS_IPV6) {
+ rss_ctx->flags |= MLX4_RSS_UDP_IPV6;
+ } else {
+ pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n");
+ return (-EOPNOTSUPP);
+ }
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+ pr_debug("RX Hash fields_mask is not supported - both UDP SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
+ if (rss_ctx->flags & MLX4_RSS_IPV4) {
+ rss_ctx->flags |= MLX4_RSS_TCP_IPV4;
+ } else if (rss_ctx->flags & MLX4_RSS_IPV6) {
+ rss_ctx->flags |= MLX4_RSS_TCP_IPV6;
+ } else {
+ pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n");
+ return (-EOPNOTSUPP);
+ }
+
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
+ pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ return 0;
+}
+
+static int create_qp_rss(struct mlx4_ib_dev *dev, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx4_ib_create_qp_rss *ucmd,
+ struct mlx4_ib_qp *qp)
+{
+ int qpn;
+ int err;
+
+ qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
+
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, 0, qp->mqp.usage);
+ if (err)
+ return err;
+
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+ if (err)
+ goto err_qpn;
+
+ mutex_init(&qp->mutex);
+
+ INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
+
+ qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_ETHERTYPE;
+ qp->state = IB_QPS_RESET;
+
+ /* Set dummy send resources to be compatible with HV and PRM */
+ qp->sq_no_prefetch = 1;
+ qp->sq.wqe_cnt = 1;
+ qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
+ qp->buf_size = qp->sq.wqe_cnt << MLX4_IB_MIN_SQ_STRIDE;
+ qp->mtt = (to_mqp(
+ (struct ib_qp *)init_attr->rwq_ind_tbl->ind_tbl[0]))->mtt;
+
+ qp->rss_ctx = kzalloc(sizeof(*qp->rss_ctx), GFP_KERNEL);
+ if (!qp->rss_ctx) {
+ err = -ENOMEM;
+ goto err_qp_alloc;
+ }
+
+ err = set_qp_rss(dev, qp->rss_ctx, init_attr, ucmd);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ kfree(qp->rss_ctx);
+
+err_qp_alloc:
+ mlx4_qp_remove(dev->dev, &qp->mqp);
+ mlx4_qp_free(dev->dev, &qp->mqp);
+
+err_qpn:
+ mlx4_qp_release_range(dev->dev, qpn, 1);
+ return err;
+}
+
+static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *qp;
+ struct mlx4_ib_create_qp_rss ucmd = {};
+ size_t required_cmd_sz;
+ int err;
+
+ if (!udata) {
+ pr_debug("RSS QP with NULL udata\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (udata->outlen)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved1) +
+ sizeof(ucmd.reserved1);
+ if (udata->inlen < required_cmd_sz) {
+ pr_debug("invalid inlen\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
+ pr_debug("copy failed\n");
+ return ERR_PTR(-EFAULT);
+ }
+
+ if (ucmd.comp_mask || ucmd.reserved1)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd))) {
+ pr_debug("inlen is not supported\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ pr_debug("RSS QP with unsupported QP type %d\n",
+ init_attr->qp_type);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (init_attr->create_flags) {
+ pr_debug("RSS QP doesn't support create flags\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (init_attr->send_cq || init_attr->cap.max_send_wr) {
+ pr_debug("RSS QP with unsupported send attributes\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+
+ err = create_qp_rss(to_mdev(pd->device), pd, init_attr, &ucmd, qp);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->ibqp.qp_num = qp->mqp.qpn;
+
+ return &qp->ibqp;
+}
+
+/*
+ * This function allocates a WQN from a range which is consecutive and aligned
+ * to its size. In case the range is full, then it creates a new range and
+ * allocates WQN from it. The new range will be used for following allocations.
+ */
+static int mlx4_ib_alloc_wqn(struct mlx4_ib_ucontext *context,
+ struct mlx4_ib_qp *qp, int range_size, int *wqn)
+{
+ struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device);
+ struct mlx4_wqn_range *range;
+ int err = 0;
+
+ mutex_lock(&context->wqn_ranges_mutex);
+
+ range = list_first_entry_or_null(&context->wqn_ranges_list,
+ struct mlx4_wqn_range, list);
+
+ if (!range || (range->refcount == range->size) || range->dirty) {
+ range = kzalloc(sizeof(*range), GFP_KERNEL);
+ if (!range) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mlx4_qp_reserve_range(dev->dev, range_size,
+ range_size, &range->base_wqn, 0,
+ qp->mqp.usage);
+ if (err) {
+ kfree(range);
+ goto out;
+ }
+
+ range->size = range_size;
+ list_add(&range->list, &context->wqn_ranges_list);
+ } else if (range_size != 1) {
+ /*
+ * Requesting a new range (>1) when last range is still open, is
+ * not valid.
+ */
+ err = -EINVAL;
+ goto out;
+ }
+
+ qp->wqn_range = range;
+
+ *wqn = range->base_wqn + range->refcount;
+
+ range->refcount++;
+
+out:
+ mutex_unlock(&context->wqn_ranges_mutex);
+
+ return err;
+}
+
+static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context,
+ struct mlx4_ib_qp *qp, bool dirty_release)
+{
+ struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device);
+ struct mlx4_wqn_range *range;
+
+ mutex_lock(&context->wqn_ranges_mutex);
+
+ range = qp->wqn_range;
+
+ range->refcount--;
+ if (!range->refcount) {
+ mlx4_qp_release_range(dev->dev, range->base_wqn,
+ range->size);
+ list_del(&range->list);
+ kfree(range);
+ } else if (dirty_release) {
+ /*
+ * A range which one of its WQNs is destroyed, won't be able to be
+ * reused for further WQN allocations.
+ * The next created WQ will allocate a new range.
+ */
+ range->dirty = 1;
+ }
+
+ mutex_unlock(&context->wqn_ranges_mutex);
+}
+
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
+ enum mlx4_ib_source_type src,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn,
struct mlx4_ib_qp **caller_qp)
@@ -645,6 +954,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
struct mlx4_ib_cq *mcq;
unsigned long flags;
+ int range_size = 0;
/* When tunneling special qps, we use a plain UD qp */
if (sqpn) {
@@ -719,26 +1029,71 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
- if (err)
- goto err;
if (pd->uobject) {
- struct mlx4_ib_create_qp ucmd;
+ union {
+ struct mlx4_ib_create_qp qp;
+ struct mlx4_ib_create_wq wq;
+ } ucmd;
+ size_t copy_len;
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ copy_len = (src == MLX4_IB_QP_SRC) ?
+ sizeof(struct mlx4_ib_create_qp) :
+ min(sizeof(struct mlx4_ib_create_wq), udata->inlen);
+
+ if (ib_copy_from_udata(&ucmd, udata, copy_len)) {
err = -EFAULT;
goto err;
}
- qp->sq_no_prefetch = ucmd.sq_no_prefetch;
+ if (src == MLX4_IB_RWQ_SRC) {
+ if (ucmd.wq.comp_mask || ucmd.wq.reserved1 ||
+ ucmd.wq.reserved[0] || ucmd.wq.reserved[1] ||
+ ucmd.wq.reserved[2]) {
+ pr_debug("user command isn't supported\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ if (ucmd.wq.log_range_size >
+ ilog2(dev->dev->caps.max_rss_tbl_sz)) {
+ pr_debug("WQN range size must be equal or smaller than %d\n",
+ dev->dev->caps.max_rss_tbl_sz);
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+ range_size = 1 << ucmd.wq.log_range_size;
+ } else {
+ qp->inl_recv_sz = ucmd.qp.inl_recv_sz;
+ }
- err = set_user_sq_size(dev, qp, &ucmd);
+ err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
+ qp_has_rq(init_attr), qp, qp->inl_recv_sz);
if (err)
goto err;
- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
+ if (src == MLX4_IB_QP_SRC) {
+ qp->sq_no_prefetch = ucmd.qp.sq_no_prefetch;
+
+ err = set_user_sq_size(dev, qp,
+ (struct mlx4_ib_create_qp *)
+ &ucmd);
+ if (err)
+ goto err;
+ } else {
+ qp->sq_no_prefetch = 1;
+ qp->sq.wqe_cnt = 1;
+ qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
+ /* Allocated buffer expects to have at least that SQ
+ * size.
+ */
+ qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << qp->sq.wqe_shift);
+ }
+
+ qp->umem = ib_umem_get(pd->uobject->context,
+ (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr :
+ ucmd.wq.buf_addr, qp->buf_size, 0, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
goto err;
@@ -755,11 +1110,18 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (qp_has_rq(init_attr)) {
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
- ucmd.db_addr, &qp->db);
+ (src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr :
+ ucmd.wq.db_addr, &qp->db);
if (err)
goto err_mtt;
}
+ qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
} else {
+ err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
+ qp_has_rq(init_attr), qp, 0);
+ if (err)
+ goto err;
+
qp->sq_no_prefetch = 0;
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
@@ -826,6 +1188,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
err = -ENOMEM;
goto err_wrid;
}
+ qp->mqp.usage = MLX4_RES_USAGE_DRIVER;
}
if (sqpn) {
@@ -836,6 +1199,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_wrid;
}
}
+ } else if (src == MLX4_IB_RWQ_SRC) {
+ err = mlx4_ib_alloc_wqn(to_mucontext(pd->uobject->context), qp,
+ range_size, &qpn);
+ if (err)
+ goto err_wrid;
} else {
/* Raw packet QPNs may not have bits 6,7 set in their qp_num;
* otherwise, the WQE BlueFlame setup flow wrongly causes
@@ -845,13 +1213,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
(init_attr->cap.max_send_wr ?
MLX4_RESERVE_ETH_BF_QP : 0) |
(init_attr->cap.max_recv_wr ?
- MLX4_RESERVE_A0_QP : 0));
+ MLX4_RESERVE_A0_QP : 0),
+ qp->mqp.usage);
else
if (qp->flags & MLX4_IB_QP_NETIF)
err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
else
err = mlx4_qp_reserve_range(dev->dev, 1, 1,
- &qpn, 0);
+ &qpn, 0, qp->mqp.usage);
if (err)
goto err_proxy;
}
@@ -873,7 +1242,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
*/
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
- qp->mqp.event = mlx4_ib_qp_event;
+ qp->mqp.event = (src == MLX4_IB_QP_SRC) ? mlx4_ib_qp_event :
+ mlx4_ib_wq_event;
+
if (!*caller_qp)
*caller_qp = qp;
@@ -900,6 +1271,9 @@ err_qpn:
if (!sqpn) {
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_free(dev, qpn, 1);
+ else if (src == MLX4_IB_RWQ_SRC)
+ mlx4_ib_release_wqn(to_mucontext(pd->uobject->context),
+ qp, 0);
else
mlx4_qp_release_range(dev->dev, qpn, 1);
}
@@ -998,7 +1372,7 @@ static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
return to_mpd(qp->ibqp.pd);
}
-static void get_cqs(struct mlx4_ib_qp *qp,
+static void get_cqs(struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src,
struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
{
switch (qp->ibqp.qp_type) {
@@ -1011,14 +1385,46 @@ static void get_cqs(struct mlx4_ib_qp *qp,
*recv_cq = *send_cq;
break;
default:
- *send_cq = to_mcq(qp->ibqp.send_cq);
- *recv_cq = to_mcq(qp->ibqp.recv_cq);
+ *recv_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.recv_cq) :
+ to_mcq(qp->ibwq.cq);
+ *send_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.send_cq) :
+ *recv_cq;
break;
}
}
+static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ if (qp->state != IB_QPS_RESET) {
+ int i;
+
+ for (i = 0; i < (1 << qp->ibqp.rwq_ind_tbl->log_ind_tbl_size);
+ i++) {
+ struct ib_wq *ibwq = qp->ibqp.rwq_ind_tbl->ind_tbl[i];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ wq->rss_usecnt--;
+
+ mutex_unlock(&wq->mutex);
+ }
+
+ if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
+ MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
+ pr_warn("modify QP %06x to RESET failed.\n",
+ qp->mqp.qpn);
+ }
+
+ mlx4_qp_remove(dev->dev, &qp->mqp);
+ mlx4_qp_free(dev->dev, &qp->mqp);
+ mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+ del_gid_entries(qp);
+ kfree(qp->rss_ctx);
+}
+
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
- int is_user)
+ enum mlx4_ib_source_type src, int is_user)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
unsigned long flags;
@@ -1051,7 +1457,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
}
}
- get_cqs(qp, &send_cq, &recv_cq);
+ get_cqs(qp, src, &send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx4_ib_lock_cqs(send_cq, recv_cq);
@@ -1077,6 +1483,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) {
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1);
+ else if (src == MLX4_IB_RWQ_SRC)
+ mlx4_ib_release_wqn(to_mucontext(
+ qp->ibwq.uobject->context), qp, 1);
else
mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
}
@@ -1084,9 +1493,12 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
if (is_user) {
- if (qp->rq.wqe_cnt)
- mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
- &qp->db);
+ if (qp->rq.wqe_cnt) {
+ struct mlx4_ib_ucontext *mcontext = !src ?
+ to_mucontext(qp->ibqp.uobject->context) :
+ to_mucontext(qp->ibwq.uobject->context);
+ mlx4_ib_db_unmap_user(mcontext, &qp->db);
+ }
ib_umem_release(qp->umem);
} else {
kvfree(qp->sq.wrid);
@@ -1128,6 +1540,9 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
u16 xrcdn = 0;
+ if (init_attr->rwq_ind_tbl)
+ return _mlx4_ib_create_qp_rss(pd, init_attr, udata);
+
/*
* We only support LSO, vendor flag1, and multicast loopback blocking,
* and only for kernel UD QPs.
@@ -1182,8 +1597,8 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
/* fall through */
case IB_QPT_UD:
{
- err = create_qp_common(to_mdev(pd->device), pd, init_attr,
- udata, 0, &qp);
+ err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC,
+ init_attr, udata, 0, &qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
@@ -1203,7 +1618,9 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
if (udata)
return ERR_PTR(-EINVAL);
if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
- int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+ int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev,
+ 1, 1, &sqpn, 0,
+ MLX4_RES_USAGE_DRIVER);
if (res)
return ERR_PTR(res);
@@ -1211,8 +1628,8 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
}
- err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
- sqpn, &qp);
+ err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC,
+ init_attr, udata, sqpn, &qp);
if (err)
return ERR_PTR(err);
@@ -1267,7 +1684,6 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
- struct mlx4_ib_pd *pd;
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
@@ -1282,8 +1698,14 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
if (mqp->counter_index)
mlx4_ib_free_qp_counter(dev, mqp);
- pd = get_pd(mqp);
- destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
+ if (qp->rwq_ind_tbl) {
+ destroy_qp_rss(dev, mqp);
+ } else {
+ struct mlx4_ib_pd *pd;
+
+ pd = get_pd(mqp);
+ destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject);
+ }
if (is_sqp(dev, mqp))
kfree(to_msqp(mqp));
@@ -1566,7 +1988,7 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK))
return 0;
- err = mlx4_counter_alloc(dev->dev, &tmp_idx);
+ err = mlx4_counter_alloc(dev->dev, &tmp_idx, MLX4_RES_USAGE_DRIVER);
if (err)
return err;
@@ -1606,12 +2028,119 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
}
}
-static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
+/*
+ * Go over all RSS QP's childes (WQs) and apply their HW state according to
+ * their logic state if the RSS QP is the first RSS QP associated for the WQ.
+ */
+static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+ struct ib_wq *ibwq = ind_tbl->ind_tbl[i];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ /* Mlx4_ib restrictions:
+ * WQ's is associated to a port according to the RSS QP it is
+ * associates to.
+ * In case the WQ is associated to a different port by another
+ * RSS QP, return a failure.
+ */
+ if ((wq->rss_usecnt > 0) && (wq->port != port_num)) {
+ err = -EINVAL;
+ mutex_unlock(&wq->mutex);
+ break;
+ }
+ wq->port = port_num;
+ if ((wq->rss_usecnt == 0) && (ibwq->state == IB_WQS_RDY)) {
+ err = _mlx4_ib_modify_wq(ibwq, IB_WQS_RDY);
+ if (err) {
+ mutex_unlock(&wq->mutex);
+ break;
+ }
+ }
+ wq->rss_usecnt++;
+
+ mutex_unlock(&wq->mutex);
+ }
+
+ if (i && err) {
+ int j;
+
+ for (j = (i - 1); j >= 0; j--) {
+ struct ib_wq *ibwq = ind_tbl->ind_tbl[j];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ if ((wq->rss_usecnt == 1) &&
+ (ibwq->state == IB_WQS_RDY))
+ if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET))
+ pr_warn("failed to reverse WQN=0x%06x\n",
+ ibwq->wq_num);
+ wq->rss_usecnt--;
+
+ mutex_unlock(&wq->mutex);
+ }
+ }
+
+ return err;
+}
+
+static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl)
+{
+ int i;
+
+ for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+ struct ib_wq *ibwq = ind_tbl->ind_tbl[i];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ if ((wq->rss_usecnt == 1) && (ibwq->state == IB_WQS_RDY))
+ if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET))
+ pr_warn("failed to reverse WQN=%x\n",
+ ibwq->wq_num);
+ wq->rss_usecnt--;
+
+ mutex_unlock(&wq->mutex);
+ }
+}
+
+static void fill_qp_rss_context(struct mlx4_qp_context *context,
+ struct mlx4_ib_qp *qp)
+{
+ struct mlx4_rss_context *rss_context;
+
+ rss_context = (void *)context + offsetof(struct mlx4_qp_context,
+ pri_path) + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
+
+ rss_context->base_qpn = cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz);
+ rss_context->default_qpn =
+ cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz & 0xffffff);
+ if (qp->rss_ctx->flags & (MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6))
+ rss_context->base_qpn_udp = rss_context->default_qpn;
+ rss_context->flags = qp->rss_ctx->flags;
+ /* Currently support just toeplitz */
+ rss_context->hash_fn = MLX4_RSS_HASH_TOP;
+
+ memcpy(rss_context->rss_key, qp->rss_ctx->rss_key,
+ MLX4_EN_RSS_KEY_SIZE);
+}
+
+static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
- struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct ib_uobject *ibuobject;
+ struct ib_srq *ibsrq;
+ struct ib_rwq_ind_table *rwq_ind_tbl;
+ enum ib_qp_type qp_type;
+ struct mlx4_ib_dev *dev;
+ struct mlx4_ib_qp *qp;
struct mlx4_ib_pd *pd;
struct mlx4_ib_cq *send_cq, *recv_cq;
struct mlx4_qp_context *context;
@@ -1621,6 +2150,30 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
int err = -EINVAL;
int counter_index;
+ if (src_type == MLX4_IB_RWQ_SRC) {
+ struct ib_wq *ibwq;
+
+ ibwq = (struct ib_wq *)src;
+ ibuobject = ibwq->uobject;
+ ibsrq = NULL;
+ rwq_ind_tbl = NULL;
+ qp_type = IB_QPT_RAW_PACKET;
+ qp = to_mqp((struct ib_qp *)ibwq);
+ dev = to_mdev(ibwq->device);
+ pd = to_mpd(ibwq->pd);
+ } else {
+ struct ib_qp *ibqp;
+
+ ibqp = (struct ib_qp *)src;
+ ibuobject = ibqp->uobject;
+ ibsrq = ibqp->srq;
+ rwq_ind_tbl = ibqp->rwq_ind_tbl;
+ qp_type = ibqp->qp_type;
+ qp = to_mqp(ibqp);
+ dev = to_mdev(ibqp->device);
+ pd = get_pd(qp);
+ }
+
/* APM is not supported under RoCE */
if (attr_mask & IB_QP_ALT_PATH &&
rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
@@ -1634,6 +2187,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
+ if (rwq_ind_tbl) {
+ fill_qp_rss_context(context, qp);
+ context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET);
+ }
+
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
else {
@@ -1651,11 +2209,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
+ if (qp->inl_recv_sz)
+ context->param3 |= cpu_to_be32(1 << 25);
+
+ if (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
- else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+ else if (qp_type == IB_QPT_RAW_PACKET)
context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
- else if (ibqp->qp_type == IB_QPT_UD) {
+ else if (qp_type == IB_QPT_UD) {
if (qp->flags & MLX4_IB_QP_LSO)
context->mtu_msgmax = (IB_MTU_4096 << 5) |
ilog2(dev->dev->caps.max_gso_sz);
@@ -1671,9 +2232,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
ilog2(dev->dev->caps.max_msg_sz);
}
- if (qp->rq.wqe_cnt)
- context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
- context->rq_size_stride |= qp->rq.wqe_shift - 4;
+ if (!rwq_ind_tbl) { /* PRM RSS receive side should be left zeros */
+ if (qp->rq.wqe_cnt)
+ context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
+ context->rq_size_stride |= qp->rq.wqe_shift - 4;
+ }
if (qp->sq.wqe_cnt)
context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
@@ -1685,14 +2248,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
context->xrcd = cpu_to_be32((u32) qp->xrcdn);
- if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+ if (qp_type == IB_QPT_RAW_PACKET)
context->param3 |= cpu_to_be32(1 << 30);
}
- if (qp->ibqp.uobject)
+ if (ibuobject)
context->usr_page = cpu_to_be32(
mlx4_to_hw_uar_index(dev->dev,
- to_mucontext(ibqp->uobject->context)->uar.index));
+ to_mucontext(ibuobject->context)
+ ->uar.index));
else
context->usr_page = cpu_to_be32(
mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
@@ -1736,7 +2300,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
steer_qp = 1;
}
- if (ibqp->qp_type == IB_QPT_GSI) {
+ if (qp_type == IB_QPT_GSI) {
enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
@@ -1753,7 +2317,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_AV) {
- u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
+ u8 port_num = mlx4_is_bonded(dev->dev) ? 1 :
attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
union ib_gid gid;
struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB};
@@ -1768,7 +2332,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
int index =
rdma_ah_read_grh(&attr->ah_attr)->sgid_index;
- status = ib_get_cached_gid(ibqp->device, port_num,
+ status = ib_get_cached_gid(&dev->ib_dev, port_num,
index, &gid, &gid_attr);
if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
status = -ENOENT;
@@ -1825,15 +2389,20 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
}
- pd = get_pd(qp);
- get_cqs(qp, &send_cq, &recv_cq);
- context->pd = cpu_to_be32(pd->pdn);
+ context->pd = cpu_to_be32(pd->pdn);
+
+ if (!rwq_ind_tbl) {
+ get_cqs(qp, src_type, &send_cq, &recv_cq);
+ } else { /* Set dummy CQs to be compatible with HV and PRM */
+ send_cq = to_mcq(rwq_ind_tbl->ind_tbl[0]->cq);
+ recv_cq = send_cq;
+ }
context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
/* Set "fast registration enabled" for all kernel QPs */
- if (!qp->ibqp.uobject)
+ if (!ibuobject)
context->params1 |= cpu_to_be32(1 << 11);
if (attr_mask & IB_QP_RNR_RETRY) {
@@ -1868,7 +2437,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
}
- if (ibqp->srq)
+ if (ibsrq)
context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
if (attr_mask & IB_QP_MIN_RNR_TIMER) {
@@ -1899,17 +2468,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_Q_KEY;
}
- if (ibqp->srq)
- context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
+ if (ibsrq)
+ context->srqn = cpu_to_be32(1 << 24 |
+ to_msrq(ibsrq)->msrq.srqn);
- if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (qp->rq.wqe_cnt &&
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT)
context->db_rec_addr = cpu_to_be64(qp->db.dma);
if (cur_state == IB_QPS_INIT &&
new_state == IB_QPS_RTR &&
- (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
- ibqp->qp_type == IB_QPT_UD ||
- ibqp->qp_type == IB_QPT_RAW_PACKET)) {
+ (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI ||
+ qp_type == IB_QPT_UD || qp_type == IB_QPT_RAW_PACKET)) {
context->pri_path.sched_queue = (qp->port - 1) << 6;
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
qp->mlx4_ib_qp_type &
@@ -1942,7 +2513,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp_type == IB_QPT_RAW_PACKET) {
context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
MLX4_IB_LINK_TYPE_ETH;
if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
@@ -1952,7 +2523,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
+ if (qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
int is_eth = rdma_port_get_link_layer(
&dev->ib_dev, qp->port) ==
IB_LINK_LAYER_ETHERNET;
@@ -1962,14 +2533,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
-
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
else
sqd_event = 0;
- if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (!ibuobject &&
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT)
context->rlkey_roce_mode |= (1 << 4);
/*
@@ -1978,7 +2550,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* headroom is stamped so that the hardware doesn't start
* processing stale work requests.
*/
- if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ if (!ibuobject &&
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT) {
struct mlx4_wqe_ctrl_seg *ctrl;
int i;
@@ -2035,9 +2609,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET) {
- if (!ibqp->uobject) {
+ if (!ibuobject) {
mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
- ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+ ibsrq ? to_msrq(ibsrq) : NULL);
if (send_cq != recv_cq)
mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
@@ -2148,6 +2722,11 @@ out:
return err;
}
+enum {
+ MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK = (IB_QP_STATE |
+ IB_QP_PORT),
+};
+
static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
@@ -2178,6 +2757,27 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
+ if (ibqp->rwq_ind_tbl) {
+ if (!(((cur_state == IB_QPS_RESET) &&
+ (new_state == IB_QPS_INIT)) ||
+ ((cur_state == IB_QPS_INIT) &&
+ (new_state == IB_QPS_RTR)))) {
+ pr_debug("qpn 0x%x: RSS QP unsupported transition %d to %d\n",
+ ibqp->qp_num, cur_state, new_state);
+
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (attr_mask & ~MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK) {
+ pr_debug("qpn 0x%x: RSS QP unsupported attribute mask 0x%x for transition %d to %d\n",
+ ibqp->qp_num, attr_mask, cur_state, new_state);
+
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ }
+
if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) {
if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
if ((ibqp->qp_type == IB_QPT_RC) ||
@@ -2242,7 +2842,17 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ if (ibqp->rwq_ind_tbl && (new_state == IB_QPS_INIT)) {
+ err = bringup_rss_rwqs(ibqp->rwq_ind_tbl, attr->port_num);
+ if (err)
+ goto out;
+ }
+
+ err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask,
+ cur_state, new_state);
+
+ if (ibqp->rwq_ind_tbl && err)
+ bring_down_rss_rwqs(ibqp->rwq_ind_tbl);
if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
attr->port_num = 1;
@@ -3432,6 +4042,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
int mlx4_state;
int err = 0;
+ if (ibqp->rwq_ind_tbl)
+ return -EOPNOTSUPP;
+
mutex_lock(&qp->mutex);
if (qp->state == IB_QPS_RESET) {
@@ -3527,3 +4140,285 @@ out:
return err;
}
+struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev;
+ struct ib_qp_init_attr ib_qp_init_attr;
+ struct mlx4_ib_qp *qp;
+ struct mlx4_ib_create_wq ucmd;
+ int err, required_cmd_sz;
+
+ if (!(udata && pd->uobject))
+ return ERR_PTR(-EINVAL);
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) +
+ sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz) {
+ pr_debug("invalid inlen\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd))) {
+ pr_debug("inlen is not supported\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (udata->outlen)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dev = to_mdev(pd->device);
+
+ if (init_attr->wq_type != IB_WQT_RQ) {
+ pr_debug("unsupported wq type %d\n", init_attr->wq_type);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (init_attr->create_flags) {
+ pr_debug("unsupported create_flags %u\n",
+ init_attr->create_flags);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+
+ memset(&ib_qp_init_attr, 0, sizeof(ib_qp_init_attr));
+ ib_qp_init_attr.qp_context = init_attr->wq_context;
+ ib_qp_init_attr.qp_type = IB_QPT_RAW_PACKET;
+ ib_qp_init_attr.cap.max_recv_wr = init_attr->max_wr;
+ ib_qp_init_attr.cap.max_recv_sge = init_attr->max_sge;
+ ib_qp_init_attr.recv_cq = init_attr->cq;
+ ib_qp_init_attr.send_cq = ib_qp_init_attr.recv_cq; /* Dummy CQ */
+
+ err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr,
+ udata, 0, &qp);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->ibwq.event_handler = init_attr->event_handler;
+ qp->ibwq.wq_num = qp->mqp.qpn;
+ qp->ibwq.state = IB_WQS_RESET;
+
+ return &qp->ibwq;
+}
+
+static int ib_wq2qp_state(enum ib_wq_state state)
+{
+ switch (state) {
+ case IB_WQS_RESET:
+ return IB_QPS_RESET;
+ case IB_WQS_RDY:
+ return IB_QPS_RTR;
+ default:
+ return IB_QPS_ERR;
+ }
+}
+
+static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state)
+{
+ struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
+ enum ib_qp_state qp_cur_state;
+ enum ib_qp_state qp_new_state;
+ int attr_mask;
+ int err;
+
+ /* ib_qp.state represents the WQ HW state while ib_wq.state represents
+ * the WQ logic state.
+ */
+ qp_cur_state = qp->state;
+ qp_new_state = ib_wq2qp_state(new_state);
+
+ if (ib_wq2qp_state(new_state) == qp_cur_state)
+ return 0;
+
+ if (new_state == IB_WQS_RDY) {
+ struct ib_qp_attr attr = {};
+
+ attr.port_num = qp->port;
+ attr_mask = IB_QP_PORT;
+
+ err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, &attr,
+ attr_mask, IB_QPS_RESET, IB_QPS_INIT);
+ if (err) {
+ pr_debug("WQN=0x%06x failed to apply RST->INIT on the HW QP\n",
+ ibwq->wq_num);
+ return err;
+ }
+
+ qp_cur_state = IB_QPS_INIT;
+ }
+
+ attr_mask = 0;
+ err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, attr_mask,
+ qp_cur_state, qp_new_state);
+
+ if (err && (qp_cur_state == IB_QPS_INIT)) {
+ qp_new_state = IB_QPS_RESET;
+ if (__mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL,
+ attr_mask, IB_QPS_INIT, IB_QPS_RESET)) {
+ pr_warn("WQN=0x%06x failed with reverting HW's resources failure\n",
+ ibwq->wq_num);
+ qp_new_state = IB_QPS_INIT;
+ }
+ }
+
+ qp->state = qp_new_state;
+
+ return err;
+}
+
+int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
+ struct mlx4_ib_modify_wq ucmd = {};
+ size_t required_cmd_sz;
+ enum ib_wq_state cur_state, new_state;
+ int err = 0;
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) +
+ sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd)))
+ return -EOPNOTSUPP;
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)))
+ return -EFAULT;
+
+ if (ucmd.comp_mask || ucmd.reserved)
+ return -EOPNOTSUPP;
+
+ if (wq_attr_mask & IB_WQ_FLAGS)
+ return -EOPNOTSUPP;
+
+ cur_state = wq_attr_mask & IB_WQ_CUR_STATE ? wq_attr->curr_wq_state :
+ ibwq->state;
+ new_state = wq_attr_mask & IB_WQ_STATE ? wq_attr->wq_state : cur_state;
+
+ if (cur_state < IB_WQS_RESET || cur_state > IB_WQS_ERR ||
+ new_state < IB_WQS_RESET || new_state > IB_WQS_ERR)
+ return -EINVAL;
+
+ if ((new_state == IB_WQS_RDY) && (cur_state == IB_WQS_ERR))
+ return -EINVAL;
+
+ if ((new_state == IB_WQS_ERR) && (cur_state == IB_WQS_RESET))
+ return -EINVAL;
+
+ /* Need to protect against the parent RSS which also may modify WQ
+ * state.
+ */
+ mutex_lock(&qp->mutex);
+
+ /* Can update HW state only if a RSS QP has already associated to this
+ * WQ, so we can apply its port on the WQ.
+ */
+ if (qp->rss_usecnt)
+ err = _mlx4_ib_modify_wq(ibwq, new_state);
+
+ if (!err)
+ ibwq->state = new_state;
+
+ mutex_unlock(&qp->mutex);
+
+ return err;
+}
+
+int mlx4_ib_destroy_wq(struct ib_wq *ibwq)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibwq->device);
+ struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
+
+ if (qp->counter_index)
+ mlx4_ib_free_qp_counter(dev, qp);
+
+ destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, 1);
+
+ kfree(qp);
+
+ return 0;
+}
+
+struct ib_rwq_ind_table
+*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_rwq_ind_table *rwq_ind_table;
+ struct mlx4_ib_create_rwq_ind_tbl_resp resp = {};
+ unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size;
+ unsigned int base_wqn;
+ size_t min_resp_len;
+ int i;
+ int err;
+
+ if (udata->inlen > 0 &&
+ !ib_is_udata_cleared(udata, 0,
+ udata->inlen))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ if (udata->outlen && udata->outlen < min_resp_len)
+ return ERR_PTR(-EINVAL);
+
+ if (ind_tbl_size >
+ device->attrs.rss_caps.max_rwq_indirection_table_size) {
+ pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n",
+ ind_tbl_size,
+ device->attrs.rss_caps.max_rwq_indirection_table_size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ base_wqn = init_attr->ind_tbl[0]->wq_num;
+
+ if (base_wqn % ind_tbl_size) {
+ pr_debug("WQN=0x%x isn't aligned with indirection table size\n",
+ base_wqn);
+ return ERR_PTR(-EINVAL);
+ }
+
+ for (i = 1; i < ind_tbl_size; i++) {
+ if (++base_wqn != init_attr->ind_tbl[i]->wq_num) {
+ pr_debug("indirection table's WQNs aren't consecutive\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL);
+ if (!rwq_ind_table)
+ return ERR_PTR(-ENOMEM);
+
+ if (udata->outlen) {
+ resp.response_length = offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length);
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err)
+ goto err;
+ }
+
+ return rwq_ind_table;
+
+err:
+ kfree(rwq_ind_table);
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+ kfree(ib_rwq_ind_tbl);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 90ad2adc752f..bc6299697dda 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 18d5e1db93ed..470995fa38d2 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -57,3 +57,23 @@ int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
MLX5_SET(query_cong_statistics_in, in, clear, reset);
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
+
+int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
+ void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { };
+
+ MLX5_SET(query_cong_params_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_PARAMS);
+ MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+
+int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,
+ void *in, int in_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { };
+
+ return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index fa09228193a6..af4c24596274 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -39,4 +39,8 @@
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
bool reset, void *out, int out_size);
+int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
+ void *out, int out_size);
+int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
+ void *in, int in_size);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
new file mode 100644
index 000000000000..2d32b519bb61
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/debugfs.h>
+
+#include "mlx5_ib.h"
+#include "cmd.h"
+
+enum mlx5_ib_cong_node_type {
+ MLX5_IB_RROCE_ECN_RP = 1,
+ MLX5_IB_RROCE_ECN_NP = 2,
+};
+
+static const char * const mlx5_ib_dbg_cc_name[] = {
+ "rp_clamp_tgt_rate",
+ "rp_clamp_tgt_rate_ati",
+ "rp_time_reset",
+ "rp_byte_reset",
+ "rp_threshold",
+ "rp_ai_rate",
+ "rp_hai_rate",
+ "rp_min_dec_fac",
+ "rp_min_rate",
+ "rp_rate_to_set_on_first_cnp",
+ "rp_dce_tcp_g",
+ "rp_dce_tcp_rtt",
+ "rp_rate_reduce_monitor_period",
+ "rp_initial_alpha_value",
+ "rp_gd",
+ "np_cnp_dscp",
+ "np_cnp_prio_mode",
+ "np_cnp_prio",
+};
+
+#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
+#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2)
+#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
+#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
+#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
+#define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
+#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
+#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
+#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10)
+#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11)
+#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12)
+#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13)
+#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14)
+#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
+#define MLX5_IB_RP_GD_ATTR BIT(16)
+
+#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
+#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
+
+static enum mlx5_ib_cong_node_type
+mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
+{
+ if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE &&
+ param_offset <= MLX5_IB_DBG_CC_RP_GD)
+ return MLX5_IB_RROCE_ECN_RP;
+ else
+ return MLX5_IB_RROCE_ECN_NP;
+}
+
+static u32 mlx5_get_cc_param_val(void *field, int offset)
+{
+ switch (offset) {
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate);
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate_after_time_inc);
+ case MLX5_IB_DBG_CC_RP_TIME_RESET:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_time_reset);
+ case MLX5_IB_DBG_CC_RP_BYTE_RESET:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_byte_reset);
+ case MLX5_IB_DBG_CC_RP_THRESHOLD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_threshold);
+ case MLX5_IB_DBG_CC_RP_AI_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_ai_rate);
+ case MLX5_IB_DBG_CC_RP_HAI_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_hai_rate);
+ case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_dec_fac);
+ case MLX5_IB_DBG_CC_RP_MIN_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_rate);
+ case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rate_to_set_on_first_cnp);
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_g);
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_rtt);
+ case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rate_reduce_monitor_period);
+ case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ initial_alpha_value);
+ case MLX5_IB_DBG_CC_RP_GD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_gd);
+ case MLX5_IB_DBG_CC_NP_CNP_DSCP:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_dscp);
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_prio_mode);
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_802p_prio);
+ default:
+ return 0;
+ }
+}
+
+static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
+ u32 var, u32 *attr_mask)
+{
+ switch (offset) {
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
+ *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
+ *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate_after_time_inc, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_TIME_RESET:
+ *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_time_reset, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_BYTE_RESET:
+ *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_byte_reset, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_THRESHOLD:
+ *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_threshold, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_AI_RATE:
+ *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_ai_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_HAI_RATE:
+ *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_hai_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
+ *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_dec_fac, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_MIN_RATE:
+ *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
+ *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rate_to_set_on_first_cnp, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
+ *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_g, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
+ *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_rtt, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
+ *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rate_reduce_monitor_period, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
+ *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ initial_alpha_value, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_GD:
+ *attr_mask |= MLX5_IB_RP_GD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_gd, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_DSCP:
+ *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
+ *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO:
+ *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
+ break;
+ }
+}
+
+static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
+ void *out;
+ void *field;
+ int err;
+ enum mlx5_ib_cong_node_type node;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ node = mlx5_ib_param_to_node(offset);
+
+ err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen);
+ if (err)
+ goto free;
+
+ field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
+ *var = mlx5_get_cc_param_val(field, offset);
+
+free:
+ kvfree(out);
+ return err;
+}
+
+static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
+ void *in;
+ void *field;
+ enum mlx5_ib_cong_node_type node;
+ u32 attr_mask = 0;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_cong_params_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_CONG_PARAMS);
+
+ node = mlx5_ib_param_to_node(offset);
+ MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
+
+ field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
+ mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask);
+
+ field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
+ MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
+ attr_mask);
+
+ err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen);
+ kvfree(in);
+ return err;
+}
+
+static ssize_t set_param(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_dbg_param *param = filp->private_data;
+ int offset = param->offset;
+ char lbuf[11] = { };
+ u32 var;
+ int ret;
+
+ if (count > sizeof(lbuf))
+ return -EINVAL;
+
+ if (copy_from_user(lbuf, buf, count))
+ return -EFAULT;
+
+ lbuf[sizeof(lbuf) - 1] = '\0';
+
+ if (kstrtou32(lbuf, 0, &var))
+ return -EINVAL;
+
+ ret = mlx5_ib_set_cc_params(param->dev, offset, var);
+ return ret ? ret : count;
+}
+
+static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_ib_dbg_param *param = filp->private_data;
+ int offset = param->offset;
+ u32 var = 0;
+ int ret;
+ char lbuf[11];
+
+ if (*pos)
+ return 0;
+
+ ret = mlx5_ib_get_cc_params(param->dev, offset, &var);
+ if (ret)
+ return ret;
+
+ ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var);
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user(buf, lbuf, ret))
+ return -EFAULT;
+
+ *pos += ret;
+ return ret;
+}
+
+static const struct file_operations dbg_cc_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = set_param,
+ .read = get_param,
+};
+
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_debugfs_root ||
+ !dev->dbg_cc_params ||
+ !dev->dbg_cc_params->root)
+ return;
+
+ debugfs_remove_recursive(dev->dbg_cc_params->root);
+ kfree(dev->dbg_cc_params);
+ dev->dbg_cc_params = NULL;
+}
+
+int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+ int i;
+
+ if (!mlx5_debugfs_root)
+ goto out;
+
+ if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) ||
+ !MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
+ goto out;
+
+ dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
+ if (!dbg_cc_params)
+ goto out;
+
+ dev->dbg_cc_params = dbg_cc_params;
+
+ dbg_cc_params->root = debugfs_create_dir("cc_params",
+ dev->mdev->priv.dbg_root);
+ if (!dbg_cc_params->root)
+ goto err;
+
+ for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
+ dbg_cc_params->params[i].offset = i;
+ dbg_cc_params->params[i].dev = dev;
+ dbg_cc_params->params[i].dentry =
+ debugfs_create_file(mlx5_ib_dbg_cc_name[i],
+ 0600, dbg_cc_params->root,
+ &dbg_cc_params->params[i],
+ &dbg_cc_fops);
+ if (!dbg_cc_params->params[i].dentry)
+ goto err;
+ }
+out: return 0;
+
+err:
+ mlx5_ib_warn(dev, "cong debugfs failure\n");
+ mlx5_ib_cleanup_cong_debugfs(dev);
+ /*
+ * We don't want to fail driver if debugfs failed to initialize,
+ * so we are not forwarding error to the user.
+ */
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index c1b9de800fe5..649a3364f838 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -96,6 +96,7 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -109,6 +110,8 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
}
in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err)
+ vfs_ctx[vf].policy = in->policy;
out:
kfree(in);
@@ -151,6 +154,7 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -160,6 +164,8 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
in->node_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err)
+ vfs_ctx[vf].node_guid = guid;
kfree(in);
return err;
}
@@ -169,6 +175,7 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -178,6 +185,8 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
in->port_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err)
+ vfs_ctx[vf].port_guid = guid;
kfree(in);
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index a7f2e60085c4..9dd9759459fb 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include <linux/debugfs.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -58,6 +59,7 @@
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
#include "cmd.h"
+#include <linux/mlx5/vport.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
@@ -97,6 +99,20 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
}
+static int get_port_state(struct ib_device *ibdev,
+ u8 port_num,
+ enum ib_port_state *state)
+{
+ struct ib_port_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+ if (!ret)
+ *state = attr.state;
+ return ret;
+}
+
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -114,6 +130,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
write_unlock(&ibdev->roce.netdev_lock);
break;
+ case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
@@ -127,10 +144,23 @@ static int mlx5_netdev_event(struct notifier_block *this,
if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
&& ibdev->ib_active) {
struct ib_event ibev = { };
+ enum ib_port_state port_state;
+
+ if (get_port_state(&ibdev->ib_dev, 1, &port_state))
+ return NOTIFY_DONE;
+
+ if (ibdev->roce.last_port_state == port_state)
+ return NOTIFY_DONE;
+ ibdev->roce.last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
- ibev.event = (event == NETDEV_UP) ?
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ if (port_state == IB_PORT_DOWN)
+ ibev.event = IB_EVENT_PORT_ERR;
+ else if (port_state == IB_PORT_ACTIVE)
+ ibev.event = IB_EVENT_PORT_ACTIVE;
+ else
+ return NOTIFY_DONE;
+
ibev.element.port_num = 1;
ib_dispatch_event(&ibev);
}
@@ -668,6 +698,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_TSO;
}
+ if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
+ MLX5_CAP_GEN(dev->mdev, general_notification_event))
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
+
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
+ MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap))
+ props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
/* Legacy bit to support old userspace libraries */
@@ -1187,6 +1225,45 @@ static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *con
return 0;
}
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
+{
+ int err;
+
+ err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
+ if (err)
+ return err;
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+ return err;
+
+ mutex_lock(&dev->lb_mutex);
+ dev->user_td++;
+
+ if (dev->user_td == 2)
+ err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+
+ mutex_unlock(&dev->lb_mutex);
+ return err;
+}
+
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
+{
+ mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+ return;
+
+ mutex_lock(&dev->lb_mutex);
+ dev->user_td--;
+
+ if (dev->user_td < 2)
+ mlx5_nic_vport_update_local_lb(dev->mdev, false);
+
+ mutex_unlock(&dev->lb_mutex);
+}
+
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
@@ -1295,8 +1372,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
mutex_init(&context->upd_xlt_page_mutex);
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
- err = mlx5_core_alloc_transport_domain(dev->mdev,
- &context->tdn);
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
if (err)
goto out_page;
}
@@ -1362,7 +1438,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
out_page:
free_page(context->upd_xlt_page);
@@ -1390,7 +1466,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
bfregi = &context->bfregi;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
free_page(context->upd_xlt_page);
deallocate_uars(dev, context);
@@ -2030,21 +2106,32 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
*/
static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
{
- struct ib_flow_spec_eth *eth_spec;
+ union ib_flow_spec *flow_spec;
if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
- ib_attr->size < sizeof(struct ib_flow_attr) +
- sizeof(struct ib_flow_spec_eth) ||
ib_attr->num_of_specs < 1)
return false;
- eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
- if (eth_spec->type != IB_FLOW_SPEC_ETH ||
- eth_spec->size != sizeof(*eth_spec))
+ flow_spec = (union ib_flow_spec *)(ib_attr + 1);
+ if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
+ struct ib_flow_spec_ipv4 *ipv4_spec;
+
+ ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
+ if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
+ return true;
+
return false;
+ }
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
- is_multicast_ether_addr(eth_spec->val.dst_mac);
+ if (flow_spec->type == IB_FLOW_SPEC_ETH) {
+ struct ib_flow_spec_eth *eth_spec;
+
+ eth_spec = (struct ib_flow_spec_eth *)flow_spec;
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+ }
+
+ return false;
}
static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
@@ -2522,8 +2609,14 @@ unlock:
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(ibqp);
int err;
+ if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
+ mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
@@ -2685,6 +2778,26 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
+static void delay_drop_handler(struct work_struct *work)
+{
+ int err;
+ struct mlx5_ib_delay_drop *delay_drop =
+ container_of(work, struct mlx5_ib_delay_drop,
+ delay_drop_work);
+
+ atomic_inc(&delay_drop->events_cnt);
+
+ mutex_lock(&delay_drop->lock);
+ err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
+ delay_drop->timeout);
+ if (err) {
+ mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
+ delay_drop->timeout);
+ delay_drop->activate = false;
+ }
+ mutex_unlock(&delay_drop->lock);
+}
+
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
@@ -2737,8 +2850,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
ibev.event = IB_EVENT_CLIENT_REREGISTER;
port = (u8)param;
break;
+ case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
+ schedule_work(&ibdev->delay_drop.delay_drop_work);
+ goto out;
default:
- return;
+ goto out;
}
ibev.device = &ibdev->ib_dev;
@@ -2746,7 +2862,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (port < 1 || port > ibdev->num_ports) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
- return;
+ goto out;
}
if (ibdev->ib_active)
@@ -2754,6 +2870,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (fatal)
ibdev->ib_active = false;
+
+out:
+ return;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -3313,6 +3432,17 @@ static const struct mlx5_ib_counter cong_cnts[] = {
INIT_CONG_COUNTER(np_cnp_sent),
};
+static const struct mlx5_ib_counter extended_err_cnts[] = {
+ INIT_Q_COUNTER(resp_local_length_error),
+ INIT_Q_COUNTER(resp_cqe_error),
+ INIT_Q_COUNTER(req_cqe_error),
+ INIT_Q_COUNTER(req_remote_invalid_request),
+ INIT_Q_COUNTER(req_remote_access_errors),
+ INIT_Q_COUNTER(resp_remote_access_errors),
+ INIT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_Q_COUNTER(req_cqe_flush_error),
+};
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
@@ -3337,6 +3467,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
+ num_counters += ARRAY_SIZE(extended_err_cnts);
+
cnts->num_q_counters = num_counters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -3386,6 +3520,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
}
}
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+ names[j] = extended_err_cnts[i].name;
+ offsets[j] = extended_err_cnts[i].offset;
+ }
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
names[j] = cong_cnts[i].name;
@@ -3556,6 +3697,126 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
return netdev;
}
+static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!dev->delay_drop.dbg)
+ return;
+ debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs);
+ kfree(dev->delay_drop.dbg);
+ dev->delay_drop.dbg = NULL;
+}
+
+static void cancel_delay_drop(struct mlx5_ib_dev *dev)
+{
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+ delay_drop_debugfs_cleanup(dev);
+}
+
+static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
+ char lbuf[20];
+ int len;
+
+ len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout);
+ return simple_read_from_buffer(buf, count, pos, lbuf, len);
+}
+
+static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
+ u32 timeout;
+ u32 var;
+
+ if (kstrtouint_from_user(buf, count, 0, &var))
+ return -EFAULT;
+
+ timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS *
+ 1000);
+ if (timeout != var)
+ mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n",
+ timeout);
+
+ delay_drop->timeout = timeout;
+
+ return count;
+}
+
+static const struct file_operations fops_delay_drop_timeout = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = delay_drop_timeout_write,
+ .read = delay_drop_timeout_read,
+};
+
+static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dbg_delay_drop *dbg;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ dbg = kzalloc(sizeof(*dbg), GFP_KERNEL);
+ if (!dbg)
+ return -ENOMEM;
+
+ dbg->dir_debugfs =
+ debugfs_create_dir("delay_drop",
+ dev->mdev->priv.dbg_root);
+ if (!dbg->dir_debugfs)
+ return -ENOMEM;
+
+ dbg->events_cnt_debugfs =
+ debugfs_create_atomic_t("num_timeout_events", 0400,
+ dbg->dir_debugfs,
+ &dev->delay_drop.events_cnt);
+ if (!dbg->events_cnt_debugfs)
+ goto out_debugfs;
+
+ dbg->rqs_cnt_debugfs =
+ debugfs_create_atomic_t("num_rqs", 0400,
+ dbg->dir_debugfs,
+ &dev->delay_drop.rqs_cnt);
+ if (!dbg->rqs_cnt_debugfs)
+ goto out_debugfs;
+
+ dbg->timeout_debugfs =
+ debugfs_create_file("timeout", 0600,
+ dbg->dir_debugfs,
+ &dev->delay_drop,
+ &fops_delay_drop_timeout);
+ if (!dbg->timeout_debugfs)
+ goto out_debugfs;
+
+ return 0;
+
+out_debugfs:
+ delay_drop_debugfs_cleanup(dev);
+ return -ENOMEM;
+}
+
+static void init_delay_drop(struct mlx5_ib_dev *dev)
+{
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
+ atomic_set(&dev->delay_drop.rqs_cnt, 0);
+ atomic_set(&dev->delay_drop.events_cnt, 0);
+
+ if (delay_drop_debugfs_init(dev))
+ mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
@@ -3723,18 +3984,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ dev->ib_dev.create_flow = mlx5_ib_create_flow;
+ dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
- dev->ib_dev.create_flow = mlx5_ib_create_flow;
- dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
@@ -3754,6 +4017,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
err = mlx5_enable_eth(dev);
if (err)
goto err_free_port;
+ dev->roce.last_port_state = IB_PORT_DOWN;
}
err = create_dev_resources(&dev->devr);
@@ -3770,9 +4034,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
goto err_odp;
}
+ err = mlx5_ib_init_cong_debugfs(dev);
+ if (err)
+ goto err_cnt;
+
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
- goto err_cnt;
+ goto err_cong;
err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
@@ -3790,18 +4058,25 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_dev;
+ init_delay_drop(dev);
+
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
err = device_create_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
if (err)
- goto err_umrc;
+ goto err_delay_drop;
}
+ if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ MLX5_CAP_GEN(mdev, disable_local_lb))
+ mutex_init(&dev->lb_mutex);
+
dev->ib_active = true;
return dev;
-err_umrc:
+err_delay_drop:
+ cancel_delay_drop(dev);
destroy_umrc_res(dev);
err_dev:
@@ -3817,6 +4092,8 @@ err_uar_page:
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
err_cnt:
+ mlx5_ib_cleanup_cong_debugfs(dev);
+err_cong:
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
@@ -3846,11 +4123,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
struct mlx5_ib_dev *dev = context;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
+ cancel_delay_drop(dev);
mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
+ mlx5_ib_cleanup_cong_debugfs(dev);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
destroy_umrc_res(dev);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index bdcf25410c99..7ac991070020 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -247,6 +247,10 @@ struct mlx5_ib_wq {
void *qend;
};
+enum mlx5_ib_wq_flags {
+ MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1,
+};
+
struct mlx5_ib_rwq {
struct ib_wq ibwq;
struct mlx5_core_qp core_qp;
@@ -264,6 +268,7 @@ struct mlx5_ib_rwq {
u32 wqe_count;
u32 wqe_shift;
int wq_sig;
+ u32 create_flags; /* Use enum mlx5_ib_wq_flags */
};
enum {
@@ -378,6 +383,7 @@ struct mlx5_ib_qp {
struct list_head cq_recv_list;
struct list_head cq_send_list;
u32 rate_limit;
+ u32 underlay_qpn;
};
struct mlx5_ib_cq_buf {
@@ -399,6 +405,7 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
MLX5_IB_QP_RSS = 1 << 8,
MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
+ MLX5_IB_QP_UNDERLAY = 1 << 10,
};
struct mlx5_umr_wr {
@@ -616,6 +623,63 @@ struct mlx5_roce {
struct net_device *netdev;
struct notifier_block nb;
atomic_t next_port;
+ enum ib_port_state last_port_state;
+};
+
+struct mlx5_ib_dbg_param {
+ int offset;
+ struct mlx5_ib_dev *dev;
+ struct dentry *dentry;
+};
+
+enum mlx5_ib_dbg_cc_types {
+ MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE,
+ MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI,
+ MLX5_IB_DBG_CC_RP_TIME_RESET,
+ MLX5_IB_DBG_CC_RP_BYTE_RESET,
+ MLX5_IB_DBG_CC_RP_THRESHOLD,
+ MLX5_IB_DBG_CC_RP_AI_RATE,
+ MLX5_IB_DBG_CC_RP_HAI_RATE,
+ MLX5_IB_DBG_CC_RP_MIN_DEC_FAC,
+ MLX5_IB_DBG_CC_RP_MIN_RATE,
+ MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP,
+ MLX5_IB_DBG_CC_RP_DCE_TCP_G,
+ MLX5_IB_DBG_CC_RP_DCE_TCP_RTT,
+ MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD,
+ MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE,
+ MLX5_IB_DBG_CC_RP_GD,
+ MLX5_IB_DBG_CC_NP_CNP_DSCP,
+ MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE,
+ MLX5_IB_DBG_CC_NP_CNP_PRIO,
+ MLX5_IB_DBG_CC_MAX,
+};
+
+struct mlx5_ib_dbg_cc_params {
+ struct dentry *root;
+ struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX];
+};
+
+enum {
+ MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100,
+};
+
+struct mlx5_ib_dbg_delay_drop {
+ struct dentry *dir_debugfs;
+ struct dentry *rqs_cnt_debugfs;
+ struct dentry *events_cnt_debugfs;
+ struct dentry *timeout_debugfs;
+};
+
+struct mlx5_ib_delay_drop {
+ struct mlx5_ib_dev *dev;
+ struct work_struct delay_drop_work;
+ /* serialize setting of delay drop */
+ struct mutex lock;
+ u32 timeout;
+ bool activate;
+ atomic_t events_cnt;
+ atomic_t rqs_cnt;
+ struct mlx5_ib_dbg_delay_drop *dbg;
};
struct mlx5_ib_dev {
@@ -652,9 +716,15 @@ struct mlx5_ib_dev {
struct list_head qp_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
- struct mlx5_sq_bfreg bfreg;
- struct mlx5_sq_bfreg fp_bfreg;
- u8 umr_fence;
+ struct mlx5_sq_bfreg bfreg;
+ struct mlx5_sq_bfreg fp_bfreg;
+ struct mlx5_ib_delay_drop delay_drop;
+ struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+
+ /* protect the user_td */
+ struct mutex lb_mutex;
+ u32 user_td;
+ u8 umr_fence;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -904,6 +974,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
int index, enum ib_gid_type *gid_type);
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev);
+int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev);
+
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 2c40a2e989d2..a0eb2f96179a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -48,6 +48,7 @@ enum {
#define MLX5_UMR_ALIGN 2048
static int clean_mr(struct mlx5_ib_mr *mr);
+static int max_umr_order(struct mlx5_ib_dev *dev);
static int use_umr(struct mlx5_ib_dev *dev, int order);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
@@ -491,16 +492,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_ib_mr *mr = NULL;
struct mlx5_cache_ent *ent;
+ int last_umr_cache_entry;
int c;
int i;
c = order2idx(dev, order);
- if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
+ last_umr_cache_entry = order2idx(dev, max_umr_order(dev));
+ if (c < 0 || c > last_umr_cache_entry) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
return NULL;
}
- for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
+ for (i = c; i <= last_umr_cache_entry; i++) {
ent = &cache->ent[i];
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@@ -816,11 +819,16 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
return (npages + 1) / 2;
}
-static int use_umr(struct mlx5_ib_dev *dev, int order)
+static int max_umr_order(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
- return order <= MAX_UMR_CACHE_ENTRY + 2;
- return order <= MLX5_MAX_UMR_SHIFT;
+ return MAX_UMR_CACHE_ENTRY + 2;
+ return MLX5_MAX_UMR_SHIFT;
+}
+
+static int use_umr(struct mlx5_ib_dev *dev, int order)
+{
+ return order <= max_umr_order(dev);
}
static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index ae0746754008..3d701c7a4c91 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
if (qp->ibqp.qp_type != IB_QPT_RC) {
av = *wqe;
- if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT))
+ if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
*wqe += sizeof(struct mlx5_av);
else
*wqe += sizeof(struct mlx5_base_av);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 0889ff367c86..5c7ce9bd466e 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -34,6 +34,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_user_verbs.h>
+#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
/* not supported currently */
@@ -453,7 +454,8 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
return -EINVAL;
}
- if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
} else {
@@ -675,10 +677,14 @@ err_umem:
return err;
}
-static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
+static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_rwq *rwq)
{
struct mlx5_ib_ucontext *context;
+ if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP)
+ atomic_dec(&dev->delay_drop.rqs_cnt);
+
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &rwq->db);
if (rwq->umem)
@@ -1021,12 +1027,16 @@ static int is_connected(enum ib_qp_type qp_type)
}
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
struct mlx5_ib_sq *sq, u32 tdn)
{
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
MLX5_SET(tisc, tisc, transport_domain, tdn);
+ if (qp->flags & MLX5_IB_QP_UNDERLAY)
+ MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
+
return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
}
@@ -1229,7 +1239,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u32 tdn = mucontext->tdn;
if (qp->sq.wqe_cnt) {
- err = create_raw_packet_qp_tis(dev, sq, tdn);
+ err = create_raw_packet_qp_tis(dev, qp, sq, tdn);
if (err)
return err;
@@ -1502,10 +1512,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
u32 *in;
int err;
- base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
- &qp->raw_packet_qp.rq.base :
- &qp->trans_qp.base;
-
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
@@ -1587,10 +1593,28 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
+
+ if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
+ if (init_attr->qp_type != IB_QPT_UD ||
+ (MLX5_CAP_GEN(dev->mdev, port_type) !=
+ MLX5_CAP_PORT_TYPE_IB) ||
+ !mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) {
+ mlx5_ib_dbg(dev, "Source QP option isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ qp->flags |= MLX5_IB_QP_UNDERLAY;
+ qp->underlay_qpn = init_attr->source_qpn;
+ }
} else {
qp->wq_sig = !!wq_signature;
}
+ base = (init_attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
+
qp->has_rq = qp_has_rq(init_attr);
err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
qp, (pd && pd->uobject) ? &ucmd : NULL);
@@ -1741,7 +1765,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags |= MLX5_IB_QP_LSO;
}
- if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
err = create_raw_packet_qp(dev, qp, in, pd);
@@ -1893,7 +1918,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
- struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct mlx5_ib_qp_base *base;
unsigned long flags;
int err;
@@ -1902,12 +1927,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
return;
}
- base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+ base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
if (qp->state != IB_QPS_RESET) {
- if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET &&
+ !(qp->flags & MLX5_IB_QP_UNDERLAY)) {
err = mlx5_core_qp_modify(dev->mdev,
MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp);
@@ -1946,7 +1973,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
destroy_raw_packet_qp(dev, qp);
} else {
err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
@@ -2702,7 +2730,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (is_sqp(ibqp->qp_type)) {
context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
- } else if (ibqp->qp_type == IB_QPT_UD ||
+ } else if ((ibqp->qp_type == IB_QPT_UD &&
+ !(qp->flags & MLX5_IB_QP_UNDERLAY)) ||
ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
} else if (attr_mask & IB_QP_PATH_MTU) {
@@ -2799,6 +2828,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
qp->port) - 1;
+
+ /* Underlay port should be used - index 0 function per port */
+ if (qp->flags & MLX5_IB_QP_UNDERLAY)
+ port_num = 0;
+
mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
cpu_to_be32((u32)(mibport->cnts.set_id) << 24);
@@ -2824,7 +2858,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
struct mlx5_modify_raw_qp_param raw_qp_param = {};
raw_qp_param.operation = op;
@@ -2913,7 +2948,13 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
}
- if (qp_type != MLX5_IB_QPT_REG_UMR &&
+ if (qp->flags & MLX5_IB_QP_UNDERLAY) {
+ if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
+ mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
+ attr_mask);
+ goto out;
+ }
+ } else if (qp_type != MLX5_IB_QPT_REG_UMR &&
!ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
cur_state, new_state, ibqp->qp_type, attr_mask);
@@ -4477,9 +4518,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
+ /* Not all of output fields are applicable, make sure to zero them */
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+ memset(qp_attr, 0, sizeof(*qp_attr));
+
mutex_lock(&qp->mutex);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
if (err)
goto out;
@@ -4597,6 +4643,27 @@ static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
}
}
+static int set_delay_drop(struct mlx5_ib_dev *dev)
+{
+ int err = 0;
+
+ mutex_lock(&dev->delay_drop.lock);
+ if (dev->delay_drop.activate)
+ goto out;
+
+ err = mlx5_core_set_delay_drop(dev->mdev, dev->delay_drop.timeout);
+ if (err)
+ goto out;
+
+ dev->delay_drop.activate = true;
+out:
+ mutex_unlock(&dev->delay_drop.lock);
+
+ if (!err)
+ atomic_inc(&dev->delay_drop.rqs_cnt);
+ return err;
+}
+
static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct ib_wq_init_attr *init_attr)
{
@@ -4651,9 +4718,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
}
MLX5_SET(rqc, rqc, scatter_fcs, 1);
}
+ if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
+ if (!(dev->ib_dev.attrs.raw_packet_caps &
+ IB_RAW_PACKET_CAP_DELAY_DROP)) {
+ mlx5_ib_dbg(dev, "Delay drop is not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET(rqc, rqc, delay_drop_en, 1);
+ }
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+ if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
+ err = set_delay_drop(dev);
+ if (err) {
+ mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n",
+ err);
+ mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
+ } else {
+ rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP;
+ }
+ }
out:
kvfree(in);
return err;
@@ -4787,7 +4873,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
err_copy:
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
err_user_rq:
- destroy_user_rq(pd, rwq);
+ destroy_user_rq(dev, pd, rwq);
err:
kfree(rwq);
return ERR_PTR(err);
@@ -4799,7 +4885,7 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq)
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
- destroy_user_rq(wq->pd, rwq);
+ destroy_user_rq(dev, wq->pd, rwq);
kfree(rwq);
return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 1ac5b8551a4d..6447d736d5a4 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -97,7 +97,7 @@ int rxe_rcv(struct sk_buff *skb);
void rxe_dev_put(struct rxe_dev *rxe);
struct rxe_dev *net_to_rxe(struct net_device *ndev);
-struct rxe_dev *get_rxe_by_name(const char* name);
+struct rxe_dev *get_rxe_by_name(const char *name);
void rxe_port_up(struct rxe_dev *rxe);
void rxe_port_down(struct rxe_dev *rxe);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index d6299edf9a5b..6b65c0132289 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -250,7 +250,7 @@ void rxe_resp_queue_pkt(struct rxe_dev *rxe,
void rxe_comp_queue_pkt(struct rxe_dev *rxe,
struct rxe_qp *qp, struct sk_buff *skb);
-static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp)
+static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
{
return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index bd812e00988e..d22431e3a908 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -76,7 +76,7 @@ static void rxe_vma_close(struct vm_area_struct *vma)
kref_put(&ip->ref, rxe_mmap_release);
}
-static struct vm_operations_struct rxe_vm_ops = {
+static const struct vm_operations_struct rxe_vm_ops = {
.open = rxe_vma_open,
.close = rxe_vma_close,
};
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 75d11ee635ec..c1b5f38f31a5 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -188,7 +188,7 @@ int rxe_pool_init(
struct rxe_dev *rxe,
struct rxe_pool *pool,
enum rxe_elem_type type,
- unsigned max_elem)
+ unsigned int max_elem)
{
int err = 0;
size_t size = rxe_type_info[type].size;
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 7ee465d1a1e1..db7161456f45 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -43,7 +43,7 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
static inline void retry_first_write_send(struct rxe_qp *qp,
struct rxe_send_wqe *wqe,
- unsigned mask, int npsn)
+ unsigned int mask, int npsn)
{
int i;
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index d2a14a1bdc7f..ea3810b29273 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -78,7 +78,7 @@ void rxe_do_task(unsigned long data)
default:
spin_unlock_irqrestore(&task->state_lock, flags);
- pr_warn("bad state = %d in rxe_do_task\n", task->state);
+ pr_warn("%s failed with bad state %d\n", __func__, task->state);
return;
}
@@ -105,7 +105,7 @@ void rxe_do_task(unsigned long data)
break;
default:
- pr_warn("bad state = %d in rxe_do_task\n",
+ pr_warn("%s failed with bad state %d\n", __func__,
task->state);
}
spin_unlock_irqrestore(&task->state_lock, flags);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index af90a7d42b96..aba3fa9ae599 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1210,8 +1210,8 @@ static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
}
-static ssize_t rxe_show_parent(struct device *device,
- struct device_attribute *attr, char *buf)
+static ssize_t parent_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct rxe_dev *rxe = container_of(device, struct rxe_dev,
ib_dev.dev);
@@ -1219,7 +1219,7 @@ static ssize_t rxe_show_parent(struct device *device,
return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
}
-static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL);
+static DEVICE_ATTR_RO(parent);
static struct device_attribute *rxe_dev_attributes[] = {
&dev_attr_parent,
@@ -1336,15 +1336,15 @@ int rxe_register_device(struct rxe_dev *rxe)
err = ib_register_device(dev, NULL);
if (err) {
- pr_warn("rxe_register_device failed, err = %d\n", err);
+ pr_warn("%s failed with error %d\n", __func__, err);
goto err1;
}
for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
if (err) {
- pr_warn("device_create_file failed, i = %d, err = %d\n",
- i, err);
+ pr_warn("%s failed with error %d for attr number %d\n",
+ __func__, err, i);
goto err2;
}
}