aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile6
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c12
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h1
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c709
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h17
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c6
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c104
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h45
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c765
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c2516
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h29
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3254
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h109
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c2
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c28
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c71
-rw-r--r--drivers/infiniband/hw/mlx5/qp.h1
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c121
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.h13
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/std_types.c45
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c70
22 files changed, 4005 insertions, 3923 deletions
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 8cca61c671f8..b4c009bb0db6 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := ah.o \
cmd.o \
cong.o \
+ counters.o \
cq.o \
doorbell.o \
gsi.o \
@@ -22,5 +23,6 @@ mlx5_ib-y := ah.o \
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
- flow.o \
- qos.o
+ fs.o \
+ qos.o \
+ std_types.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index cc24c711e92a..ebb2f108b64f 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -148,18 +148,6 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
spin_unlock(&dm->lock);
}
-int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
-{
- u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
- int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
-
- MLX5_SET(ppcnt_reg, in, local_port, 1);
-
- MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
- return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
- 0, 0);
-}
-
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index f4d8558db434..1d192a8ca87d 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -41,7 +41,6 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out);
-int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out);
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
u64 length, u32 alignment);
void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
new file mode 100644
index 000000000000..145f3cb40ccb
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -0,0 +1,709 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include "mlx5_ib.h"
+#include <linux/mlx5/eswitch.h>
+#include "counters.h"
+#include "ib_rep.h"
+#include "qp.h"
+
+struct mlx5_ib_counter {
+ const char *name;
+ size_t offset;
+};
+
+#define INIT_Q_COUNTER(_name) \
+ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_counter basic_q_cnts[] = {
+ INIT_Q_COUNTER(rx_write_requests),
+ INIT_Q_COUNTER(rx_read_requests),
+ INIT_Q_COUNTER(rx_atomic_requests),
+ INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
+ INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_counter retrans_q_cnts[] = {
+ INIT_Q_COUNTER(duplicate_request),
+ INIT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_Q_COUNTER(packet_seq_err),
+ INIT_Q_COUNTER(implied_nak_seq_err),
+ INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
+#define INIT_CONG_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
+
+static const struct mlx5_ib_counter cong_cnts[] = {
+ INIT_CONG_COUNTER(rp_cnp_ignored),
+ INIT_CONG_COUNTER(rp_cnp_handled),
+ INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
+ INIT_CONG_COUNTER(np_cnp_sent),
+};
+
+static const struct mlx5_ib_counter extended_err_cnts[] = {
+ INIT_Q_COUNTER(resp_local_length_error),
+ INIT_Q_COUNTER(resp_cqe_error),
+ INIT_Q_COUNTER(req_cqe_error),
+ INIT_Q_COUNTER(req_remote_invalid_request),
+ INIT_Q_COUNTER(req_remote_access_errors),
+ INIT_Q_COUNTER(resp_remote_access_errors),
+ INIT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_Q_COUNTER(req_cqe_flush_error),
+};
+
+static const struct mlx5_ib_counter roce_accl_cnts[] = {
+ INIT_Q_COUNTER(roce_adp_retrans),
+ INIT_Q_COUNTER(roce_adp_retrans_to),
+ INIT_Q_COUNTER(roce_slow_restart),
+ INIT_Q_COUNTER(roce_slow_restart_cnps),
+ INIT_Q_COUNTER(roce_slow_restart_trans),
+};
+
+#define INIT_EXT_PPCNT_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
+
+static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
+ INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
+};
+
+static int mlx5_ib_read_counters(struct ib_counters *counters,
+ struct ib_counters_read_attr *read_attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ struct mlx5_read_counters_attr mread_attr = {};
+ struct mlx5_ib_flow_counters_desc *desc;
+ int ret, i;
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ if (mcounters->cntrs_max_index > read_attr->ncounters) {
+ ret = -EINVAL;
+ goto err_bound;
+ }
+
+ mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
+ GFP_KERNEL);
+ if (!mread_attr.out) {
+ ret = -ENOMEM;
+ goto err_bound;
+ }
+
+ mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
+ mread_attr.flags = read_attr->flags;
+ ret = mcounters->read_counters(counters->device, &mread_attr);
+ if (ret)
+ goto err_read;
+
+ /* do the pass over the counters data array to assign according to the
+ * descriptions and indexing pairs
+ */
+ desc = mcounters->counters_data;
+ for (i = 0; i < mcounters->ncounters; i++)
+ read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
+
+err_read:
+ kfree(mread_attr.out);
+err_bound:
+ mutex_unlock(&mcounters->mcntrs_mutex);
+ return ret;
+}
+
+static void mlx5_ib_destroy_counters(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mlx5_ib_counters_clear_description(counters);
+ if (mcounters->hw_cntrs_hndl)
+ mlx5_fc_destroy(to_mdev(counters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+}
+
+static int mlx5_ib_create_counters(struct ib_counters *counters,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mutex_init(&mcounters->mcntrs_mutex);
+ return 0;
+}
+
+
+static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
+{
+ return MLX5_ESWITCH_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
+ MLX5_ESWITCH_OFFLOADS;
+}
+
+static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
+ u8 port_num)
+{
+ return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
+ &dev->port[port_num].cnts;
+}
+
+/**
+ * mlx5_ib_get_counters_id - Returns counters id to use for device+port
+ * @dev: Pointer to mlx5 IB device
+ * @port_num: Zero based port number
+ *
+ * mlx5_ib_get_counters_id() Returns counters set id to use for given
+ * device port combination in switchdev and non switchdev mode of the
+ * parent device.
+ */
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
+{
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+
+ return cnts->set_id;
+}
+
+static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+ bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
+
+ if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
+ return NULL;
+
+ cnts = get_counters(dev, port_num - 1);
+
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats,
+ u16 set_id)
+{
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ __be32 val;
+ int ret, i;
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+ MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
+ ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < cnts->num_q_counters; i++) {
+ val = *(__be32 *)((void *)out + cnts->offsets[i]);
+ stats->value[i] = (u64)be32_to_cpu(val);
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats)
+{
+ int offset = cnts->num_q_counters + cnts->num_cong_counters;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int ret, i;
+ void *out;
+
+ out = kvzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+ ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
+ 0, 0);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
+ stats->value[i + offset] =
+ be64_to_cpup((__be64 *)(out +
+ cnts->offsets[i + offset]));
+free:
+ kvfree(out);
+ return ret;
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
+ struct mlx5_core_dev *mdev;
+ int ret, num_counters;
+ u8 mdev_port_num;
+
+ if (!stats)
+ return -EINVAL;
+
+ num_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+
+ /* q_counters are per IB device, query the master mdev */
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
+ if (ret)
+ return ret;
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
+ if (ret)
+ return ret;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
+ &mdev_port_num);
+ if (!mdev) {
+ /* If port is not affiliated yet, its in down state
+ * which doesn't have any counters yet, so it would be
+ * zero. So no need to read from the HCA.
+ */
+ goto done;
+ }
+ ret = mlx5_lag_query_cong_counters(dev->mdev,
+ stats->value +
+ cnts->num_q_counters,
+ cnts->num_cong_counters,
+ cnts->offsets +
+ cnts->num_q_counters);
+
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ if (ret)
+ return ret;
+ }
+
+done:
+ return num_counters;
+}
+
+static struct rdma_hw_stats *
+mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
+
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
+
+ return mlx5_ib_query_q_counters(dev->mdev, cnts,
+ counter->stats, counter->id);
+}
+
+static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+
+ if (!counter->id)
+ return 0;
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
+ return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+}
+
+static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ int err;
+
+ if (!counter->id) {
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+
+ MLX5_SET(alloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
+ if (err)
+ return err;
+ counter->id =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ }
+
+ err = mlx5_ib_qp_set_counter(qp, counter);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ mlx5_ib_counter_dealloc(counter);
+ counter->id = 0;
+
+ return err;
+}
+
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
+{
+ return mlx5_ib_qp_set_counter(qp, NULL);
+}
+
+
+static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
+ const char **names,
+ size_t *offsets)
+{
+ int i;
+ int j = 0;
+
+ for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+ names[j] = basic_q_cnts[i].name;
+ offsets[j] = basic_q_cnts[i].offset;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+ for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+ names[j] = out_of_seq_q_cnts[i].name;
+ offsets[j] = out_of_seq_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+ names[j] = retrans_q_cnts[i].name;
+ offsets[j] = retrans_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+ names[j] = extended_err_cnts[i].name;
+ offsets[j] = extended_err_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
+ for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
+ names[j] = roce_accl_cnts[i].name;
+ offsets[j] = roce_accl_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
+ names[j] = cong_cnts[i].name;
+ offsets[j] = cong_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
+ names[j] = ext_ppcnt_cnts[i].name;
+ offsets[j] = ext_ppcnt_cnts[i].offset;
+ }
+ }
+}
+
+
+static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_counters *cnts)
+{
+ u32 num_counters;
+
+ num_counters = ARRAY_SIZE(basic_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+ num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+ num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
+ num_counters += ARRAY_SIZE(extended_err_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl))
+ num_counters += ARRAY_SIZE(roce_accl_cnts);
+
+ cnts->num_q_counters = num_counters;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
+ num_counters += ARRAY_SIZE(cong_cnts);
+ }
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
+ num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
+ }
+ cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
+ if (!cnts->names)
+ return -ENOMEM;
+
+ cnts->offsets = kcalloc(num_counters,
+ sizeof(cnts->offsets), GFP_KERNEL);
+ if (!cnts->offsets)
+ goto err_names;
+
+ return 0;
+
+err_names:
+ kfree(cnts->names);
+ cnts->names = NULL;
+ return -ENOMEM;
+}
+
+static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+ int num_cnt_ports;
+ int i;
+
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+
+ for (i = 0; i < num_cnt_ports; i++) {
+ if (dev->port[i].cnts.set_id) {
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+ dev->port[i].cnts.set_id);
+ mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+ }
+ kfree(dev->port[i].cnts.names);
+ kfree(dev->port[i].cnts.offsets);
+ }
+}
+
+static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+ int num_cnt_ports;
+ int err = 0;
+ int i;
+ bool is_shared;
+
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+ for (i = 0; i < num_cnt_ports; i++) {
+ err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
+ if (err)
+ goto err_alloc;
+
+ mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
+ dev->port[i].cnts.offsets);
+
+ MLX5_SET(alloc_q_counter_in, in, uid,
+ is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
+
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
+ if (err) {
+ mlx5_ib_warn(dev,
+ "couldn't allocate queue counter for port %d, err %d\n",
+ i + 1, err);
+ goto err_alloc;
+ }
+
+ dev->port[i].cnts.set_id =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ }
+ return 0;
+
+err_alloc:
+ mlx5_ib_dealloc_counters(dev);
+ return err;
+}
+
+static int read_flow_counters(struct ib_device *ibdev,
+ struct mlx5_read_counters_attr *read_attr)
+{
+ struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+
+ return mlx5_fc_query(dev->mdev, fc,
+ &read_attr->out[IB_COUNTER_PACKETS],
+ &read_attr->out[IB_COUNTER_BYTES]);
+}
+
+/* flow counters currently expose two counters packets and bytes */
+#define FLOW_COUNTERS_NUM 2
+static int counters_set_description(
+ struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
+ struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ u32 cntrs_max_index = 0;
+ int i;
+
+ if (counters_type != MLX5_IB_COUNTERS_FLOW)
+ return -EINVAL;
+
+ /* init the fields for the object */
+ mcounters->type = counters_type;
+ mcounters->read_counters = read_flow_counters;
+ mcounters->counters_num = FLOW_COUNTERS_NUM;
+ mcounters->ncounters = ncounters;
+ /* each counter entry have both description and index pair */
+ for (i = 0; i < ncounters; i++) {
+ if (desc_data[i].description > IB_COUNTER_BYTES)
+ return -EINVAL;
+
+ if (cntrs_max_index <= desc_data[i].index)
+ cntrs_max_index = desc_data[i].index + 1;
+ }
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ mcounters->counters_data = desc_data;
+ mcounters->cntrs_max_index = cntrs_max_index;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+
+ return 0;
+}
+
+#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
+int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
+ struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
+ struct mlx5_ib_flow_counters_desc *desc_data = NULL;
+ bool hw_hndl = false;
+ int ret = 0;
+
+ if (ucmd && ucmd->ncounters_data != 0) {
+ cntrs_data = ucmd->data;
+ if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
+ return -EINVAL;
+
+ desc_data = kcalloc(cntrs_data->ncounters,
+ sizeof(*desc_data),
+ GFP_KERNEL);
+ if (!desc_data)
+ return -ENOMEM;
+
+ if (copy_from_user(desc_data,
+ u64_to_user_ptr(cntrs_data->counters_data),
+ sizeof(*desc_data) * cntrs_data->ncounters)) {
+ ret = -EFAULT;
+ goto free;
+ }
+ }
+
+ if (!mcounters->hw_cntrs_hndl) {
+ mcounters->hw_cntrs_hndl = mlx5_fc_create(
+ to_mdev(ibcounters->device)->mdev, false);
+ if (IS_ERR(mcounters->hw_cntrs_hndl)) {
+ ret = PTR_ERR(mcounters->hw_cntrs_hndl);
+ goto free;
+ }
+ hw_hndl = true;
+ }
+
+ if (desc_data) {
+ /* counters already bound to at least one flow */
+ if (mcounters->cntrs_max_index) {
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ ret = counters_set_description(ibcounters,
+ MLX5_IB_COUNTERS_FLOW,
+ desc_data,
+ cntrs_data->ncounters);
+ if (ret)
+ goto free_hndl;
+
+ } else if (!mcounters->cntrs_max_index) {
+ /* counters not bound yet, must have udata passed */
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ return 0;
+
+free_hndl:
+ if (hw_hndl) {
+ mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+ mcounters->hw_cntrs_hndl = NULL;
+ }
+free:
+ kfree(desc_data);
+ return ret;
+}
+
+void mlx5_ib_counters_clear_description(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters;
+
+ if (!counters || atomic_read(&counters->usecnt) != 1)
+ return;
+
+ mcounters = to_mcounters(counters);
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ kfree(mcounters->counters_data);
+ mcounters->counters_data = NULL;
+ mcounters->cntrs_max_index = 0;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+}
+
+static const struct ib_device_ops hw_stats_ops = {
+ .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
+ .get_hw_stats = mlx5_ib_get_hw_stats,
+ .counter_bind_qp = mlx5_ib_counter_bind_qp,
+ .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+ .counter_dealloc = mlx5_ib_counter_dealloc,
+ .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+ .counter_update_stats = mlx5_ib_counter_update_stats,
+};
+
+static const struct ib_device_ops counters_ops = {
+ .create_counters = mlx5_ib_create_counters,
+ .destroy_counters = mlx5_ib_destroy_counters,
+ .read_counters = mlx5_ib_read_counters,
+
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
+};
+
+int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &counters_ops);
+
+ if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ return 0;
+
+ ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
+ return mlx5_ib_alloc_counters(dev);
+}
+
+void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ return;
+
+ mlx5_ib_dealloc_counters(dev);
+}
diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h
new file mode 100644
index 000000000000..1aa30c2f3f4d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/counters.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_COUNTERS_H
+#define _MLX5_IB_COUNTERS_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_counters_clear_description(struct ib_counters *counters);
+int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd);
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num);
+#endif /* _MLX5_IB_COUNTERS_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 0c18cb6a2f14..0133ebb8d740 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -925,8 +925,8 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_cq *cq = to_mcq(ibcq);
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
- int uninitialized_var(index);
- int uninitialized_var(inlen);
+ int index;
+ int inlen;
u32 *cqb = NULL;
void *cqc;
int cqe_size;
@@ -1246,7 +1246,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
__be64 *pas;
int page_shift;
int inlen;
- int uninitialized_var(cqe_size);
+ int cqe_size;
unsigned long flags;
if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) {
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 9454a66c12cc..9e3d8b826498 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -14,6 +14,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
+#include "devx.h"
#include "qp.h"
#include <linux/xarray.h>
@@ -89,22 +90,6 @@ struct devx_async_event_file {
u8 is_destroyed:1;
};
-#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
-struct devx_obj {
- struct mlx5_ib_dev *ib_dev;
- u64 obj_id;
- u32 dinlen; /* destroy inbox length */
- u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
- u32 flags;
- union {
- struct mlx5_ib_devx_mr devx_mr;
- struct mlx5_core_dct core_dct;
- struct mlx5_core_cq core_cq;
- u32 flow_counter_bulk_size;
- };
- struct list_head event_sub; /* holds devx_event_subscription entries */
-};
-
struct devx_umem {
struct mlx5_core_dev *mdev;
struct ib_umem *umem;
@@ -171,48 +156,6 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
-bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
-{
- struct devx_obj *devx_obj = obj;
- u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
-
- switch (opcode) {
- case MLX5_CMD_OP_DESTROY_TIR:
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
- obj_id);
- return true;
-
- case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
- table_id);
- return true;
- default:
- return false;
- }
-}
-
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id)
-{
- struct devx_obj *devx_obj = obj;
- u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
-
- if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
-
- if (offset && offset >= devx_obj->flow_counter_bulk_size)
- return false;
-
- *counter_id = MLX5_GET(dealloc_flow_counter_in,
- devx_obj->dinbox,
- flow_counter_id);
- *counter_id += offset;
- return true;
- }
-
- return false;
-}
-
static bool is_legacy_unaffiliated_event_num(u16 event_num)
{
switch (event_num) {
@@ -2419,17 +2362,24 @@ static int devx_event_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
-void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev)
+int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
{
struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ int uid;
- xa_init(&table->event_xa);
- mutex_init(&table->event_xa_lock);
- MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
- mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
+ uid = mlx5_ib_devx_create(dev, false);
+ if (uid > 0) {
+ dev->devx_whitelist_uid = uid;
+ xa_init(&table->event_xa);
+ mutex_init(&table->event_xa_lock);
+ MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
+ mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
+ }
+
+ return 0;
}
-void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
+void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_devx_event_table *table = &dev->devx_event_table;
struct devx_event_subscription *sub, *tmp;
@@ -2437,17 +2387,21 @@ void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
void *entry;
unsigned long id;
- mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
- mutex_lock(&dev->devx_event_table.event_xa_lock);
- xa_for_each(&table->event_xa, id, entry) {
- event = entry;
- list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list,
- xa_list)
- devx_cleanup_subscription(dev, sub);
- kfree(entry);
+ if (dev->devx_whitelist_uid) {
+ mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ xa_for_each(&table->event_xa, id, entry) {
+ event = entry;
+ list_for_each_entry_safe(
+ sub, tmp, &event->unaffiliated_list, xa_list)
+ devx_cleanup_subscription(dev, sub);
+ kfree(entry);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+ xa_destroy(&table->event_xa);
+
+ mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
}
- mutex_unlock(&dev->devx_event_table.event_xa_lock);
- xa_destroy(&table->event_xa);
}
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
@@ -2536,7 +2490,7 @@ static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
{
struct devx_async_event_file *ev_file = filp->private_data;
struct devx_event_subscription *event_sub;
- struct devx_async_event_data *uninitialized_var(event);
+ struct devx_async_event_data *event;
int ret = 0;
size_t eventsz;
bool omit_data;
diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h
new file mode 100644
index 000000000000..1f69866aed16
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/devx.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_DEVX_H
+#define _MLX5_IB_DEVX_H
+
+#include "mlx5_ib.h"
+
+#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
+struct devx_obj {
+ struct mlx5_ib_dev *ib_dev;
+ u64 obj_id;
+ u32 dinlen; /* destroy inbox length */
+ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+ u32 flags;
+ union {
+ struct mlx5_ib_devx_mr devx_mr;
+ struct mlx5_core_dct core_dct;
+ struct mlx5_core_cq core_cq;
+ u32 flow_counter_bulk_size;
+ };
+ struct list_head event_sub; /* holds devx_event_subscription entries */
+};
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
+int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
+#else
+static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+{
+ return -EOPNOTSUPP;
+}
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
+static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
+{
+ return 0;
+}
+static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
+{
+}
+#endif
+#endif /* _MLX5_IB_DEVX_H */
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
deleted file mode 100644
index 216a1108ad34..000000000000
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ /dev/null
@@ -1,765 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/*
- * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
- */
-
-#include <rdma/ib_user_verbs.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/uverbs_types.h>
-#include <rdma/uverbs_ioctl.h>
-#include <rdma/uverbs_std_types.h>
-#include <rdma/mlx5_user_ioctl_cmds.h>
-#include <rdma/mlx5_user_ioctl_verbs.h>
-#include <rdma/ib_umem.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/fs.h>
-#include "mlx5_ib.h"
-
-#define UVERBS_MODULE_NAME mlx5_ib
-#include <rdma/uverbs_named_ioctl.h>
-
-static int
-mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
- enum mlx5_flow_namespace_type *namespace)
-{
- switch (table_type) {
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
- *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
- *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
- *namespace = MLX5_FLOW_NAMESPACE_FDB;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
- *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
- *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
- [MLX5_IB_FLOW_TYPE_NORMAL] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- .u.ptr = {
- .len = sizeof(u16), /* data is priority */
- .min_len = sizeof(u16),
- }
- },
- [MLX5_IB_FLOW_TYPE_SNIFFER] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
-};
-
-static int get_dests(struct uverbs_attr_bundle *attrs,
- struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
- int *dest_type, struct ib_qp **qp, u32 *flags)
-{
- bool dest_devx, dest_qp;
- void *devx_obj;
- int err;
-
- dest_devx = uverbs_attr_is_valid(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
- dest_qp = uverbs_attr_is_valid(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
-
- *flags = 0;
- err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
- MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
- MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
- if (err)
- return err;
-
- /* Both flags are not allowed */
- if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
- *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
- return -EINVAL;
-
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
- if (dest_devx && (dest_qp || *flags))
- return -EINVAL;
- else if (dest_qp && *flags)
- return -EINVAL;
- }
-
- /* Allow only DEVX object, drop as dest for FDB */
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx ||
- (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
- return -EINVAL;
-
- /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
- ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
- return -EINVAL;
-
- *qp = NULL;
- if (dest_devx) {
- devx_obj =
- uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
-
- /* Verify that the given DEVX object is a flow
- * steering destination.
- */
- if (!mlx5_ib_devx_is_flow_dest(devx_obj, dest_id, dest_type))
- return -EINVAL;
- /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
- *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
- return -EINVAL;
- } else if (dest_qp) {
- struct mlx5_ib_qp *mqp;
-
- *qp = uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
- if (IS_ERR(*qp))
- return PTR_ERR(*qp);
-
- if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
- return -EINVAL;
-
- mqp = to_mqp(*qp);
- if (mqp->is_rss)
- *dest_id = mqp->rss_qp.tirn;
- else
- *dest_id = mqp->raw_packet_qp.rq.tirn;
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- }
-
- if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
- return -EINVAL;
-
- return 0;
-}
-
-#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
-static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_flow_context flow_context = {.flow_tag =
- MLX5_FS_DEFAULT_FLOW_TAG};
- u32 *offset_attr, offset = 0, counter_id = 0;
- int dest_id, dest_type, inlen, len, ret, i;
- struct mlx5_ib_flow_handler *flow_handler;
- struct mlx5_ib_flow_matcher *fs_matcher;
- struct ib_uobject **arr_flow_actions;
- struct ib_uflow_resources *uflow_res;
- struct mlx5_flow_act flow_act = {};
- struct ib_qp *qp = NULL;
- void *devx_obj, *cmd_in;
- struct ib_uobject *uobj;
- struct mlx5_ib_dev *dev;
- u32 flags;
-
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
- fs_matcher = uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
- uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
- dev = mlx5_udata_to_mdev(&attrs->driver_udata);
-
- if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
- return -EINVAL;
-
- if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
-
- if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- len = uverbs_attr_get_uobjs_arr(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
- if (len) {
- devx_obj = arr_flow_actions[0]->object;
-
- if (uverbs_attr_is_valid(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
-
- int num_offsets = uverbs_attr_ptr_get_array_size(
- attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
- sizeof(u32));
-
- if (num_offsets != 1)
- return -EINVAL;
-
- offset_attr = uverbs_attr_get_alloced_ptr(
- attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
- offset = *offset_attr;
- }
-
- if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset,
- &counter_id))
- return -EINVAL;
-
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- }
-
- cmd_in = uverbs_attr_get_alloced_ptr(
- attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
- inlen = uverbs_attr_get_len(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
-
- uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
- if (!uflow_res)
- return -ENOMEM;
-
- len = uverbs_attr_get_uobjs_arr(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
- for (i = 0; i < len; i++) {
- struct mlx5_ib_flow_action *maction =
- to_mflow_act(arr_flow_actions[i]->object);
-
- ret = parse_flow_flow_action(maction, false, &flow_act);
- if (ret)
- goto err_out;
- flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
- arr_flow_actions[i]->object);
- }
-
- ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
- MLX5_IB_ATTR_CREATE_FLOW_TAG);
- if (!ret) {
- if (flow_context.flow_tag >= BIT(24)) {
- ret = -EINVAL;
- goto err_out;
- }
- flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
- }
-
- flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher,
- &flow_context,
- &flow_act,
- counter_id,
- cmd_in, inlen,
- dest_id, dest_type);
- if (IS_ERR(flow_handler)) {
- ret = PTR_ERR(flow_handler);
- goto err_out;
- }
-
- ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
-
- return 0;
-err_out:
- ib_uverbs_flow_resources_free(uflow_res);
- return ret;
-}
-
-static int flow_matcher_cleanup(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_matcher *obj = uobject->object;
- int ret;
-
- ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
- if (ret)
- return ret;
-
- kfree(obj);
- return 0;
-}
-
-static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
- struct mlx5_ib_flow_matcher *obj)
-{
- enum mlx5_ib_uapi_flow_table_type ft_type =
- MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
- u32 flags;
- int err;
-
- /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
- * users should switch to it. We leave this to not break userspace
- */
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
- uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
- return -EINVAL;
-
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
- err = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
- if (err)
- return err;
-
- err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
- if (err)
- return err;
-
- return 0;
- }
-
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
- err = uverbs_get_flags32(&flags, attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
- IB_FLOW_ATTR_FLAGS_EGRESS);
- if (err)
- return err;
-
- if (flags) {
- mlx5_ib_ft_type_to_namespace(
- MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
- &obj->ns_type);
- return 0;
- }
- }
-
- obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
-
- return 0;
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
- struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
- struct mlx5_ib_flow_matcher *obj;
- int err;
-
- obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- obj->mask_len = uverbs_attr_get_len(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
- err = uverbs_copy_from(&obj->matcher_mask,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
- if (err)
- goto end;
-
- obj->flow_type = uverbs_attr_get_enum_id(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
-
- if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
- err = uverbs_copy_from(&obj->priority,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
- if (err)
- goto end;
- }
-
- err = uverbs_copy_from(&obj->match_criteria_enable,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
- if (err)
- goto end;
-
- err = mlx5_ib_matcher_ns(attrs, obj);
- if (err)
- goto end;
-
- uobj->object = obj;
- obj->mdev = dev->mdev;
- atomic_set(&obj->usecnt, 0);
- return 0;
-
-end:
- kfree(obj);
- return err;
-}
-
-void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
-{
- switch (maction->flow_action_raw.sub_type) {
- case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
- mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.modify_hdr);
- break;
- case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
- mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.pkt_reformat);
- break;
- case MLX5_IB_FLOW_ACTION_DECAP:
- break;
- default:
- break;
- }
-}
-
-static struct ib_flow_action *
-mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
- enum mlx5_ib_uapi_flow_table_type ft_type,
- u8 num_actions, void *in)
-{
- enum mlx5_flow_namespace_type namespace;
- struct mlx5_ib_flow_action *maction;
- int ret;
-
- ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
- if (ret)
- return ERR_PTR(-EINVAL);
-
- maction = kzalloc(sizeof(*maction), GFP_KERNEL);
- if (!maction)
- return ERR_PTR(-ENOMEM);
-
- maction->flow_action_raw.modify_hdr =
- mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
-
- if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
- ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
- kfree(maction);
- return ERR_PTR(ret);
- }
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
- maction->flow_action_raw.dev = dev;
-
- return &maction->ib_action;
-}
-
-static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
-{
- return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- max_modify_header_actions) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
- max_modify_header_actions) ||
- MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
- max_modify_header_actions);
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
- struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
- enum mlx5_ib_uapi_flow_table_type ft_type;
- struct ib_flow_action *action;
- int num_actions;
- void *in;
- int ret;
-
- if (!mlx5_ib_modify_header_supported(mdev))
- return -EOPNOTSUPP;
-
- in = uverbs_attr_get_alloced_ptr(attrs,
- MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
-
- num_actions = uverbs_attr_ptr_get_array_size(
- attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
- MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
- if (num_actions < 0)
- return num_actions;
-
- ret = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
- if (ret)
- return ret;
- action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
- if (IS_ERR(action))
- return PTR_ERR(action);
-
- uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
- IB_FLOW_ACTION_UNSPECIFIED);
-
- return 0;
-}
-
-static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
- u8 packet_reformat_type,
- u8 ft_type)
-{
- switch (packet_reformat_type) {
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
- return MLX5_CAP_FLOWTABLE(ibdev->mdev,
- encap_general_header);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
- return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
- reformat_l2_to_l3_tunnel);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
- return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
- reformat_l3_tunnel_to_l2);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
- return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
- break;
- default:
- break;
- }
-
- return false;
-}
-
-static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
-{
- switch (dv_prt) {
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
- *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
- *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
- *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int mlx5_ib_flow_action_create_packet_reformat_ctx(
- struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_action *maction,
- u8 ft_type, u8 dv_prt,
- void *in, size_t len)
-{
- enum mlx5_flow_namespace_type namespace;
- u8 prm_prt;
- int ret;
-
- ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
- if (ret)
- return ret;
-
- ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
- if (ret)
- return ret;
-
- maction->flow_action_raw.pkt_reformat =
- mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
- in, namespace);
- if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
- ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
- return ret;
- }
-
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
- maction->flow_action_raw.dev = dev;
-
- return 0;
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
- struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
- enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
- enum mlx5_ib_uapi_flow_table_type ft_type;
- struct mlx5_ib_flow_action *maction;
- int ret;
-
- ret = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
- if (ret)
- return ret;
-
- ret = uverbs_get_const(&dv_prt, attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
- if (ret)
- return ret;
-
- if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
- return -EOPNOTSUPP;
-
- maction = kzalloc(sizeof(*maction), GFP_KERNEL);
- if (!maction)
- return -ENOMEM;
-
- if (dv_prt ==
- MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_DECAP;
- maction->flow_action_raw.dev = mdev;
- } else {
- void *in;
- int len;
-
- in = uverbs_attr_get_alloced_ptr(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
- if (IS_ERR(in)) {
- ret = PTR_ERR(in);
- goto free_maction;
- }
-
- len = uverbs_attr_get_len(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
-
- ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
- maction, ft_type, dv_prt, in, len);
- if (ret)
- goto free_maction;
- }
-
- uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
- IB_FLOW_ACTION_UNSPECIFIED);
- return 0;
-
-free_maction:
- kfree(maction);
- return ret;
-}
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_CREATE_FLOW,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
- UVERBS_OBJECT_FLOW,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(
- MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
- UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
- UA_MANDATORY,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_READ,
- UA_MANDATORY),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
- UVERBS_OBJECT_QP,
- UVERBS_ACCESS_READ),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
- MLX5_IB_OBJECT_DEVX_OBJ,
- UVERBS_ACCESS_READ),
- UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_READ, 1,
- MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
- UVERBS_ATTR_TYPE(u32),
- UA_OPTIONAL),
- UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
- MLX5_IB_OBJECT_DEVX_OBJ,
- UVERBS_ACCESS_READ, 1, 1,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
- UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
- UA_OPTIONAL,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
- enum mlx5_ib_create_flow_flags,
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(
- MLX5_IB_METHOD_DESTROY_FLOW,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
- UVERBS_OBJECT_FLOW,
- UVERBS_ACCESS_DESTROY,
- UA_MANDATORY));
-
-ADD_UVERBS_METHODS(mlx5_ib_fs,
- UVERBS_OBJECT_FLOW,
- &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
- &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
- UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
- set_add_copy_action_in_auto)),
- UA_MANDATORY,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_MANDATORY));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
- UVERBS_ATTR_MIN_SIZE(1),
- UA_ALLOC_AND_COPY,
- UA_OPTIONAL),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
- enum mlx5_ib_uapi_flow_action_packet_reformat_type,
- UA_MANDATORY),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_MANDATORY));
-
-ADD_UVERBS_METHODS(
- mlx5_ib_flow_actions,
- UVERBS_OBJECT_FLOW_ACTION,
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
- UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
- UA_MANDATORY),
- UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
- mlx5_ib_flow_type,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
- UVERBS_ATTR_TYPE(u8),
- UA_MANDATORY),
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
- enum ib_flow_flags,
- UA_OPTIONAL),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(
- MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_DESTROY,
- UA_MANDATORY));
-
-DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
-
-const struct uapi_definition mlx5_ib_flow_defs[] = {
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
- MLX5_IB_OBJECT_FLOW_MATCHER),
- UAPI_DEF_CHAIN_OBJ_TREE(
- UVERBS_OBJECT_FLOW,
- &mlx5_ib_fs),
- UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
- &mlx5_ib_flow_actions),
- {},
-};
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
new file mode 100644
index 000000000000..e9cfb9a2ef41
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -0,0 +1,2516 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_umem.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/accel.h>
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+#include "counters.h"
+#include "devx.h"
+#include "fs.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+enum {
+ MATCH_CRITERIA_ENABLE_OUTER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC_BIT,
+ MATCH_CRITERIA_ENABLE_INNER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC2_BIT
+};
+
+#define HEADER_IS_ZERO(match_criteria, headers) \
+ !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
+ 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
+
+static u8 get_match_criteria_enable(u32 *match_criteria)
+{
+ u8 match_criteria_enable;
+
+ match_criteria_enable =
+ (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
+ MATCH_CRITERIA_ENABLE_OUTER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
+ MATCH_CRITERIA_ENABLE_MISC_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
+ MATCH_CRITERIA_ENABLE_INNER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
+ MATCH_CRITERIA_ENABLE_MISC2_BIT;
+
+ return match_criteria_enable;
+}
+
+static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+ u8 entry_mask;
+ u8 entry_val;
+ int err = 0;
+
+ if (!mask)
+ goto out;
+
+ entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
+ ip_protocol);
+ entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
+ ip_protocol);
+ if (!entry_mask) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
+ goto out;
+ }
+ /* Don't override existing ip protocol */
+ if (mask != entry_mask || val != entry_val)
+ err = -EINVAL;
+out:
+ return err;
+}
+
+static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
+ bool inner)
+{
+ if (inner) {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, inner_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, inner_ipv6_flow_label, val);
+ } else {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, outer_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, outer_ipv6_flow_label, val);
+ }
+}
+
+static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
+}
+
+static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
+{
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+#define LAST_ETH_FIELD vlan_tag
+#define LAST_IB_FIELD sl
+#define LAST_IPV4_FIELD tos
+#define LAST_IPV6_FIELD traffic_class
+#define LAST_TCP_UDP_FIELD src_port
+#define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
+#define LAST_DROP_FIELD size
+#define LAST_COUNTERS_FIELD counters
+
+/* Field is the last supported field */
+#define FIELDS_NOT_SUPPORTED(filter, field)\
+ memchr_inv((void *)&filter.field +\
+ sizeof(filter.field), 0,\
+ sizeof(filter) -\
+ offsetof(typeof(filter), field) -\
+ sizeof(filter.field))
+
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action)
+{
+
+ switch (maction->ib_action.type) {
+ case IB_FLOW_ACTION_ESP:
+ if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
+ return -EINVAL;
+ /* Currently only AES_GCM keymat is supported by the driver */
+ action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
+ action->action |= is_egress ?
+ MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
+ return 0;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ action->modify_hdr =
+ maction->flow_action_raw.modify_hdr;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_DECAP) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
+ if (action->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ return -EINVAL;
+ action->action |=
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ action->pkt_reformat =
+ maction->flow_action_raw.pkt_reformat;
+ return 0;
+ }
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int parse_flow_attr(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_spec *spec,
+ const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_act *action, u32 prev_type)
+{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ u32 *match_c = spec->match_criteria;
+ u32 *match_v = spec->match_value;
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters);
+ void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters_2);
+ void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters_2);
+ void *headers_c;
+ void *headers_v;
+ int match_ipv;
+ int ret;
+
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ inner_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
+ } else {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ }
+
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
+ case IB_FLOW_SPEC_ETH:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
+ return -EOPNOTSUPP;
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dmac_47_16),
+ ib_spec->eth.mask.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16),
+ ib_spec->eth.val.dst_mac);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ smac_47_16),
+ ib_spec->eth.mask.src_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ smac_47_16),
+ ib_spec->eth.val.src_mac);
+
+ if (ib_spec->eth.mask.vlan_tag) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ cvlan_tag, 1);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_vid, ntohs(ib_spec->eth.val.vlan_tag));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_cfi,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_cfi,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 12);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_prio,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_prio,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 13);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, ntohs(ib_spec->eth.mask.ether_type));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ntohs(ib_spec->eth.val.ether_type));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
+ return -EOPNOTSUPP;
+
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, MLX5_FS_IPV4_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IP);
+ }
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.src_ip,
+ sizeof(ib_spec->ipv4.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.src_ip,
+ sizeof(ib_spec->ipv4.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.dst_ip,
+ sizeof(ib_spec->ipv4.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.dst_ip,
+ sizeof(ib_spec->ipv4.val.dst_ip));
+
+ set_tos(headers_c, headers_v,
+ ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
+
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv4.mask.proto,
+ ib_spec->ipv4.val.proto))
+ return -EINVAL;
+ break;
+ case IB_FLOW_SPEC_IPV6:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
+ return -EOPNOTSUPP;
+
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, MLX5_FS_IPV6_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IPV6);
+ }
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.src_ip,
+ sizeof(ib_spec->ipv6.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.src_ip,
+ sizeof(ib_spec->ipv6.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.dst_ip,
+ sizeof(ib_spec->ipv6.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.dst_ip,
+ sizeof(ib_spec->ipv6.val.dst_ip));
+
+ set_tos(headers_c, headers_v,
+ ib_spec->ipv6.mask.traffic_class,
+ ib_spec->ipv6.val.traffic_class);
+
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv6.mask.next_hdr,
+ ib_spec->ipv6.val.next_hdr))
+ return -EINVAL;
+
+ set_flow_label(misc_params_c, misc_params_v,
+ ntohl(ib_spec->ipv6.mask.flow_label),
+ ntohl(ib_spec->ipv6.val.flow_label),
+ ib_spec->type & IB_FLOW_SPEC_INNER);
+ break;
+ case IB_FLOW_SPEC_ESP:
+ if (ib_spec->esp.mask.seq)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
+ ntohl(ib_spec->esp.mask.spi));
+ MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+ ntohl(ib_spec->esp.val.spi));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+ LAST_TCP_UDP_FIELD))
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_UDP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+ LAST_TCP_UDP_FIELD))
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (ib_spec->gre.mask.c_ks_res0_ver)
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ IPPROTO_GRE);
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
+ ntohs(ib_spec->gre.mask.protocol));
+ MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
+ ntohs(ib_spec->gre.val.protocol));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+ gre_key.nvgre.hi),
+ &ib_spec->gre.mask.key,
+ sizeof(ib_spec->gre.mask.key));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
+ gre_key.nvgre.hi),
+ &ib_spec->gre.val.key,
+ sizeof(ib_spec->gre.val.key));
+ break;
+ case IB_FLOW_SPEC_MPLS:
+ switch (prev_type) {
+ case IB_FLOW_SPEC_UDP:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ default:
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ inner_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ inner_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ } else {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ }
+ }
+ break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
+ LAST_TUNNEL_FIELD))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
+ ntohl(ib_spec->tunnel.mask.tunnel_id));
+ MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
+ ntohl(ib_spec->tunnel.val.tunnel_id));
+ break;
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+ LAST_FLOW_TAG_FIELD))
+ return -EOPNOTSUPP;
+ if (ib_spec->flow_tag.tag_id >= BIT(24))
+ return -EINVAL;
+
+ flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
+ LAST_DROP_FIELD))
+ return -EOPNOTSUPP;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
+ flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
+ if (ret)
+ return ret;
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
+ LAST_COUNTERS_FIELD))
+ return -EOPNOTSUPP;
+
+ /* for now support only one counters spec per flow */
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ return -EINVAL;
+
+ action->counters = ib_spec->flow_count.counters;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* If a flow could catch both multicast and unicast packets,
+ * it won't fall into the multicast flow steering table and this rule
+ * could steal other multicast packets.
+ */
+static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
+{
+ union ib_flow_spec *flow_spec;
+
+ if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
+ ib_attr->num_of_specs < 1)
+ return false;
+
+ flow_spec = (union ib_flow_spec *)(ib_attr + 1);
+ if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
+ struct ib_flow_spec_ipv4 *ipv4_spec;
+
+ ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
+ if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
+ return true;
+
+ return false;
+ }
+
+ if (flow_spec->type == IB_FLOW_SPEC_ETH) {
+ struct ib_flow_spec_eth *eth_spec;
+
+ eth_spec = (struct ib_flow_spec_eth *)flow_spec;
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+ }
+
+ return false;
+}
+
+enum valid_spec {
+ VALID_SPEC_INVALID,
+ VALID_SPEC_VALID,
+ VALID_SPEC_NA,
+};
+
+static enum valid_spec
+is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ const u32 *match_c = spec->match_criteria;
+ bool is_crypto =
+ (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
+ bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
+ bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /*
+ * Currently only crypto is supported in egress, when regular egress
+ * rules would be supported, always return VALID_SPEC_NA.
+ */
+ if (!is_crypto)
+ return VALID_SPEC_NA;
+
+ return is_crypto && is_ipsec &&
+ (!egress || (!is_drop &&
+ !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
+ VALID_SPEC_VALID : VALID_SPEC_INVALID;
+}
+
+static bool is_valid_spec(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ /* We curretly only support ipsec egress flow */
+ return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
+}
+
+static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr,
+ bool check_inner)
+{
+ union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+ int match_ipv = check_inner ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
+ bool ipv4_spec_valid, ipv6_spec_valid;
+ unsigned int ip_spec_type = 0;
+ bool has_ethertype = false;
+ unsigned int spec_index;
+ bool mask_valid = true;
+ u16 eth_type = 0;
+ bool type_valid;
+
+ /* Validate that ethertype is correct */
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
+ ib_spec->eth.mask.ether_type) {
+ mask_valid = (ib_spec->eth.mask.ether_type ==
+ htons(0xffff));
+ has_ethertype = true;
+ eth_type = ntohs(ib_spec->eth.val.ether_type);
+ } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
+ (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
+ ip_spec_type = ib_spec->type;
+ }
+ ib_spec = (void *)ib_spec + ib_spec->size;
+ }
+
+ type_valid = (!has_ethertype) || (!ip_spec_type);
+ if (!type_valid && mask_valid) {
+ ipv4_spec_valid = (eth_type == ETH_P_IP) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
+ ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
+
+ type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
+ (((eth_type == ETH_P_MPLS_UC) ||
+ (eth_type == ETH_P_MPLS_MC)) && match_ipv);
+ }
+
+ return type_valid;
+}
+
+static bool is_valid_attr(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr)
+{
+ return is_valid_ethertype(mdev, flow_attr, false) &&
+ is_valid_ethertype(mdev, flow_attr, true);
+}
+
+static void put_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *prio, bool ft_added)
+{
+ prio->refcount -= !!ft_added;
+ if (!prio->refcount) {
+ mlx5_destroy_flow_table(prio->flow_table);
+ prio->flow_table = NULL;
+ }
+}
+
+static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ struct mlx5_ib_flow_handler *handler = container_of(flow_id,
+ struct mlx5_ib_flow_handler,
+ ibflow);
+ struct mlx5_ib_flow_handler *iter, *tmp;
+ struct mlx5_ib_dev *dev = handler->dev;
+
+ mutex_lock(&dev->flow_db->lock);
+
+ list_for_each_entry_safe(iter, tmp, &handler->list, list) {
+ mlx5_del_flow_rules(iter->rule);
+ put_flow_table(dev, iter->prio, true);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+
+ mlx5_del_flow_rules(handler->rule);
+ put_flow_table(dev, handler->prio, true);
+ mlx5_ib_counters_clear_description(handler->ibcounters);
+ mutex_unlock(&dev->flow_db->lock);
+ if (handler->flow_matcher)
+ atomic_dec(&handler->flow_matcher->usecnt);
+ kfree(handler);
+
+ return 0;
+}
+
+static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
+{
+ priority *= 2;
+ if (!dont_trap)
+ priority++;
+ return priority;
+}
+
+enum flow_table_type {
+ MLX5_IB_FT_RX,
+ MLX5_IB_FT_TX
+};
+
+#define MLX5_FS_MAX_TYPES 6
+#define MLX5_FS_MAX_ENTRIES BIT(16)
+
+static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
+ struct mlx5_ib_flow_prio *prio,
+ int priority,
+ int num_entries, int num_groups,
+ u32 flags)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *ft;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = num_entries;
+ ft_attr.flags = flags;
+ ft_attr.autogroup.max_num_groups = num_groups;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return ERR_CAST(ft);
+
+ prio->flow_table = ft;
+ prio->refcount = 0;
+ return prio;
+}
+
+static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
+ struct ib_flow_attr *flow_attr,
+ enum flow_table_type ft_type)
+{
+ bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_table *ft;
+ int max_table_size;
+ int num_entries;
+ int num_groups;
+ bool esw_encap;
+ u32 flags = 0;
+ int priority;
+
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ enum mlx5_flow_namespace_type fn_type;
+
+ if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = ib_prio_to_core_prio(flow_attr->priority,
+ dont_trap);
+ if (ft_type == MLX5_IB_FT_RX) {
+ fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
+ prio = &dev->flow_db->prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ log_max_ft_size));
+ fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ prio = &dev->flow_db->egress_prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ }
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ num_entries = MLX5_FS_MAX_ENTRIES;
+ num_groups = MLX5_FS_MAX_TYPES;
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_LEFTOVERS);
+ build_leftovers_ft_param(&priority,
+ &num_entries,
+ &num_groups);
+ prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ allow_sniffer_and_nic_rx_shared_tir))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
+ MLX5_FLOW_NAMESPACE_SNIFFER_RX :
+ MLX5_FLOW_NAMESPACE_SNIFFER_TX);
+
+ prio = &dev->flow_db->sniffer[ft_type];
+ priority = 0;
+ num_entries = 1;
+ num_groups = 1;
+ }
+
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ max_table_size = min_t(int, num_entries, max_table_size);
+
+ ft = prio->flow_table;
+ if (!ft)
+ return _get_prio(ns, prio, priority, max_table_size, num_groups,
+ flags);
+
+ return prio;
+}
+
+static void set_underlay_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ u32 underlay_qpn)
+{
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+ spec->match_criteria,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (underlay_qpn &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ ft_field_support.bth_dst_qp)) {
+ MLX5_SET(fte_match_set_misc,
+ misc_params_v, bth_dst_qp, underlay_qpn);
+ MLX5_SET(fte_match_set_misc,
+ misc_params_c, bth_dst_qp, 0xffffff);
+ }
+}
+
+static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw,
+ rep->vport));
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+}
+
+static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst,
+ u32 underlay_qpn,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_destination dest_arr[2] = {};
+ struct mlx5_flow_destination *rule_dst = dest_arr;
+ const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
+ unsigned int spec_index;
+ u32 prev_type = 0;
+ int err = 0;
+ int dest_num = 0;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+
+ if (!is_valid_attr(dev->mdev, flow_attr))
+ return ERR_PTR(-EINVAL);
+
+ if (dev->is_rep && is_egress)
+ return ERR_PTR(-EINVAL);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !spec) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ err = parse_flow_attr(dev->mdev, spec,
+ ib_flow, flow_attr, &flow_act,
+ prev_type);
+ if (err < 0)
+ goto free;
+
+ prev_type = ((union ib_flow_spec *)ib_flow)->type;
+ ib_flow += ((union ib_flow_spec *)ib_flow)->size;
+ }
+
+ if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
+ memcpy(&dest_arr[0], dst, sizeof(*dst));
+ dest_num++;
+ }
+
+ if (!flow_is_multicast_only(flow_attr))
+ set_underlay_qp(dev, spec, underlay_qpn);
+
+ if (dev->is_rep) {
+ struct mlx5_eswitch_rep *rep;
+
+ rep = dev->port[flow_attr->port - 1].rep;
+ if (!rep) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ mlx5_ib_set_rule_source_port(dev, spec, rep);
+ }
+
+ spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
+
+ if (is_egress &&
+ !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ struct mlx5_ib_mcounters *mcounters;
+
+ err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
+ if (err)
+ goto free;
+
+ mcounters = to_mcounters(flow_act.counters);
+ handler->ibcounters = flow_act.counters;
+ dest_arr[dest_num].type =
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest_arr[dest_num].counter_id =
+ mlx5_fc_id(mcounters->hw_cntrs_hndl);
+ dest_num++;
+ }
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ if (!dest_num)
+ rule_dst = NULL;
+ } else {
+ if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
+ flow_act.action |=
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ if (is_egress)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ else if (dest_num)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ }
+
+ if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+ spec->flow_context.flow_tag, flow_attr->type);
+ err = -EINVAL;
+ goto free;
+ }
+ handler->rule = mlx5_add_flow_rules(ft, spec,
+ &flow_act,
+ rule_dst, dest_num);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ ft_prio->refcount++;
+ handler->prio = ft_prio;
+ handler->dev = dev;
+
+ ft_prio->flow_table = ft;
+free:
+ if (err && handler) {
+ mlx5_ib_counters_clear_description(handler->ibcounters);
+ kfree(handler);
+ }
+ kvfree(spec);
+ return err ? ERR_PTR(err) : handler;
+}
+
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
+}
+
+enum {
+ LEFTOVERS_MC,
+ LEFTOVERS_UC,
+};
+
+static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_ucast = NULL;
+ struct mlx5_ib_flow_handler *handler = NULL;
+
+ static struct {
+ struct ib_flow_attr flow_attr;
+ struct ib_flow_spec_eth eth_flow;
+ } leftovers_specs[] = {
+ [LEFTOVERS_MC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {0x1} }
+ }
+ },
+ [LEFTOVERS_UC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {} }
+ }
+ }
+ };
+
+ handler = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_MC].flow_attr,
+ dst);
+ if (!IS_ERR(handler) &&
+ flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
+ handler_ucast = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_UC].flow_attr,
+ dst);
+ if (IS_ERR(handler_ucast)) {
+ mlx5_del_flow_rules(handler->rule);
+ ft_prio->refcount--;
+ kfree(handler);
+ handler = handler_ucast;
+ } else {
+ list_add(&handler_ucast->list, &handler->list);
+ }
+ }
+
+ return handler;
+}
+
+static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_rx,
+ struct mlx5_ib_flow_prio *ft_tx,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_rx;
+ struct mlx5_ib_flow_handler *handler_tx;
+ int err;
+ static const struct ib_flow_attr flow_attr = {
+ .num_of_specs = 0,
+ .size = sizeof(flow_attr)
+ };
+
+ handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
+ if (IS_ERR(handler_rx)) {
+ err = PTR_ERR(handler_rx);
+ goto err;
+ }
+
+ handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
+ if (IS_ERR(handler_tx)) {
+ err = PTR_ERR(handler_tx);
+ goto err_tx;
+ }
+
+ list_add(&handler_tx->list, &handler_rx->list);
+
+ return handler_rx;
+
+err_tx:
+ mlx5_del_flow_rules(handler_rx->rule);
+ ft_rx->refcount--;
+ kfree(handler_rx);
+err:
+ return ERR_PTR(err);
+}
+
+
+static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_flow_handler *handler = NULL;
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
+ struct mlx5_ib_flow_prio *ft_prio;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+ struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
+ size_t min_ucmd_sz, required_ucmd_sz;
+ int err;
+ int underlay_qpn;
+
+ if (udata && udata->inlen) {
+ min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
+ sizeof(ucmd_hdr.reserved);
+ if (udata->inlen < min_ucmd_sz)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
+ if (err)
+ return ERR_PTR(err);
+
+ /* currently supports only one counters data */
+ if (ucmd_hdr.ncounters_data > 1)
+ return ERR_PTR(-EINVAL);
+
+ required_ucmd_sz = min_ucmd_sz +
+ sizeof(struct mlx5_ib_flow_counters_data) *
+ ucmd_hdr.ncounters_data;
+ if (udata->inlen > required_ucmd_sz &&
+ !ib_is_udata_cleared(udata, required_ucmd_sz,
+ udata->inlen - required_ucmd_sz))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
+ if (!ucmd)
+ return ERR_PTR(-ENOMEM);
+
+ err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
+ if (err)
+ goto free_ucmd;
+ }
+
+ if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ if (domain != IB_FLOW_DOMAIN_USER ||
+ flow_attr->port > dev->num_ports ||
+ (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
+ IB_FLOW_ATTR_FLAGS_EGRESS))) {
+ err = -EINVAL;
+ goto free_ucmd;
+ }
+
+ if (is_egress &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ err = -EINVAL;
+ goto free_ucmd;
+ }
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = get_flow_table(dev, flow_attr,
+ is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+ if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
+ if (IS_ERR(ft_prio_tx)) {
+ err = PTR_ERR(ft_prio_tx);
+ ft_prio_tx = NULL;
+ goto destroy_ft;
+ }
+ }
+
+ if (is_egress) {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ } else {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ if (mqp->is_rss)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ }
+
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
+ mqp->underlay_qpn :
+ 0;
+ handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
+ underlay_qpn, ucmd);
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ handler = create_leftovers_rule(dev, ft_prio, flow_attr,
+ dst);
+ } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
+ } else {
+ err = -EINVAL;
+ goto destroy_ft;
+ }
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ handler = NULL;
+ goto destroy_ft;
+ }
+
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+ kfree(ucmd);
+
+ return &handler->ibflow;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+ if (ft_prio_tx)
+ put_flow_table(dev, ft_prio_tx, false);
+unlock:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+free_ucmd:
+ kfree(ucmd);
+ return ERR_PTR(err);
+}
+
+static struct mlx5_ib_flow_prio *
+_get_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ bool mcast)
+{
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio = NULL;
+ int max_table_size = 0;
+ bool esw_encap;
+ u32 flags = 0;
+ int priority;
+
+ if (mcast)
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = ib_prio_to_core_prio(fs_matcher->priority, false);
+
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2) &&
+ !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+ max_table_size = BIT(
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
+ esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ priority = FDB_BYPASS_PATH;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ log_max_ft_size));
+ priority = fs_matcher->priority;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ log_max_ft_size));
+ priority = fs_matcher->priority;
+ }
+
+ max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
+ prio = &dev->flow_db->prios[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+ prio = &dev->flow_db->egress_prios[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ prio = &dev->flow_db->fdb;
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
+ prio = &dev->flow_db->rdma_rx[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
+ prio = &dev->flow_db->rdma_tx[priority];
+
+ if (!prio)
+ return ERR_PTR(-EINVAL);
+
+ if (prio->flow_table)
+ return prio;
+
+ return _get_prio(ns, prio, priority, max_table_size,
+ MLX5_FS_MAX_TYPES, flags);
+}
+
+static struct mlx5_ib_flow_handler *
+_create_raw_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct mlx5_flow_destination *dst,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
+ struct mlx5_flow_act *flow_act,
+ void *cmd_in, int inlen,
+ int dst_num)
+{
+ struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !spec) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ memcpy(spec->match_value, cmd_in, inlen);
+ memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
+ fs_matcher->mask_len);
+ spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+ spec->flow_context = *flow_context;
+
+ handler->rule = mlx5_add_flow_rules(ft, spec,
+ flow_act, dst, dst_num);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ ft_prio->refcount++;
+ handler->prio = ft_prio;
+ handler->dev = dev;
+ ft_prio->flow_table = ft;
+
+free:
+ if (err)
+ kfree(handler);
+ kvfree(spec);
+ return err ? ERR_PTR(err) : handler;
+}
+
+static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
+ void *match_v)
+{
+ void *match_c;
+ void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
+ void *dmac, *dmac_mask;
+ void *ipv4, *ipv4_mask;
+
+ if (!(fs_matcher->match_criteria_enable &
+ (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
+ return false;
+
+ match_c = fs_matcher->matcher_mask.match_params;
+ match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+
+ dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+ dmac_47_16);
+ dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+ dmac_47_16);
+
+ if (is_multicast_ether_addr(dmac) &&
+ is_multicast_ether_addr(dmac_mask))
+ return true;
+
+ ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
+ ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
+ return true;
+
+ return false;
+}
+
+static struct mlx5_ib_flow_handler *raw_fs_rule_add(
+ struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
+ u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
+{
+ struct mlx5_flow_destination *dst;
+ struct mlx5_ib_flow_prio *ft_prio;
+ struct mlx5_ib_flow_handler *handler;
+ int dst_num = 0;
+ bool mcast;
+ int err;
+
+ if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
+ return ERR_PTR(-ENOMEM);
+
+ dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = _get_flow_table(dev, fs_matcher, mcast);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+
+ if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
+ dst[dst_num].type = dest_type;
+ dst[dst_num++].tir_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+ dst[dst_num++].ft_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
+ dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ }
+
+
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst[dst_num].counter_id = counter_id;
+ dst_num++;
+ }
+
+ handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
+ flow_context, flow_act,
+ cmd_in, inlen, dst_num);
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ goto destroy_ft;
+ }
+
+ mutex_unlock(&dev->flow_db->lock);
+ atomic_inc(&fs_matcher->usecnt);
+ handler->flow_matcher = fs_matcher;
+
+ kfree(dst);
+
+ return handler;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+unlock:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+
+ return ERR_PTR(err);
+}
+
+static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
+{
+ u32 flags = 0;
+
+ if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
+ flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
+
+ return flags;
+}
+
+#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \
+ MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
+static struct ib_flow_action *
+mlx5_ib_create_flow_action_esp(struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(device);
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
+ struct mlx5_ib_flow_action *action;
+ u64 action_flags;
+ u64 flags;
+ int err = 0;
+
+ err = uverbs_get_flags64(
+ &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+ ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
+ if (err)
+ return ERR_PTR(err);
+
+ flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
+
+ /* We current only support a subset of the standard features. Only a
+ * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
+ * (with overlap). Full offload mode isn't supported.
+ */
+ if (!attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (attr->keymat->protocol !=
+ IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ aes_gcm = &attr->keymat->keymat.aes_gcm;
+
+ if (aes_gcm->icv_len != 16 ||
+ aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ action = kmalloc(sizeof(*action), GFP_KERNEL);
+ if (!action)
+ return ERR_PTR(-ENOMEM);
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+ memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
+ sizeof(accel_attrs.keymat.aes_gcm.aes_key));
+ accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
+ memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
+ sizeof(accel_attrs.keymat.aes_gcm.salt));
+ memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
+ sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
+ accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
+ accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
+ accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
+ accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
+
+ action->esp_aes_gcm.ctx =
+ mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
+ if (IS_ERR(action->esp_aes_gcm.ctx)) {
+ err = PTR_ERR(action->esp_aes_gcm.ctx);
+ goto err_parse;
+ }
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+
+ return &action->ib_action;
+
+err_parse:
+ kfree(action);
+ return ERR_PTR(err);
+}
+
+static int
+mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs;
+ int err = 0;
+
+ if (attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
+ return -EOPNOTSUPP;
+
+ /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
+ * be modified.
+ */
+ if (!(maction->esp_aes_gcm.ib_flags &
+ IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
+ attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
+ return -EINVAL;
+
+ memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
+ sizeof(accel_attrs));
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+ else
+ accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
+ &accel_attrs);
+ if (err)
+ return err;
+
+ maction->esp_aes_gcm.ib_flags &=
+ ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+ maction->esp_aes_gcm.ib_flags |=
+ attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+
+ return 0;
+}
+
+static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+ switch (maction->flow_action_raw.sub_type) {
+ case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
+ mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.modify_hdr);
+ break;
+ case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
+ mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.pkt_reformat);
+ break;
+ case MLX5_IB_FLOW_ACTION_DECAP:
+ break;
+ default:
+ break;
+ }
+}
+
+static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+
+ switch (action->type) {
+ case IB_FLOW_ACTION_ESP:
+ /*
+ * We only support aes_gcm by now, so we implicitly know this is
+ * the underline crypto.
+ */
+ mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
+ break;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ destroy_flow_action_raw(maction);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ kfree(maction);
+ return 0;
+}
+
+static int
+mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
+ enum mlx5_flow_namespace_type *namespace)
+{
+ switch (table_type) {
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
+ *namespace = MLX5_FLOW_NAMESPACE_FDB;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
+ [MLX5_IB_FLOW_TYPE_NORMAL] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ .u.ptr = {
+ .len = sizeof(u16), /* data is priority */
+ .min_len = sizeof(u16),
+ }
+ },
+ [MLX5_IB_FLOW_TYPE_SNIFFER] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+};
+
+static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_DESTROY_TIR:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
+ obj_id);
+ return true;
+
+ case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
+ table_id);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int get_dests(struct uverbs_attr_bundle *attrs,
+ struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
+ int *dest_type, struct ib_qp **qp, u32 *flags)
+{
+ bool dest_devx, dest_qp;
+ void *devx_obj;
+ int err;
+
+ dest_devx = uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+ dest_qp = uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+
+ *flags = 0;
+ err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
+ MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
+ MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
+ if (err)
+ return err;
+
+ /* Both flags are not allowed */
+ if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
+ *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
+ return -EINVAL;
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ if (dest_devx && (dest_qp || *flags))
+ return -EINVAL;
+ else if (dest_qp && *flags)
+ return -EINVAL;
+ }
+
+ /* Allow only DEVX object, drop as dest for FDB */
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx ||
+ (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
+ return -EINVAL;
+
+ /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
+ return -EINVAL;
+
+ *qp = NULL;
+ if (dest_devx) {
+ devx_obj =
+ uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+
+ /* Verify that the given DEVX object is a flow
+ * steering destination.
+ */
+ if (!is_flow_dest(devx_obj, dest_id, dest_type))
+ return -EINVAL;
+ /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+ return -EINVAL;
+ } else if (dest_qp) {
+ struct mlx5_ib_qp *mqp;
+
+ *qp = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+ if (IS_ERR(*qp))
+ return PTR_ERR(*qp);
+
+ if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+
+ mqp = to_mqp(*qp);
+ if (mqp->is_rss)
+ *dest_id = mqp->rss_qp.tirn;
+ else
+ *dest_id = mqp->raw_packet_qp.rq.tirn;
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ }
+
+ if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+
+ if (offset && offset >= devx_obj->flow_counter_bulk_size)
+ return false;
+
+ *counter_id = MLX5_GET(dealloc_flow_counter_in,
+ devx_obj->dinbox,
+ flow_counter_id);
+ *counter_id += offset;
+ return true;
+ }
+
+ return false;
+}
+
+#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
+static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_flow_context flow_context = {.flow_tag =
+ MLX5_FS_DEFAULT_FLOW_TAG};
+ u32 *offset_attr, offset = 0, counter_id = 0;
+ int dest_id, dest_type = -1, inlen, len, ret, i;
+ struct mlx5_ib_flow_handler *flow_handler;
+ struct mlx5_ib_flow_matcher *fs_matcher;
+ struct ib_uobject **arr_flow_actions;
+ struct ib_uflow_resources *uflow_res;
+ struct mlx5_flow_act flow_act = {};
+ struct ib_qp *qp = NULL;
+ void *devx_obj, *cmd_in;
+ struct ib_uobject *uobj;
+ struct mlx5_ib_dev *dev;
+ u32 flags;
+
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+
+ fs_matcher = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
+ uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+
+ if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
+ return -EINVAL;
+
+ if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
+
+ if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
+ if (len) {
+ devx_obj = arr_flow_actions[0]->object;
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
+
+ int num_offsets = uverbs_attr_ptr_get_array_size(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ sizeof(u32));
+
+ if (num_offsets != 1)
+ return -EINVAL;
+
+ offset_attr = uverbs_attr_get_alloced_ptr(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
+ offset = *offset_attr;
+ }
+
+ if (!is_flow_counter(devx_obj, offset, &counter_id))
+ return -EINVAL;
+
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ }
+
+ cmd_in = uverbs_attr_get_alloced_ptr(
+ attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+ inlen = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+
+ uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
+ if (!uflow_res)
+ return -ENOMEM;
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
+ for (i = 0; i < len; i++) {
+ struct mlx5_ib_flow_action *maction =
+ to_mflow_act(arr_flow_actions[i]->object);
+
+ ret = parse_flow_flow_action(maction, false, &flow_act);
+ if (ret)
+ goto err_out;
+ flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
+ arr_flow_actions[i]->object);
+ }
+
+ ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_TAG);
+ if (!ret) {
+ if (flow_context.flow_tag >= BIT(24)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+ flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
+ }
+
+ flow_handler =
+ raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
+ counter_id, cmd_in, inlen, dest_id, dest_type);
+ if (IS_ERR(flow_handler)) {
+ ret = PTR_ERR(flow_handler);
+ goto err_out;
+ }
+
+ ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
+
+ return 0;
+err_out:
+ ib_uverbs_flow_resources_free(uflow_res);
+ return ret;
+}
+
+static int flow_matcher_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_matcher *obj = uobject->object;
+ int ret;
+
+ ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
+ if (ret)
+ return ret;
+
+ kfree(obj);
+ return 0;
+}
+
+static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
+ struct mlx5_ib_flow_matcher *obj)
+{
+ enum mlx5_ib_uapi_flow_table_type ft_type =
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
+ u32 flags;
+ int err;
+
+ /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
+ * users should switch to it. We leave this to not break userspace
+ */
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
+ uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
+ return -EINVAL;
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
+ err = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
+ if (err)
+ return err;
+
+ err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
+ if (err)
+ return err;
+
+ return 0;
+ }
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ IB_FLOW_ATTR_FLAGS_EGRESS);
+ if (err)
+ return err;
+
+ if (flags) {
+ mlx5_ib_ft_type_to_namespace(
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
+ &obj->ns_type);
+ return 0;
+ }
+ }
+
+ obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ struct mlx5_ib_flow_matcher *obj;
+ int err;
+
+ obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ obj->mask_len = uverbs_attr_get_len(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+ err = uverbs_copy_from(&obj->matcher_mask,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+ if (err)
+ goto end;
+
+ obj->flow_type = uverbs_attr_get_enum_id(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+
+ if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
+ err = uverbs_copy_from(&obj->priority,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+ if (err)
+ goto end;
+ }
+
+ err = uverbs_copy_from(&obj->match_criteria_enable,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
+ if (err)
+ goto end;
+
+ err = mlx5_ib_matcher_ns(attrs, obj);
+ if (err)
+ goto end;
+
+ uobj->object = obj;
+ obj->mdev = dev->mdev;
+ atomic_set(&obj->usecnt, 0);
+ return 0;
+
+end:
+ kfree(obj);
+ return err;
+}
+
+static struct ib_flow_action *
+mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_uapi_flow_table_type ft_type,
+ u8 num_actions, void *in)
+{
+ enum mlx5_flow_namespace_type namespace;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ERR_PTR(-EINVAL);
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return ERR_PTR(-ENOMEM);
+
+ maction->flow_action_raw.modify_hdr =
+ mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
+
+ if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
+ ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
+ kfree(maction);
+ return ERR_PTR(ret);
+ }
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
+ maction->flow_action_raw.dev = dev;
+
+ return &maction->ib_action;
+}
+
+static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
+{
+ return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ max_modify_header_actions);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct ib_flow_action *action;
+ int num_actions;
+ void *in;
+ int ret;
+
+ if (!mlx5_ib_modify_header_supported(mdev))
+ return -EOPNOTSUPP;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+
+ num_actions = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
+ if (num_actions < 0)
+ return num_actions;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
+ if (ret)
+ return ret;
+ action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
+ IB_FLOW_ACTION_UNSPECIFIED);
+
+ return 0;
+}
+
+static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
+ u8 packet_reformat_type,
+ u8 ft_type)
+{
+ switch (packet_reformat_type) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE(ibdev->mdev,
+ encap_general_header);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
+ reformat_l2_to_l3_tunnel);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
+ reformat_l3_tunnel_to_l2);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
+{
+ switch (dv_prt) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_flow_action_create_packet_reformat_ctx(
+ struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_action *maction,
+ u8 ft_type, u8 dv_prt,
+ void *in, size_t len)
+{
+ enum mlx5_flow_namespace_type namespace;
+ u8 prm_prt;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ret;
+
+ ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
+ if (ret)
+ return ret;
+
+ maction->flow_action_raw.pkt_reformat =
+ mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
+ in, namespace);
+ if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
+ ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
+ return ret;
+ }
+
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
+ maction->flow_action_raw.dev = dev;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&dv_prt, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
+ if (ret)
+ return ret;
+
+ if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
+ return -EOPNOTSUPP;
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return -ENOMEM;
+
+ if (dv_prt ==
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_DECAP;
+ maction->flow_action_raw.dev = mdev;
+ } else {
+ void *in;
+ int len;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+ if (IS_ERR(in)) {
+ ret = PTR_ERR(in);
+ goto free_maction;
+ }
+
+ len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+
+ ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
+ maction, ft_type, dv_prt, in, len);
+ if (ret)
+ goto free_maction;
+ }
+
+ uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
+ IB_FLOW_ACTION_UNSPECIFIED);
+ return 0;
+
+free_maction:
+ kfree(maction);
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_CREATE_FLOW,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+ UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_READ, 1,
+ MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ, 1, 1,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+ UA_OPTIONAL,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
+ enum mlx5_ib_create_flow_flags,
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DESTROY_FLOW,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_fs,
+ UVERBS_OBJECT_FLOW,
+ &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
+ set_add_copy_action_in_auto)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+ UVERBS_ATTR_MIN_SIZE(1),
+ UA_ALLOC_AND_COPY,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(
+ mlx5_ib_flow_actions,
+ UVERBS_OBJECT_FLOW_ACTION,
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+ UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+ UA_MANDATORY),
+ UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+ mlx5_ib_flow_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+ UVERBS_ATTR_TYPE(u8),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ enum ib_flow_flags,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
+
+const struct uapi_definition mlx5_ib_flow_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_FLOW_MATCHER),
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_FLOW,
+ &mlx5_ib_fs),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_actions),
+ {},
+};
+
+static const struct ib_device_ops flow_ops = {
+ .create_flow = mlx5_ib_create_flow,
+ .destroy_flow = mlx5_ib_destroy_flow,
+ .destroy_flow_action = mlx5_ib_destroy_flow_action,
+};
+
+static const struct ib_device_ops flow_ipsec_ops = {
+ .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
+ .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
+};
+
+int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
+{
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ mutex_init(&dev->flow_db->lock);
+
+ ib_set_device_ops(&dev->ib_dev, &flow_ops);
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+ ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
new file mode 100644
index 000000000000..ad320adaf321
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_FS_H
+#define _MLX5_IB_FS_H
+
+#include "mlx5_ib.h"
+
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
+#else
+static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
+{
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ mutex_init(&dev->flow_db->lock);
+ return 0;
+}
+#endif
+static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
+{
+ kfree(dev->flow_db);
+}
+#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 6f99ed03d88e..fbc45a5e76c5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/debugfs.h>
@@ -59,10 +32,13 @@
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
+#include "devx.h"
+#include "fs.h"
#include "srq.h"
#include "qp.h"
#include "wr.h"
-#include <linux/mlx5/fs_helpers.h>
+#include "restrack.h"
+#include "counters.h"
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
@@ -311,9 +287,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
*native_port_num = 1;
port = &ibdev->port[ib_port_num - 1];
- if (!port)
- return NULL;
-
spin_lock(&port->mp.mpi_lock);
mpi = ibdev->port[ib_port_num - 1].mp.mpi;
if (mpi && !mpi->unaffiliate) {
@@ -1765,6 +1738,92 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
mlx5_ib_disable_lb(dev, true, false);
}
+static int set_ucontext_resp(struct ib_ucontext *uctx,
+ struct mlx5_ib_alloc_ucontext_resp *resp)
+{
+ struct ib_device *ibdev = uctx->device;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
+ int err;
+
+ if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+ err = mlx5_cmd_dump_fill_mkey(dev->mdev,
+ &resp->dump_fill_mkey);
+ if (err)
+ return err;
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
+ }
+
+ resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
+ if (dev->wc_support)
+ resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_bf_reg_size);
+ resp->cache_line_size = cache_line_size();
+ resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
+ resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
+ resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp->cqe_version = context->cqe_version;
+ resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
+ resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_CAP_GEN(dev->mdev,
+ num_of_uars_per_page) : 1;
+
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE) {
+ if (mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_EGRESS))
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
+ if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
+ /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
+ }
+
+ resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 :
+ bfregi->total_num_bfregs - bfregi->num_dyn_bfregs;
+ resp->num_ports = dev->num_ports;
+ resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
+ MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
+
+ if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
+ mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline);
+ resp->eth_min_inline++;
+ }
+
+ if (dev->mdev->clock_info)
+ resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
+
+ /*
+ * We don't want to expose information from the PCI bar that is located
+ * after 4096 bytes, so if the arch only supports larger pages, let's
+ * pretend we don't support reading the HCA's core clock. This is also
+ * forced by mmap function.
+ */
+ if (PAGE_SIZE <= 4096) {
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp->hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg,
+ internal_timer_h) % PAGE_SIZE;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
+ resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
+
+ resp->num_dyn_bfregs = bfregi->num_dyn_bfregs;
+ return 0;
+}
+
static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
@@ -1772,14 +1831,12 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_ucontext *context = to_mucontext(uctx);
struct mlx5_bfreg_info *bfregi;
int ver;
int err;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
- u32 dump_fill_mkey;
bool lib_uar_4k;
bool lib_uar_dyn;
@@ -1808,37 +1865,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
return -EINVAL;
- resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
- if (dev->wc_support)
- resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
- resp.cache_line_size = cache_line_size();
- resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
- resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
- resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
- resp.cqe_version = min_t(__u8,
- (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
- req.max_cqe_version);
- resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
- MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
- resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
- MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
- resp.response_length = min(offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length), udata->outlen);
-
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) {
- if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS))
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
- if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
- /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
- }
-
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
bfregi = &context->bfregi;
@@ -1887,87 +1913,24 @@ uar_done:
if (err)
goto out_devx;
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
- if (err)
- goto out_mdev;
- }
-
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs;
- resp.num_ports = dev->num_ports;
-
- if (offsetofend(typeof(resp), cqe_version) <= udata->outlen)
- resp.response_length += sizeof(resp.cqe_version);
-
- if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) {
- resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
- MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
- resp.response_length += sizeof(resp.cmds_supp_uhw);
- }
-
- if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) {
- if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
- mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
- resp.eth_min_inline++;
- }
- resp.response_length += sizeof(resp.eth_min_inline);
- }
-
- if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) {
- if (mdev->clock_info)
- resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
- resp.response_length += sizeof(resp.clock_info_versions);
- }
-
- /*
- * We don't want to expose information from the PCI bar that is located
- * after 4096 bytes, so if the arch only supports larger pages, let's
- * pretend we don't support reading the HCA's core clock. This is also
- * forced by mmap function.
- */
- if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) {
- if (PAGE_SIZE <= 4096) {
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
- resp.hca_core_clock_offset =
- offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
- }
- resp.response_length += sizeof(resp.hca_core_clock_offset);
- }
-
- if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen)
- resp.response_length += sizeof(resp.log_uar_size);
-
- if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen)
- resp.response_length += sizeof(resp.num_uars_per_page);
-
- if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) {
- resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
- resp.response_length += sizeof(resp.num_dyn_bfregs);
- }
-
- if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) {
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- resp.dump_fill_mkey = dump_fill_mkey;
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
- }
- resp.response_length += sizeof(resp.dump_fill_mkey);
- }
+ context->cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
- if (MLX5_CAP_GEN(dev->mdev, ece_support))
- resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
+ err = set_ucontext_resp(uctx, &resp);
+ if (err)
+ goto out_mdev;
+ resp.response_length = min(udata->outlen, sizeof(resp));
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_mdev;
bfregi->ver = ver;
bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
- context->cqe_version = resp.cqe_version;
context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps);
@@ -2000,6 +1963,29 @@ out_ctx:
return err;
}
+static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_alloc_ucontext_resp uctx_resp = {};
+ int ret;
+
+ ret = set_ucontext_resp(ibcontext, &uctx_resp);
+ if (ret)
+ return ret;
+
+ uctx_resp.response_length =
+ min_t(size_t,
+ uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX),
+ sizeof(uctx_resp));
+
+ ret = uverbs_copy_to_struct_or_zero(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ &uctx_resp,
+ sizeof(uctx_resp));
+ return ret;
+}
+
static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
@@ -2591,1820 +2577,6 @@ static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
}
-enum {
- MATCH_CRITERIA_ENABLE_OUTER_BIT,
- MATCH_CRITERIA_ENABLE_MISC_BIT,
- MATCH_CRITERIA_ENABLE_INNER_BIT,
- MATCH_CRITERIA_ENABLE_MISC2_BIT
-};
-
-#define HEADER_IS_ZERO(match_criteria, headers) \
- !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
- 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
-
-static u8 get_match_criteria_enable(u32 *match_criteria)
-{
- u8 match_criteria_enable;
-
- match_criteria_enable =
- (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
- MATCH_CRITERIA_ENABLE_OUTER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
- MATCH_CRITERIA_ENABLE_MISC_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
- MATCH_CRITERIA_ENABLE_INNER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
- MATCH_CRITERIA_ENABLE_MISC2_BIT;
-
- return match_criteria_enable;
-}
-
-static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- u8 entry_mask;
- u8 entry_val;
- int err = 0;
-
- if (!mask)
- goto out;
-
- entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
- ip_protocol);
- entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
- ip_protocol);
- if (!entry_mask) {
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
- goto out;
- }
- /* Don't override existing ip protocol */
- if (mask != entry_mask || val != entry_val)
- err = -EINVAL;
-out:
- return err;
-}
-
-static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
- bool inner)
-{
- if (inner) {
- MLX5_SET(fte_match_set_misc,
- misc_c, inner_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, inner_ipv6_flow_label, val);
- } else {
- MLX5_SET(fte_match_set_misc,
- misc_c, outer_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, outer_ipv6_flow_label, val);
- }
-}
-
-static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
-}
-
-static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
-{
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
- return -EOPNOTSUPP;
-
- return 0;
-}
-
-#define LAST_ETH_FIELD vlan_tag
-#define LAST_IB_FIELD sl
-#define LAST_IPV4_FIELD tos
-#define LAST_IPV6_FIELD traffic_class
-#define LAST_TCP_UDP_FIELD src_port
-#define LAST_TUNNEL_FIELD tunnel_id
-#define LAST_FLOW_TAG_FIELD tag_id
-#define LAST_DROP_FIELD size
-#define LAST_COUNTERS_FIELD counters
-
-/* Field is the last supported field */
-#define FIELDS_NOT_SUPPORTED(filter, field)\
- memchr_inv((void *)&filter.field +\
- sizeof(filter.field), 0,\
- sizeof(filter) -\
- offsetof(typeof(filter), field) -\
- sizeof(filter.field))
-
-int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
- bool is_egress,
- struct mlx5_flow_act *action)
-{
-
- switch (maction->ib_action.type) {
- case IB_FLOW_ACTION_ESP:
- if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
- return -EINVAL;
- /* Currently only AES_GCM keymat is supported by the driver */
- action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
- action->action |= is_egress ?
- MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
- return 0;
- case IB_FLOW_ACTION_UNSPECIFIED:
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- return -EINVAL;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- action->modify_hdr =
- maction->flow_action_raw.modify_hdr;
- return 0;
- }
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_DECAP) {
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
- return -EINVAL;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
- return 0;
- }
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
- if (action->action &
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- return -EINVAL;
- action->action |=
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- action->pkt_reformat =
- maction->flow_action_raw.pkt_reformat;
- return 0;
- }
- /* fall through */
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int parse_flow_attr(struct mlx5_core_dev *mdev,
- struct mlx5_flow_spec *spec,
- const union ib_flow_spec *ib_spec,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_act *action, u32 prev_type)
-{
- struct mlx5_flow_context *flow_context = &spec->flow_context;
- u32 *match_c = spec->match_criteria;
- u32 *match_v = spec->match_value;
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters);
- void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters_2);
- void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters_2);
- void *headers_c;
- void *headers_v;
- int match_ipv;
- int ret;
-
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- inner_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- inner_headers);
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_ip_version);
- } else {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_ip_version);
- }
-
- switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
- case IB_FLOW_SPEC_ETH:
- if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
- return -EOPNOTSUPP;
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dmac_47_16),
- ib_spec->eth.mask.dst_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dmac_47_16),
- ib_spec->eth.val.dst_mac);
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- smac_47_16),
- ib_spec->eth.mask.src_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- smac_47_16),
- ib_spec->eth.val.src_mac);
-
- if (ib_spec->eth.mask.vlan_tag) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- cvlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- cvlan_tag, 1);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_vid, ntohs(ib_spec->eth.val.vlan_tag));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_cfi,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_cfi,
- ntohs(ib_spec->eth.val.vlan_tag) >> 12);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_prio,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_prio,
- ntohs(ib_spec->eth.val.vlan_tag) >> 13);
- }
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, ntohs(ib_spec->eth.mask.ether_type));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ntohs(ib_spec->eth.val.ether_type));
- break;
- case IB_FLOW_SPEC_IPV4:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
- return -EOPNOTSUPP;
-
- if (match_ipv) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ip_version, 0xf);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, MLX5_FS_IPV4_VERSION);
- } else {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IP);
- }
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.src_ip,
- sizeof(ib_spec->ipv4.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.src_ip,
- sizeof(ib_spec->ipv4.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.dst_ip,
- sizeof(ib_spec->ipv4.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.dst_ip,
- sizeof(ib_spec->ipv4.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
-
- if (set_proto(headers_c, headers_v,
- ib_spec->ipv4.mask.proto,
- ib_spec->ipv4.val.proto))
- return -EINVAL;
- break;
- case IB_FLOW_SPEC_IPV6:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
- return -EOPNOTSUPP;
-
- if (match_ipv) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ip_version, 0xf);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, MLX5_FS_IPV6_VERSION);
- } else {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IPV6);
- }
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.src_ip,
- sizeof(ib_spec->ipv6.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.src_ip,
- sizeof(ib_spec->ipv6.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.dst_ip,
- sizeof(ib_spec->ipv6.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.dst_ip,
- sizeof(ib_spec->ipv6.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv6.mask.traffic_class,
- ib_spec->ipv6.val.traffic_class);
-
- if (set_proto(headers_c, headers_v,
- ib_spec->ipv6.mask.next_hdr,
- ib_spec->ipv6.val.next_hdr))
- return -EINVAL;
-
- set_flow_label(misc_params_c, misc_params_v,
- ntohl(ib_spec->ipv6.mask.flow_label),
- ntohl(ib_spec->ipv6.val.flow_label),
- ib_spec->type & IB_FLOW_SPEC_INNER);
- break;
- case IB_FLOW_SPEC_ESP:
- if (ib_spec->esp.mask.seq)
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
- ntohl(ib_spec->esp.mask.spi));
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
- ntohl(ib_spec->esp.val.spi));
- break;
- case IB_FLOW_SPEC_TCP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_UDP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_GRE:
- if (ib_spec->gre.mask.c_ks_res0_ver)
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_GRE);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
- ntohs(ib_spec->gre.mask.protocol));
- MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
- ntohs(ib_spec->gre.val.protocol));
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
- gre_key.nvgre.hi),
- &ib_spec->gre.mask.key,
- sizeof(ib_spec->gre.mask.key));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
- gre_key.nvgre.hi),
- &ib_spec->gre.val.key,
- sizeof(ib_spec->gre.val.key));
- break;
- case IB_FLOW_SPEC_MPLS:
- switch (prev_type) {
- case IB_FLOW_SPEC_UDP:
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls_over_udp),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls_over_udp),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls_over_udp),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- break;
- case IB_FLOW_SPEC_GRE:
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls_over_gre),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls_over_gre),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls_over_gre),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- break;
- default:
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_first_mpls),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- inner_first_mpls),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- inner_first_mpls),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- } else {
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- }
- }
- break;
- case IB_FLOW_SPEC_VXLAN_TUNNEL:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
- LAST_TUNNEL_FIELD))
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
- ntohl(ib_spec->tunnel.mask.tunnel_id));
- MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
- ntohl(ib_spec->tunnel.val.tunnel_id));
- break;
- case IB_FLOW_SPEC_ACTION_TAG:
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
- LAST_FLOW_TAG_FIELD))
- return -EOPNOTSUPP;
- if (ib_spec->flow_tag.tag_id >= BIT(24))
- return -EINVAL;
-
- flow_context->flow_tag = ib_spec->flow_tag.tag_id;
- flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
- break;
- case IB_FLOW_SPEC_ACTION_DROP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
- LAST_DROP_FIELD))
- return -EOPNOTSUPP;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
- break;
- case IB_FLOW_SPEC_ACTION_HANDLE:
- ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
- flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
- if (ret)
- return ret;
- break;
- case IB_FLOW_SPEC_ACTION_COUNT:
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
- LAST_COUNTERS_FIELD))
- return -EOPNOTSUPP;
-
- /* for now support only one counters spec per flow */
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- return -EINVAL;
-
- action->counters = ib_spec->flow_count.counters;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* If a flow could catch both multicast and unicast packets,
- * it won't fall into the multicast flow steering table and this rule
- * could steal other multicast packets.
- */
-static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
-{
- union ib_flow_spec *flow_spec;
-
- if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
- ib_attr->num_of_specs < 1)
- return false;
-
- flow_spec = (union ib_flow_spec *)(ib_attr + 1);
- if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
- struct ib_flow_spec_ipv4 *ipv4_spec;
-
- ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
- if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
- return true;
-
- return false;
- }
-
- if (flow_spec->type == IB_FLOW_SPEC_ETH) {
- struct ib_flow_spec_eth *eth_spec;
-
- eth_spec = (struct ib_flow_spec_eth *)flow_spec;
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
- is_multicast_ether_addr(eth_spec->val.dst_mac);
- }
-
- return false;
-}
-
-enum valid_spec {
- VALID_SPEC_INVALID,
- VALID_SPEC_VALID,
- VALID_SPEC_NA,
-};
-
-static enum valid_spec
-is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- const u32 *match_c = spec->match_criteria;
- bool is_crypto =
- (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
- bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
- bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /*
- * Currently only crypto is supported in egress, when regular egress
- * rules would be supported, always return VALID_SPEC_NA.
- */
- if (!is_crypto)
- return VALID_SPEC_NA;
-
- return is_crypto && is_ipsec &&
- (!egress || (!is_drop &&
- !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
- VALID_SPEC_VALID : VALID_SPEC_INVALID;
-}
-
-static bool is_valid_spec(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- /* We curretly only support ipsec egress flow */
- return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
-}
-
-static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
- const struct ib_flow_attr *flow_attr,
- bool check_inner)
-{
- union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
- int match_ipv = check_inner ?
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_ip_version) :
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_ip_version);
- int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
- bool ipv4_spec_valid, ipv6_spec_valid;
- unsigned int ip_spec_type = 0;
- bool has_ethertype = false;
- unsigned int spec_index;
- bool mask_valid = true;
- u16 eth_type = 0;
- bool type_valid;
-
- /* Validate that ethertype is correct */
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
- ib_spec->eth.mask.ether_type) {
- mask_valid = (ib_spec->eth.mask.ether_type ==
- htons(0xffff));
- has_ethertype = true;
- eth_type = ntohs(ib_spec->eth.val.ether_type);
- } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
- (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
- ip_spec_type = ib_spec->type;
- }
- ib_spec = (void *)ib_spec + ib_spec->size;
- }
-
- type_valid = (!has_ethertype) || (!ip_spec_type);
- if (!type_valid && mask_valid) {
- ipv4_spec_valid = (eth_type == ETH_P_IP) &&
- (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
- ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
- (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
-
- type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
- (((eth_type == ETH_P_MPLS_UC) ||
- (eth_type == ETH_P_MPLS_MC)) && match_ipv);
- }
-
- return type_valid;
-}
-
-static bool is_valid_attr(struct mlx5_core_dev *mdev,
- const struct ib_flow_attr *flow_attr)
-{
- return is_valid_ethertype(mdev, flow_attr, false) &&
- is_valid_ethertype(mdev, flow_attr, true);
-}
-
-static void put_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *prio, bool ft_added)
-{
- prio->refcount -= !!ft_added;
- if (!prio->refcount) {
- mlx5_destroy_flow_table(prio->flow_table);
- prio->flow_table = NULL;
- }
-}
-
-static void counters_clear_description(struct ib_counters *counters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
-
- mutex_lock(&mcounters->mcntrs_mutex);
- kfree(mcounters->counters_data);
- mcounters->counters_data = NULL;
- mcounters->cntrs_max_index = 0;
- mutex_unlock(&mcounters->mcntrs_mutex);
-}
-
-static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
-{
- struct mlx5_ib_flow_handler *handler = container_of(flow_id,
- struct mlx5_ib_flow_handler,
- ibflow);
- struct mlx5_ib_flow_handler *iter, *tmp;
- struct mlx5_ib_dev *dev = handler->dev;
-
- mutex_lock(&dev->flow_db->lock);
-
- list_for_each_entry_safe(iter, tmp, &handler->list, list) {
- mlx5_del_flow_rules(iter->rule);
- put_flow_table(dev, iter->prio, true);
- list_del(&iter->list);
- kfree(iter);
- }
-
- mlx5_del_flow_rules(handler->rule);
- put_flow_table(dev, handler->prio, true);
- if (handler->ibcounters &&
- atomic_read(&handler->ibcounters->usecnt) == 1)
- counters_clear_description(handler->ibcounters);
-
- mutex_unlock(&dev->flow_db->lock);
- if (handler->flow_matcher)
- atomic_dec(&handler->flow_matcher->usecnt);
- kfree(handler);
-
- return 0;
-}
-
-static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
-{
- priority *= 2;
- if (!dont_trap)
- priority++;
- return priority;
-}
-
-enum flow_table_type {
- MLX5_IB_FT_RX,
- MLX5_IB_FT_TX
-};
-
-#define MLX5_FS_MAX_TYPES 6
-#define MLX5_FS_MAX_ENTRIES BIT(16)
-
-static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
- struct mlx5_ib_flow_prio *prio,
- int priority,
- int num_entries, int num_groups,
- u32 flags)
-{
- struct mlx5_flow_table_attr ft_attr = {};
- struct mlx5_flow_table *ft;
-
- ft_attr.prio = priority;
- ft_attr.max_fte = num_entries;
- ft_attr.flags = flags;
- ft_attr.autogroup.max_num_groups = num_groups;
- ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
- if (IS_ERR(ft))
- return ERR_CAST(ft);
-
- prio->flow_table = ft;
- prio->refcount = 0;
- return prio;
-}
-
-static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
- struct ib_flow_attr *flow_attr,
- enum flow_table_type ft_type)
-{
- bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio;
- struct mlx5_flow_table *ft;
- int max_table_size;
- int num_entries;
- int num_groups;
- bool esw_encap;
- u32 flags = 0;
- int priority;
-
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
- DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- enum mlx5_flow_namespace_type fn_type;
-
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(flow_attr->priority,
- dont_trap);
- if (ft_type == MLX5_IB_FT_RX) {
- fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
- prio = &dev->flow_db->prios[priority];
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
- log_max_ft_size));
- fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
- prio = &dev->flow_db->egress_prios[priority];
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- }
- ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
- num_entries = MLX5_FS_MAX_ENTRIES;
- num_groups = MLX5_FS_MAX_TYPES;
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- ns = mlx5_get_flow_namespace(dev->mdev,
- MLX5_FLOW_NAMESPACE_LEFTOVERS);
- build_leftovers_ft_param(&priority,
- &num_entries,
- &num_groups);
- prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- if (!MLX5_CAP_FLOWTABLE(dev->mdev,
- allow_sniffer_and_nic_rx_shared_tir))
- return ERR_PTR(-ENOTSUPP);
-
- ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
- MLX5_FLOW_NAMESPACE_SNIFFER_RX :
- MLX5_FLOW_NAMESPACE_SNIFFER_TX);
-
- prio = &dev->flow_db->sniffer[ft_type];
- priority = 0;
- num_entries = 1;
- num_groups = 1;
- }
-
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- max_table_size = min_t(int, num_entries, max_table_size);
-
- ft = prio->flow_table;
- if (!ft)
- return _get_prio(ns, prio, priority, max_table_size, num_groups,
- flags);
-
- return prio;
-}
-
-static void set_underlay_qp(struct mlx5_ib_dev *dev,
- struct mlx5_flow_spec *spec,
- u32 underlay_qpn)
-{
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
- spec->match_criteria,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
-
- if (underlay_qpn &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- ft_field_support.bth_dst_qp)) {
- MLX5_SET(fte_match_set_misc,
- misc_params_v, bth_dst_qp, underlay_qpn);
- MLX5_SET(fte_match_set_misc,
- misc_params_c, bth_dst_qp, 0xffffff);
- }
-}
-
-static int read_flow_counters(struct ib_device *ibdev,
- struct mlx5_read_counters_attr *read_attr)
-{
- struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
-
- return mlx5_fc_query(dev->mdev, fc,
- &read_attr->out[IB_COUNTER_PACKETS],
- &read_attr->out[IB_COUNTER_BYTES]);
-}
-
-/* flow counters currently expose two counters packets and bytes */
-#define FLOW_COUNTERS_NUM 2
-static int counters_set_description(struct ib_counters *counters,
- enum mlx5_ib_counters_type counters_type,
- struct mlx5_ib_flow_counters_desc *desc_data,
- u32 ncounters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
- u32 cntrs_max_index = 0;
- int i;
-
- if (counters_type != MLX5_IB_COUNTERS_FLOW)
- return -EINVAL;
-
- /* init the fields for the object */
- mcounters->type = counters_type;
- mcounters->read_counters = read_flow_counters;
- mcounters->counters_num = FLOW_COUNTERS_NUM;
- mcounters->ncounters = ncounters;
- /* each counter entry have both description and index pair */
- for (i = 0; i < ncounters; i++) {
- if (desc_data[i].description > IB_COUNTER_BYTES)
- return -EINVAL;
-
- if (cntrs_max_index <= desc_data[i].index)
- cntrs_max_index = desc_data[i].index + 1;
- }
-
- mutex_lock(&mcounters->mcntrs_mutex);
- mcounters->counters_data = desc_data;
- mcounters->cntrs_max_index = cntrs_max_index;
- mutex_unlock(&mcounters->mcntrs_mutex);
-
- return 0;
-}
-
-#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
-static int flow_counters_set_data(struct ib_counters *ibcounters,
- struct mlx5_ib_create_flow *ucmd)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
- struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
- struct mlx5_ib_flow_counters_desc *desc_data = NULL;
- bool hw_hndl = false;
- int ret = 0;
-
- if (ucmd && ucmd->ncounters_data != 0) {
- cntrs_data = ucmd->data;
- if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
- return -EINVAL;
-
- desc_data = kcalloc(cntrs_data->ncounters,
- sizeof(*desc_data),
- GFP_KERNEL);
- if (!desc_data)
- return -ENOMEM;
-
- if (copy_from_user(desc_data,
- u64_to_user_ptr(cntrs_data->counters_data),
- sizeof(*desc_data) * cntrs_data->ncounters)) {
- ret = -EFAULT;
- goto free;
- }
- }
-
- if (!mcounters->hw_cntrs_hndl) {
- mcounters->hw_cntrs_hndl = mlx5_fc_create(
- to_mdev(ibcounters->device)->mdev, false);
- if (IS_ERR(mcounters->hw_cntrs_hndl)) {
- ret = PTR_ERR(mcounters->hw_cntrs_hndl);
- goto free;
- }
- hw_hndl = true;
- }
-
- if (desc_data) {
- /* counters already bound to at least one flow */
- if (mcounters->cntrs_max_index) {
- ret = -EINVAL;
- goto free_hndl;
- }
-
- ret = counters_set_description(ibcounters,
- MLX5_IB_COUNTERS_FLOW,
- desc_data,
- cntrs_data->ncounters);
- if (ret)
- goto free_hndl;
-
- } else if (!mcounters->cntrs_max_index) {
- /* counters not bound yet, must have udata passed */
- ret = -EINVAL;
- goto free_hndl;
- }
-
- return 0;
-
-free_hndl:
- if (hw_hndl) {
- mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
- mcounters->hw_cntrs_hndl);
- mcounters->hw_cntrs_hndl = NULL;
- }
-free:
- kfree(desc_data);
- return ret;
-}
-
-static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
- struct mlx5_flow_spec *spec,
- struct mlx5_eswitch_rep *rep)
-{
- struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
- void *misc;
-
- if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters_2);
-
- MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_for_match(esw,
- rep->vport));
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters_2);
-
- MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_mask());
- } else {
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
-
- MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
-
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
-
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- }
-}
-
-static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst,
- u32 underlay_qpn,
- struct mlx5_ib_create_flow *ucmd)
-{
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {};
- struct mlx5_flow_spec *spec;
- struct mlx5_flow_destination dest_arr[2] = {};
- struct mlx5_flow_destination *rule_dst = dest_arr;
- const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
- unsigned int spec_index;
- u32 prev_type = 0;
- int err = 0;
- int dest_num = 0;
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
-
- if (!is_valid_attr(dev->mdev, flow_attr))
- return ERR_PTR(-EINVAL);
-
- if (dev->is_rep && is_egress)
- return ERR_PTR(-EINVAL);
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
-
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(dev->mdev, spec,
- ib_flow, flow_attr, &flow_act,
- prev_type);
- if (err < 0)
- goto free;
-
- prev_type = ((union ib_flow_spec *)ib_flow)->type;
- ib_flow += ((union ib_flow_spec *)ib_flow)->size;
- }
-
- if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
- memcpy(&dest_arr[0], dst, sizeof(*dst));
- dest_num++;
- }
-
- if (!flow_is_multicast_only(flow_attr))
- set_underlay_qp(dev, spec, underlay_qpn);
-
- if (dev->is_rep) {
- struct mlx5_eswitch_rep *rep;
-
- rep = dev->port[flow_attr->port - 1].rep;
- if (!rep) {
- err = -EINVAL;
- goto free;
- }
-
- mlx5_ib_set_rule_source_port(dev, spec, rep);
- }
-
- spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-
- if (is_egress &&
- !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
- err = -EINVAL;
- goto free;
- }
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- struct mlx5_ib_mcounters *mcounters;
-
- err = flow_counters_set_data(flow_act.counters, ucmd);
- if (err)
- goto free;
-
- mcounters = to_mcounters(flow_act.counters);
- handler->ibcounters = flow_act.counters;
- dest_arr[dest_num].type =
- MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest_arr[dest_num].counter_id =
- mlx5_fc_id(mcounters->hw_cntrs_hndl);
- dest_num++;
- }
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- if (!dest_num)
- rule_dst = NULL;
- } else {
- if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
- flow_act.action |=
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- if (is_egress)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- else if (dest_num)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- }
-
- if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
- mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- spec->flow_context.flow_tag, flow_attr->type);
- err = -EINVAL;
- goto free;
- }
- handler->rule = mlx5_add_flow_rules(ft, spec,
- &flow_act,
- rule_dst, dest_num);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
- handler->dev = dev;
-
- ft_prio->flow_table = ft;
-free:
- if (err && handler) {
- if (handler->ibcounters &&
- atomic_read(&handler->ibcounters->usecnt) == 1)
- counters_clear_description(handler->ibcounters);
- kfree(handler);
- }
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
-}
-
-enum {
- LEFTOVERS_MC,
- LEFTOVERS_UC,
-};
-
-static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_ucast = NULL;
- struct mlx5_ib_flow_handler *handler = NULL;
-
- static struct {
- struct ib_flow_attr flow_attr;
- struct ib_flow_spec_eth eth_flow;
- } leftovers_specs[] = {
- [LEFTOVERS_MC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {0x1} }
- }
- },
- [LEFTOVERS_UC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {} }
- }
- }
- };
-
- handler = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_MC].flow_attr,
- dst);
- if (!IS_ERR(handler) &&
- flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
- handler_ucast = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_UC].flow_attr,
- dst);
- if (IS_ERR(handler_ucast)) {
- mlx5_del_flow_rules(handler->rule);
- ft_prio->refcount--;
- kfree(handler);
- handler = handler_ucast;
- } else {
- list_add(&handler_ucast->list, &handler->list);
- }
- }
-
- return handler;
-}
-
-static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_rx,
- struct mlx5_ib_flow_prio *ft_tx,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_rx;
- struct mlx5_ib_flow_handler *handler_tx;
- int err;
- static const struct ib_flow_attr flow_attr = {
- .num_of_specs = 0,
- .size = sizeof(flow_attr)
- };
-
- handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
- if (IS_ERR(handler_rx)) {
- err = PTR_ERR(handler_rx);
- goto err;
- }
-
- handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
- if (IS_ERR(handler_tx)) {
- err = PTR_ERR(handler_tx);
- goto err_tx;
- }
-
- list_add(&handler_tx->list, &handler_rx->list);
-
- return handler_rx;
-
-err_tx:
- mlx5_del_flow_rules(handler_rx->rule);
- ft_rx->refcount--;
- kfree(handler_rx);
-err:
- return ERR_PTR(err);
-}
-
-static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain,
- struct ib_udata *udata)
-{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_qp *mqp = to_mqp(qp);
- struct mlx5_ib_flow_handler *handler = NULL;
- struct mlx5_flow_destination *dst = NULL;
- struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
- struct mlx5_ib_flow_prio *ft_prio;
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
- struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
- size_t min_ucmd_sz, required_ucmd_sz;
- int err;
- int underlay_qpn;
-
- if (udata && udata->inlen) {
- min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
- sizeof(ucmd_hdr.reserved);
- if (udata->inlen < min_ucmd_sz)
- return ERR_PTR(-EOPNOTSUPP);
-
- err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
- if (err)
- return ERR_PTR(err);
-
- /* currently supports only one counters data */
- if (ucmd_hdr.ncounters_data > 1)
- return ERR_PTR(-EINVAL);
-
- required_ucmd_sz = min_ucmd_sz +
- sizeof(struct mlx5_ib_flow_counters_data) *
- ucmd_hdr.ncounters_data;
- if (udata->inlen > required_ucmd_sz &&
- !ib_is_udata_cleared(udata, required_ucmd_sz,
- udata->inlen - required_ucmd_sz))
- return ERR_PTR(-EOPNOTSUPP);
-
- ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
- if (!ucmd)
- return ERR_PTR(-ENOMEM);
-
- err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
- if (err)
- goto free_ucmd;
- }
-
- if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
- err = -ENOMEM;
- goto free_ucmd;
- }
-
- if (domain != IB_FLOW_DOMAIN_USER ||
- flow_attr->port > dev->num_ports ||
- (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
- IB_FLOW_ATTR_FLAGS_EGRESS))) {
- err = -EINVAL;
- goto free_ucmd;
- }
-
- if (is_egress &&
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
- err = -EINVAL;
- goto free_ucmd;
- }
-
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
- if (!dst) {
- err = -ENOMEM;
- goto free_ucmd;
- }
-
- mutex_lock(&dev->flow_db->lock);
-
- ft_prio = get_flow_table(dev, flow_attr,
- is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
- if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
- if (IS_ERR(ft_prio_tx)) {
- err = PTR_ERR(ft_prio_tx);
- ft_prio_tx = NULL;
- goto destroy_ft;
- }
- }
-
- if (is_egress) {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- } else {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- if (mqp->is_rss)
- dst->tir_num = mqp->rss_qp.tirn;
- else
- dst->tir_num = mqp->raw_packet_qp.rq.tirn;
- }
-
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
- mqp->underlay_qpn :
- 0;
- handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
- underlay_qpn, ucmd);
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- handler = create_leftovers_rule(dev, ft_prio, flow_attr,
- dst);
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
- } else {
- err = -EINVAL;
- goto destroy_ft;
- }
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- handler = NULL;
- goto destroy_ft;
- }
-
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
- kfree(ucmd);
-
- return &handler->ibflow;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
- if (ft_prio_tx)
- put_flow_table(dev, ft_prio_tx, false);
-unlock:
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
-free_ucmd:
- kfree(ucmd);
- return ERR_PTR(err);
-}
-
-static struct mlx5_ib_flow_prio *
-_get_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
- bool mcast)
-{
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio = NULL;
- int max_table_size = 0;
- bool esw_encap;
- u32 flags = 0;
- int priority;
-
- if (mcast)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(fs_matcher->priority, false);
-
- esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
- DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2) &&
- !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
- max_table_size = BIT(
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
- max_table_size = BIT(
- MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
- esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- priority = FDB_BYPASS_PATH;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
- log_max_ft_size));
- priority = fs_matcher->priority;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
- log_max_ft_size));
- priority = fs_matcher->priority;
- }
-
- max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
-
- ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
- prio = &dev->flow_db->prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
- prio = &dev->flow_db->egress_prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
- prio = &dev->flow_db->fdb;
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
- prio = &dev->flow_db->rdma_rx[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
- prio = &dev->flow_db->rdma_tx[priority];
-
- if (!prio)
- return ERR_PTR(-EINVAL);
-
- if (prio->flow_table)
- return prio;
-
- return _get_prio(ns, prio, priority, max_table_size,
- MLX5_FS_MAX_TYPES, flags);
-}
-
-static struct mlx5_ib_flow_handler *
-_create_raw_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct mlx5_flow_destination *dst,
- struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act,
- void *cmd_in, int inlen,
- int dst_num)
-{
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_spec *spec;
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- int err = 0;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
-
- memcpy(spec->match_value, cmd_in, inlen);
- memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
- fs_matcher->mask_len);
- spec->match_criteria_enable = fs_matcher->match_criteria_enable;
- spec->flow_context = *flow_context;
-
- handler->rule = mlx5_add_flow_rules(ft, spec,
- flow_act, dst, dst_num);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
- handler->dev = dev;
- ft_prio->flow_table = ft;
-
-free:
- if (err)
- kfree(handler);
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
- void *match_v)
-{
- void *match_c;
- void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
- void *dmac, *dmac_mask;
- void *ipv4, *ipv4_mask;
-
- if (!(fs_matcher->match_criteria_enable &
- (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
- return false;
-
- match_c = fs_matcher->matcher_mask.match_params;
- match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
-
- dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
- dmac_47_16);
- dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
- dmac_47_16);
-
- if (is_multicast_ether_addr(dmac) &&
- is_multicast_ether_addr(dmac_mask))
- return true;
-
- ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
- ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
- return true;
-
- return false;
-}
-
-struct mlx5_ib_flow_handler *
-mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act,
- u32 counter_id,
- void *cmd_in, int inlen, int dest_id,
- int dest_type)
-{
- struct mlx5_flow_destination *dst;
- struct mlx5_ib_flow_prio *ft_prio;
- struct mlx5_ib_flow_handler *handler;
- int dst_num = 0;
- bool mcast;
- int err;
-
- if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
- return ERR_PTR(-EOPNOTSUPP);
-
- if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
- return ERR_PTR(-ENOMEM);
-
- dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
- if (!dst)
- return ERR_PTR(-ENOMEM);
-
- mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
- mutex_lock(&dev->flow_db->lock);
-
- ft_prio = _get_flow_table(dev, fs_matcher, mcast);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
-
- if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
- dst[dst_num].type = dest_type;
- dst[dst_num++].tir_num = dest_id;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
- dst[dst_num++].ft_num = dest_id;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
- dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- }
-
-
- if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dst[dst_num].counter_id = counter_id;
- dst_num++;
- }
-
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
- flow_context, flow_act,
- cmd_in, inlen, dst_num);
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- goto destroy_ft;
- }
-
- mutex_unlock(&dev->flow_db->lock);
- atomic_inc(&fs_matcher->usecnt);
- handler->flow_matcher = fs_matcher;
-
- kfree(dst);
-
- return handler;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
-unlock:
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
-
- return ERR_PTR(err);
-}
-
-static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
-{
- u32 flags = 0;
-
- if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
- flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
-
- return flags;
-}
-
-#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
-static struct ib_flow_action *
-mlx5_ib_create_flow_action_esp(struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_dev *mdev = to_mdev(device);
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
- struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
- struct mlx5_ib_flow_action *action;
- u64 action_flags;
- u64 flags;
- int err = 0;
-
- err = uverbs_get_flags64(
- &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
- if (err)
- return ERR_PTR(err);
-
- flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
-
- /* We current only support a subset of the standard features. Only a
- * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
- * (with overlap). Full offload mode isn't supported.
- */
- if (!attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
- return ERR_PTR(-EOPNOTSUPP);
-
- if (attr->keymat->protocol !=
- IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
- return ERR_PTR(-EOPNOTSUPP);
-
- aes_gcm = &attr->keymat->keymat.aes_gcm;
-
- if (aes_gcm->icv_len != 16 ||
- aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return ERR_PTR(-EOPNOTSUPP);
-
- action = kmalloc(sizeof(*action), GFP_KERNEL);
- if (!action)
- return ERR_PTR(-ENOMEM);
-
- action->esp_aes_gcm.ib_flags = attr->flags;
- memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
- sizeof(accel_attrs.keymat.aes_gcm.aes_key));
- accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
- memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
- sizeof(accel_attrs.keymat.aes_gcm.salt));
- memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
- sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
- accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
- accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
- accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
- accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
-
- action->esp_aes_gcm.ctx =
- mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
- if (IS_ERR(action->esp_aes_gcm.ctx)) {
- err = PTR_ERR(action->esp_aes_gcm.ctx);
- goto err_parse;
- }
-
- action->esp_aes_gcm.ib_flags = attr->flags;
-
- return &action->ib_action;
-
-err_parse:
- kfree(action);
- return ERR_PTR(err);
-}
-
-static int
-mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
- struct mlx5_accel_esp_xfrm_attrs accel_attrs;
- int err = 0;
-
- if (attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
- return -EOPNOTSUPP;
-
- /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
- * be modified.
- */
- if (!(maction->esp_aes_gcm.ib_flags &
- IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
- attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
- return -EINVAL;
-
- memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
- sizeof(accel_attrs));
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
- else
- accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
- &accel_attrs);
- if (err)
- return err;
-
- maction->esp_aes_gcm.ib_flags &=
- ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
- maction->esp_aes_gcm.ib_flags |=
- attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
-
- return 0;
-}
-
-static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
-
- switch (action->type) {
- case IB_FLOW_ACTION_ESP:
- /*
- * We only support aes_gcm by now, so we implicitly know this is
- * the underline crypto.
- */
- mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
- break;
- case IB_FLOW_ACTION_UNSPECIFIED:
- mlx5_ib_destroy_flow_action_raw(maction);
- break;
- default:
- WARN_ON(true);
- break;
- }
-
- kfree(maction);
- return 0;
-}
-
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4848,137 +3020,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
return __get_port_caps(dev, port);
}
-static void destroy_umrc_res(struct mlx5_ib_dev *dev)
-{
- int err;
-
- err = mlx5_mr_cache_cleanup(dev);
- if (err)
- mlx5_ib_warn(dev, "mr cache cleanup failed\n");
-
- if (dev->umrc.qp)
- mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
- if (dev->umrc.cq)
- ib_free_cq(dev->umrc.cq);
- if (dev->umrc.pd)
- ib_dealloc_pd(dev->umrc.pd);
-}
-
-enum {
- MAX_UMR_WR = 128,
-};
-
-static int create_umr_res(struct mlx5_ib_dev *dev)
-{
- struct ib_qp_init_attr *init_attr = NULL;
- struct ib_qp_attr *attr = NULL;
- struct ib_pd *pd;
- struct ib_cq *cq;
- struct ib_qp *qp;
- int ret;
-
- attr = kzalloc(sizeof(*attr), GFP_KERNEL);
- init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
- if (!attr || !init_attr) {
- ret = -ENOMEM;
- goto error_0;
- }
-
- pd = ib_alloc_pd(&dev->ib_dev, 0);
- if (IS_ERR(pd)) {
- mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
- ret = PTR_ERR(pd);
- goto error_0;
- }
-
- cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
- if (IS_ERR(cq)) {
- mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
- ret = PTR_ERR(cq);
- goto error_2;
- }
-
- init_attr->send_cq = cq;
- init_attr->recv_cq = cq;
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr->cap.max_send_wr = MAX_UMR_WR;
- init_attr->cap.max_send_sge = 1;
- init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
- init_attr->port_num = 1;
- qp = mlx5_ib_create_qp(pd, init_attr, NULL);
- if (IS_ERR(qp)) {
- mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
- ret = PTR_ERR(qp);
- goto error_3;
- }
- qp->device = &dev->ib_dev;
- qp->real_qp = qp;
- qp->uobject = NULL;
- qp->qp_type = MLX5_IB_QPT_REG_UMR;
- qp->send_cq = init_attr->send_cq;
- qp->recv_cq = init_attr->recv_cq;
-
- attr->qp_state = IB_QPS_INIT;
- attr->port_num = 1;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
- IB_QP_PORT, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
- goto error_4;
- }
-
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTR;
- attr->path_mtu = IB_MTU_256;
-
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
- goto error_4;
- }
-
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTS;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
- goto error_4;
- }
-
- dev->umrc.qp = qp;
- dev->umrc.cq = cq;
- dev->umrc.pd = pd;
-
- sema_init(&dev->umrc.sem, MAX_UMR_WR);
- ret = mlx5_mr_cache_init(dev);
- if (ret) {
- mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
- goto error_4;
- }
-
- kfree(attr);
- kfree(init_attr);
-
- return 0;
-
-error_4:
- mlx5_ib_destroy_qp(qp, NULL);
- dev->umrc.qp = NULL;
-
-error_3:
- ib_free_cq(cq);
- dev->umrc.cq = NULL;
-
-error_2:
- ib_dealloc_pd(pd);
- dev->umrc.pd = NULL;
-
-error_0:
- kfree(attr);
- kfree(init_attr);
- return ret;
-}
-
static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
{
switch (umr_fence_cap) {
@@ -4991,18 +3032,20 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
}
}
-static int create_dev_resources(struct mlx5_ib_resources *devr)
+static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
struct ib_srq_init_attr attr;
- struct mlx5_ib_dev *dev;
struct ib_device *ibdev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
int port;
int ret = 0;
- dev = container_of(devr, struct mlx5_ib_dev, devr);
ibdev = &dev->ib_dev;
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ return -EOPNOTSUPP;
+
mutex_init(&devr->mutex);
devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
@@ -5030,34 +3073,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
if (ret)
goto err_create_cq;
- devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
- if (IS_ERR(devr->x0)) {
- ret = PTR_ERR(devr->x0);
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
+ if (ret)
goto error2;
- }
- devr->x0->device = &dev->ib_dev;
- devr->x0->inode = NULL;
- atomic_set(&devr->x0->usecnt, 0);
- mutex_init(&devr->x0->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
-
- devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
- if (IS_ERR(devr->x1)) {
- ret = PTR_ERR(devr->x1);
+
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
+ if (ret)
goto error3;
- }
- devr->x1->device = &dev->ib_dev;
- devr->x1->inode = NULL;
- atomic_set(&devr->x1->usecnt, 0);
- mutex_init(&devr->x1->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
attr.ext.cq = devr->c0;
- attr.ext.xrc.xrcd = devr->x0;
devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq);
if (!devr->s0) {
@@ -5068,13 +3096,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
devr->s0->device = &dev->ib_dev;
devr->s0->pd = devr->p0;
devr->s0->srq_type = IB_SRQT_XRC;
- devr->s0->ext.xrc.xrcd = devr->x0;
devr->s0->ext.cq = devr->c0;
ret = mlx5_ib_create_srq(devr->s0, &attr, NULL);
if (ret)
goto err_create;
- atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
atomic_inc(&devr->s0->ext.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
@@ -5116,9 +3142,9 @@ error5:
err_create:
kfree(devr->s0);
error4:
- mlx5_ib_dealloc_xrcd(devr->x1, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
error3:
- mlx5_ib_dealloc_xrcd(devr->x0, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
error2:
mlx5_ib_destroy_cq(devr->c0, NULL);
err_create_cq:
@@ -5130,16 +3156,17 @@ error0:
return ret;
}
-static void destroy_dev_resources(struct mlx5_ib_resources *devr)
+static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
int port;
mlx5_ib_destroy_srq(devr->s1, NULL);
kfree(devr->s1);
mlx5_ib_destroy_srq(devr->s0, NULL);
kfree(devr->s0);
- mlx5_ib_dealloc_xrcd(devr->x0, NULL);
- mlx5_ib_dealloc_xrcd(devr->x1, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
mlx5_ib_destroy_cq(devr->c0, NULL);
kfree(devr->c0);
mlx5_ib_dealloc_pd(devr->p0, NULL);
@@ -5332,466 +3359,6 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-struct mlx5_ib_counter {
- const char *name;
- size_t offset;
-};
-
-#define INIT_Q_COUNTER(_name) \
- { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
-
-static const struct mlx5_ib_counter basic_q_cnts[] = {
- INIT_Q_COUNTER(rx_write_requests),
- INIT_Q_COUNTER(rx_read_requests),
- INIT_Q_COUNTER(rx_atomic_requests),
- INIT_Q_COUNTER(out_of_buffer),
-};
-
-static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
- INIT_Q_COUNTER(out_of_sequence),
-};
-
-static const struct mlx5_ib_counter retrans_q_cnts[] = {
- INIT_Q_COUNTER(duplicate_request),
- INIT_Q_COUNTER(rnr_nak_retry_err),
- INIT_Q_COUNTER(packet_seq_err),
- INIT_Q_COUNTER(implied_nak_seq_err),
- INIT_Q_COUNTER(local_ack_timeout_err),
-};
-
-#define INIT_CONG_COUNTER(_name) \
- { .name = #_name, .offset = \
- MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
-
-static const struct mlx5_ib_counter cong_cnts[] = {
- INIT_CONG_COUNTER(rp_cnp_ignored),
- INIT_CONG_COUNTER(rp_cnp_handled),
- INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
- INIT_CONG_COUNTER(np_cnp_sent),
-};
-
-static const struct mlx5_ib_counter extended_err_cnts[] = {
- INIT_Q_COUNTER(resp_local_length_error),
- INIT_Q_COUNTER(resp_cqe_error),
- INIT_Q_COUNTER(req_cqe_error),
- INIT_Q_COUNTER(req_remote_invalid_request),
- INIT_Q_COUNTER(req_remote_access_errors),
- INIT_Q_COUNTER(resp_remote_access_errors),
- INIT_Q_COUNTER(resp_cqe_flush_error),
- INIT_Q_COUNTER(req_cqe_flush_error),
-};
-
-static const struct mlx5_ib_counter roce_accl_cnts[] = {
- INIT_Q_COUNTER(roce_adp_retrans),
- INIT_Q_COUNTER(roce_adp_retrans_to),
- INIT_Q_COUNTER(roce_slow_restart),
- INIT_Q_COUNTER(roce_slow_restart_cnps),
- INIT_Q_COUNTER(roce_slow_restart_trans),
-};
-
-#define INIT_EXT_PPCNT_COUNTER(_name) \
- { .name = #_name, .offset = \
- MLX5_BYTE_OFF(ppcnt_reg, \
- counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
-
-static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
- INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
-};
-
-static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
-{
- return MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
- MLX5_ESWITCH_OFFLOADS;
-}
-
-static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
-{
- u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
- int num_cnt_ports;
- int i;
-
- num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
-
- MLX5_SET(dealloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_Q_COUNTER);
-
- for (i = 0; i < num_cnt_ports; i++) {
- if (dev->port[i].cnts.set_id) {
- MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
- dev->port[i].cnts.set_id);
- mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
- }
- kfree(dev->port[i].cnts.names);
- kfree(dev->port[i].cnts.offsets);
- }
-}
-
-static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
- struct mlx5_ib_counters *cnts)
-{
- u32 num_counters;
-
- num_counters = ARRAY_SIZE(basic_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
- num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
- num_counters += ARRAY_SIZE(retrans_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
- num_counters += ARRAY_SIZE(extended_err_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, roce_accl))
- num_counters += ARRAY_SIZE(roce_accl_cnts);
-
- cnts->num_q_counters = num_counters;
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
- num_counters += ARRAY_SIZE(cong_cnts);
- }
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
- num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
- }
- cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
- if (!cnts->names)
- return -ENOMEM;
-
- cnts->offsets = kcalloc(num_counters,
- sizeof(cnts->offsets), GFP_KERNEL);
- if (!cnts->offsets)
- goto err_names;
-
- return 0;
-
-err_names:
- kfree(cnts->names);
- cnts->names = NULL;
- return -ENOMEM;
-}
-
-static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
- const char **names,
- size_t *offsets)
-{
- int i;
- int j = 0;
-
- for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
- names[j] = basic_q_cnts[i].name;
- offsets[j] = basic_q_cnts[i].offset;
- }
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
- for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
- names[j] = out_of_seq_q_cnts[i].name;
- offsets[j] = out_of_seq_q_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
- names[j] = retrans_q_cnts[i].name;
- offsets[j] = retrans_q_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
- names[j] = extended_err_cnts[i].name;
- offsets[j] = extended_err_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
- for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
- names[j] = roce_accl_cnts[i].name;
- offsets[j] = roce_accl_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
- names[j] = cong_cnts[i].name;
- offsets[j] = cong_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
- names[j] = ext_ppcnt_cnts[i].name;
- offsets[j] = ext_ppcnt_cnts[i].offset;
- }
- }
-}
-
-static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
-{
- u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
- int num_cnt_ports;
- int err = 0;
- int i;
- bool is_shared;
-
- MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
- is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
- num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
-
- for (i = 0; i < num_cnt_ports; i++) {
- err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
- if (err)
- goto err_alloc;
-
- mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
- dev->port[i].cnts.offsets);
-
- MLX5_SET(alloc_q_counter_in, in, uid,
- is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
-
- err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
- if (err) {
- mlx5_ib_warn(dev,
- "couldn't allocate queue counter for port %d, err %d\n",
- i + 1, err);
- goto err_alloc;
- }
-
- dev->port[i].cnts.set_id =
- MLX5_GET(alloc_q_counter_out, out, counter_set_id);
- }
- return 0;
-
-err_alloc:
- mlx5_ib_dealloc_counters(dev);
- return err;
-}
-
-static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
- u8 port_num)
-{
- return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
- &dev->port[port_num].cnts;
-}
-
-/**
- * mlx5_ib_get_counters_id - Returns counters id to use for device+port
- * @dev: Pointer to mlx5 IB device
- * @port_num: Zero based port number
- *
- * mlx5_ib_get_counters_id() Returns counters set id to use for given
- * device port combination in switchdev and non switchdev mode of the
- * parent device.
- */
-u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
-{
- const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
-
- return cnts->set_id;
-}
-
-static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts;
- bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
-
- if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
- return NULL;
-
- cnts = get_counters(dev, port_num - 1);
-
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
- const struct mlx5_ib_counters *cnts,
- struct rdma_hw_stats *stats,
- u16 set_id)
-{
- u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
- __be32 val;
- int ret, i;
-
- MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
- MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
- ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
- if (ret)
- return ret;
-
- for (i = 0; i < cnts->num_q_counters; i++) {
- val = *(__be32 *)((void *)out + cnts->offsets[i]);
- stats->value[i] = (u64)be32_to_cpu(val);
- }
-
- return 0;
-}
-
-static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_counters *cnts,
- struct rdma_hw_stats *stats)
-{
- int offset = cnts->num_q_counters + cnts->num_cong_counters;
- int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
- int ret, i;
- void *out;
-
- out = kvzalloc(sz, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
- ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out);
- if (ret)
- goto free;
-
- for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
- stats->value[i + offset] =
- be64_to_cpup((__be64 *)(out +
- cnts->offsets[i + offset]));
-free:
- kvfree(out);
- return ret;
-}
-
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port_num, int index)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
- struct mlx5_core_dev *mdev;
- int ret, num_counters;
- u8 mdev_port_num;
-
- if (!stats)
- return -EINVAL;
-
- num_counters = cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters;
-
- /* q_counters are per IB device, query the master mdev */
- ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
- if (ret)
- return ret;
-
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
- if (ret)
- return ret;
- }
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
- &mdev_port_num);
- if (!mdev) {
- /* If port is not affiliated yet, its in down state
- * which doesn't have any counters yet, so it would be
- * zero. So no need to read from the HCA.
- */
- goto done;
- }
- ret = mlx5_lag_query_cong_counters(dev->mdev,
- stats->value +
- cnts->num_q_counters,
- cnts->num_cong_counters,
- cnts->offsets +
- cnts->num_q_counters);
-
- mlx5_ib_put_native_port_mdev(dev, port_num);
- if (ret)
- return ret;
- }
-
-done:
- return num_counters;
-}
-
-static struct rdma_hw_stats *
-mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- const struct mlx5_ib_counters *cnts =
- get_counters(dev, counter->port - 1);
-
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- const struct mlx5_ib_counters *cnts =
- get_counters(dev, counter->port - 1);
-
- return mlx5_ib_query_q_counters(dev->mdev, cnts,
- counter->stats, counter->id);
-}
-
-static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
-
- if (!counter->id)
- return 0;
-
- MLX5_SET(dealloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_Q_COUNTER);
- MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
- return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
-}
-
-static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- int err;
-
- if (!counter->id) {
- u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
-
- MLX5_SET(alloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_ALLOC_Q_COUNTER);
- MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
- err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
- if (err)
- return err;
- counter->id =
- MLX5_GET(alloc_q_counter_out, out, counter_set_id);
- }
-
- err = mlx5_ib_qp_set_counter(qp, counter);
- if (err)
- goto fail_set_counter;
-
- return 0;
-
-fail_set_counter:
- mlx5_ib_counter_dealloc(counter);
- counter->id = 0;
-
- return err;
-}
-
-static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
-{
- return mlx5_ib_qp_set_counter(qp, NULL);
-}
-
static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params)
@@ -5802,23 +3369,6 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params);
}
-static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
-{
- if (!dev->delay_drop.dir_debugfs)
- return;
- debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
- dev->delay_drop.dir_debugfs = NULL;
-}
-
-static void cancel_delay_drop(struct mlx5_ib_dev *dev)
-{
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
- return;
-
- cancel_work_sync(&dev->delay_drop.delay_drop_work);
- delay_drop_debugfs_cleanup(dev);
-}
-
static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
@@ -5858,40 +3408,6 @@ static const struct file_operations fops_delay_drop_timeout = {
.read = delay_drop_timeout_read,
};
-static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
-{
- struct dentry *root;
-
- if (!mlx5_debugfs_root)
- return;
-
- root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
- dev->delay_drop.dir_debugfs = root;
-
- debugfs_create_atomic_t("num_timeout_events", 0400, root,
- &dev->delay_drop.events_cnt);
- debugfs_create_atomic_t("num_rqs", 0400, root,
- &dev->delay_drop.rqs_cnt);
- debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
- &fops_delay_drop_timeout);
-}
-
-static void init_delay_drop(struct mlx5_ib_dev *dev)
-{
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
- return;
-
- mutex_init(&dev->delay_drop.lock);
- dev->delay_drop.dev = dev;
- dev->delay_drop.activate = false;
- dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
- INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
- atomic_set(&dev->delay_drop.rqs_cnt, 0);
- atomic_set(&dev->delay_drop.events_cnt, 0);
-
- delay_drop_debugfs_init(dev);
-}
-
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
@@ -6385,90 +3901,32 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
enum mlx5_ib_uapi_flow_action_flags));
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_query_context,
+ UVERBS_OBJECT_DEVICE,
+ UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp,
+ dump_fill_mkey),
+ UA_MANDATORY));
+
static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
UAPI_DEF_CHAIN(mlx5_ib_qos_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
&mlx5_ib_flow_action),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
{}
};
-static int mlx5_ib_read_counters(struct ib_counters *counters,
- struct ib_counters_read_attr *read_attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
- struct mlx5_read_counters_attr mread_attr = {};
- struct mlx5_ib_flow_counters_desc *desc;
- int ret, i;
-
- mutex_lock(&mcounters->mcntrs_mutex);
- if (mcounters->cntrs_max_index > read_attr->ncounters) {
- ret = -EINVAL;
- goto err_bound;
- }
-
- mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
- GFP_KERNEL);
- if (!mread_attr.out) {
- ret = -ENOMEM;
- goto err_bound;
- }
-
- mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
- mread_attr.flags = read_attr->flags;
- ret = mcounters->read_counters(counters->device, &mread_attr);
- if (ret)
- goto err_read;
-
- /* do the pass over the counters data array to assign according to the
- * descriptions and indexing pairs
- */
- desc = mcounters->counters_data;
- for (i = 0; i < mcounters->ncounters; i++)
- read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
-
-err_read:
- kfree(mread_attr.out);
-err_bound:
- mutex_unlock(&mcounters->mcntrs_mutex);
- return ret;
-}
-
-static int mlx5_ib_destroy_counters(struct ib_counters *counters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
-
- counters_clear_description(counters);
- if (mcounters->hw_cntrs_hndl)
- mlx5_fc_destroy(to_mdev(counters->device)->mdev,
- mcounters->hw_cntrs_hndl);
-
- kfree(mcounters);
-
- return 0;
-}
-
-static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_mcounters *mcounters;
-
- mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL);
- if (!mcounters)
- return ERR_PTR(-ENOMEM);
-
- mutex_init(&mcounters->mcntrs_mutex);
-
- return &mcounters->ibcntrs;
-}
-
static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
@@ -6547,21 +4005,16 @@ err_mp:
return -ENOMEM;
}
-static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_enable_driver(struct ib_device *dev)
{
- dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
-
- if (!dev->flow_db)
- return -ENOMEM;
-
- mutex_init(&dev->flow_db->lock);
+ struct mlx5_ib_dev *mdev = to_mdev(dev);
+ int ret;
- return 0;
-}
+ ret = mlx5_ib_test_wc(mdev);
+ mlx5_ib_dbg(mdev, "Write-Combining %s",
+ mdev->wc_support ? "supported" : "not supported");
-static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
-{
- kfree(dev->flow_db);
+ return ret;
}
static const struct ib_device_ops mlx5_ib_dev_ops = {
@@ -6577,9 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.attach_mcast = mlx5_ib_mcg_attach,
.check_mr_status = mlx5_ib_check_mr_status,
.create_ah = mlx5_ib_create_ah,
- .create_counters = mlx5_ib_create_counters,
.create_cq = mlx5_ib_create_cq,
- .create_flow = mlx5_ib_create_flow,
.create_qp = mlx5_ib_create_qp,
.create_srq = mlx5_ib_create_srq,
.dealloc_pd = mlx5_ib_dealloc_pd,
@@ -6587,10 +4038,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.del_gid = mlx5_ib_del_gid,
.dereg_mr = mlx5_ib_dereg_mr,
.destroy_ah = mlx5_ib_destroy_ah,
- .destroy_counters = mlx5_ib_destroy_counters,
.destroy_cq = mlx5_ib_destroy_cq,
- .destroy_flow = mlx5_ib_destroy_flow,
- .destroy_flow_action = mlx5_ib_destroy_flow_action,
.destroy_qp = mlx5_ib_destroy_qp,
.destroy_srq = mlx5_ib_destroy_srq,
.detach_mcast = mlx5_ib_mcg_detach,
@@ -6598,8 +4046,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq,
.enable_driver = mlx5_ib_enable_driver,
- .fill_res_entry = mlx5_ib_fill_res_entry,
- .fill_stat_entry = mlx5_ib_fill_stat_entry,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
@@ -6623,24 +4069,20 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.query_pkey = mlx5_ib_query_pkey,
.query_qp = mlx5_ib_query_qp,
.query_srq = mlx5_ib_query_srq,
- .read_counters = mlx5_ib_read_counters,
+ .query_ucontext = mlx5_ib_query_ucontext,
.reg_user_mr = mlx5_ib_reg_user_mr,
.req_notify_cq = mlx5_ib_arm_cq,
.rereg_user_mr = mlx5_ib_rereg_user_mr,
.resize_cq = mlx5_ib_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
};
-static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = {
- .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
- .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
-};
-
static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
.rdma_netdev_get_params = mlx5_ib_rn_get_params,
};
@@ -6661,6 +4103,8 @@ static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
.alloc_xrcd = mlx5_ib_alloc_xrcd,
.dealloc_xrcd = mlx5_ib_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd),
};
static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
@@ -6769,9 +4213,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
@@ -6829,65 +4270,36 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
.modify_wq = mlx5_ib_modify_wq,
};
-static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
-{
- u8 port_num;
-
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
-
- port_num = mlx5_core_native_port_num(dev->mdev) - 1;
-
- /* Register only for native ports */
- return mlx5_add_netdev_notifier(dev, port_num);
-}
-
-static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
-{
- u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
-
- mlx5_remove_netdev_notifier(dev, port_num);
-}
-
-static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev)
-{
- struct mlx5_core_dev *mdev = dev->mdev;
- enum rdma_link_layer ll;
- int port_type_cap;
- int err = 0;
-
- port_type_cap = MLX5_CAP_GEN(mdev, port_type);
- ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
-
- if (ll == IB_LINK_LAYER_ETHERNET)
- err = mlx5_ib_stage_common_roce_init(dev);
-
- return err;
-}
-
-static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_stage_common_roce_cleanup(dev);
-}
-
-static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
+ u8 port_num = 0;
int err;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- err = mlx5_ib_stage_common_roce_init(dev);
- if (err)
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+ /* Register only for native ports */
+ err = mlx5_add_netdev_notifier(dev, port_num);
+ if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev))
+ /*
+ * We don't enable ETH interface for
+ * 1. IB representors
+ * 2. User disabled ROCE through devlink interface
+ */
return err;
err = mlx5_enable_eth(dev);
@@ -6897,71 +4309,27 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
return 0;
cleanup:
- mlx5_ib_stage_common_roce_cleanup(dev);
-
+ mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
-static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
+ u8 port_num;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- mlx5_disable_eth(dev);
- mlx5_ib_stage_common_roce_cleanup(dev);
- }
-}
-
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
-{
- return create_dev_resources(&dev->devr);
-}
+ if (!dev->is_rep)
+ mlx5_disable_eth(dev);
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
-{
- destroy_dev_resources(&dev->devr);
-}
-
-static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
-{
- return mlx5_ib_odp_init_one(dev);
-}
-
-static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_odp_cleanup_one(dev);
-}
-
-static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = {
- .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
- .get_hw_stats = mlx5_ib_get_hw_stats,
- .counter_bind_qp = mlx5_ib_counter_bind_qp,
- .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
- .counter_dealloc = mlx5_ib_counter_dealloc,
- .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
- .counter_update_stats = mlx5_ib_counter_update_stats,
-};
-
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
-{
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops);
-
- return mlx5_ib_alloc_counters(dev);
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ mlx5_remove_netdev_notifier(dev, port_num);
}
-
- return 0;
-}
-
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
-{
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_counters(dev);
}
static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
@@ -7023,7 +4391,18 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
- destroy_umrc_res(dev);
+ int err;
+
+ err = mlx5_mr_cache_cleanup(dev);
+ if (err)
+ mlx5_ib_warn(dev, "mr cache cleanup failed\n");
+
+ if (dev->umrc.qp)
+ mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
+ if (dev->umrc.cq)
+ ib_free_cq(dev->umrc.cq);
+ if (dev->umrc.pd)
+ ib_dealloc_pd(dev->umrc.pd);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@@ -7031,21 +4410,162 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
ib_unregister_device(&dev->ib_dev);
}
+enum {
+ MAX_UMR_WR = 128,
+};
+
static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
- return create_umr_res(dev);
+ struct ib_qp_init_attr *init_attr = NULL;
+ struct ib_qp_attr *attr = NULL;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ int ret;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
+ if (!attr || !init_attr) {
+ ret = -ENOMEM;
+ goto error_0;
+ }
+
+ pd = ib_alloc_pd(&dev->ib_dev, 0);
+ if (IS_ERR(pd)) {
+ mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+ ret = PTR_ERR(pd);
+ goto error_0;
+ }
+
+ cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
+ if (IS_ERR(cq)) {
+ mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
+ ret = PTR_ERR(cq);
+ goto error_2;
+ }
+
+ init_attr->send_cq = cq;
+ init_attr->recv_cq = cq;
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->cap.max_send_wr = MAX_UMR_WR;
+ init_attr->cap.max_send_sge = 1;
+ init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
+ init_attr->port_num = 1;
+ qp = mlx5_ib_create_qp(pd, init_attr, NULL);
+ if (IS_ERR(qp)) {
+ mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
+ ret = PTR_ERR(qp);
+ goto error_3;
+ }
+ qp->device = &dev->ib_dev;
+ qp->real_qp = qp;
+ qp->uobject = NULL;
+ qp->qp_type = MLX5_IB_QPT_REG_UMR;
+ qp->send_cq = init_attr->send_cq;
+ qp->recv_cq = init_attr->recv_cq;
+
+ attr->qp_state = IB_QPS_INIT;
+ attr->port_num = 1;
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
+ IB_QP_PORT, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ goto error_4;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+ attr->qp_state = IB_QPS_RTR;
+ attr->path_mtu = IB_MTU_256;
+
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
+ goto error_4;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+ attr->qp_state = IB_QPS_RTS;
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
+ goto error_4;
+ }
+
+ dev->umrc.qp = qp;
+ dev->umrc.cq = cq;
+ dev->umrc.pd = pd;
+
+ sema_init(&dev->umrc.sem, MAX_UMR_WR);
+ ret = mlx5_mr_cache_init(dev);
+ if (ret) {
+ mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
+ goto error_4;
+ }
+
+ kfree(attr);
+ kfree(init_attr);
+
+ return 0;
+
+error_4:
+ mlx5_ib_destroy_qp(qp, NULL);
+ dev->umrc.qp = NULL;
+
+error_3:
+ ib_free_cq(cq);
+ dev->umrc.cq = NULL;
+
+error_2:
+ ib_dealloc_pd(pd);
+ dev->umrc.pd = NULL;
+
+error_0:
+ kfree(attr);
+ kfree(init_attr);
+ return ret;
}
static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
{
- init_delay_drop(dev);
+ struct dentry *root;
+
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return 0;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
+ atomic_set(&dev->delay_drop.rqs_cnt, 0);
+ atomic_set(&dev->delay_drop.events_cnt, 0);
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
+ dev->delay_drop.dir_debugfs = root;
+ debugfs_create_atomic_t("num_timeout_events", 0400, root,
+ &dev->delay_drop.events_cnt);
+ debugfs_create_atomic_t("num_rqs", 0400, root,
+ &dev->delay_drop.rqs_cnt);
+ debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
+ &fops_delay_drop_timeout);
return 0;
}
static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
{
- cancel_delay_drop(dev);
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+ if (!dev->delay_drop.dir_debugfs)
+ return;
+
+ debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
+ dev->delay_drop.dir_debugfs = NULL;
}
static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
@@ -7060,38 +4580,6 @@ static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
}
-static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
-{
- int uid;
-
- uid = mlx5_ib_devx_create(dev, false);
- if (uid > 0) {
- dev->devx_whitelist_uid = uid;
- mlx5_ib_devx_init_event_table(dev);
- }
-
- return 0;
-}
-static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
-{
- if (dev->devx_whitelist_uid) {
- mlx5_ib_devx_cleanup_event_table(dev);
- mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
- }
-}
-
-int mlx5_ib_enable_driver(struct ib_device *dev)
-{
- struct mlx5_ib_dev *mdev = to_mdev(dev);
- int ret;
-
- ret = mlx5_ib_test_wc(mdev);
- mlx5_ib_dbg(mdev, "Write-Combining %s",
- mdev->wc_support ? "supported" : "not supported");
-
- return ret;
-}
-
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
@@ -7139,9 +4627,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
- mlx5_ib_stage_flow_db_init,
- mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
mlx5_ib_stage_caps_cleanup),
@@ -7149,8 +4637,8 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_ib_stage_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_roce_init,
- mlx5_ib_stage_roce_cleanup),
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_QP,
mlx5_init_qp_table,
mlx5_cleanup_qp_table),
@@ -7158,17 +4646,17 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
- mlx5_ib_stage_dev_res_init,
- mlx5_ib_stage_dev_res_cleanup),
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
- mlx5_ib_stage_odp_init,
- mlx5_ib_stage_odp_cleanup),
+ mlx5_ib_odp_init_one,
+ mlx5_ib_odp_cleanup_one),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
- mlx5_ib_stage_counters_init,
- mlx5_ib_stage_counters_cleanup),
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
@@ -7182,8 +4670,8 @@ static const struct mlx5_ib_profile pf_profile = {
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
- mlx5_ib_stage_devx_init,
- mlx5_ib_stage_devx_cleanup),
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -7193,15 +4681,18 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
mlx5_ib_stage_delay_drop_init,
mlx5_ib_stage_delay_drop_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
};
const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
- mlx5_ib_stage_flow_db_init,
- mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
mlx5_ib_stage_caps_cleanup),
@@ -7209,8 +4700,8 @@ const struct mlx5_ib_profile raw_eth_profile = {
mlx5_ib_stage_raw_eth_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_raw_eth_roce_init,
- mlx5_ib_stage_raw_eth_roce_cleanup),
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_QP,
mlx5_init_qp_table,
mlx5_cleanup_qp_table),
@@ -7218,14 +4709,14 @@ const struct mlx5_ib_profile raw_eth_profile = {
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
- mlx5_ib_stage_dev_res_init,
- mlx5_ib_stage_dev_res_cleanup),
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
- mlx5_ib_stage_counters_init,
- mlx5_ib_stage_counters_cleanup),
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
@@ -7239,14 +4730,17 @@ const struct mlx5_ib_profile raw_eth_profile = {
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
- mlx5_ib_stage_devx_init,
- mlx5_ib_stage_devx_cleanup),
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
};
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 5dbe3eb0d9cb..5287fc868662 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
*/
#ifndef MLX5_IB_H
@@ -730,8 +703,8 @@ struct mlx5_ib_port_resources {
struct mlx5_ib_resources {
struct ib_cq *c0;
- struct ib_xrcd *x0;
- struct ib_xrcd *x1;
+ u32 xrcdn0;
+ u32 xrcdn1;
struct ib_pd *p0;
struct ib_srq *s0;
struct ib_srq *s1;
@@ -832,7 +805,7 @@ struct mlx5_ib_delay_drop {
enum mlx5_ib_stages {
MLX5_IB_STAGE_INIT,
- MLX5_IB_STAGE_FLOW_DB,
+ MLX5_IB_STAGE_FS,
MLX5_IB_STAGE_CAPS,
MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
@@ -850,7 +823,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
- MLX5_IB_STAGE_CLASS_ATTR,
+ MLX5_IB_STAGE_RESTRACK,
MLX5_IB_STAGE_MAX,
};
@@ -1078,11 +1051,6 @@ static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp)
return container_of(core_qp, struct mlx5_ib_rwq, core_qp);
}
-static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
-{
- return container_of(mmkey, struct mlx5_ib_mr, mmkey);
-}
-
static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mlx5_ib_pd, ibpd);
@@ -1210,7 +1178,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
struct ib_pd *pd, struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
u32 max_num_sg,
u32 max_num_meta_sg);
@@ -1224,9 +1192,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad *in, struct ib_mad *out,
size_t *out_mad_size, u16 *out_mad_pkey_index);
-struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata);
-int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
@@ -1375,46 +1342,12 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
u8 *native_port_num);
void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
u8 port_num);
-int mlx5_ib_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
-int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
extern const struct uapi_definition mlx5_ib_devx_defs[];
extern const struct uapi_definition mlx5_ib_flow_defs[];
extern const struct uapi_definition mlx5_ib_qos_defs[];
+extern const struct uapi_definition mlx5_ib_std_types_defs[];
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
-void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
-void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev);
-void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev);
-struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
- struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act, u32 counter_id,
- void *cmd_in, int inlen, int dest_id, int dest_type);
-bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id);
-void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
-#else
-static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
- bool is_user) { return -EOPNOTSUPP; }
-static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
-static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {}
-static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {}
-static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
- int *dest_type)
-{
- return false;
-}
-static inline void
-mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
-{
- return;
-};
-#endif
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -1423,15 +1356,6 @@ static inline void init_query_mad(struct ib_smp *mad)
mad->method = IB_MGMT_METHOD_GET;
}
-static inline u8 convert_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
- MLX5_PERM_LOCAL_READ;
-}
-
static inline int is_qp1(enum ib_qp_type qp_type)
{
return qp_type == MLX5_IB_QPT_HW_GSI;
@@ -1518,9 +1442,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
-int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
-u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num);
-
static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
bool do_modify_atomic, int access_flags)
{
@@ -1533,14 +1454,18 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
return false;
if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
- (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) ||
- MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)))
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ return false;
+
+ if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return false;
return true;
}
-int mlx5_ib_enable_driver(struct ib_device *dev);
int mlx5_ib_test_wc(struct mlx5_ib_dev *dev);
static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 44683073be0c..3e6f2f9c6655 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1961,7 +1961,7 @@ err_free:
}
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 77dca1e05bba..cfd7efab114e 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -816,6 +816,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ lockdep_assert_held(&mr->dev->odp_srcu);
if (unlikely(io_virt < mr->mmkey.iova))
return -EFAULT;
@@ -929,11 +930,6 @@ next_mr:
if (ret < 0)
goto srcu_unlock;
- /*
- * When prefetching a page, page fault is generated
- * in order to bring the page to the main memory.
- * In the current flow, page faults are being counted.
- */
mlx5_update_odp_stats(mr, faults, ret);
npages += ret;
@@ -1770,13 +1766,26 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
{
struct prefetch_mr_work *work =
container_of(w, struct prefetch_mr_work, work);
+ struct mlx5_ib_dev *dev;
u32 bytes_mapped = 0;
+ int srcu_key;
+ int ret;
u32 i;
- for (i = 0; i < work->num_sge; ++i)
- pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
- work->frags[i].length, &bytes_mapped,
- work->pf_flags);
+ /* We rely on IB/core that work is executed if we have num_sge != 0 only. */
+ WARN_ON(!work->num_sge);
+ dev = work->frags[0].mr->dev;
+ /* SRCU should be held when calling to mlx5_odp_populate_xlt() */
+ srcu_key = srcu_read_lock(&dev->odp_srcu);
+ for (i = 0; i < work->num_sge; ++i) {
+ ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
+ work->frags[i].length, &bytes_mapped,
+ work->pf_flags);
+ if (ret <= 0)
+ continue;
+ mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret);
+ }
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
destroy_prefetch_work(work);
}
@@ -1832,6 +1841,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
&bytes_mapped, pf_flags);
if (ret < 0)
goto out;
+ mlx5_update_odp_stats(mr, prefetch, ret);
}
ret = 0;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 1225b8d77510..59fce5fac7a3 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -38,6 +38,7 @@
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
+#include "counters.h"
#include "cmd.h"
#include "qp.h"
#include "wr.h"
@@ -2031,15 +2032,15 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
switch (init_attr->qp_type) {
case IB_QPT_XRC_INI:
MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
break;
default:
if (init_attr->srq) {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
} else {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
}
}
@@ -2178,11 +2179,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, no_sq, 1);
if (attr->srq) {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
to_msrq(attr->srq)->msrq.srqn);
} else {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
to_msrq(devr->s1)->msrq.srqn);
}
@@ -3554,7 +3555,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
switch (raw_qp_param->operation) {
case MLX5_CMD_OP_RST2INIT_QP:
rq_state = MLX5_RQC_STATE_RDY;
- sq_state = MLX5_SQC_STATE_RDY;
+ sq_state = MLX5_SQC_STATE_RST;
break;
case MLX5_CMD_OP_2ERR_QP:
rq_state = MLX5_RQC_STATE_ERR;
@@ -3566,13 +3567,11 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
break;
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
- if (raw_qp_param->set_mask ==
- MLX5_RAW_QP_RATE_LIMIT) {
- modify_rq = 0;
- sq_state = sq->state;
- } else {
- return raw_qp_param->set_mask ? -EINVAL : 0;
- }
+ if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT)
+ return -EINVAL;
+
+ modify_rq = 0;
+ sq_state = MLX5_SQC_STATE_RDY;
break;
case MLX5_CMD_OP_INIT2INIT_QP:
case MLX5_CMD_OP_INIT2RTR_QP:
@@ -4114,9 +4113,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
enum ib_qp_state cur_state, new_state;
- int err = 0;
int required = IB_QP_STATE;
void *dctc;
+ int err;
if (!(attr_mask & IB_QP_STATE))
return -EINVAL;
@@ -4208,11 +4207,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state);
return -EINVAL;
}
- if (err)
- qp->state = IB_QPS_ERR;
- else
- qp->state = new_state;
- return err;
+
+ qp->state = new_state;
+ return 0;
}
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -4450,7 +4447,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
[MLX5_SQ_STATE_NA] = IB_QPS_RESET,
},
[MLX5_RQC_STATE_RDY] = {
- [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE,
[MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
[MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
[MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
@@ -4462,7 +4459,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
[MLX5_SQ_STATE_NA] = IB_QPS_ERR,
},
[MLX5_RQ_STATE_NA] = {
- [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE,
[MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
[MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
[MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
@@ -4708,41 +4705,23 @@ out:
return err;
}
-struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata)
+int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_xrcd *xrcd;
- int err;
+ struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
if (!MLX5_CAP_GEN(dev->mdev, xrc))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
-
- err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
- if (err) {
- kfree(xrcd);
- return ERR_PTR(-ENOMEM);
- }
+ return -EOPNOTSUPP;
- return &xrcd->ibxrcd;
+ return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
}
-int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
- int err;
- err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
- if (err)
- mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
-
- kfree(xrcd);
- return 0;
+ mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
}
static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
index 82ea2b94dfa6..ba899df44c5b 100644
--- a/drivers/infiniband/hw/mlx5/qp.h
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -43,4 +43,5 @@ void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn);
int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
#endif /* _MLX5_IB_QP_H */
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
index 8f6c04f12531..887270dd3ce2 100644
--- a/drivers/infiniband/hw/mlx5/restrack.c
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -1,17 +1,85 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2019-2020, Mellanox Technologies Ltd. All rights reserved.
*/
#include <uapi/rdma/rdma_netlink.h>
+#include <linux/mlx5/rsc_dump.h>
#include <rdma/ib_umem_odp.h>
#include <rdma/restrack.h>
#include "mlx5_ib.h"
+#include "restrack.h"
-static int fill_stat_mr_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+#define MAX_DUMP_SIZE 1024
+
+static int dump_rsc(struct mlx5_core_dev *dev, enum mlx5_sgmt_type type,
+ int index, void *data, int *data_len)
+{
+ struct mlx5_core_dev *mdev = dev;
+ struct mlx5_rsc_dump_cmd *cmd;
+ struct mlx5_rsc_key key = {};
+ struct page *page;
+ int offset = 0;
+ int err = 0;
+ int cmd_err;
+ int size;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ key.size = PAGE_SIZE;
+ key.rsc = type;
+ key.index1 = index;
+ key.num_of_obj1 = 1;
+
+ cmd = mlx5_rsc_dump_cmd_create(mdev, &key);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto free_page;
+ }
+
+ do {
+ cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
+ if (cmd_err < 0 || size + offset > MAX_DUMP_SIZE) {
+ err = cmd_err;
+ goto destroy_cmd;
+ }
+ memcpy(data + offset, page_address(page), size);
+ offset += size;
+ } while (cmd_err > 0);
+ *data_len = offset;
+
+destroy_cmd:
+ mlx5_rsc_dump_cmd_destroy(cmd);
+free_page:
+ __free_page(page);
+ return err;
+}
+
+static int fill_res_raw(struct sk_buff *msg, struct mlx5_ib_dev *dev,
+ enum mlx5_sgmt_type type, u32 key)
+{
+ int len = 0;
+ void *data;
+ int err;
+
+ data = kzalloc(MAX_DUMP_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = dump_rsc(dev->mdev, type, key, data, &len);
+ if (err)
+ goto out;
+
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+out:
+ kfree(data);
+ return err;
+}
+
+static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
{
- struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
struct nlattr *table_attr;
@@ -31,6 +99,9 @@ static int fill_stat_mr_entry(struct sk_buff *msg,
msg, "page_invalidations",
atomic64_read(&mr->odp_stats.invalidations)))
goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch",
+ atomic64_read(&mr->odp_stats.prefetch)))
+ goto err_table;
nla_nest_end(msg, table_attr);
return 0;
@@ -41,10 +112,16 @@ err:
return -EMSGSIZE;
}
-static int fill_res_mr_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+
+ return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY,
+ mlx5_mkey_to_idx(mr->mmkey.key));
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
{
- struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
struct nlattr *table_attr;
@@ -71,20 +148,32 @@ err:
return -EMSGSIZE;
}
-int mlx5_ib_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq)
{
- if (res->type == RDMA_RESTRACK_MR)
- return fill_res_mr_entry(msg, res);
+ struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
- return 0;
+ return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn);
}
-int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
{
- if (res->type == RDMA_RESTRACK_MR)
- return fill_stat_mr_entry(msg, res);
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+
+ return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_QP,
+ ibqp->qp_num);
+}
+static const struct ib_device_ops restrack_ops = {
+ .fill_res_cq_entry_raw = fill_res_cq_entry_raw,
+ .fill_res_mr_entry = fill_res_mr_entry,
+ .fill_res_mr_entry_raw = fill_res_mr_entry_raw,
+ .fill_res_qp_entry_raw = fill_res_qp_entry_raw,
+ .fill_stat_mr_entry = fill_stat_mr_entry,
+};
+
+int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &restrack_ops);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/restrack.h b/drivers/infiniband/hw/mlx5/restrack.h
new file mode 100644
index 000000000000..e8d81270f1b6
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/restrack.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies Ltd. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_RESTRACK_H
+#define _MLX5_IB_RESTRACK_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev);
+
+#endif /* _MLX5_IB_RESTRACK_H */
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 6d1ff13d2283..7e10cbcb6d5c 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -274,10 +274,10 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
if (srq->wq_sig)
in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
- if (init_attr->srq_type == IB_SRQT_XRC)
+ if (init_attr->srq_type == IB_SRQT_XRC && init_attr->ext.xrc.xrcd)
in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
else
- in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
+ in.xrcd = dev->devr.xrcdn0;
if (init_attr->srq_type == IB_SRQT_TM) {
in.tm_log_list_size =
diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c
new file mode 100644
index 000000000000..16145fda68d0
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/std_types.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_PD_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_PD_HANDLE);
+ struct mlx5_ib_pd *mpd = to_mpd(pd);
+
+ return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_PD_RESP_PDN,
+ &mpd->pdn, sizeof(mpd->pdn));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_PD_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_PD_RESP_PDN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_pd,
+ UVERBS_OBJECT_PD,
+ &UVERBS_METHOD(MLX5_IB_METHOD_PD_QUERY));
+
+const struct uapi_definition mlx5_ib_std_types_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_PD,
+ &mlx5_ib_pd),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index 2c6df1c43b55..43880973a512 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -263,7 +263,9 @@ static __be64 get_umr_update_translation_mask(void)
return cpu_to_be64(result);
}
-static __be64 get_umr_update_access_mask(int atomic)
+static __be64 get_umr_update_access_mask(int atomic,
+ int relaxed_ordering_write,
+ int relaxed_ordering_read)
{
u64 result;
@@ -275,6 +277,12 @@ static __be64 get_umr_update_access_mask(int atomic)
if (atomic)
result |= MLX5_MKEY_MASK_A;
+ if (relaxed_ordering_write)
+ result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
+
+ if (relaxed_ordering_read)
+ result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
+
return cpu_to_be64(result);
}
@@ -289,17 +297,28 @@ static __be64 get_umr_update_pd_mask(void)
static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
{
- if ((mask & MLX5_MKEY_MASK_PAGE_SIZE &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) ||
- (mask & MLX5_MKEY_MASK_A &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)))
+ if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_A &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ return -EPERM;
+
return 0;
}
static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
struct mlx5_wqe_umr_ctrl_seg *umr,
- const struct ib_send_wr *wr, int atomic)
+ const struct ib_send_wr *wr)
{
const struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -325,7 +344,10 @@ static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
umr->mkey_mask |= get_umr_update_translation_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
- umr->mkey_mask |= get_umr_update_access_mask(atomic);
+ umr->mkey_mask |= get_umr_update_access_mask(
+ !!(MLX5_CAP_GEN(dev->mdev, atomic)),
+ !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)),
+ !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)));
umr->mkey_mask |= get_umr_update_pd_mask();
}
if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
@@ -383,20 +405,31 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
memset(seg, 0, sizeof(*seg));
if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
- seg->status = MLX5_MKEY_STATUS_FREE;
+ MLX5_SET(mkc, seg, free, 1);
+
+ MLX5_SET(mkc, seg, a,
+ !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, seg, rw,
+ !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, seg, lr, 1);
+ MLX5_SET(mkc, seg, relaxed_ordering_write,
+ !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
+ MLX5_SET(mkc, seg, relaxed_ordering_read,
+ !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
- seg->flags = convert_access(umrwr->access_flags);
if (umrwr->pd)
- seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+ MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
!umrwr->length)
- seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64);
+ MLX5_SET(mkc, seg, length64, 1);
- seg->start_addr = cpu_to_be64(umrwr->virt_addr);
- seg->len = cpu_to_be64(umrwr->length);
- seg->log2_page_size = umrwr->page_shift;
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
- mlx5_mkey_variant(umrwr->mkey));
+ MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr);
+ MLX5_SET64(mkc, seg, len, umrwr->length);
+ MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift);
+ MLX5_SET(mkc, seg, qpn, 0xffffff);
+ MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey));
}
static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
@@ -1224,8 +1257,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
(*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
- err = set_reg_umr_segment(dev, *seg, wr,
- !!(MLX5_CAP_GEN(dev->mdev, atomic)));
+ err = set_reg_umr_segment(dev, *seg, wr);
if (unlikely(err))
goto out;
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
@@ -1249,7 +1281,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf;
void *cur_edge;
- int uninitialized_var(size);
+ int size;
unsigned long flags;
unsigned int idx;
int err = 0;