aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5/main.c
diff options
context:
space:
mode:
authorMaor Gottlieb <maorg@mellanox.com>2016-01-11 10:26:07 +0200
committerDavid S. Miller <davem@davemloft.net>2016-01-11 17:48:53 -0500
commit038d2ef87572757861a177b19f9d489def2c48b8 (patch)
tree04ab4306825571d353669cd2f92dfe1dd9257ec2 /drivers/infiniband/hw/mlx5/main.c
parentnet/mlx5_core: Export flow steering API (diff)
downloadlinux-dev-038d2ef87572757861a177b19f9d489def2c48b8.tar.xz
linux-dev-038d2ef87572757861a177b19f9d489def2c48b8.zip
IB/mlx5: Add flow steering support
Adding flow steering support by creating a flow-table per priority (if rules exist in the priority). mlx5_ib uses autogrouping and thus only creates the required destinations. Also includes adding of these flow steering utilities 1. Parsing verbs flow attributes hardware steering specs. 2. Check if flow is multicast - this is required in order to decide to which flow table will we add the steering rule. 3. Set outer headers in flow match criteria to zeros. Signed-off-by: Maor Gottlieb <maorg@mellanox.com> Signed-off-by: Moni Shoua <monis@mellanox.com> Signed-off-by: Matan Barak <matanb@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
-rw-r--r--drivers/infiniband/hw/mlx5/main.c463
1 files changed, 463 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 7e97cb55a6bf..b0ec175cc6ba 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -43,6 +43,9 @@
#include <linux/mlx5/vport.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/fs.h>
#include "user.h"
#include "mlx5_ib.h"
@@ -835,6 +838,457 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
return 0;
}
+static bool outer_header_zero(u32 *match_criteria)
+{
+ int size = MLX5_ST_SZ_BYTES(fte_match_param);
+ char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers);
+
+ return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
+ outer_headers_c + 1,
+ size - 1);
+}
+
+static int parse_flow_attr(u32 *match_c, u32 *match_v,
+ union ib_flow_spec *ib_spec)
+{
+ void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ if (ib_spec->size != sizeof(ib_spec->eth))
+ return -EINVAL;
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dmac_47_16),
+ ib_spec->eth.mask.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ dmac_47_16),
+ ib_spec->eth.val.dst_mac);
+
+ if (ib_spec->eth.mask.vlan_tag) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ vlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ vlan_tag, 1);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ first_vid, ntohs(ib_spec->eth.val.vlan_tag));
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ first_cfi,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ first_cfi,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 12);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ first_prio,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ first_prio,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 13);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ ethertype, ntohs(ib_spec->eth.mask.ether_type));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ethertype, ntohs(ib_spec->eth.val.ether_type));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ if (ib_spec->size != sizeof(ib_spec->ipv4))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ethertype, ETH_P_IP);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.src_ip,
+ sizeof(ib_spec->ipv4.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.src_ip,
+ sizeof(ib_spec->ipv4.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.dst_ip,
+ sizeof(ib_spec->ipv4.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.dst_ip,
+ sizeof(ib_spec->ipv4.val.dst_ip));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ if (ib_spec->size != sizeof(ib_spec->tcp_udp))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+ IPPROTO_TCP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_UDP:
+ if (ib_spec->size != sizeof(ib_spec->tcp_udp))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+ IPPROTO_UDP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* If a flow could catch both multicast and unicast packets,
+ * it won't fall into the multicast flow steering table and this rule
+ * could steal other multicast packets.
+ */
+static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
+{
+ struct ib_flow_spec_eth *eth_spec;
+
+ if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
+ ib_attr->size < sizeof(struct ib_flow_attr) +
+ sizeof(struct ib_flow_spec_eth) ||
+ ib_attr->num_of_specs < 1)
+ return false;
+
+ eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
+ if (eth_spec->type != IB_FLOW_SPEC_ETH ||
+ eth_spec->size != sizeof(*eth_spec))
+ return false;
+
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+}
+
+static bool is_valid_attr(struct ib_flow_attr *flow_attr)
+{
+ union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+ bool has_ipv4_spec = false;
+ bool eth_type_ipv4 = true;
+ unsigned int spec_index;
+
+ /* Validate that ethertype is correct */
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ if (ib_spec->type == IB_FLOW_SPEC_ETH &&
+ ib_spec->eth.mask.ether_type) {
+ if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
+ ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
+ eth_type_ipv4 = false;
+ } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
+ has_ipv4_spec = true;
+ }
+ ib_spec = (void *)ib_spec + ib_spec->size;
+ }
+ return !has_ipv4_spec || eth_type_ipv4;
+}
+
+static void put_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *prio, bool ft_added)
+{
+ prio->refcount -= !!ft_added;
+ if (!prio->refcount) {
+ mlx5_destroy_flow_table(prio->flow_table);
+ prio->flow_table = NULL;
+ }
+}
+
+static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
+ struct mlx5_ib_flow_handler *handler = container_of(flow_id,
+ struct mlx5_ib_flow_handler,
+ ibflow);
+ struct mlx5_ib_flow_handler *iter, *tmp;
+
+ mutex_lock(&dev->flow_db.lock);
+
+ list_for_each_entry_safe(iter, tmp, &handler->list, list) {
+ mlx5_del_flow_rule(iter->rule);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+
+ mlx5_del_flow_rule(handler->rule);
+ put_flow_table(dev, &dev->flow_db.prios[handler->prio], true);
+ mutex_unlock(&dev->flow_db.lock);
+
+ kfree(handler);
+
+ return 0;
+}
+
+#define MLX5_FS_MAX_TYPES 10
+#define MLX5_FS_MAX_ENTRIES 32000UL
+static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
+ struct ib_flow_attr *flow_attr)
+{
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_table *ft;
+ int num_entries;
+ int num_groups;
+ int priority;
+ int err = 0;
+
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ if (flow_is_multicast_only(flow_attr))
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = flow_attr->priority;
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_BYPASS);
+ num_entries = MLX5_FS_MAX_ENTRIES;
+ num_groups = MLX5_FS_MAX_TYPES;
+ prio = &dev->flow_db.prios[priority];
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_LEFTOVERS);
+ build_leftovers_ft_param(&priority,
+ &num_entries,
+ &num_groups);
+ prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ }
+
+ if (!ns)
+ return ERR_PTR(-ENOTSUPP);
+
+ ft = prio->flow_table;
+ if (!ft) {
+ ft = mlx5_create_auto_grouped_flow_table(ns, priority,
+ num_entries,
+ num_groups);
+
+ if (!IS_ERR(ft)) {
+ prio->refcount = 0;
+ prio->flow_table = ft;
+ } else {
+ err = PTR_ERR(ft);
+ }
+ }
+
+ return err ? ERR_PTR(err) : prio;
+}
+
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ struct mlx5_ib_flow_handler *handler;
+ void *ib_flow = flow_attr + 1;
+ u8 match_criteria_enable = 0;
+ unsigned int spec_index;
+ u32 *match_c;
+ u32 *match_v;
+ int err = 0;
+
+ if (!is_valid_attr(flow_attr))
+ return ERR_PTR(-EINVAL);
+
+ match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !match_c || !match_v) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ err = parse_flow_attr(match_c, match_v, ib_flow);
+ if (err < 0)
+ goto free;
+
+ ib_flow += ((union ib_flow_spec *)ib_flow)->size;
+ }
+
+ /* Outer header support only */
+ match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+ handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
+ match_c, match_v,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_DEFAULT_FLOW_TAG,
+ dst);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ handler->prio = ft_prio - dev->flow_db.prios;
+
+ ft_prio->flow_table = ft;
+free:
+ if (err)
+ kfree(handler);
+ kfree(match_c);
+ kfree(match_v);
+ return err ? ERR_PTR(err) : handler;
+}
+
+enum {
+ LEFTOVERS_MC,
+ LEFTOVERS_UC,
+};
+
+static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_ucast = NULL;
+ struct mlx5_ib_flow_handler *handler = NULL;
+
+ static struct {
+ struct ib_flow_attr flow_attr;
+ struct ib_flow_spec_eth eth_flow;
+ } leftovers_specs[] = {
+ [LEFTOVERS_MC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {0x1} }
+ }
+ },
+ [LEFTOVERS_UC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {} }
+ }
+ }
+ };
+
+ handler = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_MC].flow_attr,
+ dst);
+ if (!IS_ERR(handler) &&
+ flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
+ handler_ucast = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_UC].flow_attr,
+ dst);
+ if (IS_ERR(handler_ucast)) {
+ kfree(handler);
+ handler = handler_ucast;
+ } else {
+ list_add(&handler_ucast->list, &handler->list);
+ }
+ }
+
+ return handler;
+}
+
+static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_flow_handler *handler = NULL;
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_ib_flow_prio *ft_prio;
+ int err;
+
+ if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
+ return ERR_PTR(-ENOSPC);
+
+ if (domain != IB_FLOW_DOMAIN_USER ||
+ flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
+ flow_attr->flags)
+ return ERR_PTR(-EINVAL);
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&dev->flow_db.lock);
+
+ ft_prio = get_flow_table(dev, flow_attr);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ handler = create_flow_rule(dev, ft_prio, flow_attr,
+ dst);
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ handler = create_leftovers_rule(dev, ft_prio, flow_attr,
+ dst);
+ } else {
+ err = -EINVAL;
+ goto destroy_ft;
+ }
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ handler = NULL;
+ goto destroy_ft;
+ }
+
+ ft_prio->refcount++;
+ mutex_unlock(&dev->flow_db.lock);
+ kfree(dst);
+
+ return &handler->ibflow;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+unlock:
+ mutex_unlock(&dev->flow_db.lock);
+ kfree(dst);
+ kfree(handler);
+ return ERR_PTR(err);
+}
+
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -1439,10 +1893,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
+ IB_LINK_LAYER_ETHERNET) {
+ dev->ib_dev.create_flow = mlx5_ib_create_flow;
+ dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ }
err = init_node_data(dev);
if (err)
goto err_dealloc;
+ mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
err = create_dev_resources(&dev->devr);