aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx4/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx4/main.c')
-rw-r--r--drivers/infiniband/hw/mlx4/main.c802
1 files changed, 659 insertions, 143 deletions
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 1958c5ca792a..e81c5547e647 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -39,6 +39,8 @@
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
@@ -55,6 +57,7 @@
#define DRV_RELDATE "April 4, 2008"
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
@@ -92,21 +95,27 @@ static union ib_gid zgid;
static int check_flow_steering_support(struct mlx4_dev *dev)
{
+ int eth_num_ports = 0;
int ib_num_ports = 0;
- int i;
-
- mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
- ib_num_ports++;
- if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
- if (ib_num_ports || mlx4_is_mfunc(dev)) {
- pr_warn("Device managed flow steering is unavailable "
- "for IB ports or in multifunction env.\n");
- return 0;
+ int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
+
+ if (dmfs) {
+ int i;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+ eth_num_ports++;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+ dmfs &= (!ib_num_ports ||
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
+ (!eth_num_ports ||
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
+ if (ib_num_ports && mlx4_is_mfunc(dev)) {
+ pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
+ dmfs = 0;
}
- return 1;
}
- return 0;
+ return dmfs;
}
static int mlx4_ib_query_device(struct ib_device *ibdev,
@@ -165,7 +174,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
else
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
- if (check_flow_steering_support(dev->dev))
+ if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
}
@@ -338,7 +347,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
IB_WIDTH_4X : IB_WIDTH_1X;
props->active_speed = IB_SPEED_QDR;
- props->port_cap_flags = IB_PORT_CM_SUP;
+ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
props->pkey_tbl_len = 1;
@@ -787,7 +796,6 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid)
{
- u8 mac[6];
struct net_device *ndev;
int ret = 0;
@@ -801,11 +809,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
spin_unlock(&mdev->iboe.lock);
if (ndev) {
- rdma_get_mcast_mac((struct in6_addr *)gid, mac);
- rtnl_lock();
- dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
ret = 1;
- rtnl_unlock();
dev_put(ndev);
}
@@ -819,6 +823,7 @@ struct mlx4_ib_steering {
};
static int parse_flow_attr(struct mlx4_dev *dev,
+ u32 qp_num,
union ib_flow_spec *ib_spec,
struct _rule_hw *mlx4_spec)
{
@@ -834,6 +839,14 @@ static int parse_flow_attr(struct mlx4_dev *dev,
mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
break;
+ case IB_FLOW_SPEC_IB:
+ type = MLX4_NET_TRANS_RULE_ID_IB;
+ mlx4_spec->ib.l3_qpn =
+ cpu_to_be32(qp_num);
+ mlx4_spec->ib.qpn_mask =
+ cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
+ break;
+
case IB_FLOW_SPEC_IPV4:
type = MLX4_NET_TRANS_RULE_ID_IPV4;
@@ -865,6 +878,115 @@ static int parse_flow_attr(struct mlx4_dev *dev,
return mlx4_hw_rule_sz(dev, type);
}
+struct default_rules {
+ __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u8 link_layer;
+};
+static const struct default_rules default_table[] = {
+ {
+ .mandatory_fields = {IB_FLOW_SPEC_IPV4},
+ .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
+ .rules_create_list = {IB_FLOW_SPEC_IB},
+ .link_layer = IB_LINK_LAYER_INFINIBAND
+ }
+};
+
+static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr)
+{
+ int i, j, k;
+ void *ib_flow;
+ const struct default_rules *pdefault_rules = default_table;
+ u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
+
+ for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
+ pdefault_rules++) {
+ __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ memset(&field_types, 0, sizeof(field_types));
+
+ if (link_layer != pdefault_rules->link_layer)
+ continue;
+
+ ib_flow = flow_attr + 1;
+ /* we assume the specs are sorted */
+ for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
+ j < flow_attr->num_of_specs; k++) {
+ union ib_flow_spec *current_flow =
+ (union ib_flow_spec *)ib_flow;
+
+ /* same layer but different type */
+ if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
+ (pdefault_rules->mandatory_fields[k] &
+ IB_FLOW_SPEC_LAYER_MASK)) &&
+ (current_flow->type !=
+ pdefault_rules->mandatory_fields[k]))
+ goto out;
+
+ /* same layer, try match next one */
+ if (current_flow->type ==
+ pdefault_rules->mandatory_fields[k]) {
+ j++;
+ ib_flow +=
+ ((union ib_flow_spec *)ib_flow)->size;
+ }
+ }
+
+ ib_flow = flow_attr + 1;
+ for (j = 0; j < flow_attr->num_of_specs;
+ j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
+ for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
+ /* same layer and same type */
+ if (((union ib_flow_spec *)ib_flow)->type ==
+ pdefault_rules->mandatory_not_fields[k])
+ goto out;
+
+ return i;
+ }
+out:
+ return -1;
+}
+
+static int __mlx4_ib_create_default_rules(
+ struct mlx4_ib_dev *mdev,
+ struct ib_qp *qp,
+ const struct default_rules *pdefault_rules,
+ struct _rule_hw *mlx4_spec) {
+ int size = 0;
+ int i;
+
+ for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
+ sizeof(pdefault_rules->rules_create_list[0]); i++) {
+ int ret;
+ union ib_flow_spec ib_spec;
+ switch (pdefault_rules->rules_create_list[i]) {
+ case 0:
+ /* no rule */
+ continue;
+ case IB_FLOW_SPEC_IB:
+ ib_spec.type = IB_FLOW_SPEC_IB;
+ ib_spec.size = sizeof(struct ib_flow_spec_ib);
+
+ break;
+ default:
+ /* invalid rule */
+ return -EINVAL;
+ }
+ /* We must put empty rule, qpn is being ignored */
+ ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
+ mlx4_spec);
+ if (ret < 0) {
+ pr_info("invalid parsing\n");
+ return -EINVAL;
+ }
+
+ mlx4_spec = (void *)mlx4_spec + ret;
+ size += ret;
+ }
+ return size;
+}
+
static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
int domain,
enum mlx4_net_trans_promisc_mode flow_type,
@@ -876,6 +998,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
struct mlx4_ib_dev *mdev = to_mdev(qp->device);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ int default_flow;
static const u16 __mlx4_domain[] = {
[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
@@ -910,8 +1033,21 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
ib_flow = flow_attr + 1;
size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+ /* Add default flows */
+ default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
+ if (default_flow >= 0) {
+ ret = __mlx4_ib_create_default_rules(
+ mdev, qp, default_table + default_flow,
+ mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return -EINVAL;
+ }
+ size += ret;
+ }
for (i = 0; i < flow_attr->num_of_specs; i++) {
- ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
+ ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
+ mailbox->buf + size);
if (ret < 0) {
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return -EINVAL;
@@ -1025,6 +1161,8 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
u64 reg_id;
struct mlx4_ib_steering *ib_steering = NULL;
+ enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+ MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1036,7 +1174,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
!!(mqp->flags &
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- MLX4_PROT_IB_IPV6, &reg_id);
+ prot, &reg_id);
if (err)
goto err_malloc;
@@ -1055,7 +1193,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err_add:
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- MLX4_PROT_IB_IPV6, reg_id);
+ prot, reg_id);
err_malloc:
kfree(ib_steering);
@@ -1083,10 +1221,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
- u8 mac[6];
struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
u64 reg_id = 0;
+ enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+ MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1109,7 +1248,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- MLX4_PROT_IB_IPV6, reg_id);
+ prot, reg_id);
if (err)
return err;
@@ -1121,13 +1260,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (ndev)
dev_hold(ndev);
spin_unlock(&mdev->iboe.lock);
- rdma_get_mcast_mac((struct in6_addr *)gid, mac);
- if (ndev) {
- rtnl_lock();
- dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
- rtnl_unlock();
+ if (ndev)
dev_put(ndev);
- }
list_del(&ge->list);
kfree(ge);
} else
@@ -1223,7 +1357,8 @@ static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_board_id
};
-static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
+static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
+ struct net_device *dev)
{
memcpy(eui, dev->dev_addr, 3);
memcpy(eui + 5, dev->dev_addr + 3, 3);
@@ -1259,161 +1394,377 @@ static void update_gids_task(struct work_struct *work)
MLX4_CMD_WRAPPED);
if (err)
pr_warn("set port command failed\n");
- else {
- memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
+ else
mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ kfree(gw);
+}
+
+static void reset_gids_task(struct work_struct *work)
+{
+ struct update_gid_work *gw =
+ container_of(work, struct update_gid_work, work);
+ struct mlx4_cmd_mailbox *mailbox;
+ union ib_gid *gids;
+ int err;
+ struct mlx4_dev *dev = gw->dev->dev;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ pr_warn("reset gid table failed\n");
+ goto free;
+ }
+
+ gids = mailbox->buf;
+ memcpy(gids, gw->gids, sizeof(gw->gids));
+
+ if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
+ 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ pr_warn(KERN_WARNING
+ "set port %d command failed\n", gw->port);
}
mlx4_free_cmd_mailbox(dev, mailbox);
+free:
kfree(gw);
}
-static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
+static int update_gid_table(struct mlx4_ib_dev *dev, int port,
+ union ib_gid *gid, int clear,
+ int default_gid)
{
- struct net_device *ndev = dev->iboe.netdevs[port - 1];
struct update_gid_work *work;
- struct net_device *tmp;
int i;
- u8 *hits;
- int ret;
- union ib_gid gid;
- int free;
- int found;
int need_update = 0;
- u16 vid;
-
- work = kzalloc(sizeof *work, GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
-
- hits = kzalloc(128, GFP_ATOMIC);
- if (!hits) {
- ret = -ENOMEM;
- goto out;
- }
+ int free = -1;
+ int found = -1;
+ int max_gids;
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, tmp) {
- if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
- gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- vid = rdma_vlan_dev_vlan_id(tmp);
- mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
- found = 0;
- free = -1;
- for (i = 0; i < 128; ++i) {
- if (free < 0 &&
- !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
- free = i;
- if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
- hits[i] = 1;
- found = 1;
+ if (default_gid) {
+ free = 0;
+ } else {
+ max_gids = dev->dev->caps.gid_table_len[port];
+ for (i = 1; i < max_gids; ++i) {
+ if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
+ sizeof(*gid)))
+ found = i;
+
+ if (clear) {
+ if (found >= 0) {
+ need_update = 1;
+ dev->iboe.gid_table[port - 1][found] =
+ zgid;
break;
}
- }
+ } else {
+ if (found >= 0)
+ break;
- if (!found) {
- if (tmp == ndev &&
- (memcmp(&dev->iboe.gid_table[port - 1][0],
- &gid, sizeof gid) ||
- !memcmp(&dev->iboe.gid_table[port - 1][0],
- &zgid, sizeof gid))) {
- dev->iboe.gid_table[port - 1][0] = gid;
- ++need_update;
- hits[0] = 1;
- } else if (free >= 0) {
- dev->iboe.gid_table[port - 1][free] = gid;
- hits[free] = 1;
- ++need_update;
- }
+ if (free < 0 &&
+ !memcmp(&dev->iboe.gid_table[port - 1][i],
+ &zgid, sizeof(*gid)))
+ free = i;
}
}
}
- rcu_read_unlock();
- for (i = 0; i < 128; ++i)
- if (!hits[i]) {
- if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
- ++need_update;
- dev->iboe.gid_table[port - 1][i] = zgid;
- }
+ if (found == -1 && !clear && free >= 0) {
+ dev->iboe.gid_table[port - 1][free] = *gid;
+ need_update = 1;
+ }
- if (need_update) {
- memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
- INIT_WORK(&work->work, update_gids_task);
- work->port = port;
- work->dev = dev;
- queue_work(wq, &work->work);
- } else
- kfree(work);
+ if (!need_update)
+ return 0;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
+ INIT_WORK(&work->work, update_gids_task);
+ work->port = port;
+ work->dev = dev;
+ queue_work(wq, &work->work);
- kfree(hits);
return 0;
+}
-out:
- kfree(work);
- return ret;
+static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid)
+{
+ gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
}
-static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
+
+static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port)
{
- switch (event) {
- case NETDEV_UP:
- case NETDEV_CHANGEADDR:
- update_ipv6_gids(dev, port, 0);
- break;
+ struct update_gid_work *work;
- case NETDEV_DOWN:
- update_ipv6_gids(dev, port, 1);
- dev->iboe.netdevs[port - 1] = NULL;
- }
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids));
+ memset(work->gids, 0, sizeof(work->gids));
+ INIT_WORK(&work->work, reset_gids_task);
+ work->dev = dev;
+ work->port = port;
+ queue_work(wq, &work->work);
+ return 0;
}
-static void netdev_added(struct mlx4_ib_dev *dev, int port)
+static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
+ struct mlx4_ib_dev *ibdev, union ib_gid *gid)
{
- update_ipv6_gids(dev, port, 0);
+ struct mlx4_ib_iboe *iboe;
+ int port = 0;
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
+ rdma_vlan_dev_real_dev(event_netdev) :
+ event_netdev;
+ union ib_gid default_gid;
+
+ mlx4_make_default_gid(real_dev, &default_gid);
+
+ if (!memcmp(gid, &default_gid, sizeof(*gid)))
+ return 0;
+
+ if (event != NETDEV_DOWN && event != NETDEV_UP)
+ return 0;
+
+ if ((real_dev != event_netdev) &&
+ (event == NETDEV_DOWN) &&
+ rdma_link_local_addr((struct in6_addr *)gid))
+ return 0;
+
+ iboe = &ibdev->iboe;
+ spin_lock(&iboe->lock);
+
+ for (port = 1; port <= MLX4_MAX_PORTS; ++port)
+ if ((netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->masters[port - 1])) ||
+ (!netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->netdevs[port - 1])))
+ update_gid_table(ibdev, port, gid,
+ event == NETDEV_DOWN, 0);
+
+ spin_unlock(&iboe->lock);
+ return 0;
+
}
-static void netdev_removed(struct mlx4_ib_dev *dev, int port)
+static u8 mlx4_ib_get_dev_port(struct net_device *dev,
+ struct mlx4_ib_dev *ibdev)
{
- update_ipv6_gids(dev, port, 1);
+ u8 port = 0;
+ struct mlx4_ib_iboe *iboe;
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
+ rdma_vlan_dev_real_dev(dev) : dev;
+
+ iboe = &ibdev->iboe;
+
+ for (port = 1; port <= MLX4_MAX_PORTS; ++port)
+ if ((netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->masters[port - 1])) ||
+ (!netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->netdevs[port - 1])))
+ break;
+
+ if ((port == 0) || (port > MLX4_MAX_PORTS))
+ return 0;
+ else
+ return port;
}
-static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
+static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct mlx4_ib_dev *ibdev;
+ struct in_ifaddr *ifa = ptr;
+ union ib_gid gid;
+ struct net_device *event_netdev = ifa->ifa_dev->dev;
+
+ ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
+
+ mlx4_ib_addr_event(event, event_netdev, ibdev, &gid);
+ return NOTIFY_DONE;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mlx4_ib_dev *ibdev;
- struct net_device *oldnd;
+ struct inet6_ifaddr *ifa = ptr;
+ union ib_gid *gid = (union ib_gid *)&ifa->addr;
+ struct net_device *event_netdev = ifa->idev->dev;
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6);
+
+ mlx4_ib_addr_event(event, event_netdev, ibdev, gid);
+ return NOTIFY_DONE;
+}
+#endif
+
+static void mlx4_ib_get_dev_addr(struct net_device *dev,
+ struct mlx4_ib_dev *ibdev, u8 port)
+{
+ struct in_device *in_dev;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev;
+ union ib_gid *pgid;
+ struct inet6_ifaddr *ifp;
+#endif
+ union ib_gid gid;
+
+
+ if ((port == 0) || (port > MLX4_MAX_PORTS))
+ return;
+
+ /* IPv4 gids */
+ in_dev = in_dev_get(dev);
+ if (in_dev) {
+ for_ifa(in_dev) {
+ /*ifa->ifa_address;*/
+ ipv6_addr_set_v4mapped(ifa->ifa_address,
+ (struct in6_addr *)&gid);
+ update_gid_table(ibdev, port, &gid, 0, 0);
+ }
+ endfor_ifa(in_dev);
+ in_dev_put(in_dev);
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ /* IPv6 gids */
+ in6_dev = in6_dev_get(dev);
+ if (in6_dev) {
+ read_lock_bh(&in6_dev->lock);
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+ pgid = (union ib_gid *)&ifp->addr;
+ update_gid_table(ibdev, port, pgid, 0, 0);
+ }
+ read_unlock_bh(&in6_dev->lock);
+ in6_dev_put(in6_dev);
+ }
+#endif
+}
+
+static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev, u8 port)
+{
+ union ib_gid gid;
+ mlx4_make_default_gid(dev, &gid);
+ update_gid_table(ibdev, port, &gid, 0, 1);
+}
+
+static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
+{
+ struct net_device *dev;
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ int i;
+
+ for (i = 1; i <= ibdev->num_ports; ++i)
+ if (reset_gid_table(ibdev, i))
+ return -1;
+
+ read_lock(&dev_base_lock);
+ spin_lock(&iboe->lock);
+
+ for_each_netdev(&init_net, dev) {
+ u8 port = mlx4_ib_get_dev_port(dev, ibdev);
+ if (port)
+ mlx4_ib_get_dev_addr(dev, ibdev, port);
+ }
+
+ spin_unlock(&iboe->lock);
+ read_unlock(&dev_base_lock);
+
+ return 0;
+}
+
+static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev)
+{
struct mlx4_ib_iboe *iboe;
int port;
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
iboe = &ibdev->iboe;
spin_lock(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
- oldnd = iboe->netdevs[port - 1];
+ enum ib_port_state port_state = IB_PORT_NOP;
+ struct net_device *old_master = iboe->masters[port - 1];
+ struct net_device *curr_netdev;
+ struct net_device *curr_master;
+
iboe->netdevs[port - 1] =
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
- if (oldnd != iboe->netdevs[port - 1]) {
- if (iboe->netdevs[port - 1])
- netdev_added(ibdev, port);
- else
- netdev_removed(ibdev, port);
+ if (iboe->netdevs[port - 1])
+ mlx4_ib_set_default_gid(ibdev,
+ iboe->netdevs[port - 1], port);
+ curr_netdev = iboe->netdevs[port - 1];
+
+ if (iboe->netdevs[port - 1] &&
+ netif_is_bond_slave(iboe->netdevs[port - 1])) {
+ iboe->masters[port - 1] = netdev_master_upper_dev_get(
+ iboe->netdevs[port - 1]);
+ } else {
+ iboe->masters[port - 1] = NULL;
+ }
+ curr_master = iboe->masters[port - 1];
+
+ if (curr_netdev) {
+ port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ } else {
+ reset_gid_table(ibdev, port);
+ }
+ /* if using bonding/team and a slave port is down, we don't the bond IP
+ * based gids in the table since flows that select port by gid may get
+ * the down port.
+ */
+ if (curr_master && (port_state == IB_PORT_DOWN)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ }
+ /* if bonding is used it is possible that we add it to masters
+ * only after IP address is assigned to the net bonding
+ * interface.
+ */
+ if (curr_master && (old_master != curr_master)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ mlx4_ib_get_dev_addr(curr_master, ibdev, port);
}
- }
- if (dev == iboe->netdevs[0] ||
- (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
- handle_en_event(ibdev, 1, event);
- else if (dev == iboe->netdevs[1]
- || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
- handle_en_event(ibdev, 2, event);
+ if (!curr_master && (old_master != curr_master)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
+ }
+ }
spin_unlock(&iboe->lock);
+}
+
+static int mlx4_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct mlx4_ib_dev *ibdev;
+
+ if (!net_eq(dev_net(dev), &init_net))
+ return NOTIFY_DONE;
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
+ mlx4_ib_scan_netdevs(ibdev);
return NOTIFY_DONE;
}
@@ -1533,6 +1884,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int i, j;
int err;
struct mlx4_ib_iboe *iboe;
+ int ib_num_ports = 0;
pr_info_once("%s", mlx4_ib_version);
@@ -1682,6 +2034,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
}
if (check_flow_steering_support(dev)) {
+ ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
@@ -1707,11 +2060,42 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->counters[i] = -1;
}
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ ib_num_ports) {
+ ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
+ err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
+ MLX4_IB_UC_STEER_QPN_ALIGN,
+ &ibdev->steer_qpn_base);
+ if (err)
+ goto err_counter;
+
+ ibdev->ib_uc_qpns_bitmap =
+ kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
+ sizeof(long),
+ GFP_KERNEL);
+ if (!ibdev->ib_uc_qpns_bitmap) {
+ dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+ goto err_steer_qp_release;
+ }
+
+ bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
+
+ err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
+ dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_base +
+ ibdev->steer_qpn_count - 1);
+ if (err)
+ goto err_steer_free_bitmap;
+ }
+
if (ib_register_device(&ibdev->ib_dev, NULL))
- goto err_counter;
+ goto err_steer_free_bitmap;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
@@ -1719,11 +2103,39 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
- iboe->nb.notifier_call = mlx4_ib_netdev_event;
- err = register_netdevice_notifier(&iboe->nb);
- if (err)
- goto err_sriov;
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+ if (!iboe->nb.notifier_call) {
+ iboe->nb.notifier_call = mlx4_ib_netdev_event;
+ err = register_netdevice_notifier(&iboe->nb);
+ if (err) {
+ iboe->nb.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+ if (!iboe->nb_inet.notifier_call) {
+ iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
+ err = register_inetaddr_notifier(&iboe->nb_inet);
+ if (err) {
+ iboe->nb_inet.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!iboe->nb_inet6.notifier_call) {
+ iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event;
+ err = register_inet6addr_notifier(&iboe->nb_inet6);
+ if (err) {
+ iboe->nb_inet6.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+#endif
+ for (i = 1 ; i <= ibdev->num_ports ; ++i)
+ reset_gid_table(ibdev, i);
+ rtnl_lock();
+ mlx4_ib_scan_netdevs(ibdev);
+ rtnl_unlock();
+ mlx4_ib_init_gid_table(ibdev);
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -1749,11 +2161,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
return ibdev;
err_notif:
- if (unregister_netdevice_notifier(&ibdev->iboe.nb))
- pr_warn("failure unregistering notifier\n");
+ if (ibdev->iboe.nb.notifier_call) {
+ if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb.notifier_call = NULL;
+ }
+ if (ibdev->iboe.nb_inet.notifier_call) {
+ if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet.notifier_call = NULL;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ibdev->iboe.nb_inet6.notifier_call) {
+ if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet6.notifier_call = NULL;
+ }
+#endif
flush_workqueue(wq);
-err_sriov:
mlx4_ib_close_sriov(ibdev);
err_mad:
@@ -1762,6 +2188,13 @@ err_mad:
err_reg:
ib_unregister_device(&ibdev->ib_dev);
+err_steer_free_bitmap:
+ kfree(ibdev->ib_uc_qpns_bitmap);
+
+err_steer_qp_release:
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
err_counter:
for (; i; --i)
if (ibdev->counters[i - 1] != -1)
@@ -1782,6 +2215,69 @@ err_dealloc:
return NULL;
}
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
+{
+ int offset;
+
+ WARN_ON(!dev->ib_uc_qpns_bitmap);
+
+ offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
+ dev->steer_qpn_count,
+ get_count_order(count));
+ if (offset < 0)
+ return offset;
+
+ *qpn = dev->steer_qpn_base + offset;
+ return 0;
+}
+
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
+{
+ if (!qpn ||
+ dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return;
+
+ BUG_ON(qpn < dev->steer_qpn_base);
+
+ bitmap_release_region(dev->ib_uc_qpns_bitmap,
+ qpn - dev->steer_qpn_base,
+ get_count_order(count));
+}
+
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach)
+{
+ int err;
+ size_t flow_size;
+ struct ib_flow_attr *flow = NULL;
+ struct ib_flow_spec_ib *ib_spec;
+
+ if (is_attach) {
+ flow_size = sizeof(struct ib_flow_attr) +
+ sizeof(struct ib_flow_spec_ib);
+ flow = kzalloc(flow_size, GFP_KERNEL);
+ if (!flow)
+ return -ENOMEM;
+ flow->port = mqp->port;
+ flow->num_of_specs = 1;
+ flow->size = flow_size;
+ ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
+ ib_spec->type = IB_FLOW_SPEC_IB;
+ ib_spec->size = sizeof(struct ib_flow_spec_ib);
+ /* Add an empty rule for IB L2 */
+ memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
+
+ err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
+ IB_FLOW_DOMAIN_NIC,
+ MLX4_FS_REGULAR,
+ &mqp->reg_id);
+ } else {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
+ }
+ kfree(flow);
+ return err;
+}
+
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
@@ -1795,6 +2291,26 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
+
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
+ kfree(ibdev->ib_uc_qpns_bitmap);
+ }
+
+ if (ibdev->iboe.nb_inet.notifier_call) {
+ if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet.notifier_call = NULL;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ibdev->iboe.nb_inet6.notifier_call) {
+ if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet6.notifier_call = NULL;
+ }
+#endif
+
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
if (ibdev->counters[p] != -1)