aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c1
-rw-r--r--drivers/infiniband/hw/mlx4/main.c161
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h17
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c31
-rw-r--r--drivers/net/bonding/bond_main.c106
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c176
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_resources.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c56
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/intf.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c3
-rw-r--r--include/linux/mlx4/cmd.h7
-rw-r--r--include/linux/mlx4/device.h11
-rw-r--r--include/linux/mlx4/driver.h19
-rw-r--r--include/linux/mlx4/qp.h1
-rw-r--r--include/linux/netdevice.h15
-rw-r--r--include/net/bonding.h17
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/rtnetlink.c1
24 files changed, 756 insertions, 64 deletions
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 2d8c3397774f..f50a546224ad 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -36,6 +36,7 @@
#include <linux/slab.h>
#include <linux/inet.h>
#include <linux/string.h>
+#include <linux/mlx4/driver.h>
#include "mlx4_ib.h"
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 9db258f7c804..2ed5b996b2f4 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -351,6 +351,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
enum ib_mtu tmp;
struct mlx4_cmd_mailbox *mailbox;
int err = 0;
+ int is_bonded = mlx4_is_bonded(mdev->dev);
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
@@ -374,8 +375,12 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
+ if (is_bonded)
+ rtnl_lock(); /* required to get upper dev */
spin_lock_bh(&iboe->lock);
ndev = iboe->netdevs[port - 1];
+ if (ndev && is_bonded)
+ ndev = netdev_master_upper_dev_get(ndev);
if (!ndev)
goto out_unlock;
@@ -387,6 +392,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->phys_state = state_to_phys_state(props->state);
out_unlock:
spin_unlock_bh(&iboe->lock);
+ if (is_bonded)
+ rtnl_unlock();
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
@@ -844,7 +851,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
struct mlx4_ib_steering {
struct list_head list;
- u64 reg_id;
+ struct mlx4_flow_reg_id reg_id;
union ib_gid gid;
};
@@ -1135,9 +1142,11 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
{
- int err = 0, i = 0;
+ int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
enum mlx4_net_trans_promisc_mode type[2];
+ struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
+ int is_bonded = mlx4_is_bonded(dev);
memset(type, 0, sizeof(type));
@@ -1172,26 +1181,55 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
while (i < ARRAY_SIZE(type) && type[i]) {
err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
- &mflow->reg_id[i]);
+ &mflow->reg_id[i].id);
if (err)
goto err_create_flow;
i++;
+ if (is_bonded) {
+ flow_attr->port = 2;
+ err = __mlx4_ib_create_flow(qp, flow_attr,
+ domain, type[j],
+ &mflow->reg_id[j].mirror);
+ flow_attr->port = 1;
+ if (err)
+ goto err_create_flow;
+ j++;
+ }
+
}
if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]);
+ err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
+ &mflow->reg_id[i].id);
if (err)
goto err_create_flow;
i++;
+ if (is_bonded) {
+ flow_attr->port = 2;
+ err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
+ &mflow->reg_id[j].mirror);
+ flow_attr->port = 1;
+ if (err)
+ goto err_create_flow;
+ j++;
+ }
+ /* function to create mirror rule */
}
return &mflow->ibflow;
err_create_flow:
while (i) {
- (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]);
+ (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
+ mflow->reg_id[i].id);
i--;
}
+
+ while (j) {
+ (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
+ mflow->reg_id[j].mirror);
+ j--;
+ }
err_free:
kfree(mflow);
return ERR_PTR(err);
@@ -1204,10 +1242,16 @@ static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
struct mlx4_ib_flow *mflow = to_mflow(flow_id);
- while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
- err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
+ while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
if (err)
ret = err;
+ if (mflow->reg_id[i].mirror) {
+ err = __mlx4_ib_destroy_flow(mdev->dev,
+ mflow->reg_id[i].mirror);
+ if (err)
+ ret = err;
+ }
i++;
}
@@ -1219,11 +1263,12 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_dev *dev = mdev->dev;
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
- u64 reg_id;
struct mlx4_ib_steering *ib_steering = NULL;
enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
+ struct mlx4_flow_reg_id reg_id;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1235,10 +1280,20 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
!!(mqp->flags &
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- prot, &reg_id);
+ prot, &reg_id.id);
if (err)
goto err_malloc;
+ reg_id.mirror = 0;
+ if (mlx4_is_bonded(dev)) {
+ err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, 2,
+ !!(mqp->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+ prot, &reg_id.mirror);
+ if (err)
+ goto err_add;
+ }
+
err = add_gid_entry(ibqp, gid);
if (err)
goto err_add;
@@ -1254,7 +1309,10 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err_add:
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- prot, reg_id);
+ prot, reg_id.id);
+ if (reg_id.mirror)
+ mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ prot, reg_id.mirror);
err_malloc:
kfree(ib_steering);
@@ -1281,10 +1339,12 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_dev *dev = mdev->dev;
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
- u64 reg_id = 0;
+ struct mlx4_flow_reg_id reg_id = {0, 0};
+
enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
@@ -1309,10 +1369,17 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- prot, reg_id);
+ prot, reg_id.id);
if (err)
return err;
+ if (mlx4_is_bonded(dev)) {
+ err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ prot, reg_id.mirror);
+ if (err)
+ return err;
+ }
+
mutex_lock(&mqp->mutex);
ge = find_gid_entry(mqp, gid->raw);
if (ge) {
@@ -1440,6 +1507,7 @@ static void update_gids_task(struct work_struct *work)
union ib_gid *gids;
int err;
struct mlx4_dev *dev = gw->dev->dev;
+ int is_bonded = mlx4_is_bonded(dev);
if (!gw->dev->ib_active)
return;
@@ -1459,7 +1527,10 @@ static void update_gids_task(struct work_struct *work)
if (err)
pr_warn("set port command failed\n");
else
- mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
+ if ((gw->port == 1) || !is_bonded)
+ mlx4_ib_dispatch_event(gw->dev,
+ is_bonded ? 1 : gw->port,
+ IB_EVENT_GID_CHANGE);
mlx4_free_cmd_mailbox(dev, mailbox);
kfree(gw);
@@ -1875,7 +1946,8 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
* don't want the bond IP based gids in the table since
* flows that select port by gid may get the down port.
*/
- if (port_state == IB_PORT_DOWN) {
+ if (port_state == IB_PORT_DOWN &&
+ !mlx4_is_bonded(ibdev->dev)) {
reset_gid_table(ibdev, port);
mlx4_ib_set_default_gid(ibdev,
curr_netdev,
@@ -2047,6 +2119,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int err;
struct mlx4_ib_iboe *iboe;
int ib_num_ports = 0;
+ int num_req_counters;
pr_info_once("%s", mlx4_ib_version);
@@ -2080,13 +2153,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev;
+ ibdev->bond_next_port = 0;
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
ibdev->num_ports = num_ports;
- ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
+ ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
+ 1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
@@ -2207,7 +2282,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (init_node_data(ibdev))
goto err_map;
- for (i = 0; i < ibdev->num_ports; ++i) {
+ num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
+ for (i = 0; i < num_req_counters; ++i) {
mutex_init(&ibdev->qp1_proxy_lock[i]);
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
@@ -2218,6 +2294,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->counters[i] = -1;
}
}
+ if (mlx4_is_bonded(dev))
+ for (i = 1; i < ibdev->num_ports ; ++i)
+ ibdev->counters[i] = ibdev->counters[0];
+
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_num_ports++;
@@ -2538,6 +2618,38 @@ out:
return;
}
+static void handle_bonded_port_state_event(struct work_struct *work)
+{
+ struct ib_event_work *ew =
+ container_of(work, struct ib_event_work, work);
+ struct mlx4_ib_dev *ibdev = ew->ib_dev;
+ enum ib_port_state bonded_port_state = IB_PORT_NOP;
+ int i;
+ struct ib_event ibev;
+
+ kfree(ew);
+ spin_lock_bh(&ibdev->iboe.lock);
+ for (i = 0; i < MLX4_MAX_PORTS; ++i) {
+ struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
+
+ enum ib_port_state curr_port_state =
+ (netif_running(curr_netdev) &&
+ netif_carrier_ok(curr_netdev)) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+
+ bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
+ curr_port_state : IB_PORT_ACTIVE;
+ }
+ spin_unlock_bh(&ibdev->iboe.lock);
+
+ ibev.device = &ibdev->ib_dev;
+ ibev.element.port_num = 1;
+ ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+
+ ib_dispatch_event(&ibev);
+}
+
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
enum mlx4_dev_event event, unsigned long param)
{
@@ -2547,6 +2659,18 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
struct ib_event_work *ew;
int p = 0;
+ if (mlx4_is_bonded(dev) &&
+ ((event == MLX4_DEV_EVENT_PORT_UP) ||
+ (event == MLX4_DEV_EVENT_PORT_DOWN))) {
+ ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
+ if (!ew)
+ return;
+ INIT_WORK(&ew->work, handle_bonded_port_state_event);
+ ew->ib_dev = ibdev;
+ queue_work(wq, &ew->work);
+ return;
+ }
+
if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
eqe = (struct mlx4_eqe *)param;
else
@@ -2607,7 +2731,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
}
ibev.device = ibdev_ptr;
- ibev.element.port_num = (u8) p;
+ ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
ib_dispatch_event(&ibev);
}
@@ -2616,7 +2740,8 @@ static struct mlx4_interface mlx4_ib_interface = {
.add = mlx4_ib_add,
.remove = mlx4_ib_remove,
.event = mlx4_ib_event,
- .protocol = MLX4_PROT_IB_IPV6
+ .protocol = MLX4_PROT_IB_IPV6,
+ .flags = MLX4_INTFF_BONDING
};
static int __init mlx4_ib_init(void)
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6eb743f65f6f..721540c9163d 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -134,10 +134,17 @@ struct mlx4_ib_fmr {
struct mlx4_fmr mfmr;
};
+#define MAX_REGS_PER_FLOW 2
+
+struct mlx4_flow_reg_id {
+ u64 id;
+ u64 mirror;
+};
+
struct mlx4_ib_flow {
struct ib_flow ibflow;
/* translating DMFS verbs sniffer rule to FW API requires two reg IDs */
- u64 reg_id[2];
+ struct mlx4_flow_reg_id reg_id[MAX_REGS_PER_FLOW];
};
struct mlx4_ib_wq {
@@ -527,6 +534,7 @@ struct mlx4_ib_dev {
struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS];
/* lock when destroying qp1_proxy and getting netdev events */
struct mutex qp1_proxy_lock[MLX4_MAX_PORTS];
+ u8 bond_next_port;
};
struct ib_event_work {
@@ -622,6 +630,13 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
+static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev)
+{
+ dev->bond_next_port = (dev->bond_next_port + 1) % dev->num_ports;
+
+ return dev->bond_next_port + 1;
+}
+
int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index cf000b7ad64f..792f9dc86ada 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -40,6 +40,7 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
+#include <linux/mlx4/driver.h>
#include <linux/mlx4/qp.h>
#include "mlx4_ib.h"
@@ -93,17 +94,6 @@ enum {
#ifndef ETH_ALEN
#define ETH_ALEN 6
#endif
-static inline u64 mlx4_mac_to_u64(u8 *addr)
-{
- u64 mac = 0;
- int i;
-
- for (i = 0; i < ETH_ALEN; i++) {
- mac <<= 8;
- mac |= addr[i];
- }
- return mac;
-}
static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
@@ -1915,6 +1905,22 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
+ if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) {
+ if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
+ if ((ibqp->qp_type == IB_QPT_RC) ||
+ (ibqp->qp_type == IB_QPT_UD) ||
+ (ibqp->qp_type == IB_QPT_UC) ||
+ (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
+ (ibqp->qp_type == IB_QPT_XRC_INI)) {
+ attr->port_num = mlx4_ib_bond_next_port(dev);
+ }
+ } else {
+ /* no sense in changing port_num
+ * when ports are bonded */
+ attr_mask &= ~IB_QP_PORT;
+ }
+ }
+
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->num_ports)) {
pr_debug("qpn 0x%x: invalid port number (%d) specified "
@@ -1965,6 +1971,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
+ attr->port_num = 1;
+
out:
mutex_unlock(&qp->mutex);
return err;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c9e519cb9214..679ef00d6b16 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -790,7 +790,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
}
new_active->delay = 0;
- new_active->link = BOND_LINK_UP;
+ bond_set_slave_link_state(new_active, BOND_LINK_UP);
if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
@@ -1181,6 +1181,62 @@ static void bond_free_slave(struct slave *slave)
kfree(slave);
}
+static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info)
+{
+ info->bond_mode = BOND_MODE(bond);
+ info->miimon = bond->params.miimon;
+ info->num_slaves = bond->slave_cnt;
+}
+
+static void bond_fill_ifslave(struct slave *slave, struct ifslave *info)
+{
+ strcpy(info->slave_name, slave->dev->name);
+ info->link = slave->link;
+ info->state = bond_slave_state(slave);
+ info->link_failure_count = slave->link_failure_count;
+}
+
+static void bond_netdev_notify(struct slave *slave, struct net_device *dev)
+{
+ struct bonding *bond = slave->bond;
+ struct netdev_bonding_info bonding_info;
+
+ rtnl_lock();
+ /* make sure that slave is still valid */
+ if (dev->priv_flags & IFF_BONDING) {
+ bond_fill_ifslave(slave, &bonding_info.slave);
+ bond_fill_ifbond(bond, &bonding_info.master);
+ netdev_bonding_info_change(slave->dev, &bonding_info);
+ }
+ rtnl_unlock();
+}
+
+static void bond_netdev_notify_work(struct work_struct *_work)
+{
+ struct netdev_notify_work *w =
+ container_of(_work, struct netdev_notify_work, work.work);
+
+ bond_netdev_notify(w->slave, w->dev);
+ dev_put(w->dev);
+}
+
+void bond_queue_slave_event(struct slave *slave)
+{
+ struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC);
+
+ if (!nnw)
+ return;
+
+ INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work);
+ nnw->slave = slave;
+ nnw->dev = slave->dev;
+
+ if (queue_delayed_work(slave->bond->wq, &nnw->work, 0))
+ dev_hold(slave->dev);
+ else
+ kfree(nnw);
+}
+
/* enslave device <slave> to bond device <master> */
int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
{
@@ -1444,19 +1500,22 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
if (bond->params.miimon) {
if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
if (bond->params.updelay) {
- new_slave->link = BOND_LINK_BACK;
+ bond_set_slave_link_state(new_slave,
+ BOND_LINK_BACK);
new_slave->delay = bond->params.updelay;
} else {
- new_slave->link = BOND_LINK_UP;
+ bond_set_slave_link_state(new_slave,
+ BOND_LINK_UP);
}
} else {
- new_slave->link = BOND_LINK_DOWN;
+ bond_set_slave_link_state(new_slave, BOND_LINK_DOWN);
}
} else if (bond->params.arp_interval) {
- new_slave->link = (netif_carrier_ok(slave_dev) ?
- BOND_LINK_UP : BOND_LINK_DOWN);
+ bond_set_slave_link_state(new_slave,
+ (netif_carrier_ok(slave_dev) ?
+ BOND_LINK_UP : BOND_LINK_DOWN));
} else {
- new_slave->link = BOND_LINK_UP;
+ bond_set_slave_link_state(new_slave, BOND_LINK_UP);
}
if (new_slave->link != BOND_LINK_DOWN)
@@ -1572,6 +1631,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
/* enslave is successful */
+ bond_queue_slave_event(new_slave);
return 0;
/* Undo stages on error */
@@ -1821,11 +1881,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev,
static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
{
struct bonding *bond = netdev_priv(bond_dev);
-
- info->bond_mode = BOND_MODE(bond);
- info->miimon = bond->params.miimon;
- info->num_slaves = bond->slave_cnt;
-
+ bond_fill_ifbond(bond, info);
return 0;
}
@@ -1839,10 +1895,7 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in
bond_for_each_slave(bond, slave, iter) {
if (i++ == (int)info->slave_id) {
res = 0;
- strcpy(info->slave_name, slave->dev->name);
- info->link = slave->link;
- info->state = bond_slave_state(slave);
- info->link_failure_count = slave->link_failure_count;
+ bond_fill_ifslave(slave, info);
break;
}
}
@@ -1872,7 +1925,7 @@ static int bond_miimon_inspect(struct bonding *bond)
if (link_state)
continue;
- slave->link = BOND_LINK_FAIL;
+ bond_set_slave_link_state(slave, BOND_LINK_FAIL);
slave->delay = bond->params.downdelay;
if (slave->delay) {
netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
@@ -1887,7 +1940,7 @@ static int bond_miimon_inspect(struct bonding *bond)
case BOND_LINK_FAIL:
if (link_state) {
/* recovered before downdelay expired */
- slave->link = BOND_LINK_UP;
+ bond_set_slave_link_state(slave, BOND_LINK_UP);
slave->last_link_up = jiffies;
netdev_info(bond->dev, "link status up again after %d ms for interface %s\n",
(bond->params.downdelay - slave->delay) *
@@ -1909,7 +1962,7 @@ static int bond_miimon_inspect(struct bonding *bond)
if (!link_state)
continue;
- slave->link = BOND_LINK_BACK;
+ bond_set_slave_link_state(slave, BOND_LINK_BACK);
slave->delay = bond->params.updelay;
if (slave->delay) {
@@ -1922,7 +1975,8 @@ static int bond_miimon_inspect(struct bonding *bond)
/*FALLTHRU*/
case BOND_LINK_BACK:
if (!link_state) {
- slave->link = BOND_LINK_DOWN;
+ bond_set_slave_link_state(slave,
+ BOND_LINK_DOWN);
netdev_info(bond->dev, "link status down again after %d ms for interface %s\n",
(bond->params.updelay - slave->delay) *
bond->params.miimon,
@@ -1960,7 +2014,7 @@ static void bond_miimon_commit(struct bonding *bond)
continue;
case BOND_LINK_UP:
- slave->link = BOND_LINK_UP;
+ bond_set_slave_link_state(slave, BOND_LINK_UP);
slave->last_link_up = jiffies;
primary = rtnl_dereference(bond->primary_slave);
@@ -2000,7 +2054,7 @@ static void bond_miimon_commit(struct bonding *bond)
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
- slave->link = BOND_LINK_DOWN;
+ bond_set_slave_link_state(slave, BOND_LINK_DOWN);
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP ||
BOND_MODE(bond) == BOND_MODE_8023AD)
@@ -2583,7 +2637,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
struct slave *current_arp_slave;
current_arp_slave = rtnl_dereference(bond->current_arp_slave);
- slave->link = BOND_LINK_UP;
+ bond_set_slave_link_state(slave, BOND_LINK_UP);
if (current_arp_slave) {
bond_set_slave_inactive_flags(
current_arp_slave,
@@ -2606,7 +2660,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
- slave->link = BOND_LINK_DOWN;
+ bond_set_slave_link_state(slave, BOND_LINK_DOWN);
bond_set_slave_inactive_flags(slave,
BOND_SLAVE_NOTIFY_NOW);
@@ -2685,7 +2739,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
* up when it is actually down
*/
if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
- slave->link = BOND_LINK_DOWN;
+ bond_set_slave_link_state(slave, BOND_LINK_DOWN);
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
@@ -2705,7 +2759,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
if (!new_slave)
goto check_state;
- new_slave->link = BOND_LINK_BACK;
+ bond_set_slave_link_state(new_slave, BOND_LINK_BACK);
bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER);
bond_arp_send_all(bond, new_slave);
new_slave->last_link_up = jiffies;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 154effbfd8be..a681d7c0bb9f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1583,6 +1583,15 @@ static struct mlx4_cmd_info cmd_info[] = {
.verify = NULL,
.wrapper = mlx4_CMD_EPERM_wrapper
},
+ {
+ .opcode = MLX4_CMD_VIRT_PORT_MAP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper
+ },
};
static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index c643d2bbb7b9..58d5a07d0ff4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -214,6 +214,8 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr)
iounmap(mdev->uar_map);
mlx4_uar_free(dev, &mdev->priv_uar);
mlx4_pd_free(dev, mdev->priv_pdn);
+ if (mdev->nb.notifier_call)
+ unregister_netdevice_notifier(&mdev->nb);
kfree(mdev);
}
@@ -298,6 +300,12 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i]))
mdev->pndev[i] = NULL;
}
+ /* register notifier */
+ mdev->nb.notifier_call = mlx4_en_netdev_event;
+ if (register_netdevice_notifier(&mdev->nb)) {
+ mdev->nb.notifier_call = NULL;
+ mlx4_err(mdev, "Failed to create notifier\n");
+ }
return mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index e075ff1f4e80..028937b2a199 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2062,6 +2062,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
/* Detach the netdev so tasks would not attempt to access it */
mutex_lock(&mdev->state_lock);
mdev->pndev[priv->port] = NULL;
+ mdev->upper[priv->port] = NULL;
mutex_unlock(&mdev->state_lock);
mlx4_en_free_resources(priv);
@@ -2441,6 +2442,180 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
#endif
};
+struct mlx4_en_bond {
+ struct work_struct work;
+ struct mlx4_en_priv *priv;
+ int is_bonded;
+ struct mlx4_port_map port_map;
+};
+
+static void mlx4_en_bond_work(struct work_struct *work)
+{
+ struct mlx4_en_bond *bond = container_of(work,
+ struct mlx4_en_bond,
+ work);
+ int err = 0;
+ struct mlx4_dev *dev = bond->priv->mdev->dev;
+
+ if (bond->is_bonded) {
+ if (!mlx4_is_bonded(dev)) {
+ err = mlx4_bond(dev);
+ if (err)
+ en_err(bond->priv, "Fail to bond device\n");
+ }
+ if (!err) {
+ err = mlx4_port_map_set(dev, &bond->port_map);
+ if (err)
+ en_err(bond->priv, "Fail to set port map [%d][%d]: %d\n",
+ bond->port_map.port1,
+ bond->port_map.port2,
+ err);
+ }
+ } else if (mlx4_is_bonded(dev)) {
+ err = mlx4_unbond(dev);
+ if (err)
+ en_err(bond->priv, "Fail to unbond device\n");
+ }
+ dev_put(bond->priv->dev);
+ kfree(bond);
+}
+
+static int mlx4_en_queue_bond_work(struct mlx4_en_priv *priv, int is_bonded,
+ u8 v2p_p1, u8 v2p_p2)
+{
+ struct mlx4_en_bond *bond = NULL;
+
+ bond = kzalloc(sizeof(*bond), GFP_ATOMIC);
+ if (!bond)
+ return -ENOMEM;
+
+ INIT_WORK(&bond->work, mlx4_en_bond_work);
+ bond->priv = priv;
+ bond->is_bonded = is_bonded;
+ bond->port_map.port1 = v2p_p1;
+ bond->port_map.port2 = v2p_p2;
+ dev_hold(priv->dev);
+ queue_work(priv->mdev->workqueue, &bond->work);
+ return 0;
+}
+
+int mlx4_en_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ u8 port = 0;
+ struct mlx4_en_dev *mdev;
+ struct mlx4_dev *dev;
+ int i, num_eth_ports = 0;
+ bool do_bond = true;
+ struct mlx4_en_priv *priv;
+ u8 v2p_port1 = 0;
+ u8 v2p_port2 = 0;
+
+ if (!net_eq(dev_net(ndev), &init_net))
+ return NOTIFY_DONE;
+
+ mdev = container_of(this, struct mlx4_en_dev, nb);
+ dev = mdev->dev;
+
+ /* Go into this mode only when two network devices set on two ports
+ * of the same mlx4 device are slaves of the same bonding master
+ */
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
+ ++num_eth_ports;
+ if (!port && (mdev->pndev[i] == ndev))
+ port = i;
+ mdev->upper[i] = mdev->pndev[i] ?
+ netdev_master_upper_dev_get(mdev->pndev[i]) : NULL;
+ /* condition not met: network device is a slave */
+ if (!mdev->upper[i])
+ do_bond = false;
+ if (num_eth_ports < 2)
+ continue;
+ /* condition not met: same master */
+ if (mdev->upper[i] != mdev->upper[i-1])
+ do_bond = false;
+ }
+ /* condition not met: 2 salves */
+ do_bond = (num_eth_ports == 2) ? do_bond : false;
+
+ /* handle only events that come with enough info */
+ if ((do_bond && (event != NETDEV_BONDING_INFO)) || !port)
+ return NOTIFY_DONE;
+
+ priv = netdev_priv(ndev);
+ if (do_bond) {
+ struct netdev_notifier_bonding_info *notifier_info = ptr;
+ struct netdev_bonding_info *bonding_info =
+ &notifier_info->bonding_info;
+
+ /* required mode 1, 2 or 4 */
+ if ((bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) &&
+ (bonding_info->master.bond_mode != BOND_MODE_XOR) &&
+ (bonding_info->master.bond_mode != BOND_MODE_8023AD))
+ do_bond = false;
+
+ /* require exactly 2 slaves */
+ if (bonding_info->master.num_slaves != 2)
+ do_bond = false;
+
+ /* calc v2p */
+ if (do_bond) {
+ if (bonding_info->master.bond_mode ==
+ BOND_MODE_ACTIVEBACKUP) {
+ /* in active-backup mode virtual ports are
+ * mapped to the physical port of the active
+ * slave */
+ if (bonding_info->slave.state ==
+ BOND_STATE_BACKUP) {
+ if (port == 1) {
+ v2p_port1 = 2;
+ v2p_port2 = 2;
+ } else {
+ v2p_port1 = 1;
+ v2p_port2 = 1;
+ }
+ } else { /* BOND_STATE_ACTIVE */
+ if (port == 1) {
+ v2p_port1 = 1;
+ v2p_port2 = 1;
+ } else {
+ v2p_port1 = 2;
+ v2p_port2 = 2;
+ }
+ }
+ } else { /* Active-Active */
+ /* in active-active mode a virtual port is
+ * mapped to the native physical port if and only
+ * if the physical port is up */
+ __s8 link = bonding_info->slave.link;
+
+ if (port == 1)
+ v2p_port2 = 2;
+ else
+ v2p_port1 = 1;
+ if ((link == BOND_LINK_UP) ||
+ (link == BOND_LINK_FAIL)) {
+ if (port == 1)
+ v2p_port1 = 1;
+ else
+ v2p_port2 = 2;
+ } else { /* BOND_LINK_DOWN || BOND_LINK_BACK */
+ if (port == 1)
+ v2p_port1 = 2;
+ else
+ v2p_port2 = 1;
+ }
+ }
+ }
+ }
+
+ mlx4_en_queue_bond_work(priv, do_bond,
+ v2p_port1, v2p_port2);
+
+ return NOTIFY_DONE;
+}
+
int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
struct mlx4_en_port_profile *prof)
{
@@ -2623,6 +2798,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
}
mdev->pndev[port] = dev;
+ mdev->upper[port] = NULL;
netif_carrier_off(dev);
mlx4_en_set_default_moderation(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index f1a5500ff72d..34f2fdf4fe5d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -50,10 +50,14 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
context->mtu_msgmax = 0xff;
if (!is_tx && !rss)
context->rq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
- if (is_tx)
+ if (is_tx) {
context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
- else
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)
+ context->params2 |= MLX4_QP_BIT_FPP;
+
+ } else {
context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
+ }
context->usr_page = cpu_to_be32(mdev->priv_uar.index);
context->local_qpn = cpu_to_be32(qpn);
context->pri_path.ackto = 1 & 0x07;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index dbabfae3a3de..4b08a393ebcb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -142,7 +142,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[17] = "Asymmetric EQs support",
[18] = "More than 80 VFs support",
[19] = "Performance optimized for limited rule configuration flow steering support",
- [20] = "Recoverable error events support"
+ [20] = "Recoverable error events support",
+ [21] = "Port Remap support"
};
int i;
@@ -863,6 +864,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
MLX4_GET(dev_cap->bmme_flags, outbox,
QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
if (field & 0x20)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV;
@@ -1120,9 +1123,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
field &= 0x7f;
MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET);
- /* For guests, disable mw type 2 */
+ /* For guests, disable mw type 2 and port remap*/
MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN;
+ bmme_flags &= ~MLX4_FLAG_PORT_REMAP;
MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
/* turn off device-managed steering capability if not enabled */
@@ -2100,13 +2104,16 @@ struct mlx4_config_dev {
__be32 rsvd1[3];
__be16 vxlan_udp_dport;
__be16 rsvd2;
- __be32 rsvd3[27];
- __be16 rsvd4;
- u8 rsvd5;
+ __be32 rsvd3;
+ __be32 roce_flags;
+ __be32 rsvd4[25];
+ __be16 rsvd5;
+ u8 rsvd6;
u8 rx_checksum_val;
};
#define MLX4_VXLAN_UDP_DPORT (1 << 0)
+#define MLX4_DISABLE_RX_PORT BIT(18)
static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
{
@@ -2209,6 +2216,45 @@ int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port)
}
EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port);
+#define CONFIG_DISABLE_RX_PORT BIT(15)
+int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis)
+{
+ struct mlx4_config_dev config_dev;
+
+ memset(&config_dev, 0, sizeof(config_dev));
+ config_dev.update_flags = cpu_to_be32(MLX4_DISABLE_RX_PORT);
+ if (dis)
+ config_dev.roce_flags =
+ cpu_to_be32(CONFIG_DISABLE_RX_PORT);
+
+ return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+
+int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct {
+ __be32 v_port1;
+ __be32 v_port2;
+ } *v2p;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+
+ v2p = mailbox->buf;
+ v2p->v_port1 = cpu_to_be32(port1);
+ v2p->v_port2 = cpu_to_be32(port2);
+
+ err = mlx4_cmd(dev, mailbox->dma, 0,
+ MLX4_SET_PORT_VIRT2PHY, MLX4_CMD_VIRT_PORT_MAP,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
{
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index 68d2bad325d5..6fce58718837 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -33,11 +33,13 @@
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/errno.h>
#include "mlx4.h"
struct mlx4_device_context {
struct list_head list;
+ struct list_head bond_list;
struct mlx4_interface *intf;
void *context;
};
@@ -115,6 +117,58 @@ void mlx4_unregister_interface(struct mlx4_interface *intf)
}
EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
+int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx;
+ unsigned long flags;
+ int ret;
+ LIST_HEAD(bond_list);
+
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
+ return -ENOTSUPP;
+
+ ret = mlx4_disable_rx_port_check(dev, enable);
+ if (ret) {
+ mlx4_err(dev, "Fail to %s rx port check\n",
+ enable ? "enable" : "disable");
+ return ret;
+ }
+ if (enable) {
+ dev->flags |= MLX4_FLAG_BONDED;
+ } else {
+ ret = mlx4_virt2phy_port_map(dev, 1, 2);
+ if (ret) {
+ mlx4_err(dev, "Fail to reset port map\n");
+ return ret;
+ }
+ dev->flags &= ~MLX4_FLAG_BONDED;
+ }
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+ list_for_each_entry_safe(dev_ctx, temp_dev_ctx, &priv->ctx_list, list) {
+ if (dev_ctx->intf->flags & MLX4_INTFF_BONDING) {
+ list_add_tail(&dev_ctx->bond_list, &bond_list);
+ list_del(&dev_ctx->list);
+ }
+ }
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+ list_for_each_entry(dev_ctx, &bond_list, bond_list) {
+ dev_ctx->intf->remove(dev, dev_ctx->context);
+ dev_ctx->context = dev_ctx->intf->add(dev);
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+ list_add_tail(&dev_ctx->list, &priv->ctx_list);
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+ mlx4_dbg(dev, "Inrerface for protocol %d restarted with when bonded mode is %s\n",
+ dev_ctx->intf->protocol, enable ?
+ "enabled" : "disabled");
+ }
+ return 0;
+}
+
void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
unsigned long param)
{
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index cc9f48439244..f3245fe0f442 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1160,6 +1160,91 @@ err_set_port:
return err ? err : count;
}
+int mlx4_bond(struct mlx4_dev *dev)
+{
+ int ret = 0;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->bond_mutex);
+
+ if (!mlx4_is_bonded(dev))
+ ret = mlx4_do_bond(dev, true);
+ else
+ ret = 0;
+
+ mutex_unlock(&priv->bond_mutex);
+ if (ret)
+ mlx4_err(dev, "Failed to bond device: %d\n", ret);
+ else
+ mlx4_dbg(dev, "Device is bonded\n");
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_bond);
+
+int mlx4_unbond(struct mlx4_dev *dev)
+{
+ int ret = 0;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->bond_mutex);
+
+ if (mlx4_is_bonded(dev))
+ ret = mlx4_do_bond(dev, false);
+
+ mutex_unlock(&priv->bond_mutex);
+ if (ret)
+ mlx4_err(dev, "Failed to unbond device: %d\n", ret);
+ else
+ mlx4_dbg(dev, "Device is unbonded\n");
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_unbond);
+
+
+int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
+{
+ u8 port1 = v2p->port1;
+ u8 port2 = v2p->port2;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
+ return -ENOTSUPP;
+
+ mutex_lock(&priv->bond_mutex);
+
+ /* zero means keep current mapping for this port */
+ if (port1 == 0)
+ port1 = priv->v2p.port1;
+ if (port2 == 0)
+ port2 = priv->v2p.port2;
+
+ if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
+ (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
+ (port1 == 2 && port2 == 1)) {
+ /* besides boundary checks cross mapping makes
+ * no sense and therefore not allowed */
+ err = -EINVAL;
+ } else if ((port1 == priv->v2p.port1) &&
+ (port2 == priv->v2p.port2)) {
+ err = 0;
+ } else {
+ err = mlx4_virt2phy_port_map(dev, port1, port2);
+ if (!err) {
+ mlx4_dbg(dev, "port map changed: [%d][%d]\n",
+ port1, port2);
+ priv->v2p.port1 = port1;
+ priv->v2p.port2 = port2;
+ } else {
+ mlx4_err(dev, "Failed to change port mape: %d\n", err);
+ }
+ }
+
+ mutex_unlock(&priv->bond_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_port_map_set);
+
static int mlx4_load_fw(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2638,6 +2723,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
spin_lock_init(&priv->ctx_lock);
mutex_init(&priv->port_mutex);
+ mutex_init(&priv->bond_mutex);
INIT_LIST_HEAD(&priv->pgdir_list);
mutex_init(&priv->pgdir_mutex);
@@ -2934,6 +3020,9 @@ slave_start:
goto err_port;
}
+ priv->v2p.port1 = 1;
+ priv->v2p.port2 = 2;
+
err = mlx4_register_device(dev);
if (err)
goto err_port;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 148dc0945aab..803f17653da7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -885,6 +885,8 @@ struct mlx4_priv {
int reserved_mtts;
int fs_hash_mode;
u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ struct mlx4_port_map v2p; /* cached port mapping configuration */
+ struct mutex bond_mutex; /* for bond mode */
__be64 slave_node_guids[MLX4_MFUNC_MAX];
atomic_t opreq_count;
@@ -1364,6 +1366,7 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
/* Returns the VF index of slave */
int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
int mlx4_config_mad_demux(struct mlx4_dev *dev);
+int mlx4_do_bond(struct mlx4_dev *dev, bool enable);
enum mlx4_zone_flags {
MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0,
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 944a112dff37..2a8268e6be15 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -390,6 +390,7 @@ struct mlx4_en_dev {
struct pci_dev *pdev;
struct mutex state_lock;
struct net_device *pndev[MLX4_MAX_PORTS + 1];
+ struct net_device *upper[MLX4_MAX_PORTS + 1];
u32 port_cnt;
bool device_up;
struct mlx4_en_profile profile;
@@ -410,6 +411,7 @@ struct mlx4_en_dev {
unsigned long overflow_period;
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_clock_info;
+ struct notifier_block nb;
};
@@ -845,6 +847,9 @@ int mlx4_en_reset_config(struct net_device *dev,
struct hwtstamp_config ts_config,
netdev_features_t new_features);
+int mlx4_en_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr);
+
/*
* Functions for time stamping
*/
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 1586ecce13c7..2bb8553bd905 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -882,6 +882,8 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
for (i = 0; i < ARRAY_SIZE(states) - 1; i++) {
context->flags &= cpu_to_be32(~(0xf << 28));
context->flags |= cpu_to_be32(states[i + 1] << 28);
+ if (states[i + 1] != MLX4_QP_STATE_RTR)
+ context->params2 &= ~MLX4_QP_BIT_FPP;
err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1],
context, 0, 0, qp);
if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 79feeb6b0d87..c5f3dfca226b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2944,6 +2944,9 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
optpar = be32_to_cpu(*(__be32 *) inbox->buf);
+ if (slave != mlx4_master_func_num(dev))
+ qp_ctx->params2 &= ~MLX4_QP_BIT_FPP;
+
switch (qp_type) {
case MLX4_QP_ST_RC:
case MLX4_QP_ST_XRC:
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index ae95adc78509..7b6d4e9ff603 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -71,6 +71,7 @@ enum {
/*master notify fw on finish for slave's flr*/
MLX4_CMD_INFORM_FLR_DONE = 0x5b,
+ MLX4_CMD_VIRT_PORT_MAP = 0x5c,
MLX4_CMD_GET_OP_REQ = 0x59,
/* TPT commands */
@@ -171,6 +172,12 @@ enum {
};
enum {
+ /* virtual to physical port mapping opcode modifiers */
+ MLX4_GET_PORT_VIRT2PHY = 0x0,
+ MLX4_SET_PORT_VIRT2PHY = 0x1,
+};
+
+enum {
MLX4_MAILBOX_SIZE = 4096,
MLX4_ACCESS_MEM_ALIGN = 256,
};
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c95d659a39f2..977b0b164431 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -70,6 +70,7 @@ enum {
MLX4_FLAG_SLAVE = 1 << 3,
MLX4_FLAG_SRIOV = 1 << 4,
MLX4_FLAG_OLD_REG_MAC = 1 << 6,
+ MLX4_FLAG_BONDED = 1 << 7
};
enum {
@@ -201,7 +202,8 @@ enum {
MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17,
MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18,
MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19,
- MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20
+ MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20,
+ MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21
};
enum {
@@ -253,9 +255,14 @@ enum {
MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9,
MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10,
MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11,
+ MLX4_BMME_FLAG_PORT_REMAP = 1 << 24,
MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28,
};
+enum {
+ MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP
+};
+
enum mlx4_event {
MLX4_EVENT_TYPE_COMP = 0x00,
MLX4_EVENT_TYPE_PATH_MIG = 0x01,
@@ -1378,6 +1385,8 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port);
int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
+int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis);
+int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2);
int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index 022055c8fb26..9553a73d2049 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -49,6 +49,10 @@ enum mlx4_dev_event {
MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
};
+enum {
+ MLX4_INTFF_BONDING = 1 << 0
+};
+
struct mlx4_interface {
void * (*add) (struct mlx4_dev *dev);
void (*remove)(struct mlx4_dev *dev, void *context);
@@ -57,11 +61,26 @@ struct mlx4_interface {
void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port);
struct list_head list;
enum mlx4_protocol protocol;
+ int flags;
};
int mlx4_register_interface(struct mlx4_interface *intf);
void mlx4_unregister_interface(struct mlx4_interface *intf);
+int mlx4_bond(struct mlx4_dev *dev);
+int mlx4_unbond(struct mlx4_dev *dev);
+static inline int mlx4_is_bonded(struct mlx4_dev *dev)
+{
+ return !!(dev->flags & MLX4_FLAG_BONDED);
+}
+
+struct mlx4_port_map {
+ u8 port1;
+ u8 port2;
+};
+
+int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p);
+
void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port);
static inline u64 mlx4_mac_to_u64(u8 *addr)
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 467ccdf94c98..2bbc62aa818a 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -96,6 +96,7 @@ enum {
MLX4_QP_BIT_RRE = 1 << 15,
MLX4_QP_BIT_RWE = 1 << 14,
MLX4_QP_BIT_RAE = 1 << 13,
+ MLX4_QP_BIT_FPP = 1 << 3,
MLX4_QP_BIT_RIC = 1 << 4,
};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1347ac50d2af..ce784d5018e0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -51,6 +51,7 @@
#include <linux/netdev_features.h>
#include <linux/neighbour.h>
#include <uapi/linux/netdevice.h>
+#include <uapi/linux/if_bonding.h>
struct netpoll_info;
struct device;
@@ -2056,6 +2057,7 @@ struct pcpu_sw_netstats {
#define NETDEV_RESEND_IGMP 0x0016
#define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */
#define NETDEV_CHANGEINFODATA 0x0018
+#define NETDEV_BONDING_INFO 0x0019
int register_netdevice_notifier(struct notifier_block *nb);
int unregister_netdevice_notifier(struct notifier_block *nb);
@@ -3494,6 +3496,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
netdev_features_t features);
+struct netdev_bonding_info {
+ ifslave slave;
+ ifbond master;
+};
+
+struct netdev_notifier_bonding_info {
+ struct netdev_notifier_info info; /* must be first */
+ struct netdev_bonding_info bonding_info;
+};
+
+void netdev_bonding_info_change(struct net_device *dev,
+ struct netdev_bonding_info *bonding_info);
+
static inline
struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
{
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 29f53eacac0a..4e17095ad46a 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -150,6 +150,12 @@ struct bond_parm_tbl {
int mode;
};
+struct netdev_notify_work {
+ struct delayed_work work;
+ struct slave *slave;
+ struct net_device *dev;
+};
+
struct slave {
struct net_device *dev; /* first - useful for panic debug */
struct bonding *bond; /* our master */
@@ -243,6 +249,8 @@ struct bonding {
#define bond_slave_get_rtnl(dev) \
((struct slave *) rtnl_dereference(dev->rx_handler_data))
+void bond_queue_slave_event(struct slave *slave);
+
struct bond_vlan_tag {
__be16 vlan_proto;
unsigned short vlan_id;
@@ -315,6 +323,7 @@ static inline void bond_set_active_slave(struct slave *slave)
{
if (slave->backup) {
slave->backup = 0;
+ bond_queue_slave_event(slave);
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
}
}
@@ -323,6 +332,7 @@ static inline void bond_set_backup_slave(struct slave *slave)
{
if (!slave->backup) {
slave->backup = 1;
+ bond_queue_slave_event(slave);
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
}
}
@@ -336,6 +346,7 @@ static inline void bond_set_slave_state(struct slave *slave,
slave->backup = slave_state;
if (notify) {
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
+ bond_queue_slave_event(slave);
slave->should_notify = 0;
} else {
if (slave->should_notify)
@@ -490,6 +501,12 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
return slave->inactive;
}
+static inline void bond_set_slave_link_state(struct slave *slave, int state)
+{
+ slave->link = state;
+ bond_queue_slave_event(slave);
+}
+
static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local)
{
struct in_device *in_dev;
diff --git a/net/core/dev.c b/net/core/dev.c
index 1d564d68e31a..ede0b161b115 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5355,6 +5355,26 @@ void netdev_upper_dev_unlink(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
+/**
+ * netdev_bonding_info_change - Dispatch event about slave change
+ * @dev: device
+ * @netdev_bonding_info: info to dispatch
+ *
+ * Send NETDEV_BONDING_INFO to netdev notifiers with info.
+ * The caller must hold the RTNL lock.
+ */
+void netdev_bonding_info_change(struct net_device *dev,
+ struct netdev_bonding_info *bonding_info)
+{
+ struct netdev_notifier_bonding_info info;
+
+ memcpy(&info.bonding_info, bonding_info,
+ sizeof(struct netdev_bonding_info));
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ &info.info);
+}
+EXPORT_SYMBOL(netdev_bonding_info_change);
+
void netdev_adjacent_add_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 673cb4c6f391..4cd5e350d129 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3180,6 +3180,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_UNREGISTER_FINAL:
case NETDEV_RELEASE:
case NETDEV_JOIN:
+ case NETDEV_BONDING_INFO:
break;
default:
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);