aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-02-04 16:14:29 -0800
committerDavid S. Miller <davem@davemloft.net>2015-02-04 16:14:29 -0800
commitce388fff3aa446d5a4895f8a757345f53daa200a (patch)
tree93990685d6fdba0784d68bf5ca0dbf523c930bb4
parentMerge branch 'tipc-next' (diff)
parentIB/mlx4: Load balance ports in port aggregation mode (diff)
downloadlinux-dev-ce388fff3aa446d5a4895f8a757345f53daa200a.tar.xz
linux-dev-ce388fff3aa446d5a4895f8a757345f53daa200a.zip
Merge branch 'mlx4-next'
Or Gerlitz says: ==================== Add HA and LAG support to mlx4 RoCE and SRIOV services This series takes advanges of bonding mlx4 Ethernet devices to support a model of High-Availability and Link Aggregation for more environments. The mlx4 driver reacts on netdev events generated by bonding when slave state changes happen by programming a HW V2P (Virt-to-Phys) port table. Bonding was extended to expose these state changes through netdev events. When an mlx4 interface such as the mlx4 IB/RoCE driver is subject to this policy, QPs are created over virtual ports which are mapped to one of the two physical ports. When a failure happens, the re-programming of the V2P table allows traffic to keep flowing. The mlx4 Ethernet driver interfaces are not subject to this policy and act as usual. A 2nd use-case for this model would be to add HA and Link Aggregation support to single ported mlx4 Ethernet VFs. In this case, the PF Ethernet intrfaces are bonded, all the VFs see single port devices (which is supported already today), and VF QPs are subject to V2P. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c1
-rw-r--r--drivers/infiniband/hw/mlx4/main.c161
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h17
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c31
-rw-r--r--drivers/net/bonding/bond_main.c106
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c176
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_resources.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c56
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/intf.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c3
-rw-r--r--include/linux/mlx4/cmd.h7
-rw-r--r--include/linux/mlx4/device.h11
-rw-r--r--include/linux/mlx4/driver.h19
-rw-r--r--include/linux/mlx4/qp.h1
-rw-r--r--include/linux/netdevice.h15
-rw-r--r--include/net/bonding.h17
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/rtnetlink.c1
24 files changed, 756 insertions, 64 deletions
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 2d8c3397774f..f50a546224ad 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -36,6 +36,7 @@
#include <linux/slab.h>
#include <linux/inet.h>
#include <linux/string.h>
+#include <linux/mlx4/driver.h>
#include "mlx4_ib.h"
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 9db258f7c804..2ed5b996b2f4 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -351,6 +351,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
enum ib_mtu tmp;
struct mlx4_cmd_mailbox *mailbox;
int err = 0;
+ int is_bonded = mlx4_is_bonded(mdev->dev);
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
@@ -374,8 +375,12 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
+ if (is_bonded)
+ rtnl_lock(); /* required to get upper dev */
spin_lock_bh(&iboe->lock);
ndev = iboe->netdevs[port - 1];
+ if (ndev && is_bonded)
+ ndev = netdev_master_upper_dev_get(ndev);
if (!ndev)
goto out_unlock;
@@ -387,6 +392,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->phys_state = state_to_phys_state(props->state);
out_unlock:
spin_unlock_bh(&iboe->lock);
+ if (is_bonded)
+ rtnl_unlock();
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
@@ -844,7 +851,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
struct mlx4_ib_steering {
struct list_head list;
- u64 reg_id;
+ struct mlx4_flow_reg_id reg_id;
union ib_gid gid;
};
@@ -1135,9 +1142,11 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
{
- int err = 0, i = 0;
+ int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
enum mlx4_net_trans_promisc_mode type[2];
+ struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
+ int is_bonded = mlx4_is_bonded(dev);
memset(type, 0, sizeof(type));
@@ -1172,26 +1181,55 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
while (i < ARRAY_SIZE(type) && type[i]) {
err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
- &mflow->reg_id[i]);
+ &mflow->reg_id[i].id);
if (err)
goto err_create_flow;
i++;
+ if (is_bonded) {
+ flow_attr->port = 2;
+ err = __mlx4_ib_create_flow(qp, flow_attr,
+ domain, type[j],
+ &mflow->reg_id[j].mirror);
+ flow_attr->port = 1;
+ if (err)
+ goto err_create_flow;
+ j++;
+ }
+
}
if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]);
+ err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
+ &mflow->reg_id[i].id);
if (err)
goto err_create_flow;
i++;
+ if (is_bonded) {
+ flow_attr->port = 2;
+ err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
+ &mflow->reg_id[j].mirror);
+ flow_attr->port = 1;
+ if (err)
+ goto err_create_flow;
+ j++;
+ }
+ /* function to create mirror rule */
}
return &mflow->ibflow;
err_create_flow:
while (i) {
- (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]);
+ (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
+ mflow->reg_id[i].id);
i--;
}
+
+ while (j) {
+ (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
+ mflow->reg_id[j].mirror);
+ j--;
+ }
err_free:
kfree(mflow);
return ERR_PTR(err);
@@ -1204,10 +1242,16 @@ static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
struct mlx4_ib_flow *mflow = to_mflow(flow_id);
- while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
- err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
+ while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
if (err)
ret = err;
+ if (mflow->reg_id[i].mirror) {
+ err = __mlx4_ib_destroy_flow(mdev->dev,
+ mflow->reg_id[i].mirror);
+ if (err)
+ ret = err;
+ }
i++;
}
@@ -1219,11 +1263,12 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_dev *dev = mdev->dev;
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
- u64 reg_id;
struct mlx4_ib_steering *ib_steering = NULL;
enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
+ struct mlx4_flow_reg_id reg_id;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1235,10 +1280,20 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
!!(mqp->flags &
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- prot, &reg_id);
+ prot, &reg_id.id);
if (err)
goto err_malloc;
+ reg_id.mirror = 0;
+ if (mlx4_is_bonded(dev)) {
+ err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, 2,
+ !!(mqp->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+ prot, &reg_id.mirror);
+ if (err)
+ goto err_add;
+ }
+
err = add_gid_entry(ibqp, gid);
if (err)
goto err_add;
@@ -1254,7 +1309,10 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err_add:
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- prot, reg_id);
+ prot, reg_id.id);
+ if (reg_id.mirror)
+ mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ prot, reg_id.mirror);
err_malloc:
kfree(ib_steering);
@@ -1281,10 +1339,12 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_dev *dev = mdev->dev;
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
- u64 reg_id = 0;
+ struct mlx4_flow_reg_id reg_id = {0, 0};
+
enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
@@ -1309,10 +1369,17 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- prot, reg_id);
+ prot, reg_id.id);
if (err)
return err;
+ if (mlx4_is_bonded(dev)) {
+ err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ prot, reg_id.mirror);
+ if (err)
+ return err;
+ }
+
mutex_lock(&mqp->mutex);
ge = find_gid_entry(mqp, gid->raw);
if (ge) {
@@ -1440,6 +1507,7 @@ static void update_gids_task(struct work_struct *work)
union ib_gid *gids;
int err;
struct mlx4_dev *dev = gw->dev->dev;
+ int is_bonded = mlx4_is_bonded(dev);
if (!gw->dev->ib_active)
return;
@@ -1459,7 +1527,10 @@ static void update_gids_task(struct work_struct *work)
if (err)
pr_warn("set port command failed\n");
else
- mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
+ if ((gw->port == 1) || !is_bonded)
+ mlx4_ib_dispatch_event(gw->dev,
+ is_bonded ? 1 : gw->port,
+ IB_EVENT_GID_CHANGE);
mlx4_free_cmd_mailbox(dev, mailbox);
kfree(gw);
@@ -1875,7 +1946,8 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
* don't want the bond IP based gids in the table since
* flows that select port by gid may get the down port.
*/
- if (port_state == IB_PORT_DOWN) {
+ if (port_state == IB_PORT_DOWN &&
+ !mlx4_is_bonded(ibdev->dev)) {
reset_gid_table(ibdev, port);
mlx4_ib_set_default_gid(ibdev,
curr_netdev,
@@ -2047,6 +2119,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int err;
struct mlx4_ib_iboe *iboe;
int ib_num_ports = 0;
+ int num_req_counters;
pr_info_once("%s", mlx4_ib_version);
@@ -2080,13 +2153,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev;
+ ibdev->bond_next_port = 0;
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
ibdev->num_ports = num_ports;
- ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
+ ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
+ 1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
@@ -2207,7 +2282,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (init_node_data(ibdev))
goto err_map;
- for (i = 0; i < ibdev->num_ports; ++i) {
+ num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
+ for (i = 0; i < num_req_counters; ++i) {
mutex_init(&ibdev->qp1_proxy_lock[i]);
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
@@ -2218,6 +2294,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->counters[i] = -1;
}
}
+ if (mlx4_is_bonded(dev))
+ for (i = 1; i < ibdev->num_ports ; ++i)
+ ibdev->counters[i] = ibdev->counters[0];
+
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_num_ports++;
@@ -2538,6 +2618,38 @@ out:
return;
}
+static void handle_bonded_port_state_event(struct work_struct *work)
+{
+ struct ib_event_work *ew =
+ container_of(work, struct ib_event_work, work);
+ struct mlx4_ib_dev *ibdev = ew->ib_dev;
+ enum ib_port_state bonded_port_state = IB_PORT_NOP;
+ int i;
+ struct ib_event ibev;
+
+ kfree(ew);
+ spin_lock_bh(&ibdev->iboe.lock);
+ for (i = 0; i < MLX4_MAX_PORTS; ++i) {
+ struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
+
+ enum ib_port_state curr_port_state =
+ (netif_running(curr_netdev) &&
+ netif_carrier_ok(curr_netdev)) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+
+ bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
+ curr_port_state : IB_PORT_ACTIVE;
+ }
+ spin_unlock_bh(&ibdev->iboe.lock);
+
+ ibev.device = &ibdev->ib_dev;
+ ibev.element.port_num = 1;
+ ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+
+ ib_dispatch_event(&ibev);
+}
+
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
enum mlx4_dev_event event, unsigned long param)
{
@@ -2547,6 +2659,18 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
struct ib_event_work *ew;
int p = 0;
+ if (mlx4_is_bonded(dev) &&
+ ((event == MLX4_DEV_EVENT_PORT_UP) ||
+ (event == MLX4_DEV_EVENT_PORT_DOWN))) {
+ ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
+ if (!ew)
+ return;
+ INIT_WORK(&ew->work, handle_bonded_port_state_event);
+ ew->ib_dev = ibdev;
+ queue_work(wq, &ew->work);
+ return;
+ }
+
if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
eqe = (struct mlx4_eqe *)param;
else
@@ -2607,7 +2731,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
}
ibev.device = ibdev_ptr;
- ibev.element.port_num = (u8) p;
+ ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
ib_dispatch_event(&ibev);
}
@@ -2616,7 +2740,8 @@ static struct mlx4_interface mlx4_ib_interface = {
.add = mlx4_ib_add,
.remove = mlx4_ib_remove,
.event = mlx4_ib_event,
- .protocol = MLX4_PROT_IB_IPV6
+ .protocol = MLX4_PROT_IB_IPV6,
+ .flags = MLX4_INTFF_BONDING
};
static int __init mlx4_ib_init(void)
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6eb743f65f6f..721540c9163d 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -134,10 +134,17 @@ struct mlx4_ib_fmr {
struct mlx4_fmr mfmr;
};
+#define MAX_REGS_PER_FLOW 2
+
+struct mlx4_flow_reg_id {
+ u64 id;
+ u64 mirror;
+};
+
struct mlx4_ib_flow {
struct ib_flow ibflow;
/* translating DMFS verbs sniffer rule to FW API requires two reg IDs */
- u64 reg_id[2];
+ struct mlx4_flow_reg_id reg_id[MAX_REGS_PER_FLOW];
};
struct mlx4_ib_wq {
@@ -527,6 +534,7 @@ struct mlx4_ib_dev {
struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS];
/* lock when destroying qp1_proxy and getting netdev events */
struct mutex qp1_proxy_lock[MLX4_MAX_PORTS];
+ u8 bond_next_port;
};
struct ib_event_work {
@@ -622,6 +630,13 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
+static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev)
+{
+ dev->bond_next_port = (dev->bond_next_port + 1) % dev->num_ports;
+
+ return dev->bond_next_port + 1;
+}
+
int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index cf000b7ad64f..792f9dc86ada 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -40,6 +40,7 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
+#include <linux/mlx4/driver.h>
#include <linux/mlx4/qp.h>
#include "mlx4_ib.h"
@@ -93,17 +94,6 @@ enum {
#ifndef ETH_ALEN
#define ETH_ALEN 6
#endif
-static inline u64 mlx4_mac_to_u64(u8 *addr)
-{
- u64 mac = 0;
- int i;
-
- for (i = 0; i < ETH_ALEN; i++) {
- mac <<= 8;
- mac |= addr[i];
- }
- return mac;
-}
static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
@@ -1915,6 +1905,22 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
+ if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) {
+ if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
+ if ((ibqp->qp_type == IB_QPT_RC) ||
+ (ibqp->qp_type == IB_QPT_UD) ||
+ (ibqp->qp_type == IB_QPT_UC) ||
+ (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
+ (ibqp->qp_type == IB_QPT_XRC_INI)) {
+ attr->port_num = mlx4_ib_bond_next_port(dev);
+ }
+ } else {
+ /* no sense in changing port_num
+ * when ports are bonded */
+ attr_mask &= ~IB_QP_PORT;
+ }
+ }
+
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->num_ports)) {
pr_debug("qpn 0x%x: invalid port number (%d) specified "
@@ -1965,6 +1971,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
+ attr->port_num = 1;
+
out:
mutex_unlock(&qp->mutex);
return err;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c9e519cb9214..679ef00d6b16 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -790,7 +790,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
}
new_active->delay = 0;
- new_active->link = BOND_LINK_UP;
+ bond_set_slave_link_state(new_active, BOND_LINK_UP);
if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
@@ -1181,6 +1181,62 @@ static void bond_free_slave(struct slave *slave)
kfree(slave);
}
+static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info)
+{
+ info->bond_mode = BOND_MODE(bond);
+ info->miimon = bond->params.miimon;
+ info->num_slaves = bond->slave_cnt;
+}
+
+static void bond_fill_ifslave(struct slave *slave, struct ifslave *info)
+{
+ strcpy(info->slave_name, slave->dev->name);
+ info->link = slave->link;
+ info->state = bond_slave_state(slave);
+ info->link_failure_count = slave->link_failure_count;
+}
+
+static void bond_netdev_notify(struct slave *slave, struct net_device *dev)
+{
+ struct bonding *bond = slave->bond;
+ struct netdev_bonding_info bonding_info;
+
+ rtnl_lock();
+ /* make sure that slave is still valid */
+ if (dev->priv_flags & IFF_BONDING) {
+ bond_fill_ifslave(slave, &bonding_info.slave);
+ bond_fill_ifbond(bond, &bonding_info.master);
+ netdev_bonding_info_change(slave->dev, &bonding_info);
+ }
+ rtnl_unlock();
+}
+
+static void bond_netdev_notify_work(struct work_struct *_work)
+{
+ struct netdev_notify_work *w =
+ container_of(_work, struct netdev_notify_work, work.work);
+
+ bond_netdev_notify(w->slave, w->dev);
+ dev_put(w->dev);
+}
+
+void bond_queue_slave_event(struct slave *slave)
+{
+ struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC);
+
+ if (!nnw)
+ return;
+
+ INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work);
+ nnw->slave = slave;
+ nnw->dev = slave->dev;
+
+ if (queue_delayed_work(slave->bond->wq, &nnw->work, 0))
+ dev_hold(slave->dev);
+ else
+ kfree(nnw);
+}
+
/* enslave device <slave> to bond device <master> */
int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
{
@@ -1444,19 +1500,22 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
if (bond->params.miimon) {
if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
if (bond->params.updelay) {
- new_slave->link = BOND_LINK_BACK;
+ bond_set_slave_link_state(new_slave,
+ BOND_LINK_BACK);
new_slave->delay = bond->params.updelay;
} else {
- new_slave->link = BOND_LINK_UP;
+ bond_set_slave_link_state(new_slave,
+ BOND_LINK_UP);
}
} else {
- new_slave->link = BOND_LINK_DOWN;
+ bond_set_slave_link_state(new_slave, BOND_LINK_DOWN);
}
} else if (bond->params.arp_interval) {
- new_slave->link = (netif_carrier_ok(slave_dev) ?
- BOND_LINK_UP : BOND_LINK_DOWN);
+ bond_set_slave_link_state(new_slave,
+ (netif_carrier_ok(slave_dev) ?
+ BOND_LINK_UP : BOND_LINK_DOWN));
} else {
- new_slave->link = BOND_LINK_UP;
+ bond_set_slave_link_state(new_slave, BOND_LINK_UP);
}
if (new_slave->link != BOND_LINK_DOWN)
@@ -1572,6 +1631,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
/* enslave is successful */
+ bond_queue_slave_event(new_slave);
return 0;
/* Undo stages on error */
@@ -1821,11 +1881,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev,
static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
{
struct bonding *bond = netdev_priv(bond_dev);
-
- info->bond_mode = BOND_MODE(bond);
- info->miimon = bond->params.miimon;
- info->num_slaves = bond->slave_cnt;
-
+ bond_fill_ifbond(bond, info);
return 0;
}
@@ -1839,10 +1895,7 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in
bond_for_each_slave(bond, slave, iter) {
if (i++ == (int)info->slave_id) {
res = 0;
- strcpy(info->slave_name, slave->dev->name);
- info->link = slave->link;
- info->state = bond_slave_state(slave);
- info->link_failure_count = slave->link_failure_count;
+ bond_fill_ifslave(slave, info);
break;
}
}
@@ -1872,7 +1925,7 @@ static int bond_miimon_inspect(struct bonding *bond)
if (link_state)
continue;
- slave->link = BOND_LINK_FAIL;
+ bond_set_slave_link_state(slave, BOND_LINK_FAIL);
slave->delay = bond->params.downdelay;
if (slave->delay) {
netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
@@ -1887,7 +1940,7 @@ static int bond_miimon_inspect(struct bonding *bond)
case BOND_LINK_FAIL:
if (link_state) {
/* recovered before downdelay expired */
- slave->link = BOND_LINK_UP;
+ bond_set_slave_link_state(slave, BOND_LINK_UP);
slave->last_link_up = jiffies;
netdev_info(bond->dev, "link status up again after %d ms for interface %s\n",
(bond->params.downdelay - slave->delay) *
@@ -1909,7 +1962,7 @@ static int bond_miimon_inspect(struct bonding *bond)
if (!link_state)
continue;
- slave->link = BOND_LINK_BACK;
+ bond_set_slave_link_state(slave, BOND_LINK_BACK);
slave->delay = bond->params.updelay;
if (slave->delay) {
@@ -1922,7 +1975,8 @@ static int bond_miimon_inspect(struct bonding *bond)
/*FALLTHRU*/
case BOND_LINK_BACK:
if (!link_state) {
- slave->link = BOND_LINK_DOWN;
+ bond_set_slave_link_state(slave,
+ BOND_LINK_DOWN);
netdev_info(bond->dev, "link status down again after %d ms for interface %s\n",
(bond->params.updelay - slave->delay) *
bond->params.miimon,
@@ -1960,7 +2014,7 @@ static void bond_miimon_commit(struct bonding *bond)
continue;
case BOND_LINK_UP:
- slave->link = BOND_LINK_UP;
+ bond_set_slave_link_state(slave, BOND_LINK_UP);
slave->last_link_up = jiffies;
primary = rtnl_dereference(bond->primary_slave);
@@ -2000,7 +2054,7 @@ static void bond_miimon_commit(struct bonding *bond)
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
- slave->link = BOND_LINK_DOWN;
+ bond_set_slave_link_state(slave, BOND_LINK_DOWN);
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP ||
BOND_MODE(bond) == BOND_MODE_8023AD)
@@ -2583,7 +2637,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
struct slave *current_arp_slave;
current_arp_slave = rtnl_dereference(bond->current_arp_slave);
- slave->link = BOND_LINK_UP;
+ bond_set_slave_link_state(slave, BOND_LINK_UP);
if (current_arp_slave) {
bond_set_slave_inactive_flags(
current_arp_slave,
@@ -2606,7 +2660,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
- slave->link = BOND_LINK_DOWN;
+ bond_set_slave_link_state(slave, BOND_LINK_DOWN);
bond_set_slave_inactive_flags(slave,
BOND_SLAVE_NOTIFY_NOW);
@@ -2685,7 +2739,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
* up when it is actually down
*/
if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
- slave->link = BOND_LINK_DOWN;
+ bond_set_slave_link_state(slave, BOND_LINK_DOWN);
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
@@ -2705,7 +2759,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
if (!new_slave)
goto check_state;
- new_slave->link = BOND_LINK_BACK;
+ bond_set_slave_link_state(new_slave, BOND_LINK_BACK);
bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER);
bond_arp_send_all(bond, new_slave);
new_slave->last_link_up = jiffies;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 154effbfd8be..a681d7c0bb9f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1583,6 +1583,15 @@ static struct mlx4_cmd_info cmd_info[] = {
.verify = NULL,
.wrapper = mlx4_CMD_EPERM_wrapper
},
+ {
+ .opcode = MLX4_CMD_VIRT_PORT_MAP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper
+ },
};
static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index c643d2bbb7b9..58d5a07d0ff4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -214,6 +214,8 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr)
iounmap(mdev->uar_map);
mlx4_uar_free(dev, &mdev->priv_uar);
mlx4_pd_free(dev, mdev->priv_pdn);
+ if (mdev->nb.notifier_call)
+ unregister_netdevice_notifier(&mdev->nb);
kfree(mdev);
}
@@ -298,6 +300,12 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i]))
mdev->pndev[i] = NULL;
}
+ /* register notifier */
+ mdev->nb.notifier_call = mlx4_en_netdev_event;
+ if (register_netdevice_notifier(&mdev->nb)) {
+ mdev->nb.notifier_call = NULL;
+ mlx4_err(mdev, "Failed to create notifier\n");
+ }
return mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index e075ff1f4e80..028937b2a199 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2062,6 +2062,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
/* Detach the netdev so tasks would not attempt to access it */
mutex_lock(&mdev->state_lock);
mdev->pndev[priv->port] = NULL;
+ mdev->upper[priv->port] = NULL;
mutex_unlock(&mdev->state_lock);
mlx4_en_free_resources(priv);
@@ -2441,6 +2442,180 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
#endif
};
+struct mlx4_en_bond {
+ struct work_struct work;
+ struct mlx4_en_priv *priv;
+ int is_bonded;
+ struct mlx4_port_map port_map;
+};
+
+static void mlx4_en_bond_work(struct work_struct *work)
+{
+ struct mlx4_en_bond *bond = container_of(work,
+ struct mlx4_en_bond,
+ work);
+ int err = 0;
+ struct mlx4_dev *dev = bond->priv->mdev->dev;
+
+ if (bond->is_bonded) {
+ if (!mlx4_is_bonded(dev)) {
+ err = mlx4_bond(dev);
+ if (err)
+ en_err(bond->priv, "Fail to bond device\n");
+ }
+ if (!err) {
+ err = mlx4_port_map_set(dev, &bond->port_map);
+ if (err)
+ en_err(bond->priv, "Fail to set port map [%d][%d]: %d\n",
+ bond->port_map.port1,
+ bond->port_map.port2,
+ err);
+ }
+ } else if (mlx4_is_bonded(dev)) {
+ err = mlx4_unbond(dev);
+ if (err)
+ en_err(bond->priv, "Fail to unbond device\n");
+ }
+ dev_put(bond->priv->dev);
+ kfree(bond);
+}
+
+static int mlx4_en_queue_bond_work(struct mlx4_en_priv *priv, int is_bonded,
+ u8 v2p_p1, u8 v2p_p2)
+{
+ struct mlx4_en_bond *bond = NULL;
+
+ bond = kzalloc(sizeof(*bond), GFP_ATOMIC);
+ if (!bond)
+ return -ENOMEM;
+
+ INIT_WORK(&bond->work, mlx4_en_bond_work);
+ bond->priv = priv;
+ bond->is_bonded = is_bonded;
+ bond->port_map.port1 = v2p_p1;
+ bond->port_map.port2 = v2p_p2;
+ dev_hold(priv->dev);
+ queue_work(priv->mdev->workqueue, &bond->work);
+ return 0;
+}
+
+int mlx4_en_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ u8 port = 0;
+ struct mlx4_en_dev *mdev;
+ struct mlx4_dev *dev;
+ int i, num_eth_ports = 0;
+ bool do_bond = true;
+ struct mlx4_en_priv *priv;
+ u8 v2p_port1 = 0;
+ u8 v2p_port2 = 0;
+
+ if (!net_eq(dev_net(ndev), &init_net))
+ return NOTIFY_DONE;
+
+ mdev = container_of(this, struct mlx4_en_dev, nb);
+ dev = mdev->dev;
+
+ /* Go into this mode only when two network devices set on two ports
+ * of the same mlx4 device are slaves of the same bonding master
+ */
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
+ ++num_eth_ports;
+ if (!port && (mdev->pndev[i] == ndev))
+ port = i;
+ mdev->upper[i] = mdev->pndev[i] ?
+ netdev_master_upper_dev_get(mdev->pndev[i]) : NULL;
+ /* condition not met: network device is a slave */
+ if (!mdev->upper[i])
+ do_bond = false;
+ if (num_eth_ports < 2)
+ continue;
+ /* condition not met: same master */
+ if (mdev->upper[i] != mdev->upper[i-1])
+ do_bond = false;
+ }
+ /* condition not met: 2 salves */
+ do_bond = (num_eth_ports == 2) ? do_bond : false;
+
+ /* handle only events that come with enough info */
+ if ((do_bond && (event != NETDEV_BONDING_INFO)) || !port)
+ return NOTIFY_DONE;
+
+ priv = netdev_priv(ndev);
+ if (do_bond) {
+ struct netdev_notifier_bonding_info *notifier_info = ptr;
+ struct netdev_bonding_info *bonding_info =
+ &notifier_info->bonding_info;
+
+ /* required mode 1, 2 or 4 */
+ if ((bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) &&
+ (bonding_info->master.bond_mode != BOND_MODE_XOR) &&
+ (bonding_info->master.bond_mode != BOND_MODE_8023AD))
+ do_bond = false;
+
+ /* require exactly 2 slaves */
+ if (bonding_info->master.num_slaves != 2)
+ do_bond = false;
+
+ /* calc v2p */
+ if (do_bond) {
+ if (bonding_info->master.bond_mode ==
+ BOND_MODE_ACTIVEBACKUP) {
+ /* in active-backup mode virtual ports are
+ * mapped to the physical port of the active
+ * slave */
+ if (bonding_info->slave.state ==
+ BOND_STATE_BACKUP) {
+ if (port == 1) {
+ v2p_port1 = 2;
+ v2p_port2 = 2;
+ } else {
+ v2p_port1 = 1;
+ v2p_port2 = 1;
+ }
+ } else { /* BOND_STATE_ACTIVE */
+ if (port == 1) {
+ v2p_port1 = 1;
+ v2p_port2 = 1;
+ } else {
+ v2p_port1 = 2;
+ v2p_port2 = 2;
+ }
+ }
+ } else { /* Active-Active */
+ /* in active-active mode a virtual port is
+ * mapped to the native physical port if and only
+ * if the physical port is up */
+ __s8 link = bonding_info->slave.link;
+
+ if (port == 1)
+ v2p_port2 = 2;
+ else
+ v2p_port1 = 1;
+ if ((link == BOND_LINK_UP) ||
+ (link == BOND_LINK_FAIL)) {
+ if (port == 1)
+ v2p_port1 = 1;
+ else
+ v2p_port2 = 2;
+ } else { /* BOND_LINK_DOWN || BOND_LINK_BACK */
+ if (port == 1)
+ v2p_port1 = 2;
+ else
+ v2p_port2 = 1;
+ }
+ }
+ }
+ }
+
+ mlx4_en_queue_bond_work(priv, do_bond,
+ v2p_port1, v2p_port2);
+
+ return NOTIFY_DONE;
+}
+
int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
struct mlx4_en_port_profile *prof)
{
@@ -2623,6 +2798,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
}
mdev->pndev[port] = dev;
+ mdev->upper[port] = NULL;
netif_carrier_off(dev);
mlx4_en_set_default_moderation(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index f1a5500ff72d..34f2fdf4fe5d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -50,10 +50,14 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
context->mtu_msgmax = 0xff;
if (!is_tx && !rss)
context->rq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
- if (is_tx)
+ if (is_tx) {
context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
- else
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)
+ context->params2 |= MLX4_QP_BIT_FPP;
+
+ } else {
context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
+ }
context->usr_page = cpu_to_be32(mdev->priv_uar.index);
context->local_qpn = cpu_to_be32(qpn);
context->pri_path.ackto = 1 & 0x07;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index dbabfae3a3de..4b08a393ebcb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -142,7 +142,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[17] = "Asymmetric EQs support",
[18] = "More than 80 VFs support",
[19] = "Performance optimized for limited rule configuration flow steering support",
- [20] = "Recoverable error events support"
+ [20] = "Recoverable error events support",
+ [21] = "Port Remap support"
};
int i;
@@ -863,6 +864,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
MLX4_GET(dev_cap->bmme_flags, outbox,
QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
if (field & 0x20)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV;
@@ -1120,9 +1123,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
field &= 0x7f;
MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET);
- /* For guests, disable mw type 2 */
+ /* For guests, disable mw type 2 and port remap*/
MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN;
+ bmme_flags &= ~MLX4_FLAG_PORT_REMAP;
MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
/* turn off device-managed steering capability if not enabled */
@@ -2100,13 +2104,16 @@ struct mlx4_config_dev {
__be32 rsvd1[3];
__be16 vxlan_udp_dport;
__be16 rsvd2;
- __be32 rsvd3[27];
- __be16 rsvd4;
- u8 rsvd5;
+ __be32 rsvd3;
+ __be32 roce_flags;
+ __be32 rsvd4[25];
+ __be16 rsvd5;
+ u8 rsvd6;
u8 rx_checksum_val;
};
#define MLX4_VXLAN_UDP_DPORT (1 << 0)
+#define MLX4_DISABLE_RX_PORT BIT(18)
static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
{
@@ -2209,6 +2216,45 @@ int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port)
}
EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port);
+#define CONFIG_DISABLE_RX_PORT BIT(15)
+int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis)
+{
+ struct mlx4_config_dev config_dev;
+
+ memset(&config_dev, 0, sizeof(config_dev));
+ config_dev.update_flags = cpu_to_be32(MLX4_DISABLE_RX_PORT);
+ if (dis)
+ config_dev.roce_flags =
+ cpu_to_be32(CONFIG_DISABLE_RX_PORT);
+
+ return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+
+int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct {
+ __be32 v_port1;
+ __be32 v_port2;
+ } *v2p;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+
+ v2p = mailbox->buf;
+ v2p->v_port1 = cpu_to_be32(port1);
+ v2p->v_port2 = cpu_to_be32(port2);
+
+ err = mlx4_cmd(dev, mailbox->dma, 0,
+ MLX4_SET_PORT_VIRT2PHY, MLX4_CMD_VIRT_PORT_MAP,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
{
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index 68d2bad325d5..6fce58718837 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -33,11 +33,13 @@
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/errno.h>
#include "mlx4.h"
struct mlx4_device_context {
struct list_head list;
+ struct list_head bond_list;
struct mlx4_interface *intf;
void *context;
};
@@ -115,6 +117,58 @@ void mlx4_unregister_interface(struct mlx4_interface *intf)
}
EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
+int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx;
+ unsigned long flags;
+ int ret;
+ LIST_HEAD(bond_list);
+
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
+ return -ENOTSUPP;
+
+ ret = mlx4_disable_rx_port_check(dev, enable);
+ if (ret) {
+ mlx4_err(dev, "Fail to %s rx port check\n",
+ enable ? "enable" : "disable");
+ return ret;
+ }
+ if (enable) {
+ dev->flags |= MLX4_FLAG_BONDED;
+ } else {
+ ret = mlx4_virt2phy_port_map(dev, 1, 2);
+ if (ret) {
+ mlx4_err(dev, "Fail to reset port map\n");
+ return ret;
+ }
+ dev->flags &= ~MLX4_FLAG_BONDED;
+ }
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+ list_for_each_entry_safe(dev_ctx, temp_dev_ctx, &priv->ctx_list, list) {
+ if (dev_ctx->intf->flags & MLX4_INTFF_BONDING) {
+ list_add_tail(&dev_ctx->bond_list, &bond_list);
+ list_del(&dev_ctx->list);
+ }
+ }
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+ list_for_each_entry(dev_ctx, &bond_list, bond_list) {
+ dev_ctx->intf->remove(dev, dev_ctx->context);
+ dev_ctx->context = dev_ctx->intf->add(dev);
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+ list_add_tail(&dev_ctx->list, &priv->ctx_list);
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+ mlx4_dbg(dev, "Inrerface for protocol %d restarted with when bonded mode is %s\n",
+ dev_ctx->intf->protocol, enable ?
+ "enabled" : "disabled");
+ }
+ return 0;
+}
+
void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
unsigned long param)
{
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index cc9f48439244..f3245fe0f442 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1160,6 +1160,91 @@ err_set_port:
return err ? err : count;
}
+int mlx4_bond(struct mlx4_dev *dev)
+{
+ int ret = 0;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->bond_mutex);
+
+ if (!mlx4_is_bonded(dev))
+ ret = mlx4_do_bond(dev, true);
+ else
+ ret = 0;
+
+ mutex_unlock(&priv->bond_mutex);
+ if (ret)
+ mlx4_err(dev, "Failed to bond device: %d\n", ret);
+ else
+ mlx4_dbg(dev, "Device is bonded\n");
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_bond);
+
+int mlx4_unbond(struct mlx4_dev *dev)
+{
+ int ret = 0;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->bond_mutex);
+
+ if (mlx4_is_bonded(dev))
+ ret = mlx4_do_bond(dev, false);
+
+ mutex_unlock(&priv->bond_mutex);
+ if (ret)
+ mlx4_err(dev, "Failed to unbond device: %d\n", ret);
+ else
+ mlx4_dbg(dev, "Device is unbonded\n");
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_unbond);
+
+
+int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
+{
+ u8 port1 = v2p->port1;
+ u8 port2 = v2p->port2;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
+ return -ENOTSUPP;
+
+ mutex_lock(&priv->bond_mutex);
+
+ /* zero means keep current mapping for this port */
+ if (port1 == 0)
+ port1 = priv->v2p.port1;
+ if (port2 == 0)
+ port2 = priv->v2p.port2;
+
+ if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
+ (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
+ (port1 == 2 && port2 == 1)) {
+ /* besides boundary checks cross mapping makes
+ * no sense and therefore not allowed */
+ err = -EINVAL;
+ } else if ((port1 == priv->v2p.port1) &&
+ (port2 == priv->v2p.port2)) {
+ err = 0;
+ } else {
+ err = mlx4_virt2phy_port_map(dev, port1, port2);
+ if (!err) {
+ mlx4_dbg(dev, "port map changed: [%d][%d]\n",
+ port1, port2);
+ priv->v2p.port1 = port1;
+ priv->v2p.port2 = port2;
+ } else {
+ mlx4_err(dev, "Failed to change port mape: %d\n", err);
+ }
+ }
+
+ mutex_unlock(&priv->bond_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_port_map_set);
+
static int mlx4_load_fw(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2638,6 +2723,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
spin_lock_init(&priv->ctx_lock);
mutex_init(&priv->port_mutex);
+ mutex_init(&priv->bond_mutex);
INIT_LIST_HEAD(&priv->pgdir_list);
mutex_init(&priv->pgdir_mutex);
@@ -2934,6 +3020,9 @@ slave_start:
goto err_port;
}
+ priv->v2p.port1 = 1;
+ priv->v2p.port2 = 2;
+
err = mlx4_register_device(dev);
if (err)
goto err_port;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 148dc0945aab..803f17653da7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -885,6 +885,8 @@ struct mlx4_priv {
int reserved_mtts;
int fs_hash_mode;
u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ struct mlx4_port_map v2p; /* cached port mapping configuration */
+ struct mutex bond_mutex; /* for bond mode */
__be64 slave_node_guids[MLX4_MFUNC_MAX];
atomic_t opreq_count;
@@ -1364,6 +1366,7 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
/* Returns the VF index of slave */
int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
int mlx4_config_mad_demux(struct mlx4_dev *dev);
+int mlx4_do_bond(struct mlx4_dev *dev, bool enable);
enum mlx4_zone_flags {
MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0,
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 944a112dff37..2a8268e6be15 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -390,6 +390,7 @@ struct mlx4_en_dev {
struct pci_dev *pdev;
struct mutex state_lock;
struct net_device *pndev[MLX4_MAX_PORTS + 1];
+ struct net_device *upper[MLX4_MAX_PORTS + 1];
u32 port_cnt;
bool device_up;
struct mlx4_en_profile profile;
@@ -410,6 +411,7 @@ struct mlx4_en_dev {
unsigned long overflow_period;
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_clock_info;
+ struct notifier_block nb;
};
@@ -845,6 +847,9 @@ int mlx4_en_reset_config(struct net_device *dev,
struct hwtstamp_config ts_config,
netdev_features_t new_features);
+int mlx4_en_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr);
+
/*
* Functions for time stamping
*/
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 1586ecce13c7..2bb8553bd905 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -882,6 +882,8 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
for (i = 0; i < ARRAY_SIZE(states) - 1; i++) {
context->flags &= cpu_to_be32(~(0xf << 28));
context->flags |= cpu_to_be32(states[i + 1] << 28);
+ if (states[i + 1] != MLX4_QP_STATE_RTR)
+ context->params2 &= ~MLX4_QP_BIT_FPP;
err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1],
context, 0, 0, qp);
if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 79feeb6b0d87..c5f3dfca226b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2944,6 +2944,9 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
optpar = be32_to_cpu(*(__be32 *) inbox->buf);
+ if (slave != mlx4_master_func_num(dev))
+ qp_ctx->params2 &= ~MLX4_QP_BIT_FPP;
+
switch (qp_type) {
case MLX4_QP_ST_RC:
case MLX4_QP_ST_XRC:
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index ae95adc78509..7b6d4e9ff603 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -71,6 +71,7 @@ enum {
/*master notify fw on finish for slave's flr*/
MLX4_CMD_INFORM_FLR_DONE = 0x5b,
+ MLX4_CMD_VIRT_PORT_MAP = 0x5c,
MLX4_CMD_GET_OP_REQ = 0x59,
/* TPT commands */
@@ -171,6 +172,12 @@ enum {
};
enum {
+ /* virtual to physical port mapping opcode modifiers */
+ MLX4_GET_PORT_VIRT2PHY = 0x0,
+ MLX4_SET_PORT_VIRT2PHY = 0x1,
+};
+
+enum {
MLX4_MAILBOX_SIZE = 4096,
MLX4_ACCESS_MEM_ALIGN = 256,
};
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c95d659a39f2..977b0b164431 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -70,6 +70,7 @@ enum {
MLX4_FLAG_SLAVE = 1 << 3,
MLX4_FLAG_SRIOV = 1 << 4,
MLX4_FLAG_OLD_REG_MAC = 1 << 6,
+ MLX4_FLAG_BONDED = 1 << 7
};
enum {
@@ -201,7 +202,8 @@ enum {
MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17,
MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18,
MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19,
- MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20
+ MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20,
+ MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21
};
enum {
@@ -253,9 +255,14 @@ enum {
MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9,
MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10,
MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11,
+ MLX4_BMME_FLAG_PORT_REMAP = 1 << 24,
MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28,
};
+enum {
+ MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP
+};
+
enum mlx4_event {
MLX4_EVENT_TYPE_COMP = 0x00,
MLX4_EVENT_TYPE_PATH_MIG = 0x01,
@@ -1378,6 +1385,8 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port);
int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
+int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis);
+int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2);
int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index 022055c8fb26..9553a73d2049 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -49,6 +49,10 @@ enum mlx4_dev_event {
MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
};
+enum {
+ MLX4_INTFF_BONDING = 1 << 0
+};
+
struct mlx4_interface {
void * (*add) (struct mlx4_dev *dev);
void (*remove)(struct mlx4_dev *dev, void *context);
@@ -57,11 +61,26 @@ struct mlx4_interface {
void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port);
struct list_head list;
enum mlx4_protocol protocol;
+ int flags;
};
int mlx4_register_interface(struct mlx4_interface *intf);
void mlx4_unregister_interface(struct mlx4_interface *intf);
+int mlx4_bond(struct mlx4_dev *dev);
+int mlx4_unbond(struct mlx4_dev *dev);
+static inline int mlx4_is_bonded(struct mlx4_dev *dev)
+{
+ return !!(dev->flags & MLX4_FLAG_BONDED);
+}
+
+struct mlx4_port_map {
+ u8 port1;
+ u8 port2;
+};
+
+int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p);
+
void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port);
static inline u64 mlx4_mac_to_u64(u8 *addr)
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 467ccdf94c98..2bbc62aa818a 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -96,6 +96,7 @@ enum {
MLX4_QP_BIT_RRE = 1 << 15,
MLX4_QP_BIT_RWE = 1 << 14,
MLX4_QP_BIT_RAE = 1 << 13,
+ MLX4_QP_BIT_FPP = 1 << 3,
MLX4_QP_BIT_RIC = 1 << 4,
};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1347ac50d2af..ce784d5018e0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -51,6 +51,7 @@
#include <linux/netdev_features.h>
#include <linux/neighbour.h>
#include <uapi/linux/netdevice.h>
+#include <uapi/linux/if_bonding.h>
struct netpoll_info;
struct device;
@@ -2056,6 +2057,7 @@ struct pcpu_sw_netstats {
#define NETDEV_RESEND_IGMP 0x0016
#define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */
#define NETDEV_CHANGEINFODATA 0x0018
+#define NETDEV_BONDING_INFO 0x0019
int register_netdevice_notifier(struct notifier_block *nb);
int unregister_netdevice_notifier(struct notifier_block *nb);
@@ -3494,6 +3496,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
netdev_features_t features);
+struct netdev_bonding_info {
+ ifslave slave;
+ ifbond master;
+};
+
+struct netdev_notifier_bonding_info {
+ struct netdev_notifier_info info; /* must be first */
+ struct netdev_bonding_info bonding_info;
+};
+
+void netdev_bonding_info_change(struct net_device *dev,
+ struct netdev_bonding_info *bonding_info);
+
static inline
struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
{
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 29f53eacac0a..4e17095ad46a 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -150,6 +150,12 @@ struct bond_parm_tbl {
int mode;
};
+struct netdev_notify_work {
+ struct delayed_work work;
+ struct slave *slave;
+ struct net_device *dev;
+};
+
struct slave {
struct net_device *dev; /* first - useful for panic debug */
struct bonding *bond; /* our master */
@@ -243,6 +249,8 @@ struct bonding {
#define bond_slave_get_rtnl(dev) \
((struct slave *) rtnl_dereference(dev->rx_handler_data))
+void bond_queue_slave_event(struct slave *slave);
+
struct bond_vlan_tag {
__be16 vlan_proto;
unsigned short vlan_id;
@@ -315,6 +323,7 @@ static inline void bond_set_active_slave(struct slave *slave)
{
if (slave->backup) {
slave->backup = 0;
+ bond_queue_slave_event(slave);
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
}
}
@@ -323,6 +332,7 @@ static inline void bond_set_backup_slave(struct slave *slave)
{
if (!slave->backup) {
slave->backup = 1;
+ bond_queue_slave_event(slave);
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
}
}
@@ -336,6 +346,7 @@ static inline void bond_set_slave_state(struct slave *slave,
slave->backup = slave_state;
if (notify) {
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
+ bond_queue_slave_event(slave);
slave->should_notify = 0;
} else {
if (slave->should_notify)
@@ -490,6 +501,12 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
return slave->inactive;
}
+static inline void bond_set_slave_link_state(struct slave *slave, int state)
+{
+ slave->link = state;
+ bond_queue_slave_event(slave);
+}
+
static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local)
{
struct in_device *in_dev;
diff --git a/net/core/dev.c b/net/core/dev.c
index 1d564d68e31a..ede0b161b115 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5355,6 +5355,26 @@ void netdev_upper_dev_unlink(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
+/**
+ * netdev_bonding_info_change - Dispatch event about slave change
+ * @dev: device
+ * @netdev_bonding_info: info to dispatch
+ *
+ * Send NETDEV_BONDING_INFO to netdev notifiers with info.
+ * The caller must hold the RTNL lock.
+ */
+void netdev_bonding_info_change(struct net_device *dev,
+ struct netdev_bonding_info *bonding_info)
+{
+ struct netdev_notifier_bonding_info info;
+
+ memcpy(&info.bonding_info, bonding_info,
+ sizeof(struct netdev_bonding_info));
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ &info.info);
+}
+EXPORT_SYMBOL(netdev_bonding_info_change);
+
void netdev_adjacent_add_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 673cb4c6f391..4cd5e350d129 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3180,6 +3180,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_UNREGISTER_FINAL:
case NETDEV_RELEASE:
case NETDEV_JOIN:
+ case NETDEV_BONDING_INFO:
break;
default:
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);