From 61bd3857ff2c7daf756d49b41e6277bbdaa8f789 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:29 +0200 Subject: net/core: Add event for a change in slave state Add event which provides an indication on a change in the state of a bonding slave. The event handler should cast the pointer to the appropriate type (struct netdev_bonding_info) in order to get the full info about the slave. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++++++ net/core/dev.c | 20 ++++++++++++++++++++ net/core/rtnetlink.c | 1 + 3 files changed, 36 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1347ac50d2af..ce784d5018e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -51,6 +51,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -2056,6 +2057,7 @@ struct pcpu_sw_netstats { #define NETDEV_RESEND_IGMP 0x0016 #define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */ #define NETDEV_CHANGEINFODATA 0x0018 +#define NETDEV_BONDING_INFO 0x0019 int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -3494,6 +3496,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, netdev_features_t features); +struct netdev_bonding_info { + ifslave slave; + ifbond master; +}; + +struct netdev_notifier_bonding_info { + struct netdev_notifier_info info; /* must be first */ + struct netdev_bonding_info bonding_info; +}; + +void netdev_bonding_info_change(struct net_device *dev, + struct netdev_bonding_info *bonding_info); + static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { diff --git a/net/core/dev.c b/net/core/dev.c index 1d564d68e31a..ede0b161b115 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5355,6 +5355,26 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); +/** + * netdev_bonding_info_change - Dispatch event about slave change + * @dev: device + * @netdev_bonding_info: info to dispatch + * + * Send NETDEV_BONDING_INFO to netdev notifiers with info. + * The caller must hold the RTNL lock. + */ +void netdev_bonding_info_change(struct net_device *dev, + struct netdev_bonding_info *bonding_info) +{ + struct netdev_notifier_bonding_info info; + + memcpy(&info.bonding_info, bonding_info, + sizeof(struct netdev_bonding_info)); + call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev, + &info.info); +} +EXPORT_SYMBOL(netdev_bonding_info_change); + void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 673cb4c6f391..4cd5e350d129 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3180,6 +3180,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_UNREGISTER_FINAL: case NETDEV_RELEASE: case NETDEV_JOIN: + case NETDEV_BONDING_INFO: break; default: rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From 69a2338e05995b10225b2a131f7540d1305980e4 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:30 +0200 Subject: net/bonding: Move slave state changes to a helper function Move slave state changes to a helper function, this is a pre-step for adding functionality of dispatching an event when this helper is called. This commit doesn't add new functionality. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 64 ++++++++++++++++++++++++----------------- include/net/bonding.h | 5 ++++ 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c9e519cb9214..92fe3a1bf52b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -790,7 +790,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } new_active->delay = 0; - new_active->link = BOND_LINK_UP; + bond_set_slave_link_state(new_active, BOND_LINK_UP); if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_handle_link_change(new_active, BOND_LINK_UP); @@ -1181,6 +1181,21 @@ static void bond_free_slave(struct slave *slave) kfree(slave); } +static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info) +{ + info->bond_mode = BOND_MODE(bond); + info->miimon = bond->params.miimon; + info->num_slaves = bond->slave_cnt; +} + +static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) +{ + strcpy(info->slave_name, slave->dev->name); + info->link = slave->link; + info->state = bond_slave_state(slave); + info->link_failure_count = slave->link_failure_count; +} + /* enslave device to bond device */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1444,19 +1459,22 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (bond->params.miimon) { if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { if (bond->params.updelay) { - new_slave->link = BOND_LINK_BACK; + bond_set_slave_link_state(new_slave, + BOND_LINK_BACK); new_slave->delay = bond->params.updelay; } else { - new_slave->link = BOND_LINK_UP; + bond_set_slave_link_state(new_slave, + BOND_LINK_UP); } } else { - new_slave->link = BOND_LINK_DOWN; + bond_set_slave_link_state(new_slave, BOND_LINK_DOWN); } } else if (bond->params.arp_interval) { - new_slave->link = (netif_carrier_ok(slave_dev) ? - BOND_LINK_UP : BOND_LINK_DOWN); + bond_set_slave_link_state(new_slave, + (netif_carrier_ok(slave_dev) ? + BOND_LINK_UP : BOND_LINK_DOWN)); } else { - new_slave->link = BOND_LINK_UP; + bond_set_slave_link_state(new_slave, BOND_LINK_UP); } if (new_slave->link != BOND_LINK_DOWN) @@ -1821,11 +1839,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev, static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) { struct bonding *bond = netdev_priv(bond_dev); - - info->bond_mode = BOND_MODE(bond); - info->miimon = bond->params.miimon; - info->num_slaves = bond->slave_cnt; - + bond_fill_ifbond(bond, info); return 0; } @@ -1839,10 +1853,7 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in bond_for_each_slave(bond, slave, iter) { if (i++ == (int)info->slave_id) { res = 0; - strcpy(info->slave_name, slave->dev->name); - info->link = slave->link; - info->state = bond_slave_state(slave); - info->link_failure_count = slave->link_failure_count; + bond_fill_ifslave(slave, info); break; } } @@ -1872,7 +1883,7 @@ static int bond_miimon_inspect(struct bonding *bond) if (link_state) continue; - slave->link = BOND_LINK_FAIL; + bond_set_slave_link_state(slave, BOND_LINK_FAIL); slave->delay = bond->params.downdelay; if (slave->delay) { netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n", @@ -1887,7 +1898,7 @@ static int bond_miimon_inspect(struct bonding *bond) case BOND_LINK_FAIL: if (link_state) { /* recovered before downdelay expired */ - slave->link = BOND_LINK_UP; + bond_set_slave_link_state(slave, BOND_LINK_UP); slave->last_link_up = jiffies; netdev_info(bond->dev, "link status up again after %d ms for interface %s\n", (bond->params.downdelay - slave->delay) * @@ -1909,7 +1920,7 @@ static int bond_miimon_inspect(struct bonding *bond) if (!link_state) continue; - slave->link = BOND_LINK_BACK; + bond_set_slave_link_state(slave, BOND_LINK_BACK); slave->delay = bond->params.updelay; if (slave->delay) { @@ -1922,7 +1933,8 @@ static int bond_miimon_inspect(struct bonding *bond) /*FALLTHRU*/ case BOND_LINK_BACK: if (!link_state) { - slave->link = BOND_LINK_DOWN; + bond_set_slave_link_state(slave, + BOND_LINK_DOWN); netdev_info(bond->dev, "link status down again after %d ms for interface %s\n", (bond->params.updelay - slave->delay) * bond->params.miimon, @@ -1960,7 +1972,7 @@ static void bond_miimon_commit(struct bonding *bond) continue; case BOND_LINK_UP: - slave->link = BOND_LINK_UP; + bond_set_slave_link_state(slave, BOND_LINK_UP); slave->last_link_up = jiffies; primary = rtnl_dereference(bond->primary_slave); @@ -2000,7 +2012,7 @@ static void bond_miimon_commit(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - slave->link = BOND_LINK_DOWN; + bond_set_slave_link_state(slave, BOND_LINK_DOWN); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || BOND_MODE(bond) == BOND_MODE_8023AD) @@ -2583,7 +2595,7 @@ static void bond_ab_arp_commit(struct bonding *bond) struct slave *current_arp_slave; current_arp_slave = rtnl_dereference(bond->current_arp_slave); - slave->link = BOND_LINK_UP; + bond_set_slave_link_state(slave, BOND_LINK_UP); if (current_arp_slave) { bond_set_slave_inactive_flags( current_arp_slave, @@ -2606,7 +2618,7 @@ static void bond_ab_arp_commit(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - slave->link = BOND_LINK_DOWN; + bond_set_slave_link_state(slave, BOND_LINK_DOWN); bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); @@ -2685,7 +2697,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) * up when it is actually down */ if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { - slave->link = BOND_LINK_DOWN; + bond_set_slave_link_state(slave, BOND_LINK_DOWN); if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; @@ -2705,7 +2717,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) if (!new_slave) goto check_state; - new_slave->link = BOND_LINK_BACK; + bond_set_slave_link_state(new_slave, BOND_LINK_BACK); bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); bond_arp_send_all(bond, new_slave); new_slave->last_link_up = jiffies; diff --git a/include/net/bonding.h b/include/net/bonding.h index 29f53eacac0a..d1367ec74933 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -490,6 +490,11 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } +static inline void bond_set_slave_link_state(struct slave *slave, int state) +{ + slave->link = state; +} + static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local) { struct in_device *in_dev; -- cgit v1.2.3-59-g8ed1b From 69e6113343cfe983511904ffca0d7a1466460b67 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:31 +0200 Subject: net/bonding: Notify state change on slaves Use notifier chain to dispatch an event upon a change in slave state. Event is dispatched with slave specific info. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 42 +++++++++++++++++++++++++++++++++++++++++ include/net/bonding.h | 12 ++++++++++++ 2 files changed, 54 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 92fe3a1bf52b..679ef00d6b16 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1196,6 +1196,47 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) info->link_failure_count = slave->link_failure_count; } +static void bond_netdev_notify(struct slave *slave, struct net_device *dev) +{ + struct bonding *bond = slave->bond; + struct netdev_bonding_info bonding_info; + + rtnl_lock(); + /* make sure that slave is still valid */ + if (dev->priv_flags & IFF_BONDING) { + bond_fill_ifslave(slave, &bonding_info.slave); + bond_fill_ifbond(bond, &bonding_info.master); + netdev_bonding_info_change(slave->dev, &bonding_info); + } + rtnl_unlock(); +} + +static void bond_netdev_notify_work(struct work_struct *_work) +{ + struct netdev_notify_work *w = + container_of(_work, struct netdev_notify_work, work.work); + + bond_netdev_notify(w->slave, w->dev); + dev_put(w->dev); +} + +void bond_queue_slave_event(struct slave *slave) +{ + struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC); + + if (!nnw) + return; + + INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work); + nnw->slave = slave; + nnw->dev = slave->dev; + + if (queue_delayed_work(slave->bond->wq, &nnw->work, 0)) + dev_hold(slave->dev); + else + kfree(nnw); +} + /* enslave device to bond device */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1590,6 +1631,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); /* enslave is successful */ + bond_queue_slave_event(new_slave); return 0; /* Undo stages on error */ diff --git a/include/net/bonding.h b/include/net/bonding.h index d1367ec74933..4e17095ad46a 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -150,6 +150,12 @@ struct bond_parm_tbl { int mode; }; +struct netdev_notify_work { + struct delayed_work work; + struct slave *slave; + struct net_device *dev; +}; + struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ @@ -243,6 +249,8 @@ struct bonding { #define bond_slave_get_rtnl(dev) \ ((struct slave *) rtnl_dereference(dev->rx_handler_data)) +void bond_queue_slave_event(struct slave *slave); + struct bond_vlan_tag { __be16 vlan_proto; unsigned short vlan_id; @@ -315,6 +323,7 @@ static inline void bond_set_active_slave(struct slave *slave) { if (slave->backup) { slave->backup = 0; + bond_queue_slave_event(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -323,6 +332,7 @@ static inline void bond_set_backup_slave(struct slave *slave) { if (!slave->backup) { slave->backup = 1; + bond_queue_slave_event(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -336,6 +346,7 @@ static inline void bond_set_slave_state(struct slave *slave, slave->backup = slave_state; if (notify) { rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); + bond_queue_slave_event(slave); slave->should_notify = 0; } else { if (slave->should_notify) @@ -493,6 +504,7 @@ static inline bool bond_is_slave_inactive(struct slave *slave) static inline void bond_set_slave_link_state(struct slave *slave, int state) { slave->link = state; + bond_queue_slave_event(slave); } static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local) -- cgit v1.2.3-59-g8ed1b From 59e14e325066be49b49b6c2503337c69a9ee29fc Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:32 +0200 Subject: net/mlx4_core: Port aggregation low level interface Implement the hardware interface required for port aggregation. 1. Disable RX port check on receive - don't perform a validity check that matches to QP's port and the port where the packet is received. 2. Virtual to physical port remap - configure virtual to physical port mapping. Port remap capability for virtual functions. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 56 +++++++++++++++++++++++++++++--- include/linux/mlx4/cmd.h | 7 ++++ include/linux/mlx4/device.h | 10 +++++- include/linux/mlx4/qp.h | 1 + 5 files changed, 77 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 154effbfd8be..a681d7c0bb9f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1583,6 +1583,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_CMD_EPERM_wrapper }, + { + .opcode = MLX4_CMD_VIRT_PORT_MAP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper + }, }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index dbabfae3a3de..4b08a393ebcb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -142,7 +142,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [17] = "Asymmetric EQs support", [18] = "More than 80 VFs support", [19] = "Performance optimized for limited rule configuration flow steering support", - [20] = "Recoverable error events support" + [20] = "Recoverable error events support", + [21] = "Port Remap support" }; int i; @@ -863,6 +864,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP; MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); if (field & 0x20) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV; @@ -1120,9 +1123,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); - /* For guests, disable mw type 2 */ + /* For guests, disable mw type 2 and port remap*/ MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; + bmme_flags &= ~MLX4_FLAG_PORT_REMAP; MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); /* turn off device-managed steering capability if not enabled */ @@ -2100,13 +2104,16 @@ struct mlx4_config_dev { __be32 rsvd1[3]; __be16 vxlan_udp_dport; __be16 rsvd2; - __be32 rsvd3[27]; - __be16 rsvd4; - u8 rsvd5; + __be32 rsvd3; + __be32 roce_flags; + __be32 rsvd4[25]; + __be16 rsvd5; + u8 rsvd6; u8 rx_checksum_val; }; #define MLX4_VXLAN_UDP_DPORT (1 << 0) +#define MLX4_DISABLE_RX_PORT BIT(18) static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) { @@ -2209,6 +2216,45 @@ int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port) } EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port); +#define CONFIG_DISABLE_RX_PORT BIT(15) +int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis) +{ + struct mlx4_config_dev config_dev; + + memset(&config_dev, 0, sizeof(config_dev)); + config_dev.update_flags = cpu_to_be32(MLX4_DISABLE_RX_PORT); + if (dis) + config_dev.roce_flags = + cpu_to_be32(CONFIG_DISABLE_RX_PORT); + + return mlx4_CONFIG_DEV_set(dev, &config_dev); +} + +int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2) +{ + struct mlx4_cmd_mailbox *mailbox; + struct { + __be32 v_port1; + __be32 v_port2; + } *v2p; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return -ENOMEM; + + v2p = mailbox->buf; + v2p->v_port1 = cpu_to_be32(port1); + v2p->v_port2 = cpu_to_be32(port2); + + err = mlx4_cmd(dev, mailbox->dma, 0, + MLX4_SET_PORT_VIRT2PHY, MLX4_CMD_VIRT_PORT_MAP, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages) { diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index ae95adc78509..7b6d4e9ff603 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -71,6 +71,7 @@ enum { /*master notify fw on finish for slave's flr*/ MLX4_CMD_INFORM_FLR_DONE = 0x5b, + MLX4_CMD_VIRT_PORT_MAP = 0x5c, MLX4_CMD_GET_OP_REQ = 0x59, /* TPT commands */ @@ -170,6 +171,12 @@ enum { MLX4_CMD_TIME_CLASS_C = 60000, }; +enum { + /* virtual to physical port mapping opcode modifiers */ + MLX4_GET_PORT_VIRT2PHY = 0x0, + MLX4_SET_PORT_VIRT2PHY = 0x1, +}; + enum { MLX4_MAILBOX_SIZE = 4096, MLX4_ACCESS_MEM_ALIGN = 256, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c95d659a39f2..d9afd99dde39 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -201,7 +201,8 @@ enum { MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17, MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18, MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19, - MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20 + MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, + MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21 }; enum { @@ -253,9 +254,14 @@ enum { MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, + MLX4_BMME_FLAG_PORT_REMAP = 1 << 24, MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28, }; +enum { + MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP +}; + enum mlx4_event { MLX4_EVENT_TYPE_COMP = 0x00, MLX4_EVENT_TYPE_PATH_MIG = 0x01, @@ -1378,6 +1384,8 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port); int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); +int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis); +int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2); int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 467ccdf94c98..2bbc62aa818a 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -96,6 +96,7 @@ enum { MLX4_QP_BIT_RRE = 1 << 15, MLX4_QP_BIT_RWE = 1 << 14, MLX4_QP_BIT_RAE = 1 << 13, + MLX4_QP_BIT_FPP = 1 << 3, MLX4_QP_BIT_RIC = 1 << 4, }; -- cgit v1.2.3-59-g8ed1b From 53f33ae295a5098f12218da1400f55ad7df7447c Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:33 +0200 Subject: net/mlx4_core: Port aggregation upper layer interface Supply interface functions to bond and unbond ports of a mlx4 internal interfaces. Example for such an interface is the one registered by the mlx4 IB driver under RoCE. There are 1. Functions to go in/out to/from bonded mode 2. Function to remap virtual ports to physical ports The bond_mutex prevents simultaneous access to data that keep status of the device in bonded mode. The upper mlx4 interface marks to the mlx4 core module that they want to be subject for such bonding by setting the MLX4_INTFF_BONDING flag. Interface which goes to/from bonded mode is re-created. The mlx4 Ethernet driver does not set this flag when registering the interface, the IB driver does. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_resources.c | 8 +- drivers/net/ethernet/mellanox/mlx4/intf.c | 54 +++++++++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 89 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 + drivers/net/ethernet/mellanox/mlx4/qp.c | 2 + .../net/ethernet/mellanox/mlx4/resource_tracker.c | 3 + include/linux/mlx4/device.h | 1 + include/linux/mlx4/driver.h | 19 +++++ 8 files changed, 177 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index f1a5500ff72d..34f2fdf4fe5d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -50,10 +50,14 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->mtu_msgmax = 0xff; if (!is_tx && !rss) context->rq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); - if (is_tx) + if (is_tx) { context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); - else + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP) + context->params2 |= MLX4_QP_BIT_FPP; + + } else { context->sq_size_stride = ilog2(TXBB_SIZE) - 4; + } context->usr_page = cpu_to_be32(mdev->priv_uar.index); context->local_qpn = cpu_to_be32(qpn); context->pri_path.ackto = 1 & 0x07; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 68d2bad325d5..6fce58718837 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -33,11 +33,13 @@ #include #include +#include #include "mlx4.h" struct mlx4_device_context { struct list_head list; + struct list_head bond_list; struct mlx4_interface *intf; void *context; }; @@ -115,6 +117,58 @@ void mlx4_unregister_interface(struct mlx4_interface *intf) } EXPORT_SYMBOL_GPL(mlx4_unregister_interface); +int mlx4_do_bond(struct mlx4_dev *dev, bool enable) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx; + unsigned long flags; + int ret; + LIST_HEAD(bond_list); + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) + return -ENOTSUPP; + + ret = mlx4_disable_rx_port_check(dev, enable); + if (ret) { + mlx4_err(dev, "Fail to %s rx port check\n", + enable ? "enable" : "disable"); + return ret; + } + if (enable) { + dev->flags |= MLX4_FLAG_BONDED; + } else { + ret = mlx4_virt2phy_port_map(dev, 1, 2); + if (ret) { + mlx4_err(dev, "Fail to reset port map\n"); + return ret; + } + dev->flags &= ~MLX4_FLAG_BONDED; + } + + spin_lock_irqsave(&priv->ctx_lock, flags); + list_for_each_entry_safe(dev_ctx, temp_dev_ctx, &priv->ctx_list, list) { + if (dev_ctx->intf->flags & MLX4_INTFF_BONDING) { + list_add_tail(&dev_ctx->bond_list, &bond_list); + list_del(&dev_ctx->list); + } + } + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &bond_list, bond_list) { + dev_ctx->intf->remove(dev, dev_ctx->context); + dev_ctx->context = dev_ctx->intf->add(dev); + + spin_lock_irqsave(&priv->ctx_lock, flags); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + mlx4_dbg(dev, "Inrerface for protocol %d restarted with when bonded mode is %s\n", + dev_ctx->intf->protocol, enable ? + "enabled" : "disabled"); + } + return 0; +} + void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, unsigned long param) { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index cc9f48439244..f3245fe0f442 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1160,6 +1160,91 @@ err_set_port: return err ? err : count; } +int mlx4_bond(struct mlx4_dev *dev) +{ + int ret = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + + mutex_lock(&priv->bond_mutex); + + if (!mlx4_is_bonded(dev)) + ret = mlx4_do_bond(dev, true); + else + ret = 0; + + mutex_unlock(&priv->bond_mutex); + if (ret) + mlx4_err(dev, "Failed to bond device: %d\n", ret); + else + mlx4_dbg(dev, "Device is bonded\n"); + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_bond); + +int mlx4_unbond(struct mlx4_dev *dev) +{ + int ret = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + + mutex_lock(&priv->bond_mutex); + + if (mlx4_is_bonded(dev)) + ret = mlx4_do_bond(dev, false); + + mutex_unlock(&priv->bond_mutex); + if (ret) + mlx4_err(dev, "Failed to unbond device: %d\n", ret); + else + mlx4_dbg(dev, "Device is unbonded\n"); + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_unbond); + + +int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) +{ + u8 port1 = v2p->port1; + u8 port2 = v2p->port2; + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) + return -ENOTSUPP; + + mutex_lock(&priv->bond_mutex); + + /* zero means keep current mapping for this port */ + if (port1 == 0) + port1 = priv->v2p.port1; + if (port2 == 0) + port2 = priv->v2p.port2; + + if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) || + (port2 < 1) || (port2 > MLX4_MAX_PORTS) || + (port1 == 2 && port2 == 1)) { + /* besides boundary checks cross mapping makes + * no sense and therefore not allowed */ + err = -EINVAL; + } else if ((port1 == priv->v2p.port1) && + (port2 == priv->v2p.port2)) { + err = 0; + } else { + err = mlx4_virt2phy_port_map(dev, port1, port2); + if (!err) { + mlx4_dbg(dev, "port map changed: [%d][%d]\n", + port1, port2); + priv->v2p.port1 = port1; + priv->v2p.port2 = port2; + } else { + mlx4_err(dev, "Failed to change port mape: %d\n", err); + } + } + + mutex_unlock(&priv->bond_mutex); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_port_map_set); + static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2638,6 +2723,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, spin_lock_init(&priv->ctx_lock); mutex_init(&priv->port_mutex); + mutex_init(&priv->bond_mutex); INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); @@ -2934,6 +3020,9 @@ slave_start: goto err_port; } + priv->v2p.port1 = 1; + priv->v2p.port2 = 2; + err = mlx4_register_device(dev); if (err) goto err_port; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 148dc0945aab..803f17653da7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -885,6 +885,8 @@ struct mlx4_priv { int reserved_mtts; int fs_hash_mode; u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + struct mlx4_port_map v2p; /* cached port mapping configuration */ + struct mutex bond_mutex; /* for bond mode */ __be64 slave_node_guids[MLX4_MFUNC_MAX]; atomic_t opreq_count; @@ -1364,6 +1366,7 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); /* Returns the VF index of slave */ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); int mlx4_config_mad_demux(struct mlx4_dev *dev); +int mlx4_do_bond(struct mlx4_dev *dev, bool enable); enum mlx4_zone_flags { MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0, diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 1586ecce13c7..2bb8553bd905 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -882,6 +882,8 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, for (i = 0; i < ARRAY_SIZE(states) - 1; i++) { context->flags &= cpu_to_be32(~(0xf << 28)); context->flags |= cpu_to_be32(states[i + 1] << 28); + if (states[i + 1] != MLX4_QP_STATE_RTR) + context->params2 &= ~MLX4_QP_BIT_FPP; err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1], context, 0, 0, qp); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 79feeb6b0d87..c5f3dfca226b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2944,6 +2944,9 @@ static int verify_qp_parameters(struct mlx4_dev *dev, qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; optpar = be32_to_cpu(*(__be32 *) inbox->buf); + if (slave != mlx4_master_func_num(dev)) + qp_ctx->params2 &= ~MLX4_QP_BIT_FPP; + switch (qp_type) { case MLX4_QP_ST_RC: case MLX4_QP_ST_XRC: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d9afd99dde39..977b0b164431 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -70,6 +70,7 @@ enum { MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, MLX4_FLAG_OLD_REG_MAC = 1 << 6, + MLX4_FLAG_BONDED = 1 << 7 }; enum { diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 022055c8fb26..9553a73d2049 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -49,6 +49,10 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_SLAVE_SHUTDOWN, }; +enum { + MLX4_INTFF_BONDING = 1 << 0 +}; + struct mlx4_interface { void * (*add) (struct mlx4_dev *dev); void (*remove)(struct mlx4_dev *dev, void *context); @@ -57,11 +61,26 @@ struct mlx4_interface { void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port); struct list_head list; enum mlx4_protocol protocol; + int flags; }; int mlx4_register_interface(struct mlx4_interface *intf); void mlx4_unregister_interface(struct mlx4_interface *intf); +int mlx4_bond(struct mlx4_dev *dev); +int mlx4_unbond(struct mlx4_dev *dev); +static inline int mlx4_is_bonded(struct mlx4_dev *dev) +{ + return !!(dev->flags & MLX4_FLAG_BONDED); +} + +struct mlx4_port_map { + u8 port1; + u8 port2; +}; + +int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p); + void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port); static inline u64 mlx4_mac_to_u64(u8 *addr) -- cgit v1.2.3-59-g8ed1b From 5da0354726e4a6ae2e25c1fa2feb77585d997b05 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:34 +0200 Subject: net/mlx4_en: Port aggregation configuration Capture NETDEV events generated by the bonding driver and based on that make decisions of how to configure port aggregation in the mlx4 core driver. This includes setting the V2P port table and re-creating the interested interfaces in bonded/non-bonded mode. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_main.c | 8 ++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 176 +++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 5 + 3 files changed, 189 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index c643d2bbb7b9..58d5a07d0ff4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -214,6 +214,8 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) iounmap(mdev->uar_map); mlx4_uar_free(dev, &mdev->priv_uar); mlx4_pd_free(dev, mdev->priv_pdn); + if (mdev->nb.notifier_call) + unregister_netdevice_notifier(&mdev->nb); kfree(mdev); } @@ -298,6 +300,12 @@ static void *mlx4_en_add(struct mlx4_dev *dev) if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) mdev->pndev[i] = NULL; } + /* register notifier */ + mdev->nb.notifier_call = mlx4_en_netdev_event; + if (register_netdevice_notifier(&mdev->nb)) { + mdev->nb.notifier_call = NULL; + mlx4_err(mdev, "Failed to create notifier\n"); + } return mdev; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e075ff1f4e80..028937b2a199 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2062,6 +2062,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev) /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; + mdev->upper[priv->port] = NULL; mutex_unlock(&mdev->state_lock); mlx4_en_free_resources(priv); @@ -2441,6 +2442,180 @@ static const struct net_device_ops mlx4_netdev_ops_master = { #endif }; +struct mlx4_en_bond { + struct work_struct work; + struct mlx4_en_priv *priv; + int is_bonded; + struct mlx4_port_map port_map; +}; + +static void mlx4_en_bond_work(struct work_struct *work) +{ + struct mlx4_en_bond *bond = container_of(work, + struct mlx4_en_bond, + work); + int err = 0; + struct mlx4_dev *dev = bond->priv->mdev->dev; + + if (bond->is_bonded) { + if (!mlx4_is_bonded(dev)) { + err = mlx4_bond(dev); + if (err) + en_err(bond->priv, "Fail to bond device\n"); + } + if (!err) { + err = mlx4_port_map_set(dev, &bond->port_map); + if (err) + en_err(bond->priv, "Fail to set port map [%d][%d]: %d\n", + bond->port_map.port1, + bond->port_map.port2, + err); + } + } else if (mlx4_is_bonded(dev)) { + err = mlx4_unbond(dev); + if (err) + en_err(bond->priv, "Fail to unbond device\n"); + } + dev_put(bond->priv->dev); + kfree(bond); +} + +static int mlx4_en_queue_bond_work(struct mlx4_en_priv *priv, int is_bonded, + u8 v2p_p1, u8 v2p_p2) +{ + struct mlx4_en_bond *bond = NULL; + + bond = kzalloc(sizeof(*bond), GFP_ATOMIC); + if (!bond) + return -ENOMEM; + + INIT_WORK(&bond->work, mlx4_en_bond_work); + bond->priv = priv; + bond->is_bonded = is_bonded; + bond->port_map.port1 = v2p_p1; + bond->port_map.port2 = v2p_p2; + dev_hold(priv->dev); + queue_work(priv->mdev->workqueue, &bond->work); + return 0; +} + +int mlx4_en_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + u8 port = 0; + struct mlx4_en_dev *mdev; + struct mlx4_dev *dev; + int i, num_eth_ports = 0; + bool do_bond = true; + struct mlx4_en_priv *priv; + u8 v2p_port1 = 0; + u8 v2p_port2 = 0; + + if (!net_eq(dev_net(ndev), &init_net)) + return NOTIFY_DONE; + + mdev = container_of(this, struct mlx4_en_dev, nb); + dev = mdev->dev; + + /* Go into this mode only when two network devices set on two ports + * of the same mlx4 device are slaves of the same bonding master + */ + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + ++num_eth_ports; + if (!port && (mdev->pndev[i] == ndev)) + port = i; + mdev->upper[i] = mdev->pndev[i] ? + netdev_master_upper_dev_get(mdev->pndev[i]) : NULL; + /* condition not met: network device is a slave */ + if (!mdev->upper[i]) + do_bond = false; + if (num_eth_ports < 2) + continue; + /* condition not met: same master */ + if (mdev->upper[i] != mdev->upper[i-1]) + do_bond = false; + } + /* condition not met: 2 salves */ + do_bond = (num_eth_ports == 2) ? do_bond : false; + + /* handle only events that come with enough info */ + if ((do_bond && (event != NETDEV_BONDING_INFO)) || !port) + return NOTIFY_DONE; + + priv = netdev_priv(ndev); + if (do_bond) { + struct netdev_notifier_bonding_info *notifier_info = ptr; + struct netdev_bonding_info *bonding_info = + ¬ifier_info->bonding_info; + + /* required mode 1, 2 or 4 */ + if ((bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) && + (bonding_info->master.bond_mode != BOND_MODE_XOR) && + (bonding_info->master.bond_mode != BOND_MODE_8023AD)) + do_bond = false; + + /* require exactly 2 slaves */ + if (bonding_info->master.num_slaves != 2) + do_bond = false; + + /* calc v2p */ + if (do_bond) { + if (bonding_info->master.bond_mode == + BOND_MODE_ACTIVEBACKUP) { + /* in active-backup mode virtual ports are + * mapped to the physical port of the active + * slave */ + if (bonding_info->slave.state == + BOND_STATE_BACKUP) { + if (port == 1) { + v2p_port1 = 2; + v2p_port2 = 2; + } else { + v2p_port1 = 1; + v2p_port2 = 1; + } + } else { /* BOND_STATE_ACTIVE */ + if (port == 1) { + v2p_port1 = 1; + v2p_port2 = 1; + } else { + v2p_port1 = 2; + v2p_port2 = 2; + } + } + } else { /* Active-Active */ + /* in active-active mode a virtual port is + * mapped to the native physical port if and only + * if the physical port is up */ + __s8 link = bonding_info->slave.link; + + if (port == 1) + v2p_port2 = 2; + else + v2p_port1 = 1; + if ((link == BOND_LINK_UP) || + (link == BOND_LINK_FAIL)) { + if (port == 1) + v2p_port1 = 1; + else + v2p_port2 = 2; + } else { /* BOND_LINK_DOWN || BOND_LINK_BACK */ + if (port == 1) + v2p_port1 = 2; + else + v2p_port2 = 1; + } + } + } + } + + mlx4_en_queue_bond_work(priv, do_bond, + v2p_port1, v2p_port2); + + return NOTIFY_DONE; +} + int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { @@ -2623,6 +2798,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } mdev->pndev[port] = dev; + mdev->upper[port] = NULL; netif_carrier_off(dev); mlx4_en_set_default_moderation(priv); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 944a112dff37..2a8268e6be15 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -390,6 +390,7 @@ struct mlx4_en_dev { struct pci_dev *pdev; struct mutex state_lock; struct net_device *pndev[MLX4_MAX_PORTS + 1]; + struct net_device *upper[MLX4_MAX_PORTS + 1]; u32 port_cnt; bool device_up; struct mlx4_en_profile profile; @@ -410,6 +411,7 @@ struct mlx4_en_dev { unsigned long overflow_period; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_info; + struct notifier_block nb; }; @@ -845,6 +847,9 @@ int mlx4_en_reset_config(struct net_device *dev, struct hwtstamp_config ts_config, netdev_features_t new_features); +int mlx4_en_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr); + /* * Functions for time stamping */ -- cgit v1.2.3-59-g8ed1b From 2f48485d1cea5b6c1ce04969ab0228aa7b4659e5 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:36 +0200 Subject: IB/mlx4: Reuse mlx4_mac_to_u64() This function is implemented twice... get rid of one copy. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/qp.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index cf000b7ad64f..2f85fc762865 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -40,6 +40,7 @@ #include #include +#include #include #include "mlx4_ib.h" @@ -93,17 +94,6 @@ enum { #ifndef ETH_ALEN #define ETH_ALEN 6 #endif -static inline u64 mlx4_mac_to_u64(u8 *addr) -{ - u64 mac = 0; - int i; - - for (i = 0; i < ETH_ALEN; i++) { - mac <<= 8; - mac |= addr[i]; - } - return mac; -} static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), -- cgit v1.2.3-59-g8ed1b From a575009030931cd8a35c88ec81eb26b9e9f73539 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:37 +0200 Subject: IB/mlx4: Add port aggregation support Register the interface with the mlx4 core driver with port aggregation support and check for port aggregation mode when the 'add' function is called. In this mode, only one physical port is reported to the upper layer (RoCE/IB core stack and ULPs). Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 76 +++++++++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 9db258f7c804..ed21ae68a977 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -351,6 +351,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, enum ib_mtu tmp; struct mlx4_cmd_mailbox *mailbox; int err = 0; + int is_bonded = mlx4_is_bonded(mdev->dev); mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); if (IS_ERR(mailbox)) @@ -374,8 +375,12 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->state = IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); props->active_mtu = IB_MTU_256; + if (is_bonded) + rtnl_lock(); /* required to get upper dev */ spin_lock_bh(&iboe->lock); ndev = iboe->netdevs[port - 1]; + if (ndev && is_bonded) + ndev = netdev_master_upper_dev_get(ndev); if (!ndev) goto out_unlock; @@ -387,6 +392,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->phys_state = state_to_phys_state(props->state); out_unlock: spin_unlock_bh(&iboe->lock); + if (is_bonded) + rtnl_unlock(); out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; @@ -1440,6 +1447,7 @@ static void update_gids_task(struct work_struct *work) union ib_gid *gids; int err; struct mlx4_dev *dev = gw->dev->dev; + int is_bonded = mlx4_is_bonded(dev); if (!gw->dev->ib_active) return; @@ -1459,7 +1467,10 @@ static void update_gids_task(struct work_struct *work) if (err) pr_warn("set port command failed\n"); else - mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); + if ((gw->port == 1) || !is_bonded) + mlx4_ib_dispatch_event(gw->dev, + is_bonded ? 1 : gw->port, + IB_EVENT_GID_CHANGE); mlx4_free_cmd_mailbox(dev, mailbox); kfree(gw); @@ -1875,7 +1886,8 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, * don't want the bond IP based gids in the table since * flows that select port by gid may get the down port. */ - if (port_state == IB_PORT_DOWN) { + if (port_state == IB_PORT_DOWN && + !mlx4_is_bonded(ibdev->dev)) { reset_gid_table(ibdev, port); mlx4_ib_set_default_gid(ibdev, curr_netdev, @@ -2047,6 +2059,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) int err; struct mlx4_ib_iboe *iboe; int ib_num_ports = 0; + int num_req_counters; pr_info_once("%s", mlx4_ib_version); @@ -2086,7 +2099,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ibdev->num_ports = num_ports; - ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; + ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ? + 1 : ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->persist->pdev->dev; @@ -2207,7 +2221,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (init_node_data(ibdev)) goto err_map; - for (i = 0; i < ibdev->num_ports; ++i) { + num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; + for (i = 0; i < num_req_counters; ++i) { mutex_init(&ibdev->qp1_proxy_lock[i]); if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { @@ -2218,6 +2233,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->counters[i] = -1; } } + if (mlx4_is_bonded(dev)) + for (i = 1; i < ibdev->num_ports ; ++i) + ibdev->counters[i] = ibdev->counters[0]; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) ib_num_ports++; @@ -2538,6 +2557,38 @@ out: return; } +static void handle_bonded_port_state_event(struct work_struct *work) +{ + struct ib_event_work *ew = + container_of(work, struct ib_event_work, work); + struct mlx4_ib_dev *ibdev = ew->ib_dev; + enum ib_port_state bonded_port_state = IB_PORT_NOP; + int i; + struct ib_event ibev; + + kfree(ew); + spin_lock_bh(&ibdev->iboe.lock); + for (i = 0; i < MLX4_MAX_PORTS; ++i) { + struct net_device *curr_netdev = ibdev->iboe.netdevs[i]; + + enum ib_port_state curr_port_state = + (netif_running(curr_netdev) && + netif_carrier_ok(curr_netdev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + + bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ? + curr_port_state : IB_PORT_ACTIVE; + } + spin_unlock_bh(&ibdev->iboe.lock); + + ibev.device = &ibdev->ib_dev; + ibev.element.port_num = 1; + ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + + ib_dispatch_event(&ibev); +} + static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { @@ -2547,6 +2598,18 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, struct ib_event_work *ew; int p = 0; + if (mlx4_is_bonded(dev) && + ((event == MLX4_DEV_EVENT_PORT_UP) || + (event == MLX4_DEV_EVENT_PORT_DOWN))) { + ew = kmalloc(sizeof(*ew), GFP_ATOMIC); + if (!ew) + return; + INIT_WORK(&ew->work, handle_bonded_port_state_event); + ew->ib_dev = ibdev; + queue_work(wq, &ew->work); + return; + } + if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) eqe = (struct mlx4_eqe *)param; else @@ -2607,7 +2670,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, } ibev.device = ibdev_ptr; - ibev.element.port_num = (u8) p; + ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p; ib_dispatch_event(&ibev); } @@ -2616,7 +2679,8 @@ static struct mlx4_interface mlx4_ib_interface = { .add = mlx4_ib_add, .remove = mlx4_ib_remove, .event = mlx4_ib_event, - .protocol = MLX4_PROT_IB_IPV6 + .protocol = MLX4_PROT_IB_IPV6, + .flags = MLX4_INTFF_BONDING }; static int __init mlx4_ib_init(void) -- cgit v1.2.3-59-g8ed1b From 146d6e19832a72136089afca51e5229d1fd72dcd Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:38 +0200 Subject: IB/mlx4: Create mirror flows in port aggregation mode In port aggregation mode flows for port #1 (the only port) should be mirrored on port #2. This is because packets can arrive from either physical ports. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 84 ++++++++++++++++++++++++++++++------ drivers/infiniband/hw/mlx4/mlx4_ib.h | 9 +++- 2 files changed, 80 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ed21ae68a977..ca522382dedc 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -851,7 +851,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, struct mlx4_ib_steering { struct list_head list; - u64 reg_id; + struct mlx4_flow_reg_id reg_id; union ib_gid gid; }; @@ -1142,9 +1142,11 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) { - int err = 0, i = 0; + int err = 0, i = 0, j = 0; struct mlx4_ib_flow *mflow; enum mlx4_net_trans_promisc_mode type[2]; + struct mlx4_dev *dev = (to_mdev(qp->device))->dev; + int is_bonded = mlx4_is_bonded(dev); memset(type, 0, sizeof(type)); @@ -1179,26 +1181,55 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, while (i < ARRAY_SIZE(type) && type[i]) { err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], - &mflow->reg_id[i]); + &mflow->reg_id[i].id); if (err) goto err_create_flow; i++; + if (is_bonded) { + flow_attr->port = 2; + err = __mlx4_ib_create_flow(qp, flow_attr, + domain, type[j], + &mflow->reg_id[j].mirror); + flow_attr->port = 1; + if (err) + goto err_create_flow; + j++; + } + } if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) { - err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]); + err = mlx4_ib_tunnel_steer_add(qp, flow_attr, + &mflow->reg_id[i].id); if (err) goto err_create_flow; i++; + if (is_bonded) { + flow_attr->port = 2; + err = mlx4_ib_tunnel_steer_add(qp, flow_attr, + &mflow->reg_id[j].mirror); + flow_attr->port = 1; + if (err) + goto err_create_flow; + j++; + } + /* function to create mirror rule */ } return &mflow->ibflow; err_create_flow: while (i) { - (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]); + (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, + mflow->reg_id[i].id); i--; } + + while (j) { + (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, + mflow->reg_id[j].mirror); + j--; + } err_free: kfree(mflow); return ERR_PTR(err); @@ -1211,10 +1242,16 @@ static int mlx4_ib_destroy_flow(struct ib_flow *flow_id) struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device); struct mlx4_ib_flow *mflow = to_mflow(flow_id); - while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) { - err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]); + while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) { + err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id); if (err) ret = err; + if (mflow->reg_id[i].mirror) { + err = __mlx4_ib_destroy_flow(mdev->dev, + mflow->reg_id[i].mirror); + if (err) + ret = err; + } i++; } @@ -1226,11 +1263,12 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_dev *dev = mdev->dev; struct mlx4_ib_qp *mqp = to_mqp(ibqp); - u64 reg_id; struct mlx4_ib_steering *ib_steering = NULL; enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ? MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6; + struct mlx4_flow_reg_id reg_id; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { @@ -1242,10 +1280,20 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), - prot, ®_id); + prot, ®_id.id); if (err) goto err_malloc; + reg_id.mirror = 0; + if (mlx4_is_bonded(dev)) { + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, 2, + !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), + prot, ®_id.mirror); + if (err) + goto err_add; + } + err = add_gid_entry(ibqp, gid); if (err) goto err_add; @@ -1261,7 +1309,10 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err_add: mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, - prot, reg_id); + prot, reg_id.id); + if (reg_id.mirror) + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + prot, reg_id.mirror); err_malloc: kfree(ib_steering); @@ -1288,10 +1339,12 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_dev *dev = mdev->dev; struct mlx4_ib_qp *mqp = to_mqp(ibqp); struct net_device *ndev; struct mlx4_ib_gid_entry *ge; - u64 reg_id = 0; + struct mlx4_flow_reg_id reg_id = {0, 0}; + enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ? MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6; @@ -1316,10 +1369,17 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, - prot, reg_id); + prot, reg_id.id); if (err) return err; + if (mlx4_is_bonded(dev)) { + err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + prot, reg_id.mirror); + if (err) + return err; + } + mutex_lock(&mqp->mutex); ge = find_gid_entry(mqp, gid->raw); if (ge) { diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 6eb743f65f6f..2b49f9de2556 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -134,10 +134,17 @@ struct mlx4_ib_fmr { struct mlx4_fmr mfmr; }; +#define MAX_REGS_PER_FLOW 2 + +struct mlx4_flow_reg_id { + u64 id; + u64 mirror; +}; + struct mlx4_ib_flow { struct ib_flow ibflow; /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */ - u64 reg_id[2]; + struct mlx4_flow_reg_id reg_id[MAX_REGS_PER_FLOW]; }; struct mlx4_ib_wq { -- cgit v1.2.3-59-g8ed1b From c6215745b66a7fbeeda1a826f94dd864a2ccf654 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:39 +0200 Subject: IB/mlx4: Load balance ports in port aggregation mode When the mlx4 IB (RoCE) device works in link aggregation mode, it exposes a single port to upper layers. Therefore, applications always set '1' in port_num attribute when modifying a QP or creating an address handle. To make sure that a node uses all available ports the mlx4 driver will override the port_num attribute with a round robin policy. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/ah.c | 1 + drivers/infiniband/hw/mlx4/main.c | 1 + drivers/infiniband/hw/mlx4/mlx4_ib.h | 8 ++++++++ drivers/infiniband/hw/mlx4/qp.c | 19 +++++++++++++++++++ 4 files changed, 29 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 2d8c3397774f..f50a546224ad 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "mlx4_ib.h" diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ca522382dedc..2ed5b996b2f4 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2153,6 +2153,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); ibdev->dev = dev; + ibdev->bond_next_port = 0; strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 2b49f9de2556..721540c9163d 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -534,6 +534,7 @@ struct mlx4_ib_dev { struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS]; /* lock when destroying qp1_proxy and getting netdev events */ struct mutex qp1_proxy_lock[MLX4_MAX_PORTS]; + u8 bond_next_port; }; struct ib_event_work { @@ -629,6 +630,13 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) return container_of(ibah, struct mlx4_ib_ah, ibah); } +static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev) +{ + dev->bond_next_port = (dev->bond_next_port + 1) % dev->num_ports; + + return dev->bond_next_port + 1; +} + int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2f85fc762865..792f9dc86ada 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1905,6 +1905,22 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } + if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) { + if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { + if ((ibqp->qp_type == IB_QPT_RC) || + (ibqp->qp_type == IB_QPT_UD) || + (ibqp->qp_type == IB_QPT_UC) || + (ibqp->qp_type == IB_QPT_RAW_PACKET) || + (ibqp->qp_type == IB_QPT_XRC_INI)) { + attr->port_num = mlx4_ib_bond_next_port(dev); + } + } else { + /* no sense in changing port_num + * when ports are bonded */ + attr_mask &= ~IB_QP_PORT; + } + } + if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > dev->num_ports)) { pr_debug("qpn 0x%x: invalid port number (%d) specified " @@ -1955,6 +1971,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) + attr->port_num = 1; + out: mutex_unlock(&qp->mutex); return err; -- cgit v1.2.3-59-g8ed1b