From 61bd3857ff2c7daf756d49b41e6277bbdaa8f789 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:29 +0200 Subject: net/core: Add event for a change in slave state Add event which provides an indication on a change in the state of a bonding slave. The event handler should cast the pointer to the appropriate type (struct netdev_bonding_info) in order to get the full info about the slave. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1347ac50d2af..ce784d5018e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -51,6 +51,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -2056,6 +2057,7 @@ struct pcpu_sw_netstats { #define NETDEV_RESEND_IGMP 0x0016 #define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */ #define NETDEV_CHANGEINFODATA 0x0018 +#define NETDEV_BONDING_INFO 0x0019 int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -3494,6 +3496,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, netdev_features_t features); +struct netdev_bonding_info { + ifslave slave; + ifbond master; +}; + +struct netdev_notifier_bonding_info { + struct netdev_notifier_info info; /* must be first */ + struct netdev_bonding_info bonding_info; +}; + +void netdev_bonding_info_change(struct net_device *dev, + struct netdev_bonding_info *bonding_info); + static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { -- cgit v1.3-14-g43fede From 69a2338e05995b10225b2a131f7540d1305980e4 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:30 +0200 Subject: net/bonding: Move slave state changes to a helper function Move slave state changes to a helper function, this is a pre-step for adding functionality of dispatching an event when this helper is called. This commit doesn't add new functionality. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 64 ++++++++++++++++++++++++----------------- include/net/bonding.h | 5 ++++ 2 files changed, 43 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c9e519cb9214..92fe3a1bf52b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -790,7 +790,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } new_active->delay = 0; - new_active->link = BOND_LINK_UP; + bond_set_slave_link_state(new_active, BOND_LINK_UP); if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_handle_link_change(new_active, BOND_LINK_UP); @@ -1181,6 +1181,21 @@ static void bond_free_slave(struct slave *slave) kfree(slave); } +static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info) +{ + info->bond_mode = BOND_MODE(bond); + info->miimon = bond->params.miimon; + info->num_slaves = bond->slave_cnt; +} + +static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) +{ + strcpy(info->slave_name, slave->dev->name); + info->link = slave->link; + info->state = bond_slave_state(slave); + info->link_failure_count = slave->link_failure_count; +} + /* enslave device to bond device */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1444,19 +1459,22 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (bond->params.miimon) { if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { if (bond->params.updelay) { - new_slave->link = BOND_LINK_BACK; + bond_set_slave_link_state(new_slave, + BOND_LINK_BACK); new_slave->delay = bond->params.updelay; } else { - new_slave->link = BOND_LINK_UP; + bond_set_slave_link_state(new_slave, + BOND_LINK_UP); } } else { - new_slave->link = BOND_LINK_DOWN; + bond_set_slave_link_state(new_slave, BOND_LINK_DOWN); } } else if (bond->params.arp_interval) { - new_slave->link = (netif_carrier_ok(slave_dev) ? - BOND_LINK_UP : BOND_LINK_DOWN); + bond_set_slave_link_state(new_slave, + (netif_carrier_ok(slave_dev) ? + BOND_LINK_UP : BOND_LINK_DOWN)); } else { - new_slave->link = BOND_LINK_UP; + bond_set_slave_link_state(new_slave, BOND_LINK_UP); } if (new_slave->link != BOND_LINK_DOWN) @@ -1821,11 +1839,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev, static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) { struct bonding *bond = netdev_priv(bond_dev); - - info->bond_mode = BOND_MODE(bond); - info->miimon = bond->params.miimon; - info->num_slaves = bond->slave_cnt; - + bond_fill_ifbond(bond, info); return 0; } @@ -1839,10 +1853,7 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in bond_for_each_slave(bond, slave, iter) { if (i++ == (int)info->slave_id) { res = 0; - strcpy(info->slave_name, slave->dev->name); - info->link = slave->link; - info->state = bond_slave_state(slave); - info->link_failure_count = slave->link_failure_count; + bond_fill_ifslave(slave, info); break; } } @@ -1872,7 +1883,7 @@ static int bond_miimon_inspect(struct bonding *bond) if (link_state) continue; - slave->link = BOND_LINK_FAIL; + bond_set_slave_link_state(slave, BOND_LINK_FAIL); slave->delay = bond->params.downdelay; if (slave->delay) { netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n", @@ -1887,7 +1898,7 @@ static int bond_miimon_inspect(struct bonding *bond) case BOND_LINK_FAIL: if (link_state) { /* recovered before downdelay expired */ - slave->link = BOND_LINK_UP; + bond_set_slave_link_state(slave, BOND_LINK_UP); slave->last_link_up = jiffies; netdev_info(bond->dev, "link status up again after %d ms for interface %s\n", (bond->params.downdelay - slave->delay) * @@ -1909,7 +1920,7 @@ static int bond_miimon_inspect(struct bonding *bond) if (!link_state) continue; - slave->link = BOND_LINK_BACK; + bond_set_slave_link_state(slave, BOND_LINK_BACK); slave->delay = bond->params.updelay; if (slave->delay) { @@ -1922,7 +1933,8 @@ static int bond_miimon_inspect(struct bonding *bond) /*FALLTHRU*/ case BOND_LINK_BACK: if (!link_state) { - slave->link = BOND_LINK_DOWN; + bond_set_slave_link_state(slave, + BOND_LINK_DOWN); netdev_info(bond->dev, "link status down again after %d ms for interface %s\n", (bond->params.updelay - slave->delay) * bond->params.miimon, @@ -1960,7 +1972,7 @@ static void bond_miimon_commit(struct bonding *bond) continue; case BOND_LINK_UP: - slave->link = BOND_LINK_UP; + bond_set_slave_link_state(slave, BOND_LINK_UP); slave->last_link_up = jiffies; primary = rtnl_dereference(bond->primary_slave); @@ -2000,7 +2012,7 @@ static void bond_miimon_commit(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - slave->link = BOND_LINK_DOWN; + bond_set_slave_link_state(slave, BOND_LINK_DOWN); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || BOND_MODE(bond) == BOND_MODE_8023AD) @@ -2583,7 +2595,7 @@ static void bond_ab_arp_commit(struct bonding *bond) struct slave *current_arp_slave; current_arp_slave = rtnl_dereference(bond->current_arp_slave); - slave->link = BOND_LINK_UP; + bond_set_slave_link_state(slave, BOND_LINK_UP); if (current_arp_slave) { bond_set_slave_inactive_flags( current_arp_slave, @@ -2606,7 +2618,7 @@ static void bond_ab_arp_commit(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - slave->link = BOND_LINK_DOWN; + bond_set_slave_link_state(slave, BOND_LINK_DOWN); bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); @@ -2685,7 +2697,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) * up when it is actually down */ if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { - slave->link = BOND_LINK_DOWN; + bond_set_slave_link_state(slave, BOND_LINK_DOWN); if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; @@ -2705,7 +2717,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) if (!new_slave) goto check_state; - new_slave->link = BOND_LINK_BACK; + bond_set_slave_link_state(new_slave, BOND_LINK_BACK); bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); bond_arp_send_all(bond, new_slave); new_slave->last_link_up = jiffies; diff --git a/include/net/bonding.h b/include/net/bonding.h index 29f53eacac0a..d1367ec74933 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -490,6 +490,11 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } +static inline void bond_set_slave_link_state(struct slave *slave, int state) +{ + slave->link = state; +} + static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local) { struct in_device *in_dev; -- cgit v1.3-14-g43fede From 69e6113343cfe983511904ffca0d7a1466460b67 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:31 +0200 Subject: net/bonding: Notify state change on slaves Use notifier chain to dispatch an event upon a change in slave state. Event is dispatched with slave specific info. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 42 +++++++++++++++++++++++++++++++++++++++++ include/net/bonding.h | 12 ++++++++++++ 2 files changed, 54 insertions(+) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 92fe3a1bf52b..679ef00d6b16 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1196,6 +1196,47 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) info->link_failure_count = slave->link_failure_count; } +static void bond_netdev_notify(struct slave *slave, struct net_device *dev) +{ + struct bonding *bond = slave->bond; + struct netdev_bonding_info bonding_info; + + rtnl_lock(); + /* make sure that slave is still valid */ + if (dev->priv_flags & IFF_BONDING) { + bond_fill_ifslave(slave, &bonding_info.slave); + bond_fill_ifbond(bond, &bonding_info.master); + netdev_bonding_info_change(slave->dev, &bonding_info); + } + rtnl_unlock(); +} + +static void bond_netdev_notify_work(struct work_struct *_work) +{ + struct netdev_notify_work *w = + container_of(_work, struct netdev_notify_work, work.work); + + bond_netdev_notify(w->slave, w->dev); + dev_put(w->dev); +} + +void bond_queue_slave_event(struct slave *slave) +{ + struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC); + + if (!nnw) + return; + + INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work); + nnw->slave = slave; + nnw->dev = slave->dev; + + if (queue_delayed_work(slave->bond->wq, &nnw->work, 0)) + dev_hold(slave->dev); + else + kfree(nnw); +} + /* enslave device to bond device */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1590,6 +1631,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); /* enslave is successful */ + bond_queue_slave_event(new_slave); return 0; /* Undo stages on error */ diff --git a/include/net/bonding.h b/include/net/bonding.h index d1367ec74933..4e17095ad46a 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -150,6 +150,12 @@ struct bond_parm_tbl { int mode; }; +struct netdev_notify_work { + struct delayed_work work; + struct slave *slave; + struct net_device *dev; +}; + struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ @@ -243,6 +249,8 @@ struct bonding { #define bond_slave_get_rtnl(dev) \ ((struct slave *) rtnl_dereference(dev->rx_handler_data)) +void bond_queue_slave_event(struct slave *slave); + struct bond_vlan_tag { __be16 vlan_proto; unsigned short vlan_id; @@ -315,6 +323,7 @@ static inline void bond_set_active_slave(struct slave *slave) { if (slave->backup) { slave->backup = 0; + bond_queue_slave_event(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -323,6 +332,7 @@ static inline void bond_set_backup_slave(struct slave *slave) { if (!slave->backup) { slave->backup = 1; + bond_queue_slave_event(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -336,6 +346,7 @@ static inline void bond_set_slave_state(struct slave *slave, slave->backup = slave_state; if (notify) { rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); + bond_queue_slave_event(slave); slave->should_notify = 0; } else { if (slave->should_notify) @@ -493,6 +504,7 @@ static inline bool bond_is_slave_inactive(struct slave *slave) static inline void bond_set_slave_link_state(struct slave *slave, int state) { slave->link = state; + bond_queue_slave_event(slave); } static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local) -- cgit v1.3-14-g43fede From 59e14e325066be49b49b6c2503337c69a9ee29fc Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:32 +0200 Subject: net/mlx4_core: Port aggregation low level interface Implement the hardware interface required for port aggregation. 1. Disable RX port check on receive - don't perform a validity check that matches to QP's port and the port where the packet is received. 2. Virtual to physical port remap - configure virtual to physical port mapping. Port remap capability for virtual functions. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 56 +++++++++++++++++++++++++++++--- include/linux/mlx4/cmd.h | 7 ++++ include/linux/mlx4/device.h | 10 +++++- include/linux/mlx4/qp.h | 1 + 5 files changed, 77 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 154effbfd8be..a681d7c0bb9f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1583,6 +1583,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_CMD_EPERM_wrapper }, + { + .opcode = MLX4_CMD_VIRT_PORT_MAP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper + }, }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index dbabfae3a3de..4b08a393ebcb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -142,7 +142,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [17] = "Asymmetric EQs support", [18] = "More than 80 VFs support", [19] = "Performance optimized for limited rule configuration flow steering support", - [20] = "Recoverable error events support" + [20] = "Recoverable error events support", + [21] = "Port Remap support" }; int i; @@ -863,6 +864,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP; MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); if (field & 0x20) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV; @@ -1120,9 +1123,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); - /* For guests, disable mw type 2 */ + /* For guests, disable mw type 2 and port remap*/ MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; + bmme_flags &= ~MLX4_FLAG_PORT_REMAP; MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); /* turn off device-managed steering capability if not enabled */ @@ -2100,13 +2104,16 @@ struct mlx4_config_dev { __be32 rsvd1[3]; __be16 vxlan_udp_dport; __be16 rsvd2; - __be32 rsvd3[27]; - __be16 rsvd4; - u8 rsvd5; + __be32 rsvd3; + __be32 roce_flags; + __be32 rsvd4[25]; + __be16 rsvd5; + u8 rsvd6; u8 rx_checksum_val; }; #define MLX4_VXLAN_UDP_DPORT (1 << 0) +#define MLX4_DISABLE_RX_PORT BIT(18) static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) { @@ -2209,6 +2216,45 @@ int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port) } EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port); +#define CONFIG_DISABLE_RX_PORT BIT(15) +int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis) +{ + struct mlx4_config_dev config_dev; + + memset(&config_dev, 0, sizeof(config_dev)); + config_dev.update_flags = cpu_to_be32(MLX4_DISABLE_RX_PORT); + if (dis) + config_dev.roce_flags = + cpu_to_be32(CONFIG_DISABLE_RX_PORT); + + return mlx4_CONFIG_DEV_set(dev, &config_dev); +} + +int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2) +{ + struct mlx4_cmd_mailbox *mailbox; + struct { + __be32 v_port1; + __be32 v_port2; + } *v2p; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return -ENOMEM; + + v2p = mailbox->buf; + v2p->v_port1 = cpu_to_be32(port1); + v2p->v_port2 = cpu_to_be32(port2); + + err = mlx4_cmd(dev, mailbox->dma, 0, + MLX4_SET_PORT_VIRT2PHY, MLX4_CMD_VIRT_PORT_MAP, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages) { diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index ae95adc78509..7b6d4e9ff603 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -71,6 +71,7 @@ enum { /*master notify fw on finish for slave's flr*/ MLX4_CMD_INFORM_FLR_DONE = 0x5b, + MLX4_CMD_VIRT_PORT_MAP = 0x5c, MLX4_CMD_GET_OP_REQ = 0x59, /* TPT commands */ @@ -170,6 +171,12 @@ enum { MLX4_CMD_TIME_CLASS_C = 60000, }; +enum { + /* virtual to physical port mapping opcode modifiers */ + MLX4_GET_PORT_VIRT2PHY = 0x0, + MLX4_SET_PORT_VIRT2PHY = 0x1, +}; + enum { MLX4_MAILBOX_SIZE = 4096, MLX4_ACCESS_MEM_ALIGN = 256, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c95d659a39f2..d9afd99dde39 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -201,7 +201,8 @@ enum { MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17, MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18, MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19, - MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20 + MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, + MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21 }; enum { @@ -253,9 +254,14 @@ enum { MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, + MLX4_BMME_FLAG_PORT_REMAP = 1 << 24, MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28, }; +enum { + MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP +}; + enum mlx4_event { MLX4_EVENT_TYPE_COMP = 0x00, MLX4_EVENT_TYPE_PATH_MIG = 0x01, @@ -1378,6 +1384,8 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port); int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); +int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis); +int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2); int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 467ccdf94c98..2bbc62aa818a 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -96,6 +96,7 @@ enum { MLX4_QP_BIT_RRE = 1 << 15, MLX4_QP_BIT_RWE = 1 << 14, MLX4_QP_BIT_RAE = 1 << 13, + MLX4_QP_BIT_FPP = 1 << 3, MLX4_QP_BIT_RIC = 1 << 4, }; -- cgit v1.3-14-g43fede From 53f33ae295a5098f12218da1400f55ad7df7447c Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:33 +0200 Subject: net/mlx4_core: Port aggregation upper layer interface Supply interface functions to bond and unbond ports of a mlx4 internal interfaces. Example for such an interface is the one registered by the mlx4 IB driver under RoCE. There are 1. Functions to go in/out to/from bonded mode 2. Function to remap virtual ports to physical ports The bond_mutex prevents simultaneous access to data that keep status of the device in bonded mode. The upper mlx4 interface marks to the mlx4 core module that they want to be subject for such bonding by setting the MLX4_INTFF_BONDING flag. Interface which goes to/from bonded mode is re-created. The mlx4 Ethernet driver does not set this flag when registering the interface, the IB driver does. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_resources.c | 8 +- drivers/net/ethernet/mellanox/mlx4/intf.c | 54 +++++++++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 89 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 + drivers/net/ethernet/mellanox/mlx4/qp.c | 2 + .../net/ethernet/mellanox/mlx4/resource_tracker.c | 3 + include/linux/mlx4/device.h | 1 + include/linux/mlx4/driver.h | 19 +++++ 8 files changed, 177 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index f1a5500ff72d..34f2fdf4fe5d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -50,10 +50,14 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->mtu_msgmax = 0xff; if (!is_tx && !rss) context->rq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); - if (is_tx) + if (is_tx) { context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); - else + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP) + context->params2 |= MLX4_QP_BIT_FPP; + + } else { context->sq_size_stride = ilog2(TXBB_SIZE) - 4; + } context->usr_page = cpu_to_be32(mdev->priv_uar.index); context->local_qpn = cpu_to_be32(qpn); context->pri_path.ackto = 1 & 0x07; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 68d2bad325d5..6fce58718837 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -33,11 +33,13 @@ #include #include +#include #include "mlx4.h" struct mlx4_device_context { struct list_head list; + struct list_head bond_list; struct mlx4_interface *intf; void *context; }; @@ -115,6 +117,58 @@ void mlx4_unregister_interface(struct mlx4_interface *intf) } EXPORT_SYMBOL_GPL(mlx4_unregister_interface); +int mlx4_do_bond(struct mlx4_dev *dev, bool enable) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx; + unsigned long flags; + int ret; + LIST_HEAD(bond_list); + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) + return -ENOTSUPP; + + ret = mlx4_disable_rx_port_check(dev, enable); + if (ret) { + mlx4_err(dev, "Fail to %s rx port check\n", + enable ? "enable" : "disable"); + return ret; + } + if (enable) { + dev->flags |= MLX4_FLAG_BONDED; + } else { + ret = mlx4_virt2phy_port_map(dev, 1, 2); + if (ret) { + mlx4_err(dev, "Fail to reset port map\n"); + return ret; + } + dev->flags &= ~MLX4_FLAG_BONDED; + } + + spin_lock_irqsave(&priv->ctx_lock, flags); + list_for_each_entry_safe(dev_ctx, temp_dev_ctx, &priv->ctx_list, list) { + if (dev_ctx->intf->flags & MLX4_INTFF_BONDING) { + list_add_tail(&dev_ctx->bond_list, &bond_list); + list_del(&dev_ctx->list); + } + } + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &bond_list, bond_list) { + dev_ctx->intf->remove(dev, dev_ctx->context); + dev_ctx->context = dev_ctx->intf->add(dev); + + spin_lock_irqsave(&priv->ctx_lock, flags); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + mlx4_dbg(dev, "Inrerface for protocol %d restarted with when bonded mode is %s\n", + dev_ctx->intf->protocol, enable ? + "enabled" : "disabled"); + } + return 0; +} + void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, unsigned long param) { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index cc9f48439244..f3245fe0f442 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1160,6 +1160,91 @@ err_set_port: return err ? err : count; } +int mlx4_bond(struct mlx4_dev *dev) +{ + int ret = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + + mutex_lock(&priv->bond_mutex); + + if (!mlx4_is_bonded(dev)) + ret = mlx4_do_bond(dev, true); + else + ret = 0; + + mutex_unlock(&priv->bond_mutex); + if (ret) + mlx4_err(dev, "Failed to bond device: %d\n", ret); + else + mlx4_dbg(dev, "Device is bonded\n"); + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_bond); + +int mlx4_unbond(struct mlx4_dev *dev) +{ + int ret = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + + mutex_lock(&priv->bond_mutex); + + if (mlx4_is_bonded(dev)) + ret = mlx4_do_bond(dev, false); + + mutex_unlock(&priv->bond_mutex); + if (ret) + mlx4_err(dev, "Failed to unbond device: %d\n", ret); + else + mlx4_dbg(dev, "Device is unbonded\n"); + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_unbond); + + +int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) +{ + u8 port1 = v2p->port1; + u8 port2 = v2p->port2; + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) + return -ENOTSUPP; + + mutex_lock(&priv->bond_mutex); + + /* zero means keep current mapping for this port */ + if (port1 == 0) + port1 = priv->v2p.port1; + if (port2 == 0) + port2 = priv->v2p.port2; + + if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) || + (port2 < 1) || (port2 > MLX4_MAX_PORTS) || + (port1 == 2 && port2 == 1)) { + /* besides boundary checks cross mapping makes + * no sense and therefore not allowed */ + err = -EINVAL; + } else if ((port1 == priv->v2p.port1) && + (port2 == priv->v2p.port2)) { + err = 0; + } else { + err = mlx4_virt2phy_port_map(dev, port1, port2); + if (!err) { + mlx4_dbg(dev, "port map changed: [%d][%d]\n", + port1, port2); + priv->v2p.port1 = port1; + priv->v2p.port2 = port2; + } else { + mlx4_err(dev, "Failed to change port mape: %d\n", err); + } + } + + mutex_unlock(&priv->bond_mutex); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_port_map_set); + static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2638,6 +2723,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, spin_lock_init(&priv->ctx_lock); mutex_init(&priv->port_mutex); + mutex_init(&priv->bond_mutex); INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); @@ -2934,6 +3020,9 @@ slave_start: goto err_port; } + priv->v2p.port1 = 1; + priv->v2p.port2 = 2; + err = mlx4_register_device(dev); if (err) goto err_port; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 148dc0945aab..803f17653da7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -885,6 +885,8 @@ struct mlx4_priv { int reserved_mtts; int fs_hash_mode; u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + struct mlx4_port_map v2p; /* cached port mapping configuration */ + struct mutex bond_mutex; /* for bond mode */ __be64 slave_node_guids[MLX4_MFUNC_MAX]; atomic_t opreq_count; @@ -1364,6 +1366,7 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); /* Returns the VF index of slave */ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); int mlx4_config_mad_demux(struct mlx4_dev *dev); +int mlx4_do_bond(struct mlx4_dev *dev, bool enable); enum mlx4_zone_flags { MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0, diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 1586ecce13c7..2bb8553bd905 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -882,6 +882,8 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, for (i = 0; i < ARRAY_SIZE(states) - 1; i++) { context->flags &= cpu_to_be32(~(0xf << 28)); context->flags |= cpu_to_be32(states[i + 1] << 28); + if (states[i + 1] != MLX4_QP_STATE_RTR) + context->params2 &= ~MLX4_QP_BIT_FPP; err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1], context, 0, 0, qp); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 79feeb6b0d87..c5f3dfca226b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2944,6 +2944,9 @@ static int verify_qp_parameters(struct mlx4_dev *dev, qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; optpar = be32_to_cpu(*(__be32 *) inbox->buf); + if (slave != mlx4_master_func_num(dev)) + qp_ctx->params2 &= ~MLX4_QP_BIT_FPP; + switch (qp_type) { case MLX4_QP_ST_RC: case MLX4_QP_ST_XRC: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d9afd99dde39..977b0b164431 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -70,6 +70,7 @@ enum { MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, MLX4_FLAG_OLD_REG_MAC = 1 << 6, + MLX4_FLAG_BONDED = 1 << 7 }; enum { diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 022055c8fb26..9553a73d2049 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -49,6 +49,10 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_SLAVE_SHUTDOWN, }; +enum { + MLX4_INTFF_BONDING = 1 << 0 +}; + struct mlx4_interface { void * (*add) (struct mlx4_dev *dev); void (*remove)(struct mlx4_dev *dev, void *context); @@ -57,11 +61,26 @@ struct mlx4_interface { void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port); struct list_head list; enum mlx4_protocol protocol; + int flags; }; int mlx4_register_interface(struct mlx4_interface *intf); void mlx4_unregister_interface(struct mlx4_interface *intf); +int mlx4_bond(struct mlx4_dev *dev); +int mlx4_unbond(struct mlx4_dev *dev); +static inline int mlx4_is_bonded(struct mlx4_dev *dev) +{ + return !!(dev->flags & MLX4_FLAG_BONDED); +} + +struct mlx4_port_map { + u8 port1; + u8 port2; +}; + +int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p); + void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port); static inline u64 mlx4_mac_to_u64(u8 *addr) -- cgit v1.3-14-g43fede