From 5a0fd09428e47fb08d5a887515d92bb2447f4b65 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 7 Oct 2010 16:24:16 +0200 Subject: IB/mlx4: Limit size of fast registration WRs Fix the limit on the size of max fast registration WRs that can be posted to match hardware capabilities. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/mlx4/main.c') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4e94e360e43b..62e8cd6f0371 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -135,7 +135,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; - props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64); + props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; -- cgit v1.2.3-59-g8ed1b From d0d68b8693bd16bfbbc93b89f1d9f3351723307c Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 4 Oct 2010 12:11:34 +0000 Subject: IB/mlx4: Signal node desc changes to SM by using FW to generate trap 144 The Node Description cannot be changed via MADs (it is read-only). Until now, it was changed in the driver via sysfs, and the new Node Description was simply inserted by the driver into MAD responses (replacing the description returned by FW). System startup scripts use the sysfs interface to change the node description at driver startup to show the hostname, etc. However, this has a race condition: the SM could discover the original FW node description rather than the system-specific description if it queried the port before the startup scripts finish running. For mlx4, we fix this with a new FW command (SET_NODE) that allows passing the new node description to FW. When this command is invoked, FW sends a trap 144 to the SM. When it gets this trap, the SM can query the node to obtain the new node description -- thus eliminating the effects of the race. This patch simply calls SET_NODE command when a new node description is entered via sysfs (thus causing trap 144 to be issued by the FW). We ignore all failures of the SET_NODE command (including those caused by using a device FW that predates the SET_NODE command), since in that case things work just as before. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 28 +++++++++++++++++++++++----- include/linux/mlx4/cmd.h | 1 + 2 files changed, 24 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw/mlx4/main.c') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 62e8cd6f0371..ac6951d99336 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -272,14 +272,32 @@ out: static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { + struct mlx4_cmd_mailbox *mailbox; + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; - if (mask & IB_DEVICE_MODIFY_NODE_DESC) { - spin_lock(&to_mdev(ibdev)->sm_lock); - memcpy(ibdev->node_desc, props->node_desc, 64); - spin_unlock(&to_mdev(ibdev)->sm_lock); - } + if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) + return 0; + + spin_lock(&to_mdev(ibdev)->sm_lock); + memcpy(ibdev->node_desc, props->node_desc, 64); + spin_unlock(&to_mdev(ibdev)->sm_lock); + + /* + * If possible, pass node desc to FW, so it can generate + * a 144 trap. If cmd fails, just ignore. + */ + mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev); + if (IS_ERR(mailbox)) + return 0; + + memset(mailbox->buf, 0, 256); + memcpy(mailbox->buf, props->node_desc, 64); + mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, + MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A); + + mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); return 0; } diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 0f82293a82ed..2731266e73a7 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -57,6 +57,7 @@ enum { MLX4_CMD_QUERY_PORT = 0x43, MLX4_CMD_SENSE_PORT = 0x4d, MLX4_CMD_SET_PORT = 0xc, + MLX4_CMD_SET_NODE = 0x5a, MLX4_CMD_ACCESS_DDR = 0x2e, MLX4_CMD_MAP_ICM = 0xffa, MLX4_CMD_UNMAP_ICM = 0xff9, -- cgit v1.2.3-59-g8ed1b From fa417f7b520ee60b39f7e23528d2030af30a07d1 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 24 Oct 2010 21:08:52 -0700 Subject: IB/mlx4: Add support for IBoE Add support for IBoE to mlx4_ib. The bulk of the code is handling the new address vector fields; mlx4 needs the MAC address of a remote node to include it in a WQE (for datagrams) or in the QP context (for connected QPs). Address resolution is done by assuming all unicast GIDs are either link-local IPv6 addresses. Multicast group attach/detach needs to update the NIC's multicast filters; but since attaching a QP to a multicast group can be done before the QP is bound to a port, for IBoE we need to keep track of all multicast groups that a QP is attached too before it transitions from INIT to RTR (since it does not have a port in the INIT state). Signed-off-by: Eli Cohen [ Many things cleaned up and otherwise monkeyed with; hope I didn't introduce too many bugs. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/ah.c | 153 +++++++++--- drivers/infiniband/hw/mlx4/mad.c | 32 ++- drivers/infiniband/hw/mlx4/main.c | 448 ++++++++++++++++++++++++++++++++--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 32 ++- drivers/infiniband/hw/mlx4/qp.c | 130 ++++++++-- include/linux/mlx4/device.h | 27 +++ 6 files changed, 714 insertions(+), 108 deletions(-) (limited to 'drivers/infiniband/hw/mlx4/main.c') diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 11a236f8d884..3bf3544c0aa0 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -30,66 +30,153 @@ * SOFTWARE. */ +#include + #include +#include +#include #include "mlx4_ib.h" -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, + u8 *mac, int *is_mcast, u8 port) { - struct mlx4_dev *dev = to_mdev(pd->device)->dev; - struct mlx4_ib_ah *ah; + struct in6_addr in6; - ah = kmalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); + *is_mcast = 0; - memset(&ah->av, 0, sizeof ah->av); + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6); + if (rdma_link_local_addr(&in6)) + rdma_get_ll_mac(&in6, mac); + else if (rdma_is_multicast_addr(&in6)) { + rdma_get_mcast_mac(&in6, mac); + *is_mcast = 1; + } else + return -EINVAL; - ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); - ah->av.g_slid = ah_attr->src_path_bits; - ah->av.dlid = cpu_to_be16(ah_attr->dlid); - if (ah_attr->static_rate) { - ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; - while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && - !(1 << ah->av.stat_rate & dev->caps.stat_rate_support)) - --ah->av.stat_rate; - } - ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + return 0; +} + +static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) +{ + struct mlx4_dev *dev = to_mdev(pd->device)->dev; + + ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.ib.g_slid = ah_attr->src_path_bits; if (ah_attr->ah_flags & IB_AH_GRH) { - ah->av.g_slid |= 0x80; - ah->av.gid_index = ah_attr->grh.sgid_index; - ah->av.hop_limit = ah_attr->grh.hop_limit; - ah->av.sl_tclass_flowlabel |= + ah->av.ib.g_slid |= 0x80; + ah->av.ib.gid_index = ah_attr->grh.sgid_index; + ah->av.ib.hop_limit = ah_attr->grh.hop_limit; + ah->av.ib.sl_tclass_flowlabel |= cpu_to_be32((ah_attr->grh.traffic_class << 20) | ah_attr->grh.flow_label); - memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16); + memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16); + } + + ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid); + if (ah_attr->static_rate) { + ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) + --ah->av.ib.stat_rate; } + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); return &ah->ibah; } +static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) +{ + struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + struct mlx4_dev *dev = ibdev->dev; + u8 mac[6]; + int err; + int is_mcast; + + err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num); + if (err) + return ERR_PTR(err); + + memcpy(ah->av.eth.mac, mac, 6); + ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.eth.gid_index = ah_attr->grh.sgid_index; + if (ah_attr->static_rate) { + ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) + --ah->av.eth.stat_rate; + } + + /* + * HW requires multicast LID so we just choose one. + */ + if (is_mcast) + ah->av.ib.dlid = cpu_to_be16(0xc000); + + memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); + ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + + return &ah->ibah; +} + +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct mlx4_ib_ah *ah; + struct ib_ah *ret; + + ah = kzalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) { + if (!(ah_attr->ah_flags & IB_AH_GRH)) { + ret = ERR_PTR(-EINVAL); + } else { + /* + * TBD: need to handle the case when we get + * called in an atomic context and there we + * might sleep. We don't expect this + * currently since we're working with link + * local addresses which we can translate + * without going to sleep. + */ + ret = create_iboe_ah(pd, ah_attr, ah); + } + + if (IS_ERR(ret)) + kfree(ah); + + return ret; + } else + return create_ib_ah(pd, ah_attr, ah); /* never fails */ +} + int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); + enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->dlid = be16_to_cpu(ah->av.dlid); - ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; - ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24; - if (ah->av.stat_rate) - ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET; - ah_attr->src_path_bits = ah->av.g_slid & 0x7F; + ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; + ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); + ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; + if (ah->av.ib.stat_rate) + ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; + ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F; if (mlx4_ib_ah_grh_present(ah)) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.traffic_class = - be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20; + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20; ah_attr->grh.flow_label = - be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff; - ah_attr->grh.hop_limit = ah->av.hop_limit; - ah_attr->grh.sgid_index = ah->av.gid_index; - memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16); + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff; + ah_attr->grh.hop_limit = ah->av.ib.hop_limit; + ah_attr->grh.sgid_index = ah->av.ib.gid_index; + memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16); } return 0; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index f38d5b118927..c9a8dd63b9e2 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -311,19 +311,25 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) struct ib_mad_agent *agent; int p, q; int ret; + enum rdma_link_layer ll; - for (p = 0; p < dev->num_ports; ++p) + for (p = 0; p < dev->num_ports; ++p) { + ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1); for (q = 0; q <= 1; ++q) { - agent = ib_register_mad_agent(&dev->ib_dev, p + 1, - q ? IB_QPT_GSI : IB_QPT_SMI, - NULL, 0, send_handler, - NULL, NULL); - if (IS_ERR(agent)) { - ret = PTR_ERR(agent); - goto err; - } - dev->send_agent[p][q] = agent; + if (ll == IB_LINK_LAYER_INFINIBAND) { + agent = ib_register_mad_agent(&dev->ib_dev, p + 1, + q ? IB_QPT_GSI : IB_QPT_SMI, + NULL, 0, send_handler, + NULL, NULL); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto err; + } + dev->send_agent[p][q] = agent; + } else + dev->send_agent[p][q] = NULL; } + } return 0; @@ -344,8 +350,10 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) for (p = 0; p < dev->num_ports; ++p) { for (q = 0; q <= 1; ++q) { agent = dev->send_agent[p][q]; - dev->send_agent[p][q] = NULL; - ib_unregister_mad_agent(agent); + if (agent) { + dev->send_agent[p][q] = NULL; + ib_unregister_mad_agent(agent); + } } if (dev->sm_ah[p]) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4e94e360e43b..e65db73fc277 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -35,9 +35,13 @@ #include #include #include +#include +#include +#include #include #include +#include #include #include @@ -58,6 +62,15 @@ static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; +struct update_gid_work { + struct work_struct work; + union ib_gid gids[128]; + struct mlx4_ib_dev *dev; + int port; +}; + +static struct workqueue_struct *wq; + static void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -154,28 +167,19 @@ out: return err; } -static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) +static enum rdma_link_layer +mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + struct mlx4_dev *dev = to_mdev(device)->dev; - in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; - - memset(props, 0, sizeof *props); - - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); - - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); - if (err) - goto out; + return dev->caps.port_mask & (1 << (port_num - 1)) ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; +} +static int ib_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, + struct ib_smp *out_mad) +{ props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); @@ -196,6 +200,80 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + return 0; +} + +static u8 state_to_phys_state(enum ib_port_state state) +{ + return state == IB_PORT_ACTIVE ? 5 : 3; +} + +static int eth_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, + struct ib_smp *out_mad) +{ + struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe; + struct net_device *ndev; + enum ib_mtu tmp; + + props->active_width = IB_WIDTH_4X; + props->active_speed = 4; + props->port_cap_flags = IB_PORT_CM_SUP; + props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; + props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; + props->pkey_tbl_len = 1; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); + props->max_mtu = IB_MTU_2048; + props->subnet_timeout = 0; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = 0; + props->state = IB_PORT_DOWN; + props->phys_state = state_to_phys_state(props->state); + props->active_mtu = IB_MTU_256; + spin_lock(&iboe->lock); + ndev = iboe->netdevs[port - 1]; + if (!ndev) + goto out; + + tmp = iboe_get_mtu(ndev->mtu); + props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; + + props->state = netif_running(ndev) && netif_oper_up(ndev) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + props->phys_state = state_to_phys_state(props->state); + +out: + spin_unlock(&iboe->lock); + return 0; +} + +static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof *props); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? + ib_link_query_port(ibdev, port, props, out_mad) : + eth_link_query_port(ibdev, port, props, out_mad); + out: kfree(in_mad); kfree(out_mad); @@ -203,8 +281,8 @@ out: return err; } -static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid) +static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; @@ -241,6 +319,25 @@ out: return err; } +static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + + *gid = dev->iboe.gid_table[port - 1][index]; + + return 0; +} + +static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + return __mlx4_ib_query_gid(ibdev, port, index, gid); + else + return iboe_query_gid(ibdev, port, index, gid); +} + static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { @@ -289,6 +386,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, { struct mlx4_cmd_mailbox *mailbox; int err; + u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; mailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(mailbox)) @@ -304,7 +402,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); } - err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, + err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B); mlx4_free_cmd_mailbox(dev->dev, mailbox); @@ -447,18 +545,132 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd) return 0; } +static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) +{ + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_gid_entry *ge; + + ge = kzalloc(sizeof *ge, GFP_KERNEL); + if (!ge) + return -ENOMEM; + + ge->gid = *gid; + if (mlx4_ib_add_mc(mdev, mqp, gid)) { + ge->port = mqp->port; + ge->added = 1; + } + + mutex_lock(&mqp->mutex); + list_add_tail(&ge->list, &mqp->gid_list); + mutex_unlock(&mqp->mutex); + + return 0; +} + +int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + union ib_gid *gid) +{ + u8 mac[6]; + struct net_device *ndev; + int ret = 0; + + if (!mqp->port) + return 0; + + spin_lock(&mdev->iboe.lock); + ndev = mdev->iboe.netdevs[mqp->port - 1]; + if (ndev) + dev_hold(ndev); + spin_unlock(&mdev->iboe.lock); + + if (ndev) { + rdma_get_mcast_mac((struct in6_addr *)gid, mac); + rtnl_lock(); + dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac); + ret = 1; + rtnl_unlock(); + dev_put(ndev); + } + + return ret; +} + static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw, - !!(to_mqp(ibqp)->flags & - MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); + if (err) + return err; + + err = add_gid_entry(ibqp, gid); + if (err) + goto err_add; + + return 0; + +err_add: + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw); + return err; +} + +static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) +{ + struct mlx4_ib_gid_entry *ge; + struct mlx4_ib_gid_entry *tmp; + struct mlx4_ib_gid_entry *ret = NULL; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + if (!memcmp(raw, ge->gid.raw, 16)) { + ret = ge; + break; + } + } + + return ret; } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return mlx4_multicast_detach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw); + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + u8 mac[6]; + struct net_device *ndev; + struct mlx4_ib_gid_entry *ge; + + err = mlx4_multicast_detach(mdev->dev, + &mqp->mqp, gid->raw); + if (err) + return err; + + mutex_lock(&mqp->mutex); + ge = find_gid_entry(mqp, gid->raw); + if (ge) { + spin_lock(&mdev->iboe.lock); + ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; + if (ndev) + dev_hold(ndev); + spin_unlock(&mdev->iboe.lock); + rdma_get_mcast_mac((struct in6_addr *)gid, mac); + if (ndev) { + rtnl_lock(); + dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac); + rtnl_unlock(); + dev_put(ndev); + } + list_del(&ge->list); + kfree(ge); + } else + printk(KERN_WARNING "could not find mgid entry\n"); + + mutex_unlock(&mqp->mutex); + + return 0; } static int init_node_data(struct mlx4_ib_dev *dev) @@ -543,15 +755,143 @@ static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_board_id }; +static void mlx4_addrconf_ifid_eui48(u8 *eui, struct net_device *dev) +{ + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[0] ^= 2; +} + +static void update_gids_task(struct work_struct *work) +{ + struct update_gid_work *gw = container_of(work, struct update_gid_work, work); + struct mlx4_cmd_mailbox *mailbox; + union ib_gid *gids; + int err; + struct mlx4_dev *dev = gw->dev->dev; + struct ib_event event; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox)); + return; + } + + gids = mailbox->buf; + memcpy(gids, gw->gids, sizeof gw->gids); + + err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B); + if (err) + printk(KERN_WARNING "set port command failed\n"); + else { + memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); + event.device = &gw->dev->ib_dev; + event.element.port_num = gw->port; + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } + + mlx4_free_cmd_mailbox(dev, mailbox); + kfree(gw); +} + +static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) +{ + struct net_device *ndev = dev->iboe.netdevs[port - 1]; + struct update_gid_work *work; + + work = kzalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return -ENOMEM; + + if (!clear) { + mlx4_addrconf_ifid_eui48(&work->gids[0].raw[8], ndev); + work->gids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + } + + INIT_WORK(&work->work, update_gids_task); + work->port = port; + work->dev = dev; + queue_work(wq, &work->work); + + return 0; +} + +static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) +{ + switch (event) { + case NETDEV_UP: + update_ipv6_gids(dev, port, 0); + break; + + case NETDEV_DOWN: + update_ipv6_gids(dev, port, 1); + dev->iboe.netdevs[port - 1] = NULL; + } +} + +static void netdev_added(struct mlx4_ib_dev *dev, int port) +{ + update_ipv6_gids(dev, port, 0); +} + +static void netdev_removed(struct mlx4_ib_dev *dev, int port) +{ + update_ipv6_gids(dev, port, 1); +} + +static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct mlx4_ib_dev *ibdev; + struct net_device *oldnd; + struct mlx4_ib_iboe *iboe; + int port; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); + iboe = &ibdev->iboe; + + spin_lock(&iboe->lock); + mlx4_foreach_ib_transport_port(port, ibdev->dev) { + oldnd = iboe->netdevs[port - 1]; + iboe->netdevs[port - 1] = + mlx4_get_protocol_dev(ibdev->dev, MLX4_PROTOCOL_EN, port); + if (oldnd != iboe->netdevs[port - 1]) { + if (iboe->netdevs[port - 1]) + netdev_added(ibdev, port); + else + netdev_removed(ibdev, port); + } + } + + if (dev == iboe->netdevs[0]) + handle_en_event(ibdev, 1, event); + else if (dev == iboe->netdevs[1]) + handle_en_event(ibdev, 2, event); + + spin_unlock(&iboe->lock); + + return NOTIFY_DONE; +} + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; int num_ports = 0; int i; + int err; + struct mlx4_ib_iboe *iboe; printk_once(KERN_INFO "%s", mlx4_ib_version); - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + mlx4_foreach_ib_transport_port(i, dev) num_ports++; /* No point in registering a device with no ports... */ @@ -564,6 +904,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) return NULL; } + iboe = &ibdev->iboe; + if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) goto err_dealloc; @@ -612,6 +954,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; + ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; ibdev->ib_dev.query_gid = mlx4_ib_query_gid; ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; ibdev->ib_dev.modify_device = mlx4_ib_modify_device; @@ -656,6 +999,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + spin_lock_init(&iboe->lock); + if (init_node_data(ibdev)) goto err_map; @@ -668,16 +1013,28 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_mad_init(ibdev)) goto err_reg; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { + iboe->nb.notifier_call = mlx4_ib_netdev_event; + err = register_netdevice_notifier(&iboe->nb); + if (err) + goto err_reg; + } + for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) { if (device_create_file(&ibdev->ib_dev.dev, mlx4_class_attributes[i])) - goto err_reg; + goto err_notif; } ibdev->ib_active = true; return ibdev; +err_notif: + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + printk(KERN_WARNING "failure unregistering notifier\n"); + flush_workqueue(wq); + err_reg: ib_unregister_device(&ibdev->ib_dev); @@ -703,11 +1060,16 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); + if (ibdev->iboe.nb.notifier_call) { + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + printk(KERN_WARNING "failure unregistering notifier\n"); + ibdev->iboe.nb.notifier_call = NULL; + } + iounmap(ibdev->uar_map); - for (p = 1; p <= ibdev->num_ports; ++p) + mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); - iounmap(ibdev->uar_map); mlx4_uar_free(dev, &ibdev->priv_uar); mlx4_pd_free(dev, ibdev->priv_pdn); ib_dealloc_device(&ibdev->ib_dev); @@ -747,19 +1109,33 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, } static struct mlx4_interface mlx4_ib_interface = { - .add = mlx4_ib_add, - .remove = mlx4_ib_remove, - .event = mlx4_ib_event + .add = mlx4_ib_add, + .remove = mlx4_ib_remove, + .event = mlx4_ib_event, + .protocol = MLX4_PROTOCOL_IB }; static int __init mlx4_ib_init(void) { - return mlx4_register_interface(&mlx4_ib_interface); + int err; + + wq = create_singlethread_workqueue("mlx4_ib"); + if (!wq) + return -ENOMEM; + + err = mlx4_register_interface(&mlx4_ib_interface); + if (err) { + destroy_workqueue(wq); + return err; + } + + return 0; } static void __exit mlx4_ib_cleanup(void) { mlx4_unregister_interface(&mlx4_ib_interface); + destroy_workqueue(wq); } module_init(mlx4_ib_init); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 3486d7675e56..2a322f21049f 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -112,6 +112,13 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, }; +struct mlx4_ib_gid_entry { + struct list_head list; + union ib_gid gid; + int added; + u8 port; +}; + struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; @@ -138,6 +145,8 @@ struct mlx4_ib_qp { u8 resp_depth; u8 sq_no_prefetch; u8 state; + int mlx_type; + struct list_head gid_list; }; struct mlx4_ib_srq { @@ -157,7 +166,14 @@ struct mlx4_ib_srq { struct mlx4_ib_ah { struct ib_ah ibah; - struct mlx4_av av; + union mlx4_ext_av av; +}; + +struct mlx4_ib_iboe { + spinlock_t lock; + struct net_device *netdevs[MLX4_MAX_PORTS]; + struct notifier_block nb; + union ib_gid gid_table[MLX4_MAX_PORTS][128]; }; struct mlx4_ib_dev { @@ -176,6 +192,7 @@ struct mlx4_ib_dev { struct mutex cap_mask_mutex; bool ib_active; + struct mlx4_ib_iboe iboe; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) @@ -314,9 +331,20 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, int mlx4_ib_unmap_fmr(struct list_head *fmr_list); int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); +int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, + u8 *mac, int *is_mcast, u8 port); + static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { - return !!(ah->av.g_slid & 0x80); + u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; + + if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET) + return 1; + + return !!(ah->av.ib.g_slid & 0x80); } +int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + union ib_gid *gid); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index bb1277c8fbf0..17f60fe6e5b6 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -33,6 +33,7 @@ #include #include +#include #include #include @@ -48,17 +49,25 @@ enum { enum { MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, - MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f + MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, + MLX4_IB_LINK_TYPE_IB = 0, + MLX4_IB_LINK_TYPE_ETH = 1 }; enum { /* - * Largest possible UD header: send with GRH and immediate data. + * Largest possible UD header: send with GRH and immediate + * data plus 14 bytes for an Ethernet header. (LRH would only + * use 8 bytes, so Ethernet is the biggest case) */ - MLX4_IB_UD_HEADER_SIZE = 72, + MLX4_IB_UD_HEADER_SIZE = 78, MLX4_IB_LSO_HEADER_SPARE = 128, }; +enum { + MLX4_IB_IBOE_ETHERTYPE = 0x8915 +}; + struct mlx4_ib_sqp { struct mlx4_ib_qp qp; int pkey_index; @@ -462,6 +471,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + INIT_LIST_HEAD(&qp->gid_list); qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) @@ -649,6 +659,16 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re } } +static void del_gid_entries(struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_gid_entry *ge, *tmp; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + list_del(&ge->list); + kfree(ge); + } +} + static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, int is_user) { @@ -695,6 +715,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, if (!qp->ibqp.srq) mlx4_db_free(dev->dev, &qp->db); } + + del_gid_entries(qp); } struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, @@ -852,6 +874,12 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx4_qp_path *path, u8 port) { + int err; + int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET; + u8 mac[6]; + int is_mcast; + path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); if (ah->static_rate) { @@ -882,9 +910,35 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((port - 1) << 6) | ((ah->sl & 0xf) << 2); + if (is_eth) { + if (!(ah->ah_flags & IB_AH_GRH)) + return -1; + + err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port); + if (err) + return err; + + memcpy(path->dmac, mac, 6); + path->ackto = MLX4_IB_LINK_TYPE_ETH; + /* use index 0 into MAC table for IBoE */ + path->grh_mylmc &= 0x80; + } + return 0; } +static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_gid_entry *ge, *tmp; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) { + ge->added = 1; + ge->port = qp->port; + } + } +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -980,7 +1034,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_TIMEOUT) { - context->pri_path.ackto = attr->timeout << 3; + context->pri_path.ackto |= attr->timeout << 3; optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; } @@ -1118,8 +1172,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, qp->atomic_rd_en = attr->qp_access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->resp_depth = attr->max_dest_rd_atomic; - if (attr_mask & IB_QP_PORT) + if (attr_mask & IB_QP_PORT) { qp->port = attr->port_num; + update_mcg_macs(dev, qp); + } if (attr_mask & IB_QP_ALT_PATH) qp->alt_port = attr->alt_port_num; @@ -1226,35 +1282,45 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, int header_size; int spc; int i; + int is_eth; + int is_grh; send_size = 0; for (i = 0; i < wr->num_sge; ++i) send_size += wr->sg_list[i].length; - ib_ud_header_init(send_size, 1, 0, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header); + is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; + is_grh = mlx4_ib_ah_grh_present(ah); + ib_ud_header_init(send_size, !is_eth, is_eth, is_grh, 0, &sqp->ud_header); + + if (!is_eth) { + sqp->ud_header.lrh.service_level = + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid; + sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); + } - sqp->ud_header.lrh.service_level = - be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; - sqp->ud_header.lrh.destination_lid = ah->av.dlid; - sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f); - if (mlx4_ib_ah_grh_present(ah)) { + if (is_grh) { sqp->ud_header.grh.traffic_class = - (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; + (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.grh.flow_label = - ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); - sqp->ud_header.grh.hop_limit = ah->av.hop_limit; - ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, - ah->av.gid_index, &sqp->ud_header.grh.source_gid); + ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); + sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); memcpy(sqp->ud_header.grh.destination_gid.raw, - ah->av.dgid, 16); + ah->av.ib.dgid, 16); } mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == - IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | - (sqp->ud_header.lrh.service_level << 8)); - mlx->rlid = sqp->ud_header.lrh.destination_lid; + + if (!is_eth) { + mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | + (sqp->ud_header.lrh.service_level << 8)); + mlx->rlid = sqp->ud_header.lrh.destination_lid; + } switch (wr->opcode) { case IB_WR_SEND: @@ -1270,9 +1336,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, return -EINVAL; } - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; - if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) - sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + if (is_eth) { + u8 *smac; + + memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); + /* FIXME: cache smac value? */ + smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr; + memcpy(sqp->ud_header.eth.smac_h, smac, 6); + if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) + mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); + sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + } else { + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + } sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); @@ -1434,6 +1512,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; + memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); } static void set_mlx_icrc_seg(void *dseg) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 47e163ad3d11..ca5645c43f61 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -374,6 +374,27 @@ struct mlx4_av { u8 dgid[16]; }; +struct mlx4_eth_av { + __be32 port_pd; + u8 reserved1; + u8 smac_idx; + u16 reserved2; + u8 reserved3; + u8 gid_index; + u8 stat_rate; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + u8 dgid[16]; + u32 reserved4[2]; + __be16 vlan; + u8 mac[6]; +}; + +union mlx4_ext_av { + struct mlx4_av ib; + struct mlx4_eth_av eth; +}; + struct mlx4_dev { struct pci_dev *pdev; unsigned long flags; @@ -402,6 +423,12 @@ struct mlx4_init_port_param { if (((type) == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \ ~(dev)->caps.port_mask) & 1 << ((port) - 1)) +#define mlx4_foreach_ib_transport_port(port, dev) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + if (((dev)->caps.port_mask & 1 << ((port) - 1)) || \ + ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) + + int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); -- cgit v1.2.3-59-g8ed1b From 4c3eb3ca13966508bcb64f39dcdef48be22f1731 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 26 Aug 2010 17:19:22 +0300 Subject: IB/mlx4: Add VLAN support for IBoE This patch allows IBoE traffic to be encapsulated in 802.1Q tagged VLAN frames. The VLAN tag is encoded in the GID and derived from it by a simple computation. The netdev notifier callback is modified to catch VLAN device addition/removal and the port's GID table is updated to reflect the change, so that for each netdevice there is an entry in the GID table. When the port's GID table is exhausted, GID entries will not be added. Only children of the main interfaces can add to the GID table; if a VLAN interface is added on another VLAN interface (e.g. "vconfig add eth2.6 8"), then that interfaces will not add an entry to the GID table. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/ah.c | 10 ++++ drivers/infiniband/hw/mlx4/main.c | 99 ++++++++++++++++++++++++++++++++++----- drivers/infiniband/hw/mlx4/qp.c | 79 +++++++++++++++++++++++++------ drivers/net/mlx4/en_netdev.c | 10 ++++ drivers/net/mlx4/mlx4_en.h | 1 + drivers/net/mlx4/port.c | 19 ++++++++ include/linux/mlx4/device.h | 1 + include/linux/mlx4/qp.h | 2 +- 8 files changed, 193 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband/hw/mlx4/main.c') diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 3bf3544c0aa0..4b8f9c49397e 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -31,6 +31,7 @@ */ #include +#include #include #include @@ -91,17 +92,26 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); struct mlx4_dev *dev = ibdev->dev; + union ib_gid sgid; u8 mac[6]; int err; int is_mcast; + u16 vlan_tag; err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num); if (err) return ERR_PTR(err); memcpy(ah->av.eth.mac, mac, 6); + err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid); + if (err) + return ERR_PTR(err); + vlan_tag = rdma_get_vlan_id(&sgid); + if (vlan_tag < 0x1000) + vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ah->av.eth.gid_index = ah_attr->grh.sgid_index; + ah->av.eth.vlan = cpu_to_be16(vlan_tag); if (ah_attr->static_rate) { ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e65db73fc277..8736bd836dc0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -79,6 +80,8 @@ static void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } +static union ib_gid zgid; + static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -755,12 +758,17 @@ static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_board_id }; -static void mlx4_addrconf_ifid_eui48(u8 *eui, struct net_device *dev) +static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) { memcpy(eui, dev->dev_addr, 3); memcpy(eui + 5, dev->dev_addr + 3, 3); - eui[3] = 0xFF; - eui[4] = 0xFE; + if (vlan_id < 0x1000) { + eui[3] = vlan_id >> 8; + eui[4] = vlan_id & 0xff; + } else { + eui[3] = 0xff; + eui[4] = 0xfe; + } eui[0] ^= 2; } @@ -802,28 +810,93 @@ static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) { struct net_device *ndev = dev->iboe.netdevs[port - 1]; struct update_gid_work *work; + struct net_device *tmp; + int i; + u8 *hits; + int ret; + union ib_gid gid; + int free; + int found; + int need_update = 0; + u16 vid; work = kzalloc(sizeof *work, GFP_ATOMIC); if (!work) return -ENOMEM; - if (!clear) { - mlx4_addrconf_ifid_eui48(&work->gids[0].raw[8], ndev); - work->gids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + hits = kzalloc(128, GFP_ATOMIC); + if (!hits) { + ret = -ENOMEM; + goto out; + } + + read_lock(&dev_base_lock); + for_each_netdev(&init_net, tmp) { + if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) { + gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + vid = rdma_vlan_dev_vlan_id(tmp); + mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev); + found = 0; + free = -1; + for (i = 0; i < 128; ++i) { + if (free < 0 && + !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) + free = i; + if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) { + hits[i] = 1; + found = 1; + break; + } + } + + if (!found) { + if (tmp == ndev && + (memcmp(&dev->iboe.gid_table[port - 1][0], + &gid, sizeof gid) || + !memcmp(&dev->iboe.gid_table[port - 1][0], + &zgid, sizeof gid))) { + dev->iboe.gid_table[port - 1][0] = gid; + ++need_update; + hits[0] = 1; + } else if (free >= 0) { + dev->iboe.gid_table[port - 1][free] = gid; + hits[free] = 1; + ++need_update; + } + } + } } + read_unlock(&dev_base_lock); + + for (i = 0; i < 128; ++i) + if (!hits[i]) { + if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) + ++need_update; + dev->iboe.gid_table[port - 1][i] = zgid; + } - INIT_WORK(&work->work, update_gids_task); - work->port = port; - work->dev = dev; - queue_work(wq, &work->work); + if (need_update) { + memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids); + INIT_WORK(&work->work, update_gids_task); + work->port = port; + work->dev = dev; + queue_work(wq, &work->work); + } else + kfree(work); + kfree(hits); return 0; + +out: + kfree(work); + return ret; } static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) { switch (event) { case NETDEV_UP: + case NETDEV_CHANGEADDR: update_ipv6_gids(dev, port, 0); break; @@ -871,9 +944,11 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event } } - if (dev == iboe->netdevs[0]) + if (dev == iboe->netdevs[0] || + (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0])) handle_en_event(ibdev, 1, event); - else if (dev == iboe->netdevs[1]) + else if (dev == iboe->netdevs[1] + || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1])) handle_en_event(ibdev, 2, event); spin_unlock(&iboe->lock); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 269648445113..9a7794ac34c1 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -37,6 +37,7 @@ #include #include +#include #include @@ -57,10 +58,11 @@ enum { enum { /* * Largest possible UD header: send with GRH and immediate - * data plus 14 bytes for an Ethernet header. (LRH would only - * use 8 bytes, so Ethernet is the biggest case) + * data plus 18 bytes for an Ethernet header with VLAN/802.1Q + * tag. (LRH would only use 8 bytes, so Ethernet is the + * biggest case) */ - MLX4_IB_UD_HEADER_SIZE = 78, + MLX4_IB_UD_HEADER_SIZE = 82, MLX4_IB_LSO_HEADER_SPARE = 128, }; @@ -879,6 +881,8 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, IB_LINK_LAYER_ETHERNET; u8 mac[6]; int is_mcast; + u16 vlan_tag; + int vidx; path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); @@ -907,10 +911,10 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, memcpy(path->rgid, ah->grh.dgid.raw, 16); } - path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | - ((port - 1) << 6) | ((ah->sl & 0xf) << 2); - if (is_eth) { + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 7) << 3) | ((ah->sl & 8) >> 1); + if (!(ah->ah_flags & IB_AH_GRH)) return -1; @@ -922,7 +926,18 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, path->ackto = MLX4_IB_LINK_TYPE_ETH; /* use index 0 into MAC table for IBoE */ path->grh_mylmc &= 0x80; - } + + vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); + if (vlan_tag < 0x1000) { + if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx)) + return -ENOENT; + + path->vlan_index = vidx; + path->fl = 1 << 6; + } + } else + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 0xf) << 2); return 0; } @@ -1277,13 +1292,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + union ib_gid sgid; u16 pkey; int send_size; int header_size; int spc; int i; int is_eth; + int is_vlan = 0; int is_grh; + u16 vlan; send_size = 0; for (i = 0; i < wr->num_sge; ++i) @@ -1291,7 +1309,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); - ib_ud_header_init(send_size, !is_eth, is_eth, 0, is_grh, 0, &sqp->ud_header); + if (is_eth) { + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid); + vlan = rdma_get_vlan_id(&sgid); + is_vlan = vlan < 0x1000; + } + ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); if (!is_eth) { sqp->ud_header.lrh.service_level = @@ -1345,7 +1369,15 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, memcpy(sqp->ud_header.eth.smac_h, smac, 6); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); - sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + if (!is_vlan) { + sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + } else { + u16 pcp; + + sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13; + sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); + } } else { sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) @@ -1507,13 +1539,14 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + struct ib_send_wr *wr, __be16 *vlan) { memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); + *vlan = dseg->vlan; } static void set_mlx_icrc_seg(void *dseg) @@ -1616,6 +1649,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, __be32 uninitialized_var(lso_hdr_sz); __be32 blh; int i; + __be16 vlan = cpu_to_be16(0xffff); spin_lock_irqsave(&qp->sq.lock, flags); @@ -1719,7 +1753,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, wr, &vlan); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; @@ -1797,6 +1831,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; + if (be16_to_cpu(vlan) < 0x1000) { + ctrl->ins_vlan = 1 << 6; + ctrl->vlan_tag = vlan; + } + stamp = ind + qp->sq_spare_wqes; ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); @@ -1946,17 +1985,27 @@ static int to_ib_qp_access_flags(int mlx4_flags) return ib_flags; } -static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, +static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { + struct mlx4_dev *dev = ibdev->dev; + int is_eth; + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) return; + is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) == + IB_LINK_LAYER_ETHERNET; + if (is_eth) + ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) | + ((path->sched_queue & 4) << 1); + else + ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; + ib_ah_attr->dlid = be16_to_cpu(path->rlid); - ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; @@ -2009,8 +2058,8 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr to_ib_qp_access_flags(be32_to_cpu(context.params2)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); - to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c index a0d8a26f5a02..9a87c4f3bbbd 100644 --- a/drivers/net/mlx4/en_netdev.c +++ b/drivers/net/mlx4/en_netdev.c @@ -69,6 +69,7 @@ static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; + int idx; if (!priv->vlgrp) return; @@ -83,7 +84,10 @@ static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) if (err) en_err(priv, "Failed configuring VLAN filter\n"); } + if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) + en_err(priv, "failed adding vlan %d\n", vid); mutex_unlock(&mdev->state_lock); + } static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) @@ -91,6 +95,7 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; + int idx; if (!priv->vlgrp) return; @@ -101,6 +106,11 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) /* Remove VID from port VLAN filter */ mutex_lock(&mdev->state_lock); + if (!mlx4_find_cached_vlan(mdev->dev, priv->port, vid, &idx)) + mlx4_unregister_vlan(mdev->dev, priv->port, idx); + else + en_err(priv, "could not find vid %d in cache\n", vid); + if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlgrp); if (err) diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index 449210994ee9..dab5eafb8946 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -463,6 +463,7 @@ struct mlx4_en_priv { char *mc_addrs; int mc_addrs_cnt; struct mlx4_en_stat_out_mbox hw_stats; + int vids[128]; }; diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c index 606aa58afdea..56371ef328ef 100644 --- a/drivers/net/mlx4/port.c +++ b/drivers/net/mlx4/port.c @@ -182,6 +182,25 @@ static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port, return err; } +int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx) +{ + struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + int i; + + for (i = 0; i < MLX4_MAX_VLAN_NUM; ++i) { + if (table->refs[i] && + (vid == (MLX4_VLAN_MASK & + be32_to_cpu(table->entries[i])))) { + /* VLAN already registered, increase reference count */ + *idx = i; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan); + int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ca5645c43f61..ff9893a33e90 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -496,6 +496,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index); +int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 97cfdc8d7e2f..0eeb2a1a867c 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -109,7 +109,7 @@ struct mlx4_qp_path { __be32 tclass_flowlabel; u8 rgid[16]; u8 sched_queue; - u8 snooper_flags; + u8 vlan_index; u8 reserved3[2]; u8 counter_index; u8 reserved4; -- cgit v1.2.3-59-g8ed1b