aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx4/main.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 16:49:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 16:49:49 -0700
commite0456717e483bb8a9431b80a5bdc99a928b9b003 (patch)
tree5eb5add2bafd1f20326d70f5cb3b711d00a40b10 /drivers/net/ethernet/mellanox/mlx4/main.c
parentMerge branch 'sched-hrtimers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentbridge: vlan: flush the dynamically learned entries on port vlan delete (diff)
downloadlinux-dev-e0456717e483bb8a9431b80a5bdc99a928b9b003.tar.xz
linux-dev-e0456717e483bb8a9431b80a5bdc99a928b9b003.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add TX fast path in mac80211, from Johannes Berg. 2) Add TSO/GRO support to ibmveth, from Thomas Falcon 3) Move away from cached routes in ipv6, just like ipv4, from Martin KaFai Lau. 4) Lots of new rhashtable tests, from Thomas Graf. 5) Run ingress qdisc lockless, from Alexei Starovoitov. 6) Allow servers to fetch TCP packet headers for SYN packets of new connections, for fingerprinting. From Eric Dumazet. 7) Add mode parameter to pktgen, for testing receive. From Alexei Starovoitov. 8) Cache access optimizations via simplifications of build_skb(), from Alexander Duyck. 9) Move page frag allocator under mm/, also from Alexander. 10) Add xmit_more support to hv_netvsc, from KY Srinivasan. 11) Add a counter guard in case we try to perform endless reclassify loops in the packet scheduler. 12) Extern flow dissector to be programmable and use it in new "Flower" classifier. From Jiri Pirko. 13) AF_PACKET fanout rollover fixes, performance improvements, and new statistics. From Willem de Bruijn. 14) Add netdev driver for GENEVE tunnels, from John W Linville. 15) Add ingress netfilter hooks and filtering, from Pablo Neira Ayuso. 16) Fix handling of epoll edge triggers in TCP, from Eric Dumazet. 17) Add an ECN retry fallback for the initial TCP handshake, from Daniel Borkmann. 18) Add tail call support to BPF, from Alexei Starovoitov. 19) Add several pktgen helper scripts, from Jesper Dangaard Brouer. 20) Add zerocopy support to AF_UNIX, from Hannes Frederic Sowa. 21) Favor even port numbers for allocation to connect() requests, and odd port numbers for bind(0), in an effort to help avoid ip_local_port_range exhaustion. From Eric Dumazet. 22) Add Cavium ThunderX driver, from Sunil Goutham. 23) Allow bpf programs to access skb_iif and dev->ifindex SKB metadata, from Alexei Starovoitov. 24) Add support for T6 chips in cxgb4vf driver, from Hariprasad Shenai. 25) Double TCP Small Queues default to 256K to accomodate situations like the XEN driver and wireless aggregation. From Wei Liu. 26) Add more entropy inputs to flow dissector, from Tom Herbert. 27) Add CDG congestion control algorithm to TCP, from Kenneth Klette Jonassen. 28) Convert ipset over to RCU locking, from Jozsef Kadlecsik. 29) Track and act upon link status of ipv4 route nexthops, from Andy Gospodarek. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1670 commits) bridge: vlan: flush the dynamically learned entries on port vlan delete bridge: multicast: add a comment to br_port_state_selection about blocking state net: inet_diag: export IPV6_V6ONLY sockopt stmmac: troubleshoot unexpected bits in des0 & des1 net: ipv4 sysctl option to ignore routes when nexthop link is down net: track link-status of ipv4 nexthops net: switchdev: ignore unsupported bridge flags net: Cavium: Fix MAC address setting in shutdown state drivers: net: xgene: fix for ACPI support without ACPI ip: report the original address of ICMP messages net/mlx5e: Prefetch skb data on RX net/mlx5e: Pop cq outside mlx5e_get_cqe net/mlx5e: Remove mlx5e_cq.sqrq back-pointer net/mlx5e: Remove extra spaces net/mlx5e: Avoid TX CQE generation if more xmit packets expected net/mlx5e: Avoid redundant dev_kfree_skb() upon NOP completion net/mlx5e: Remove re-assignment of wq type in mlx5e_enable_rq() net/mlx5e: Use skb_shinfo(skb)->gso_segs rather than counting them net/mlx5e: Static mapping of netdev priv resources to/from netdev TX queues net/mlx4_en: Use HW counters for rx/tx bytes/packets in PF device ...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/main.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c266
1 files changed, 222 insertions, 44 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 70de39c6a397..12fbfcb44d8a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -479,7 +479,15 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
}
}
- dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);
+ if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) &&
+ (port_type_array[0] == MLX4_PORT_TYPE_IB) &&
+ (port_type_array[1] == MLX4_PORT_TYPE_ETH)) {
+ mlx4_warn(dev,
+ "Granular QoS per VF not supported with IB/Eth configuration\n");
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP;
+ }
+
+ dev->caps.max_counters = dev_cap->max_counters;
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
@@ -2212,20 +2220,73 @@ err_free_icm:
static int mlx4_init_counters_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- int nent;
+ int nent_pow2;
if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
return -ENOENT;
- nent = dev->caps.max_counters;
- return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
+ if (!dev->caps.max_counters)
+ return -ENOSPC;
+
+ nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
+ /* reserve last counter index for sink counter */
+ return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
+ nent_pow2 - 1, 0,
+ nent_pow2 - dev->caps.max_counters + 1);
}
static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
{
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
+ return;
+
+ if (!dev->caps.max_counters)
+ return;
+
mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
}
+static void mlx4_cleanup_default_counters(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port;
+
+ for (port = 0; port < dev->caps.num_ports; port++)
+ if (priv->def_counter[port] != -1)
+ mlx4_counter_free(dev, priv->def_counter[port]);
+}
+
+static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port, err = 0;
+ u32 idx;
+
+ for (port = 0; port < dev->caps.num_ports; port++)
+ priv->def_counter[port] = -1;
+
+ for (port = 0; port < dev->caps.num_ports; port++) {
+ err = mlx4_counter_alloc(dev, &idx);
+
+ if (!err || err == -ENOSPC) {
+ priv->def_counter[port] = idx;
+ } else if (err == -ENOENT) {
+ err = 0;
+ continue;
+ } else {
+ mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n",
+ __func__, port + 1, err);
+ mlx4_cleanup_default_counters(dev);
+ return err;
+ }
+
+ mlx4_dbg(dev, "%s: default counter index %d for port %d\n",
+ __func__, priv->def_counter[port], port + 1);
+ }
+
+ return err;
+}
+
int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2234,8 +2295,10 @@ int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
return -ENOENT;
*idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
- if (*idx == -1)
- return -ENOMEM;
+ if (*idx == -1) {
+ *idx = MLX4_SINK_COUNTER_INDEX(dev);
+ return -ENOSPC;
+ }
return 0;
}
@@ -2258,8 +2321,35 @@ int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
}
EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
+static int __mlx4_clear_if_stat(struct mlx4_dev *dev,
+ u8 counter_index)
+{
+ struct mlx4_cmd_mailbox *if_stat_mailbox;
+ int err;
+ u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET;
+
+ if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(if_stat_mailbox))
+ return PTR_ERR(if_stat_mailbox);
+
+ err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
+ MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
+ return err;
+}
+
void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
{
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
+ return;
+
+ if (idx == MLX4_SINK_COUNTER_INDEX(dev))
+ return;
+
+ __mlx4_clear_if_stat(dev, idx);
+
mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
return;
}
@@ -2279,6 +2369,14 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
}
EXPORT_SYMBOL_GPL(mlx4_counter_free);
+int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return priv->def_counter[port - 1];
+}
+EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index);
+
void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2383,11 +2481,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
if (err) {
if (dev->flags & MLX4_FLAG_MSI_X) {
mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
- priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
+ priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
mlx4_warn(dev, "Trying again without MSI-X\n");
} else {
mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
- priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
+ priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
}
@@ -2414,10 +2512,18 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
goto err_srq_table_free;
}
- err = mlx4_init_counters_table(dev);
- if (err && err != -ENOENT) {
- mlx4_err(dev, "Failed to initialize counters table, aborting\n");
- goto err_qp_table_free;
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_init_counters_table(dev);
+ if (err && err != -ENOENT) {
+ mlx4_err(dev, "Failed to initialize counters table, aborting\n");
+ goto err_qp_table_free;
+ }
+ }
+
+ err = mlx4_allocate_default_counters(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to allocate default counters, aborting\n");
+ goto err_counters_table_free;
}
if (!mlx4_is_slave(dev)) {
@@ -2451,15 +2557,19 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
if (err) {
mlx4_err(dev, "Failed to set port %d, aborting\n",
port);
- goto err_counters_table_free;
+ goto err_default_countes_free;
}
}
}
return 0;
+err_default_countes_free:
+ mlx4_cleanup_default_counters(dev);
+
err_counters_table_free:
- mlx4_cleanup_counters_table(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_cleanup_counters_table(dev);
err_qp_table_free:
mlx4_cleanup_qp_table(dev);
@@ -2500,14 +2610,45 @@ err_uar_table_free:
return err;
}
+static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn)
+{
+ int requested_cpu = 0;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_eq *eq;
+ int off = 0;
+ int i;
+
+ if (eqn > dev->caps.num_comp_vectors)
+ return -EINVAL;
+
+ for (i = 1; i < port; i++)
+ off += mlx4_get_eqs_per_port(dev, i);
+
+ requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC);
+
+ /* Meaning EQs are shared, and this call comes from the second port */
+ if (requested_cpu < 0)
+ return 0;
+
+ eq = &priv->eq_table.eq[eqn];
+
+ if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_set_cpu(requested_cpu, eq->affinity_mask);
+
+ return 0;
+}
+
static void mlx4_enable_msi_x(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct msix_entry *entries;
int i;
+ int port = 0;
if (msi_x) {
- int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ;
+ int nreq = dev->caps.num_ports * num_online_cpus() + 1;
nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
nreq);
@@ -2522,20 +2663,55 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
nreq);
- if (nreq < 0) {
+ if (nreq < 0 || nreq < MLX4_EQ_ASYNC) {
kfree(entries);
goto no_msi;
- } else if (nreq < MSIX_LEGACY_SZ +
- dev->caps.num_ports * MIN_MSIX_P_PORT) {
- /*Working in legacy mode , all EQ's shared*/
- dev->caps.comp_pool = 0;
- dev->caps.num_comp_vectors = nreq - 1;
- } else {
- dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ;
- dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
}
- for (i = 0; i < nreq; ++i)
- priv->eq_table.eq[i].irq = entries[i].vector;
+ /* 1 is reserved for events (asyncrounous EQ) */
+ dev->caps.num_comp_vectors = nreq - 1;
+
+ priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector;
+ bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports,
+ dev->caps.num_ports);
+
+ for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) {
+ if (i == MLX4_EQ_ASYNC)
+ continue;
+
+ priv->eq_table.eq[i].irq =
+ entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
+
+ if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
+ bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
+ dev->caps.num_ports);
+ /* We don't set affinity hint when there
+ * aren't enough EQs
+ */
+ } else {
+ set_bit(port,
+ priv->eq_table.eq[i].actv_ports.ports);
+ if (mlx4_init_affinity_hint(dev, port + 1, i))
+ mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n",
+ i);
+ }
+ /* We divide the Eqs evenly between the two ports.
+ * (dev->caps.num_comp_vectors / dev->caps.num_ports)
+ * refers to the number of Eqs per port
+ * (i.e eqs_per_port). Theoretically, we would like to
+ * write something like (i + 1) % eqs_per_port == 0.
+ * However, since there's an asynchronous Eq, we have
+ * to skip over it by comparing this condition to
+ * !!((i + 1) > MLX4_EQ_ASYNC).
+ */
+ if ((dev->caps.num_comp_vectors > dev->caps.num_ports) &&
+ ((i + 1) %
+ (dev->caps.num_comp_vectors / dev->caps.num_ports)) ==
+ !!((i + 1) > MLX4_EQ_ASYNC))
+ /* If dev->caps.num_comp_vectors < dev->caps.num_ports,
+ * everything is shared anyway.
+ */
+ port++;
+ }
dev->flags |= MLX4_FLAG_MSI_X;
@@ -2545,10 +2721,15 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
no_msi:
dev->caps.num_comp_vectors = 1;
- dev->caps.comp_pool = 0;
- for (i = 0; i < 2; ++i)
+ BUG_ON(MLX4_EQ_ASYNC >= 2);
+ for (i = 0; i < 2; ++i) {
priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
+ if (i != MLX4_EQ_ASYNC) {
+ bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
+ dev->caps.num_ports);
+ }
+ }
}
static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
@@ -2613,6 +2794,10 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
device_remove_file(&info->dev->persist->pdev->dev,
&info->port_mtu_attr);
+#ifdef CONFIG_RFS_ACCEL
+ free_irq_cpu_rmap(info->rmap);
+ info->rmap = NULL;
+#endif
}
static int mlx4_init_steering(struct mlx4_dev *dev)
@@ -2768,6 +2953,7 @@ disable_sriov:
free_mem:
dev->persist->num_vfs = 0;
kfree(dev->dev_vfs);
+ dev->dev_vfs = NULL;
return dev_flags & ~MLX4_FLAG_MASTER;
}
@@ -2919,6 +3105,7 @@ slave_start:
existing_vfs,
reset_flow);
+ mlx4_close_fw(dev);
mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
dev->flags = dev_flags;
if (!SRIOV_VALID_STATE(dev->flags)) {
@@ -3007,18 +3194,6 @@ slave_start:
/* In master functions, the communication channel must be initialized
* after obtaining its address from fw */
if (mlx4_is_master(dev)) {
- int ib_ports = 0;
-
- mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
- ib_ports++;
-
- if (ib_ports &&
- (num_vfs_argc > 1 || probe_vfs_argc > 1)) {
- mlx4_err(dev,
- "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
- err = -EINVAL;
- goto err_close;
- }
if (dev->caps.num_ports < 2 &&
num_vfs_argc > 1) {
err = -EINVAL;
@@ -3055,7 +3230,7 @@ slave_start:
if (err)
goto err_master_mfunc;
- priv->msix_ctl.pool_bm = 0;
+ bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX);
mutex_init(&priv->msix_ctl.pool_lock);
mlx4_enable_msi_x(dev);
@@ -3077,7 +3252,6 @@ slave_start:
!mlx4_is_mfunc(dev)) {
dev->flags &= ~MLX4_FLAG_MSI_X;
dev->caps.num_comp_vectors = 1;
- dev->caps.comp_pool = 0;
pci_disable_msix(pdev);
err = mlx4_setup_hca(dev);
}
@@ -3128,7 +3302,9 @@ err_port:
for (--port; port >= 1; --port)
mlx4_cleanup_port_info(&priv->port[port]);
- mlx4_cleanup_counters_table(dev);
+ mlx4_cleanup_default_counters(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_cleanup_counters_table(dev);
mlx4_cleanup_qp_table(dev);
mlx4_cleanup_srq_table(dev);
mlx4_cleanup_cq_table(dev);
@@ -3426,7 +3602,9 @@ static void mlx4_unload_one(struct pci_dev *pdev)
mlx4_free_resource_tracker(dev,
RES_TR_FREE_SLAVES_ONLY);
- mlx4_cleanup_counters_table(dev);
+ mlx4_cleanup_default_counters(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_cleanup_counters_table(dev);
mlx4_cleanup_qp_table(dev);
mlx4_cleanup_srq_table(dev);
mlx4_cleanup_cq_table(dev);