aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/igb/igb_main.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 11:56:19 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 11:56:19 -0800
commit5bbcc0f595fadb4cac0eddc4401035ec0bd95b09 (patch)
tree3b65e490cc36a6c6fecac1fa24d9e0ac9ced4455 /drivers/net/ethernet/intel/igb/igb_main.c
parentMerge tag 'mips_4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/mips (diff)
parenttcp: highest_sack fix (diff)
downloadlinux-dev-5bbcc0f595fadb4cac0eddc4401035ec0bd95b09.tar.xz
linux-dev-5bbcc0f595fadb4cac0eddc4401035ec0bd95b09.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Maintain the TCP retransmit queue using an rbtree, with 1GB windows at 100Gb this really has become necessary. From Eric Dumazet. 2) Multi-program support for cgroup+bpf, from Alexei Starovoitov. 3) Perform broadcast flooding in hardware in mv88e6xxx, from Andrew Lunn. 4) Add meter action support to openvswitch, from Andy Zhou. 5) Add a data meta pointer for BPF accessible packets, from Daniel Borkmann. 6) Namespace-ify almost all TCP sysctl knobs, from Eric Dumazet. 7) Turn on Broadcom Tags in b53 driver, from Florian Fainelli. 8) More work to move the RTNL mutex down, from Florian Westphal. 9) Add 'bpftool' utility, to help with bpf program introspection. From Jakub Kicinski. 10) Add new 'cpumap' type for XDP_REDIRECT action, from Jesper Dangaard Brouer. 11) Support 'blocks' of transformations in the packet scheduler which can span multiple network devices, from Jiri Pirko. 12) TC flower offload support in cxgb4, from Kumar Sanghvi. 13) Priority based stream scheduler for SCTP, from Marcelo Ricardo Leitner. 14) Thunderbolt networking driver, from Amir Levy and Mika Westerberg. 15) Add RED qdisc offloadability, and use it in mlxsw driver. From Nogah Frankel. 16) eBPF based device controller for cgroup v2, from Roman Gushchin. 17) Add some fundamental tracepoints for TCP, from Song Liu. 18) Remove garbage collection from ipv6 route layer, this is a significant accomplishment. From Wei Wang. 19) Add multicast route offload support to mlxsw, from Yotam Gigi" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2177 commits) tcp: highest_sack fix geneve: fix fill_info when link down bpf: fix lockdep splat net: cdc_ncm: GetNtbFormat endian fix openvswitch: meter: fix NULL pointer dereference in ovs_meter_cmd_reply_start netem: remove unnecessary 64 bit modulus netem: use 64 bit divide by rate tcp: Namespace-ify sysctl_tcp_default_congestion_control net: Protect iterations over net::fib_notifier_ops in fib_seq_sum() ipv6: set all.accept_dad to 0 by default uapi: fix linux/tls.h userspace compilation error usbnet: ipheth: prevent TX queue timeouts when device not ready vhost_net: conditionally enable tx polling uapi: fix linux/rxrpc.h userspace compilation errors net: stmmac: fix LPI transitioning for dwmac4 atm: horizon: Fix irq release error net-sysfs: trigger netlink notification on ifalias change via sysfs openvswitch: Using kfree_rcu() to simplify the code openvswitch: Make local function ovs_nsh_key_attr_size() static openvswitch: Fix return value check in ovs_meter_cmd_features() ...
Diffstat (limited to 'drivers/net/ethernet/intel/igb/igb_main.c')
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c367
1 files changed, 357 insertions, 10 deletions
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 18b6c25d4705..e94d3c256667 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -34,6 +34,7 @@
#include <linux/slab.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
+#include <net/pkt_sched.h>
#include <linux/net_tstamp.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
@@ -62,6 +63,17 @@
#define BUILD 0
#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
__stringify(BUILD) "-k"
+
+enum queue_mode {
+ QUEUE_MODE_STRICT_PRIORITY,
+ QUEUE_MODE_STREAM_RESERVATION,
+};
+
+enum tx_queue_prio {
+ TX_QUEUE_PRIO_HIGH,
+ TX_QUEUE_PRIO_LOW,
+};
+
char igb_driver_name[] = "igb";
char igb_driver_version[] = DRV_VERSION;
static const char igb_driver_string[] =
@@ -133,8 +145,8 @@ static void igb_clean_all_rx_rings(struct igb_adapter *);
static void igb_clean_tx_ring(struct igb_ring *);
static void igb_clean_rx_ring(struct igb_ring *);
static void igb_set_rx_mode(struct net_device *);
-static void igb_update_phy_info(unsigned long);
-static void igb_watchdog(unsigned long);
+static void igb_update_phy_info(struct timer_list *);
+static void igb_watchdog(struct timer_list *);
static void igb_watchdog_task(struct work_struct *);
static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
static void igb_get_stats64(struct net_device *dev,
@@ -1271,6 +1283,12 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,
ring->count = adapter->tx_ring_count;
ring->queue_index = txr_idx;
+ ring->cbs_enable = false;
+ ring->idleslope = 0;
+ ring->sendslope = 0;
+ ring->hicredit = 0;
+ ring->locredit = 0;
+
u64_stats_init(&ring->tx_syncp);
u64_stats_init(&ring->tx_syncp2);
@@ -1598,6 +1616,284 @@ static void igb_get_hw_control(struct igb_adapter *adapter)
ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
}
+static void enable_fqtss(struct igb_adapter *adapter, bool enable)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_hw *hw = &adapter->hw;
+
+ WARN_ON(hw->mac.type != e1000_i210);
+
+ if (enable)
+ adapter->flags |= IGB_FLAG_FQTSS;
+ else
+ adapter->flags &= ~IGB_FLAG_FQTSS;
+
+ if (netif_running(netdev))
+ schedule_work(&adapter->reset_task);
+}
+
+static bool is_fqtss_enabled(struct igb_adapter *adapter)
+{
+ return (adapter->flags & IGB_FLAG_FQTSS) ? true : false;
+}
+
+static void set_tx_desc_fetch_prio(struct e1000_hw *hw, int queue,
+ enum tx_queue_prio prio)
+{
+ u32 val;
+
+ WARN_ON(hw->mac.type != e1000_i210);
+ WARN_ON(queue < 0 || queue > 4);
+
+ val = rd32(E1000_I210_TXDCTL(queue));
+
+ if (prio == TX_QUEUE_PRIO_HIGH)
+ val |= E1000_TXDCTL_PRIORITY;
+ else
+ val &= ~E1000_TXDCTL_PRIORITY;
+
+ wr32(E1000_I210_TXDCTL(queue), val);
+}
+
+static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode)
+{
+ u32 val;
+
+ WARN_ON(hw->mac.type != e1000_i210);
+ WARN_ON(queue < 0 || queue > 1);
+
+ val = rd32(E1000_I210_TQAVCC(queue));
+
+ if (mode == QUEUE_MODE_STREAM_RESERVATION)
+ val |= E1000_TQAVCC_QUEUEMODE;
+ else
+ val &= ~E1000_TQAVCC_QUEUEMODE;
+
+ wr32(E1000_I210_TQAVCC(queue), val);
+}
+
+/**
+ * igb_configure_cbs - Configure Credit-Based Shaper (CBS)
+ * @adapter: pointer to adapter struct
+ * @queue: queue number
+ * @enable: true = enable CBS, false = disable CBS
+ * @idleslope: idleSlope in kbps
+ * @sendslope: sendSlope in kbps
+ * @hicredit: hiCredit in bytes
+ * @locredit: loCredit in bytes
+ *
+ * Configure CBS for a given hardware queue. When disabling, idleslope,
+ * sendslope, hicredit, locredit arguments are ignored. Returns 0 if
+ * success. Negative otherwise.
+ **/
+static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
+ bool enable, int idleslope, int sendslope,
+ int hicredit, int locredit)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_hw *hw = &adapter->hw;
+ u32 tqavcc;
+ u16 value;
+
+ WARN_ON(hw->mac.type != e1000_i210);
+ WARN_ON(queue < 0 || queue > 1);
+
+ if (enable) {
+ set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH);
+ set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION);
+
+ /* According to i210 datasheet section 7.2.7.7, we should set
+ * the 'idleSlope' field from TQAVCC register following the
+ * equation:
+ *
+ * For 100 Mbps link speed:
+ *
+ * value = BW * 0x7735 * 0.2 (E1)
+ *
+ * For 1000Mbps link speed:
+ *
+ * value = BW * 0x7735 * 2 (E2)
+ *
+ * E1 and E2 can be merged into one equation as shown below.
+ * Note that 'link-speed' is in Mbps.
+ *
+ * value = BW * 0x7735 * 2 * link-speed
+ * -------------- (E3)
+ * 1000
+ *
+ * 'BW' is the percentage bandwidth out of full link speed
+ * which can be found with the following equation. Note that
+ * idleSlope here is the parameter from this function which
+ * is in kbps.
+ *
+ * BW = idleSlope
+ * ----------------- (E4)
+ * link-speed * 1000
+ *
+ * That said, we can come up with a generic equation to
+ * calculate the value we should set it TQAVCC register by
+ * replacing 'BW' in E3 by E4. The resulting equation is:
+ *
+ * value = idleSlope * 0x7735 * 2 * link-speed
+ * ----------------- -------------- (E5)
+ * link-speed * 1000 1000
+ *
+ * 'link-speed' is present in both sides of the fraction so
+ * it is canceled out. The final equation is the following:
+ *
+ * value = idleSlope * 61034
+ * ----------------- (E6)
+ * 1000000
+ */
+ value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000);
+
+ tqavcc = rd32(E1000_I210_TQAVCC(queue));
+ tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+ tqavcc |= value;
+ wr32(E1000_I210_TQAVCC(queue), tqavcc);
+
+ wr32(E1000_I210_TQAVHC(queue), 0x80000000 + hicredit * 0x7735);
+ } else {
+ set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW);
+ set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY);
+
+ /* Set idleSlope to zero. */
+ tqavcc = rd32(E1000_I210_TQAVCC(queue));
+ tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+ wr32(E1000_I210_TQAVCC(queue), tqavcc);
+
+ /* Set hiCredit to zero. */
+ wr32(E1000_I210_TQAVHC(queue), 0);
+ }
+
+ /* XXX: In i210 controller the sendSlope and loCredit parameters from
+ * CBS are not configurable by software so we don't do any 'controller
+ * configuration' in respect to these parameters.
+ */
+
+ netdev_dbg(netdev, "CBS %s: queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n",
+ (enable) ? "enabled" : "disabled", queue,
+ idleslope, sendslope, hicredit, locredit);
+}
+
+static int igb_save_cbs_params(struct igb_adapter *adapter, int queue,
+ bool enable, int idleslope, int sendslope,
+ int hicredit, int locredit)
+{
+ struct igb_ring *ring;
+
+ if (queue < 0 || queue > adapter->num_tx_queues)
+ return -EINVAL;
+
+ ring = adapter->tx_ring[queue];
+
+ ring->cbs_enable = enable;
+ ring->idleslope = idleslope;
+ ring->sendslope = sendslope;
+ ring->hicredit = hicredit;
+ ring->locredit = locredit;
+
+ return 0;
+}
+
+static bool is_any_cbs_enabled(struct igb_adapter *adapter)
+{
+ struct igb_ring *ring;
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ ring = adapter->tx_ring[i];
+
+ if (ring->cbs_enable)
+ return true;
+ }
+
+ return false;
+}
+
+static void igb_setup_tx_mode(struct igb_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_hw *hw = &adapter->hw;
+ u32 val;
+
+ /* Only i210 controller supports changing the transmission mode. */
+ if (hw->mac.type != e1000_i210)
+ return;
+
+ if (is_fqtss_enabled(adapter)) {
+ int i, max_queue;
+
+ /* Configure TQAVCTRL register: set transmit mode to 'Qav',
+ * set data fetch arbitration to 'round robin' and set data
+ * transfer arbitration to 'credit shaper algorithm.
+ */
+ val = rd32(E1000_I210_TQAVCTRL);
+ val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_DATATRANARB;
+ val &= ~E1000_TQAVCTRL_DATAFETCHARB;
+ wr32(E1000_I210_TQAVCTRL, val);
+
+ /* Configure Tx and Rx packet buffers sizes as described in
+ * i210 datasheet section 7.2.7.7.
+ */
+ val = rd32(E1000_TXPBS);
+ val &= ~I210_TXPBSIZE_MASK;
+ val |= I210_TXPBSIZE_PB0_8KB | I210_TXPBSIZE_PB1_8KB |
+ I210_TXPBSIZE_PB2_4KB | I210_TXPBSIZE_PB3_4KB;
+ wr32(E1000_TXPBS, val);
+
+ val = rd32(E1000_RXPBS);
+ val &= ~I210_RXPBSIZE_MASK;
+ val |= I210_RXPBSIZE_PB_32KB;
+ wr32(E1000_RXPBS, val);
+
+ /* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ
+ * register should not exceed the buffer size programmed in
+ * TXPBS. The smallest buffer size programmed in TXPBS is 4kB
+ * so according to the datasheet we should set MAX_TPKT_SIZE to
+ * 4kB / 64.
+ *
+ * However, when we do so, no frame from queue 2 and 3 are
+ * transmitted. It seems the MAX_TPKT_SIZE should not be great
+ * or _equal_ to the buffer size programmed in TXPBS. For this
+ * reason, we set set MAX_ TPKT_SIZE to (4kB - 1) / 64.
+ */
+ val = (4096 - 1) / 64;
+ wr32(E1000_I210_DTXMXPKTSZ, val);
+
+ /* Since FQTSS mode is enabled, apply any CBS configuration
+ * previously set. If no previous CBS configuration has been
+ * done, then the initial configuration is applied, which means
+ * CBS is disabled.
+ */
+ max_queue = (adapter->num_tx_queues < I210_SR_QUEUES_NUM) ?
+ adapter->num_tx_queues : I210_SR_QUEUES_NUM;
+
+ for (i = 0; i < max_queue; i++) {
+ struct igb_ring *ring = adapter->tx_ring[i];
+
+ igb_configure_cbs(adapter, i, ring->cbs_enable,
+ ring->idleslope, ring->sendslope,
+ ring->hicredit, ring->locredit);
+ }
+ } else {
+ wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
+ wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
+ wr32(E1000_I210_DTXMXPKTSZ, I210_DTXMXPKTSZ_DEFAULT);
+
+ val = rd32(E1000_I210_TQAVCTRL);
+ /* According to Section 8.12.21, the other flags we've set when
+ * enabling FQTSS are not relevant when disabling FQTSS so we
+ * don't set they here.
+ */
+ val &= ~E1000_TQAVCTRL_XMIT_MODE;
+ wr32(E1000_I210_TQAVCTRL, val);
+ }
+
+ netdev_dbg(netdev, "FQTSS %s\n", (is_fqtss_enabled(adapter)) ?
+ "enabled" : "disabled");
+}
+
/**
* igb_configure - configure the hardware for RX and TX
* @adapter: private board structure
@@ -1609,6 +1905,7 @@ static void igb_configure(struct igb_adapter *adapter)
igb_get_hw_control(adapter);
igb_set_rx_mode(netdev);
+ igb_setup_tx_mode(adapter);
igb_restore_vlan(adapter);
@@ -2150,6 +2447,55 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev,
return features;
}
+static int igb_offload_cbs(struct igb_adapter *adapter,
+ struct tc_cbs_qopt_offload *qopt)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int err;
+
+ /* CBS offloading is only supported by i210 controller. */
+ if (hw->mac.type != e1000_i210)
+ return -EOPNOTSUPP;
+
+ /* CBS offloading is only supported by queue 0 and queue 1. */
+ if (qopt->queue < 0 || qopt->queue > 1)
+ return -EINVAL;
+
+ err = igb_save_cbs_params(adapter, qopt->queue, qopt->enable,
+ qopt->idleslope, qopt->sendslope,
+ qopt->hicredit, qopt->locredit);
+ if (err)
+ return err;
+
+ if (is_fqtss_enabled(adapter)) {
+ igb_configure_cbs(adapter, qopt->queue, qopt->enable,
+ qopt->idleslope, qopt->sendslope,
+ qopt->hicredit, qopt->locredit);
+
+ if (!is_any_cbs_enabled(adapter))
+ enable_fqtss(adapter, false);
+
+ } else {
+ enable_fqtss(adapter, true);
+ }
+
+ return 0;
+}
+
+static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+
+ switch (type) {
+ case TC_SETUP_QDISC_CBS:
+ return igb_offload_cbs(adapter, type_data);
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops igb_netdev_ops = {
.ndo_open = igb_open,
.ndo_stop = igb_close,
@@ -2175,6 +2521,7 @@ static const struct net_device_ops igb_netdev_ops = {
.ndo_set_features = igb_set_features,
.ndo_fdb_add = igb_ndo_fdb_add,
.ndo_features_check = igb_features_check,
+ .ndo_setup_tc = igb_setup_tc,
};
/**
@@ -2538,10 +2885,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
}
- setup_timer(&adapter->watchdog_timer, igb_watchdog,
- (unsigned long) adapter);
- setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
- (unsigned long) adapter);
+ timer_setup(&adapter->watchdog_timer, igb_watchdog, 0);
+ timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0);
INIT_WORK(&adapter->reset_task, igb_reset_task);
INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
@@ -3162,6 +3507,8 @@ static int igb_sw_init(struct igb_adapter *adapter)
/* Setup and initialize a copy of the hw vlan table array */
adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
GFP_ATOMIC);
+ if (!adapter->shadow_vfta)
+ return -ENOMEM;
/* This call may decrease the number of queues */
if (igb_init_interrupt_scheme(adapter, true)) {
@@ -4423,9 +4770,9 @@ static void igb_spoof_check(struct igb_adapter *adapter)
/* Need to wait a few seconds after link up to get diagnostic information from
* the phy
*/
-static void igb_update_phy_info(unsigned long data)
+static void igb_update_phy_info(struct timer_list *t)
{
- struct igb_adapter *adapter = (struct igb_adapter *) data;
+ struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer);
igb_get_phy_info(&adapter->hw);
}
@@ -4512,9 +4859,9 @@ static void igb_check_lvmmc(struct igb_adapter *adapter)
* igb_watchdog - Timer Call-back
* @data: pointer to adapter cast into an unsigned long
**/
-static void igb_watchdog(unsigned long data)
+static void igb_watchdog(struct timer_list *t)
{
- struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
/* Do the rest outside of interrupt context */
schedule_work(&adapter->watchdog_task);
}