525 files changed, 31758 insertions, 8760 deletions
diff --git a/Documentation/devicetree/bindings/net/anarion-gmac.txt b/Documentation/devicetree/bindings/net/anarion-gmac.txt
new file mode 100644
index 000000000000..fe678965ae69
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/anarion-gmac.txt
@@ -0,0 +1,25 @@
+*  Adaptrum Anarion ethernet controller
+
+This device is a platform glue layer for stmmac.
+Please see stmmac.txt for the other unchanged properties.
+
+Required properties:
+ - compatible:  Should be "adaptrum,anarion-gmac", "snps,dwmac"
+ - phy-mode:    Should be "rgmii". Other modes are not currently supported.
+
+
+Examples:
+
+	gmac1: ethernet@f2014000 {
+		compatible = "adaptrum,anarion-gmac", "snps,dwmac";
+		reg = <0xf2014000 0x4000>, <0xf2018100 8>;
+
+		interrupt-parent = <&core_intc>;
+		interrupts = <21>;
+		interrupt-names = "macirq";
+
+		clocks = <&core_clk>;
+		clock-names = "stmmaceth";
+
+		phy-mode = "rgmii";
+	};
diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt
index 6b4956beff8c..8918ad3ccf14 100644
--- a/Documentation/devicetree/bindings/net/marvell-pp2.txt
+++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt
@@ -41,6 +41,10 @@ Optional properties (port):
 - marvell,loopback: port is loopback mode
 - phy: a phandle to a phy node defining the PHY address (as the reg
   property, a single integer).
+- interrupt-names: if more than a single interrupt for rx is given, must
+                   be the name associated to the interrupts listed. Valid
+                   names are: "tx-cpu0", "tx-cpu1", "tx-cpu2", "tx-cpu3",
+		   "rx-shared".
 
 Example for marvell,armada-375-pp2:
 
@@ -80,19 +84,37 @@ cpm_ethernet: ethernet@0 {
 	clock-names = "pp_clk", "gop_clk", "gp_clk";
 
 	eth0: eth0 {
-		interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <ICU_GRP_NSR 39 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 43 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 47 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 51 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 55 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "tx-cpu0", "tx-cpu1", "tx-cpu2",
+				  "tx-cpu3", "rx-shared";
 		port-id = <0>;
 		gop-port-id = <0>;
 	};
 
 	eth1: eth1 {
-		interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <ICU_GRP_NSR 40 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 44 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 48 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 52 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 56 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "tx-cpu0", "tx-cpu1", "tx-cpu2",
+				  "tx-cpu3", "rx-shared";
 		port-id = <1>;
 		gop-port-id = <2>;
 	};
 
 	eth2: eth2 {
-		interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <ICU_GRP_NSR 41 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 45 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 49 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 53 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 57 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "tx-cpu0", "tx-cpu1", "tx-cpu2",
+				  "tx-cpu3", "rx-shared";
 		port-id = <2>;
 		gop-port-id = <3>;
 	};
diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
index c7194e87d5f4..1d1168b805cc 100644
--- a/Documentation/devicetree/bindings/net/mediatek-net.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -7,24 +7,30 @@ have dual GMAC each represented by a child node..
 * Ethernet controller node
 
 Required properties:
-- compatible: Should be "mediatek,mt2701-eth"
+- compatible: Should be
+		"mediatek,mt2701-eth": for MT2701 SoC
+		"mediatek,mt7623-eth", "mediatek,mt2701-eth": for MT7623 SoC
+		"mediatek,mt7622-eth": for MT7622 SoC
 - reg: Address and length of the register set for the device
 - interrupts: Should contain the three frame engines interrupts in numeric
 	order. These are fe_int0, fe_int1 and fe_int2.
 - clocks: the clock used by the core
 - clock-names: the names of the clock listed in the clocks property. These are
-	"ethif", "esw", "gp2", "gp1"
+	"ethif", "esw", "gp2", "gp1" : For MT2701 and MT7623 SoC
+        "ethif", "esw", "gp0", "gp1", "gp2", "sgmii_tx250m", "sgmii_rx250m",
+	"sgmii_cdr_ref", "sgmii_cdr_fb", "sgmii_ck", "eth2pll" : For MT7622 SoC
 - power-domains: phandle to the power domain that the ethernet is part of
 - resets: Should contain a phandle to the ethsys reset signal
 - reset-names: Should contain the reset signal name "eth"
 - mediatek,ethsys: phandle to the syscon node that handles the port setup
+- mediatek,sgmiisys: phandle to the syscon node that handles the SGMII setup
+	which is required for those SoCs equipped with SGMII such as MT7622 SoC.
 - mediatek,pctl: phandle to the syscon node that handles the ports slew rate
 	and driver current
 
 Optional properties:
 - interrupt-parent: Should be the phandle for the interrupt controller
   that services interrupts for this device
-
 * Ethernet MAC node
 
 Required properties:
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index b519503be51a..4717bc24eada 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -4,19 +4,24 @@ This file provides information on what the device node for the Ethernet AVB
 interface contains.
 
 Required properties:
-- compatible: "renesas,etheravb-r8a7790" if the device is a part of R8A7790 SoC.
-	      "renesas,etheravb-r8a7791" if the device is a part of R8A7791 SoC.
-	      "renesas,etheravb-r8a7792" if the device is a part of R8A7792 SoC.
-	      "renesas,etheravb-r8a7793" if the device is a part of R8A7793 SoC.
-	      "renesas,etheravb-r8a7794" if the device is a part of R8A7794 SoC.
-	      "renesas,etheravb-r8a7795" if the device is a part of R8A7795 SoC.
-	      "renesas,etheravb-r8a7796" if the device is a part of R8A7796 SoC.
-	      "renesas,etheravb-rcar-gen2" for generic R-Car Gen 2 compatible interface.
-	      "renesas,etheravb-rcar-gen3" for generic R-Car Gen 3 compatible interface.
+- compatible: Must contain one or more of the following:
+      - "renesas,etheravb-r8a7743" for the R8A7743 SoC.
+      - "renesas,etheravb-r8a7790" for the R8A7790 SoC.
+      - "renesas,etheravb-r8a7791" for the R8A7791 SoC.
+      - "renesas,etheravb-r8a7792" for the R8A7792 SoC.
+      - "renesas,etheravb-r8a7793" for the R8A7793 SoC.
+      - "renesas,etheravb-r8a7794" for the R8A7794 SoC.
+      - "renesas,etheravb-rcar-gen2" as a fallback for the above
+		R-Car Gen2 and RZ/G1 devices.
 
-	      When compatible with the generic version, nodes must list the
-	      SoC-specific version corresponding to the platform first
-	      followed by the generic version.
+      - "renesas,etheravb-r8a7795" for the R8A7795 SoC.
+      - "renesas,etheravb-r8a7796" for the R8A7796 SoC.
+      - "renesas,etheravb-rcar-gen3" as a fallback for the above
+		R-Car Gen3 devices.
+
+	When compatible with the generic version, nodes must list the
+	SoC-specific version corresponding to the platform first followed by
+	the generic version.
 
 - reg: offset and length of (1) the register block and (2) the stream buffer.
 - interrupts: A list of interrupt-specifiers, one for each entry in
diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
new file mode 100644
index 000000000000..38f9ec076743
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
@@ -0,0 +1,55 @@
+XILINX AXI ETHERNET Device Tree Bindings
+--------------------------------------------------------
+
+Also called  AXI 1G/2.5G Ethernet Subsystem, the xilinx axi ethernet IP core
+provides connectivity to an external ethernet PHY supporting different
+interfaces: MII, GMII, RGMII, SGMII, 1000BaseX. It also includes two
+segments of memory for buffering TX and RX, as well as the capability of
+offloading TX/RX checksum calculation off the processor.
+
+Management configuration is done through the AXI interface, while payload is
+sent and received through means of an AXI DMA controller. This driver
+includes the DMA driver code, so this driver is incompatible with AXI DMA
+driver.
+
+For more details about mdio please refer phy.txt file in the same directory.
+
+Required properties:
+- compatible	: Must be one of "xlnx,axi-ethernet-1.00.a",
+		  "xlnx,axi-ethernet-1.01.a", "xlnx,axi-ethernet-2.01.a"
+- reg		: Address and length of the IO space.
+- interrupts	: Should be a list of two interrupt, TX and RX.
+- phy-handle	: Should point to the external phy device.
+		  See ethernet.txt file in the same directory.
+- xlnx,rxmem	: Set to allocated memory buffer for Rx/Tx in the hardware
+
+Optional properties:
+- phy-mode	: See ethernet.txt
+- xlnx,phy-type	: Deprecated, do not use, but still accepted in preference
+		  to phy-mode.
+- xlnx,txcsum	: 0 or empty for disabling TX checksum offload,
+		  1 to enable partial TX checksum offload,
+		  2 to enable full TX checksum offload
+- xlnx,rxcsum	: Same values as xlnx,txcsum but for RX checksum offload
+
+Example:
+	axi_ethernet_eth: ethernet@40c00000 {
+		compatible = "xlnx,axi-ethernet-1.00.a";
+		device_type = "network";
+		interrupt-parent = <&microblaze_0_axi_intc>;
+		interrupts = <2 0>;
+		phy-mode = "mii";
+		reg = <0x40c00000 0x40000>;
+		xlnx,rxcsum = <0x2>;
+		xlnx,rxmem = <0x800>;
+		xlnx,txcsum = <0x2>;
+		phy-handle = <&phy0>;
+		axi_ethernetlite_0_mdio: mdio {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			phy0: phy@0 {
+				device_type = "ethernet-phy";
+				reg = <1>;
+			};
+		};
+	};
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index c6beb5f1637f..7a79b3587dd3 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -30,8 +30,6 @@ atm.txt
 	- info on where to get ATM programs and support for Linux.
 ax25.txt
 	- info on using AX.25 and NET/ROM code for Linux
-batman-adv.txt
-	- B.A.T.M.A.N routing protocol on top of layer 2 Ethernet Frames.
 baycom.txt
 	- info on the driver for Baycom style amateur radio modems
 bonding.txt
diff --git a/Documentation/networking/batman-adv.rst b/Documentation/networking/batman-adv.rst
new file mode 100644
index 000000000000..a342b2cc3dc6
--- /dev/null
+++ b/Documentation/networking/batman-adv.rst
@@ -0,0 +1,220 @@
+==========
+batman-adv
+==========
+
+Batman advanced is a new approach to wireless networking which does no longer
+operate on the IP basis. Unlike the batman daemon, which exchanges information
+using UDP packets and sets routing tables, batman-advanced operates on ISO/OSI
+Layer 2 only and uses and routes (or better: bridges) Ethernet Frames. It
+emulates a virtual network switch of all nodes participating. Therefore all
+nodes appear to be link local, thus all higher operating protocols won't be
+affected by any changes within the network. You can run almost any protocol
+above batman advanced, prominent examples are: IPv4, IPv6, DHCP, IPX.
+
+Batman advanced was implemented as a Linux kernel driver to reduce the overhead
+to a minimum. It does not depend on any (other) network driver, and can be used
+on wifi as well as ethernet lan, vpn, etc ... (anything with ethernet-style
+layer 2).
+
+
+Configuration
+=============
+
+Load the batman-adv module into your kernel::
+
+  $ insmod batman-adv.ko
+
+The module is now waiting for activation. You must add some interfaces on which
+batman can operate. After loading the module batman advanced will scan your
+systems interfaces to search for compatible interfaces. Once found, it will
+create subfolders in the ``/sys`` directories of each supported interface,
+e.g.::
+
+  $ ls /sys/class/net/eth0/batman_adv/
+  elp_interval iface_status mesh_iface throughput_override
+
+If an interface does not have the ``batman_adv`` subfolder, it probably is not
+supported. Not supported interfaces are: loopback, non-ethernet and batman's
+own interfaces.
+
+Note: After the module was loaded it will continuously watch for new
+interfaces to verify the compatibility. There is no need to reload the module
+if you plug your USB wifi adapter into your machine after batman advanced was
+initially loaded.
+
+The batman-adv soft-interface can be created using the iproute2 tool ``ip``::
+
+  $ ip link add name bat0 type batadv
+
+To activate a given interface simply attach it to the ``bat0`` interface::
+
+  $ ip link set dev eth0 master bat0
+
+Repeat this step for all interfaces you wish to add. Now batman starts
+using/broadcasting on this/these interface(s).
+
+By reading the "iface_status" file you can check its status::
+
+  $ cat /sys/class/net/eth0/batman_adv/iface_status
+  active
+
+To deactivate an interface you have to detach it from the "bat0" interface::
+
+  $ ip link set dev eth0 nomaster
+
+
+All mesh wide settings can be found in batman's own interface folder::
+
+  $ ls /sys/class/net/bat0/mesh/
+  aggregated_ogms       fragmentation isolation_mark routing_algo
+  ap_isolation          gw_bandwidth  log_level      vlan0
+  bonding               gw_mode       multicast_mode
+  bridge_loop_avoidance gw_sel_class  network_coding
+  distributed_arp_table hop_penalty   orig_interval
+
+There is a special folder for debugging information::
+
+  $ ls /sys/kernel/debug/batman_adv/bat0/
+  bla_backbone_table log         neighbors         transtable_local
+  bla_claim_table    mcast_flags originators
+  dat_cache          nc          socket
+  gateways           nc_nodes    transtable_global
+
+Some of the files contain all sort of status information regarding the mesh
+network. For example, you can view the table of originators (mesh
+participants) with::
+
+  $ cat /sys/kernel/debug/batman_adv/bat0/originators
+
+Other files allow to change batman's behaviour to better fit your requirements.
+For instance, you can check the current originator interval (value in
+milliseconds which determines how often batman sends its broadcast packets)::
+
+  $ cat /sys/class/net/bat0/mesh/orig_interval
+  1000
+
+and also change its value::
+
+  $ echo 3000 > /sys/class/net/bat0/mesh/orig_interval
+
+In very mobile scenarios, you might want to adjust the originator interval to a
+lower value. This will make the mesh more responsive to topology changes, but
+will also increase the overhead.
+
+
+Usage
+=====
+
+To make use of your newly created mesh, batman advanced provides a new
+interface "bat0" which you should use from this point on. All interfaces added
+to batman advanced are not relevant any longer because batman handles them for
+you. Basically, one "hands over" the data by using the batman interface and
+batman will make sure it reaches its destination.
+
+The "bat0" interface can be used like any other regular interface. It needs an
+IP address which can be either statically configured or dynamically (by using
+DHCP or similar services)::
+
+  NodeA: ip link set up dev bat0
+  NodeA: ip addr add 192.168.0.1/24 dev bat0
+
+  NodeB: ip link set up dev bat0
+  NodeB: ip addr add 192.168.0.2/24 dev bat0
+  NodeB: ping 192.168.0.1
+
+Note: In order to avoid problems remove all IP addresses previously assigned to
+interfaces now used by batman advanced, e.g.::
+
+  $ ip addr flush dev eth0
+
+
+Logging/Debugging
+=================
+
+All error messages, warnings and information messages are sent to the kernel
+log. Depending on your operating system distribution this can be read in one of
+a number of ways. Try using the commands: ``dmesg``, ``logread``, or looking in
+the files ``/var/log/kern.log`` or ``/var/log/syslog``. All batman-adv messages
+are prefixed with "batman-adv:" So to see just these messages try::
+
+  $ dmesg | grep batman-adv
+
+When investigating problems with your mesh network, it is sometimes necessary to
+see more detail debug messages. This must be enabled when compiling the
+batman-adv module. When building batman-adv as part of kernel, use "make
+menuconfig" and enable the option ``B.A.T.M.A.N. debugging``
+(``CONFIG_BATMAN_ADV_DEBUG=y``).
+
+Those additional debug messages can be accessed using a special file in
+debugfs::
+
+  $ cat /sys/kernel/debug/batman_adv/bat0/log
+
+The additional debug output is by default disabled. It can be enabled during
+run time. Following log_levels are defined:
+
+.. flat-table::
+
+   * - 0
+     - All debug output disabled
+   * - 1
+     - Enable messages related to routing / flooding / broadcasting
+   * - 2
+     - Enable messages related to route added / changed / deleted
+   * - 4
+     - Enable messages related to translation table operations
+   * - 8
+     - Enable messages related to bridge loop avoidance
+   * - 16
+     - Enable messages related to DAT, ARP snooping and parsing
+   * - 32
+     - Enable messages related to network coding
+   * - 64
+     - Enable messages related to multicast
+   * - 128
+     - Enable messages related to throughput meter
+   * - 255
+     - Enable all messages
+
+The debug output can be changed at runtime using the file
+``/sys/class/net/bat0/mesh/log_level``. e.g.::
+
+  $ echo 6 > /sys/class/net/bat0/mesh/log_level
+
+will enable debug messages for when routes change.
+
+Counters for different types of packets entering and leaving the batman-adv
+module are available through ethtool::
+
+  $ ethtool --statistics bat0
+
+
+batctl
+======
+
+As batman advanced operates on layer 2, all hosts participating in the virtual
+switch are completely transparent for all protocols above layer 2. Therefore
+the common diagnosis tools do not work as expected. To overcome these problems,
+batctl was created. At the moment the batctl contains ping, traceroute, tcpdump
+and interfaces to the kernel module settings.
+
+For more information, please see the manpage (``man batctl``).
+
+batctl is available on https://www.open-mesh.org/
+
+
+Contact
+=======
+
+Please send us comments, experiences, questions, anything :)
+
+IRC:
+  #batman on irc.freenode.org
+Mailing-list:
+  b.a.t.m.a.n@open-mesh.org (optional subscription at
+  https://lists.open-mesh.org/mm/listinfo/b.a.t.m.a.n)
+
+You can also contact the Authors:
+
+* Marek Lindner <mareklindner@neomailbox.ch>
+* Simon Wunderlich <sw@simonwunderlich.de>
diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt
deleted file mode 100644
index ccf94677b240..000000000000
--- a/Documentation/networking/batman-adv.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-BATMAN-ADV
-----------
-
-Batman  advanced  is  a new approach to wireless networking which
-does no longer operate on the IP basis. Unlike the batman daemon,
-which  exchanges  information  using UDP packets and sets routing
-tables, batman-advanced operates on ISO/OSI Layer 2 only and uses
-and  routes  (or  better: bridges) Ethernet Frames. It emulates a
-virtual network switch of all nodes participating.  Therefore all
-nodes  appear  to be link local, thus all higher operating proto-
-cols won't be affected by any changes within the network. You can
-run almost any protocol above batman advanced, prominent examples
-are: IPv4, IPv6, DHCP, IPX.
-
-Batman advanced was implemented as a Linux kernel driver  to  re-
-duce the overhead to a minimum. It does not depend on any (other)
-network driver, and can be used on wifi as well as ethernet  lan,
-vpn,  etc ... (anything with ethernet-style layer 2).
-
-
-CONFIGURATION
--------------
-
-Load the batman-adv module into your kernel:
-
-# insmod batman-adv.ko
-
-The  module  is now waiting for activation. You must add some in-
-terfaces on which batman can operate. After  loading  the  module
-batman  advanced  will scan your systems interfaces to search for
-compatible interfaces. Once found, it will create  subfolders  in
-the /sys directories of each supported interface, e.g.
-
-# ls /sys/class/net/eth0/batman_adv/
-# elp_interval  iface_status  mesh_iface  throughput_override
-
-If an interface does not have the "batman_adv" subfolder it prob-
-ably is not supported. Not supported  interfaces  are:  loopback,
-non-ethernet and batman's own interfaces.
-
-Note:  After the module was loaded it will continuously watch for
-new interfaces to verify the compatibility. There is no  need  to
-reload the module if you plug your USB wifi adapter into your ma-
-chine after batman advanced was initially loaded.
-
-The batman-adv soft-interface can be created using  the  iproute2
-tool "ip"
-
-# ip link add name bat0 type batadv
-
-To  activate a  given  interface  simply  attach it to the "bat0"
-interface
-
-# ip link set dev eth0 master bat0
-
-Repeat  this step for all interfaces you wish to add.  Now batman
-starts using/broadcasting on this/these interface(s).
-
-By reading the "iface_status" file you can check its status:
-
-# cat /sys/class/net/eth0/batman_adv/iface_status
-# active
-
-To  deactivate  an  interface  you  have   to  detach it from the
-"bat0" interface:
-
-# ip link set dev eth0 nomaster
-
-
-All  mesh  wide  settings  can be found in batman's own interface
-folder:
-
-# ls /sys/class/net/bat0/mesh/
-# aggregated_ogms        fragmentation  isolation_mark  routing_algo
-# ap_isolation           gw_bandwidth   log_level       vlan0
-# bonding                gw_mode        multicast_mode
-# bridge_loop_avoidance  gw_sel_class   network_coding
-# distributed_arp_table  hop_penalty    orig_interval
-
-There is a special folder for debugging information:
-
-# ls /sys/kernel/debug/batman_adv/bat0/
-# bla_backbone_table  log          neighbors          transtable_local
-# bla_claim_table     mcast_flags  originators
-# dat_cache           nc           socket
-# gateways            nc_nodes     transtable_global
-
-Some of the files contain all sort of status information  regard-
-ing  the  mesh  network.  For  example, you can view the table of
-originators (mesh participants) with:
-
-# cat /sys/kernel/debug/batman_adv/bat0/originators
-
-Other files allow to change batman's behaviour to better fit your
-requirements.  For instance, you can check the current originator
-interval (value in milliseconds which determines how often batman
-sends its broadcast packets):
-
-# cat /sys/class/net/bat0/mesh/orig_interval
-# 1000
-
-and also change its value:
-
-# echo 3000 > /sys/class/net/bat0/mesh/orig_interval
-
-In very mobile scenarios, you might want to adjust the originator
-interval to a lower value. This will make the mesh  more  respon-
-sive to topology changes, but will also increase the overhead.
-
-
-USAGE
------
-
-To  make use of your newly created mesh, batman advanced provides
-a new interface "bat0" which you should use from this  point  on.
-All  interfaces  added  to  batman  advanced are not relevant any
-longer because batman handles them for you. Basically, one "hands
-over" the data by using the batman interface and batman will make
-sure it reaches its destination.
-
-The "bat0" interface can be used like any  other  regular  inter-
-face.  It needs an IP address which can be either statically con-
-figured or dynamically (by using DHCP or similar services):
-
-# NodeA: ip link set up dev bat0
-# NodeA: ip addr add 192.168.0.1/24 dev bat0
-
-# NodeB: ip link set up dev bat0
-# NodeB: ip addr add 192.168.0.2/24 dev bat0
-# NodeB: ping 192.168.0.1
-
-Note:  In  order to avoid problems remove all IP addresses previ-
-ously assigned to interfaces now used by batman advanced, e.g.
-
-# ip addr flush dev eth0
-
-
-LOGGING/DEBUGGING
------------------
-
-All error messages, warnings and information messages are sent to
-the kernel log. Depending on your operating  system  distribution
-this  can  be read in one of a number of ways. Try using the com-
-mands: dmesg, logread, or looking in the files  /var/log/kern.log
-or  /var/log/syslog.  All  batman-adv  messages are prefixed with
-"batman-adv:" So to see just these messages try
-
-# dmesg | grep batman-adv
-
-When investigating problems with your mesh network  it  is  some-
-times  necessary  to see more detail debug messages. This must be
-enabled when compiling the batman-adv module. When building  bat-
-man-adv  as  part of kernel, use "make menuconfig" and enable the
-option "B.A.T.M.A.N. debugging".
-
-Those additional  debug messages can be accessed  using a special
-file in debugfs
-
-# cat /sys/kernel/debug/batman_adv/bat0/log
-
-The additional debug output is by default disabled. It can be en-
-abled  during run time. Following log_levels are defined:
-
-  0 - All  debug  output  disabled
-  1 - Enable messages related to routing / flooding / broadcasting
-  2 - Enable messages related to route added / changed / deleted
-  4 - Enable messages related to translation table operations
-  8 - Enable messages related to bridge loop avoidance
- 16 - Enable messages related to DAT, ARP snooping and parsing
- 32 - Enable messages related to network coding
- 64 - Enable messages related to multicast
-128 - Enable messages related to throughput meter
-255 - Enable all messages
-
-The debug output can be changed at runtime  using  the  file
-/sys/class/net/bat0/mesh/log_level. e.g.
-
-# echo 6 > /sys/class/net/bat0/mesh/log_level
-
-will enable debug messages for when routes change.
-
-Counters for different types of packets entering and leaving the
-batman-adv module are available through ethtool:
-
-# ethtool --statistics bat0
-
-
-BATCTL
-------
-
-As batman advanced operates on layer 2 all hosts participating in
-the  virtual switch are completely transparent for all  protocols
-above layer 2. Therefore the common diagnosis tools do  not  work
-as  expected.  To  overcome these problems batctl was created. At
-the  moment the  batctl contains ping,  traceroute,  tcpdump  and
-interfaces to the kernel module settings.
-
-For more information, please see the manpage (man batctl).
-
-batctl is available on https://www.open-mesh.org/
-
-
-CONTACT
--------
-
-Please send us comments, experiences, questions, anything :)
-
-IRC:            #batman   on   irc.freenode.org
-Mailing-list:   b.a.t.m.a.n@open-mesh.org (optional  subscription
-          at https://lists.open-mesh.org/mm/listinfo/b.a.t.m.a.n)
-
-You can also contact the Authors:
-
-Marek  Lindner  <mareklindner@neomailbox.ch>
-Simon  Wunderlich  <sw@simonwunderlich.de>
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index b5bd87e01f52..66e620866245 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -6,6 +6,7 @@ Contents:
 .. toctree::
    :maxdepth: 2
 
+   batman-adv
    kapi
    z8530book
 
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 974ab47ae53a..84c9b8cee780 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -353,12 +353,7 @@ tcp_l3mdev_accept - BOOLEAN
 	compiled with CONFIG_NET_L3_MASTER_DEV.
 
 tcp_low_latency - BOOLEAN
-	If set, the TCP stack makes decisions that prefer lower
-	latency as opposed to higher throughput.  By default, this
-	option is not set meaning that higher throughput is preferred.
-	An example of an application where this default should be
-	changed would be a Beowulf compute cluster.
-	Default: 0
+	This is a legacy option, it has no effect anymore.
 
 tcp_max_orphans - INTEGER
 	Maximal number of TCP sockets not attached to any user file handle,
@@ -1291,8 +1286,7 @@ tag - INTEGER
 xfrm4_gc_thresh - INTEGER
 	The threshold at which we will start garbage collecting for IPv4
 	destination cache entries.  At twice this value the system will
-	refuse new allocations. The value must be set below the flowcache
-	limit (4096 * number of online cpus) to take effect.
+	refuse new allocations.
 
 igmp_link_local_mcast_reports - BOOLEAN
 	Enable IGMP reports for link local multicast groups in the
@@ -1778,8 +1772,7 @@ ratelimit - INTEGER
 xfrm6_gc_thresh - INTEGER
 	The threshold at which we will start garbage collecting for IPv6
 	destination cache entries.  At twice this value the system will
-	refuse new allocations. The value must be set below the flowcache
-	limit (4096 * number of online cpus) to take effect.
+	refuse new allocations.
 
 
 IPv6 Update by:
diff --git a/Documentation/networking/netvsc.txt b/Documentation/networking/netvsc.txt
new file mode 100644
index 000000000000..4ddb4e4b0426
--- /dev/null
+++ b/Documentation/networking/netvsc.txt
@@ -0,0 +1,63 @@
+Hyper-V network driver
+======================
+
+Compatibility
+=============
+
+This driver is compatible with Windows Server 2012 R2, 2016 and
+Windows 10.
+
+Features
+========
+
+  Checksum offload
+  ----------------
+  The netvsc driver supports checksum offload as long as the
+  Hyper-V host version does. Windows Server 2016 and Azure
+  support checksum offload for TCP and UDP for both IPv4 and
+  IPv6. Windows Server 2012 only supports checksum offload for TCP.
+
+  Receive Side Scaling
+  --------------------
+  Hyper-V supports receive side scaling. For TCP, packets are
+  distributed among available queues based on IP address and port
+  number. Current versions of Hyper-V host, only distribute UDP
+  packets based on the IP source and destination address.
+  The port number is not used as part of the hash value for UDP.
+  Fragmented IP packets are not distributed between queues;
+  all fragmented packets arrive on the first channel.
+
+  Generic Receive Offload, aka GRO
+  --------------------------------
+  The driver supports GRO and it is enabled by default. GRO coalesces
+  like packets and significantly reduces CPU usage under heavy Rx
+  load.
+
+  SR-IOV support
+  --------------
+  Hyper-V supports SR-IOV as a hardware acceleration option. If SR-IOV
+  is enabled in both the vSwitch and the guest configuration, then the
+  Virtual Function (VF) device is passed to the guest as a PCI
+  device. In this case, both a synthetic (netvsc) and VF device are
+  visible in the guest OS and both NIC's have the same MAC address.
+
+  The VF is enslaved by netvsc device.  The netvsc driver will transparently
+  switch the data path to the VF when it is available and up.
+  Network state (addresses, firewall, etc) should be applied only to the
+  netvsc device; the slave device should not be accessed directly in
+  most cases.  The exceptions are if some special queue discipline or
+  flow direction is desired, these should be applied directly to the
+  VF slave device.
+
+  Receive Buffer
+  --------------
+  Packets are received into a receive area which is created when device
+  is probed. The receive area is broken into MTU sized chunks and each may
+  contain one or more packets. The number of receive sections may be changed
+  via ethtool Rx ring parameters.
+
+  There is a similar send buffer which is used to aggregate packets for sending.
+  The send area is broken into chunks of 6144 bytes, each of section may
+  contain one or more packets. The send buffer is an optimization, the driver
+  will use slower method to handle very large packets or if the send buffer
+  area is exhausted.
diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt
index a0bf573dfa61..fe01302471ae 100644
--- a/Documentation/networking/strparser.txt
+++ b/Documentation/networking/strparser.txt
@@ -1,45 +1,107 @@
-Stream Parser
--------------
+Stream Parser (strparser)
+
+Introduction
+============
 
 The stream parser (strparser) is a utility that parses messages of an
-application layer protocol running over a TCP connection. The stream
+application layer protocol running over a data stream. The stream
 parser works in conjunction with an upper layer in the kernel to provide
 kernel support for application layer messages. For instance, Kernel
 Connection Multiplexor (KCM) uses the Stream Parser to parse messages
 using a BPF program.
 
+The strparser works in one of two modes: receive callback or general
+mode.
+
+In receive callback mode, the strparser is called from the data_ready
+callback of a TCP socket. Messages are parsed and delivered as they are
+received on the socket.
+
+In general mode, a sequence of skbs are fed to strparser from an
+outside source. Message are parsed and delivered as the sequence is
+processed. This modes allows strparser to be applied to arbitrary
+streams of data.
+
 Interface
----------
+=========
 
 The API includes a context structure, a set of callbacks, utility
-functions, and a data_ready function. The callbacks include
-a parse_msg function that is called to perform parsing (e.g.
-BPF parsing in case of KCM), and a rcv_msg function that is called
-when a full message has been completed.
+functions, and a data_ready function for receive callback mode. The
+callbacks include a parse_msg function that is called to perform
+parsing (e.g.  BPF parsing in case of KCM), and a rcv_msg function
+that is called when a full message has been completed.
 
-A stream parser can be instantiated for a TCP connection. This is done
-by:
+Functions
+=========
 
-strp_init(struct strparser *strp, struct sock *csk,
+strp_init(struct strparser *strp, struct sock *sk,
 	  struct strp_callbacks *cb)
 
-strp is a struct of type strparser that is allocated by the upper layer.
-csk is the TCP socket associated with the stream parser. Callbacks are
-called by the stream parser.
+     Called to initialize a stream parser. strp is a struct of type
+     strparser that is allocated by the upper layer. sk is the TCP
+     socket associated with the stream parser for use with receive
+     callback mode; in general mode this is set to NULL. Callbacks
+     are called by the stream parser (the callbacks are listed below).
+
+void strp_pause(struct strparser *strp)
+
+     Temporarily pause a stream parser. Message parsing is suspended
+     and no new messages are delivered to the upper layer.
+
+void strp_pause(struct strparser *strp)
+
+     Unpause a paused stream parser.
+
+void strp_stop(struct strparser *strp);
+
+     strp_stop is called to completely stop stream parser operations.
+     This is called internally when the stream parser encounters an
+     error, and it is called from the upper layer to stop parsing
+     operations.
+
+void strp_done(struct strparser *strp);
+
+     strp_done is called to release any resources held by the stream
+     parser instance. This must be called after the stream processor
+     has been stopped.
+
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+		 unsigned int orig_offset, size_t orig_len,
+		 size_t max_msg_size, long timeo)
+
+    strp_process is called in general mode for a stream parser to
+    parse an sk_buff. The number of bytes processed or a negative
+    error number is returned. Note that strp_process does not
+    consume the sk_buff. max_msg_size is maximum size the stream
+    parser will parse. timeo is timeout for completing a message.
+
+void strp_data_ready(struct strparser *strp);
+
+    The upper layer calls strp_tcp_data_ready when data is ready on
+    the lower socket for strparser to process. This should be called
+    from a data_ready callback that is set on the socket. Note that
+    maximum messages size is the limit of the receive socket
+    buffer and message timeout is the receive timeout for the socket.
+
+void strp_check_rcv(struct strparser *strp);
+
+    strp_check_rcv is called to check for new messages on the socket.
+    This is normally called at initialization of a stream parser
+    instance or after strp_unpause.
 
 Callbacks
----------
+=========
 
-There are four callbacks:
+There are six callbacks:
 
 int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
 
     parse_msg is called to determine the length of the next message
     in the stream. The upper layer must implement this function. It
     should parse the sk_buff as containing the headers for the
-    next application layer messages in the stream.
+    next application layer message in the stream.
 
-    The skb->cb in the input skb is a struct strp_rx_msg. Only
+    The skb->cb in the input skb is a struct strp_msg. Only
     the offset field is relevant in parse_msg and gives the offset
     where the message starts in the skb.
 
@@ -50,26 +112,41 @@ int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
     -ESTRPIPE : current message should not be processed by the
           kernel, return control of the socket to userspace which
           can proceed to read the messages itself
-    other < 0 : Error is parsing, give control back to userspace
+    other < 0 : Error in parsing, give control back to userspace
           assuming that synchronization is lost and the stream
           is unrecoverable (application expected to close TCP socket)
 
     In the case that an error is returned (return value is less than
-    zero) the stream parser will set the error on TCP socket and wake
-    it up. If parse_msg returned -ESTRPIPE and the stream parser had
-    previously read some bytes for the current message, then the error
-    set on the attached socket is ENODATA since the stream is
-    unrecoverable in that case.
+    zero) and the parser is in receive callback mode, then it will set
+    the error on TCP socket and wake it up. If parse_msg returned
+    -ESTRPIPE and the stream parser had previously read some bytes for
+    the current message, then the error set on the attached socket is
+    ENODATA since the stream is unrecoverable in that case.
+
+void (*lock)(struct strparser *strp)
+
+    The lock callback is called to lock the strp structure when
+    the strparser is performing an asynchronous operation (such as
+    processing a timeout). In receive callback mode the default
+    function is to lock_sock for the associated socket. In general
+    mode the callback must be set appropriately.
+
+void (*unlock)(struct strparser *strp)
+
+    The unlock callback is called to release the lock obtained
+    by the lock callback. In receive callback mode the default
+    function is release_sock for the associated socket. In general
+    mode the callback must be set appropriately.
 
 void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
 
     rcv_msg is called when a full message has been received and
     is queued. The callee must consume the sk_buff; it can
     call strp_pause to prevent any further messages from being
-    received in rcv_msg (see strp_pause below). This callback
+    received in rcv_msg (see strp_pause above). This callback
     must be set.
 
-    The skb->cb in the input skb is a struct strp_rx_msg. This
+    The skb->cb in the input skb is a struct strp_msg. This
     struct contains two fields: offset and full_len. Offset is
     where the message starts in the skb, and full_len is the
     the length of the message. skb->len - offset may be greater
@@ -78,59 +155,53 @@ void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
 int (*read_sock_done)(struct strparser *strp, int err);
 
      read_sock_done is called when the stream parser is done reading
-     the TCP socket. The stream parser may read multiple messages
-     in a loop and this function allows cleanup to occur when existing
-     the loop. If the callback is not set (NULL in strp_init) a
-     default function is used.
+     the TCP socket in receive callback mode. The stream parser may
+     read multiple messages in a loop and this function allows cleanup
+     to occur when exiting the loop. If the callback is not set (NULL
+     in strp_init) a default function is used.
 
 void (*abort_parser)(struct strparser *strp, int err);
 
      This function is called when stream parser encounters an error
-     in parsing. The default function stops the stream parser for the
-     TCP socket and sets the error in the socket. The default function
-     can be changed by setting the callback to non-NULL in strp_init.
+     in parsing. The default function stops the stream parser and
+     sets the error in the socket if the parser is in receive callback
+     mode. The default function can be changed by setting the callback
+     to non-NULL in strp_init.
 
-Functions
----------
+Statistics
+==========
 
-The upper layer calls strp_tcp_data_ready when data is ready on the lower
-socket for strparser to process. This should be called from a data_ready
-callback that is set on the socket.
+Various counters are kept for each stream parser instance. These are in
+the strp_stats structure. strp_aggr_stats is a convenience structure for
+accumulating statistics for multiple stream parser instances.
+save_strp_stats and aggregate_strp_stats are helper functions to save
+and aggregate statistics.
 
-strp_stop is called to completely stop stream parser operations. This
-is called internally when the stream parser encounters an error, and
-it is called from the upper layer when unattaching a TCP socket.
+Message assembly limits
+=======================
 
-strp_done is called to unattach the stream parser from the TCP socket.
-This must be called after the stream processor has be stopped.
+The stream parser provide mechanisms to limit the resources consumed by
+message assembly.
 
-strp_check_rcv is called to check for new messages on the socket. This
-is normally called at initialization of the a stream parser instance
-of after strp_unpause.
+A timer is set when assembly starts for a new message. In receive
+callback mode the message timeout is taken from rcvtime for the
+associated TCP socket. In general mode, the timeout is passed as an
+argument in strp_process. If the timer fires before assembly completes
+the stream parser is aborted and the ETIMEDOUT error is set on the TCP
+socket if in receive callback mode.
 
-Statistics
-----------
+In receive callback mode, message length is limited to the receive
+buffer size of the associated TCP socket. If the length returned by
+parse_msg is greater than the socket buffer size then the stream parser
+is aborted with EMSGSIZE error set on the TCP socket. Note that this
+makes the maximum size of receive skbuffs for a socket with a stream
+parser to be 2*sk_rcvbuf of the TCP socket.
 
-Various counters are kept for each stream parser for a TCP socket.
-These are in the strp_stats structure. strp_aggr_stats is a convenience
-structure for accumulating statistics for multiple stream parser
-instances. save_strp_stats and aggregate_strp_stats are helper functions
-to save and aggregate statistics.
+In general mode the message length limit is passed in as an argument
+to strp_process.
 
-Message assembly limits
------------------------
+Author
+======
 
-The stream parser provide mechanisms to limit the resources consumed by
-message assembly.
+Tom Herbert (tom@quantonium.net)
 
-A timer is set when assembly starts for a new message. The message
-timeout is taken from rcvtime for the associated TCP socket. If the
-timer fires before assembly completes the stream parser is aborted
-and the ETIMEDOUT error is set on the TCP socket.
-
-Message length is limited to the receive buffer size of the associated
-TCP socket. If the length returned by parse_msg is greater than
-the socket buffer size then the stream parser is aborted with
-EMSGSIZE error set on the TCP socket. Note that this makes the
-maximum size of receive skbuffs for a socket with a stream parser
-to be 2*sk_rcvbuf of the TCP socket.
diff --git a/MAINTAINERS b/MAINTAINERS
index 44cb004c765d..b3a8ca6aa3ed 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2477,7 +2477,7 @@ Q:	https://patchwork.open-mesh.org/project/batman/list/
 S:	Maintained
 F:	Documentation/ABI/testing/sysfs-class-net-batman-adv
 F:	Documentation/ABI/testing/sysfs-class-net-mesh
-F:	Documentation/networking/batman-adv.txt
+F:	Documentation/networking/batman-adv.rst
 F:	include/uapi/linux/batman_adv.h
 F:	net/batman-adv/
 
@@ -5101,6 +5101,7 @@ F:	include/linux/of_net.h
 F:	include/linux/phy.h
 F:	include/linux/phy_fixed.h
 F:	include/linux/platform_data/mdio-gpio.h
+F:	include/linux/platform_data/mdio-bcm-unimac.h
 F:	include/trace/events/mdio.h
 F:	include/uapi/linux/mdio.h
 F:	include/uapi/linux/mii.h
@@ -6147,6 +6148,14 @@ S:	Maintained
 F:	drivers/net/ethernet/hisilicon/
 F:	Documentation/devicetree/bindings/net/hisilicon*.txt
 
+HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3)
+M:	Yisen Zhuang <yisen.zhuang@huawei.com>
+M:	Salil Mehta <salil.mehta@huawei.com>
+L:	netdev@vger.kernel.org
+W:	http://www.hisilicon.com
+S:	Maintained
+F:	drivers/net/ethernet/hisilicon/hns3/
+
 HISILICON ROCE DRIVER
 M:	Lijun Ou <oulijun@huawei.com>
 M:	Wei Hu(Xavier) <xavier.huwei@huawei.com>
@@ -6257,6 +6266,7 @@ M:	Haiyang Zhang <haiyangz@microsoft.com>
 M:	Stephen Hemminger <sthemmin@microsoft.com>
 L:	devel@linuxdriverproject.org
 S:	Maintained
+F:	Documentation/networking/netvsc.txt
 F:	arch/x86/include/asm/mshyperv.h
 F:	arch/x86/include/uapi/asm/hyperv.h
 F:	arch/x86/kernel/cpu/mshyperv.c
@@ -8424,7 +8434,9 @@ F:	include/uapi/linux/uvcvideo.h
 
 MEDIATEK ETHERNET DRIVER
 M:	Felix Fietkau <nbd@openwrt.org>
-M:	John Crispin <blogic@openwrt.org>
+M:	John Crispin <john@phrozen.org>
+M:	Sean Wang <sean.wang@mediatek.com>
+M:	Nelson Chang <nelson.chang@mediatek.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/mediatek/
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 7b285dd4fe05..c6133a045352 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -109,4 +109,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index f1e3b20dce9f..9abf02d6855a 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -102,5 +102,7 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 5dd5c5d0d642..002eb85a6941 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -111,4 +111,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index f8f7b47e247f..e268e51a38d1 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -102,4 +102,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 882823bec153..6c755bc07975 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -120,4 +120,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index c710db354ff2..ac82a3f26dbf 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -102,4 +102,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index a0d4dc9f4eb2..3b2bf7ae703b 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -101,4 +101,6 @@
 
 #define SO_PEERGROUPS		0x4034
 
+#define SO_ZEROCOPY		0x4035
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 52a63f4175cb..a56916c83565 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -108,4 +108,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 186fd8199f54..b2f5c50d0947 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -98,6 +98,8 @@
 
 #define SO_PEERGROUPS		0x003d
 
+#define SO_ZEROCOPY		0x003e
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 3eed2761c149..220059999e74 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -113,4 +113,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c
index 1fd25e872ece..da27ddfa75a7 100644
--- a/drivers/atm/adummy.c
+++ b/drivers/atm/adummy.c
@@ -71,7 +71,7 @@ static struct attribute *adummy_attrs[] = {
 	NULL
 };
 
-static struct attribute_group adummy_group_attrs = {
+static const struct attribute_group adummy_group_attrs = {
 	.name = NULL, /* We want them in dev's root folder */
 	.attrs = adummy_attrs
 };
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index 906705e5f776..acf16c323e38 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -2374,7 +2374,7 @@ MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
 
 /********** module entry **********/
 
-static struct pci_device_id amb_pci_tbl[] = {
+static const struct pci_device_id amb_pci_tbl[] = {
 	{ PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 },
 	{ PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 },
 	{ 0, }
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index b042ec458544..ce47eb17901d 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -2292,7 +2292,7 @@ err_disable:
 }
 
 
-static struct pci_device_id eni_pci_tbl[] = {
+static const struct pci_device_id eni_pci_tbl[] = {
 	{ PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_FPGA), 0 /* FPGA */ },
 	{ PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_ASIC), 1 /* ASIC */ },
 	{ 0, }
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 22dcab952a24..6b6368a56526 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -2030,7 +2030,7 @@ static void firestream_remove_one(struct pci_dev *pdev)
 	func_exit ();
 }
 
-static struct pci_device_id firestream_pci_tbl[] = {
+static const struct pci_device_id firestream_pci_tbl[] = {
 	{ PCI_VDEVICE(FUJITSU_ME, PCI_DEVICE_ID_FUJITSU_FS50), FS_IS50},
 	{ PCI_VDEVICE(FUJITSU_ME, PCI_DEVICE_ID_FUJITSU_FS155), FS_IS155},
 	{ 0, }
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index f0433adcd8fc..f8b7e86907cc 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -2757,7 +2757,7 @@ static void fore200e_pca_remove_one(struct pci_dev *pci_dev)
 }
 
 
-static struct pci_device_id fore200e_pca_tbl[] = {
+static const struct pci_device_id fore200e_pca_tbl[] = {
     { PCI_VENDOR_ID_FORE, PCI_DEVICE_ID_FORE_PCA200E, PCI_ANY_ID, PCI_ANY_ID,
       0, 0, (unsigned long) &fore200e_bus[0] },
     { 0, }
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 37ee21c5a5ca..8f6156d475d1 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -2851,7 +2851,7 @@ MODULE_PARM_DESC(irq_coalesce, "use interrupt coalescing (default 1)");
 module_param(sdh, bool, 0);
 MODULE_PARM_DESC(sdh, "use SDH framing (default 0)");
 
-static struct pci_device_id he_pci_tbl[] = {
+static const struct pci_device_id he_pci_tbl[] = {
 	{ PCI_VDEVICE(FORE, PCI_DEVICE_ID_FORE_HE), 0 },
 	{ 0, }
 };
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 0f18480b33b5..7e76b35f422c 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2867,7 +2867,7 @@ MODULE_PARM_DESC(max_tx_size, "maximum size of TX AAL5 frames");
 MODULE_PARM_DESC(max_rx_size, "maximum size of RX AAL5 frames");
 MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
 
-static struct pci_device_id hrz_pci_tbl[] = {
+static const struct pci_device_id hrz_pci_tbl[] = {
 	{ PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_HORIZON, PCI_ANY_ID, PCI_ANY_ID,
 	  0, 0, 0 },
 	{ 0, }
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 60bacba03d17..b7a168c46692 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -3725,7 +3725,7 @@ err_out_disable_pdev:
 	return err;
 }
 
-static struct pci_device_id idt77252_pci_tbl[] =
+static const struct pci_device_id idt77252_pci_tbl[] =
 {
 	{ PCI_VDEVICE(IDT, PCI_DEVICE_ID_IDT_IDT77252), 0 },
 	{ 0, }
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index a4fa6c82261e..fc72b763fdd7 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -3266,7 +3266,7 @@ static void ia_remove_one(struct pci_dev *pdev)
       	kfree(iadev);
 }
 
-static struct pci_device_id ia_pci_tbl[] = {
+static const struct pci_device_id ia_pci_tbl[] = {
 	{ PCI_VENDOR_ID_IPHASE, 0x0008, PCI_ANY_ID, PCI_ANY_ID, },
 	{ PCI_VENDOR_ID_IPHASE, 0x0009, PCI_ANY_ID, PCI_ANY_ID, },
 	{ 0,}
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index 1a9bc51284b0..2351dad78ff5 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -2589,7 +2589,7 @@ static int lanai_init_one(struct pci_dev *pci,
 	return result;
 }
 
-static struct pci_device_id lanai_pci_tbl[] = {
+static const struct pci_device_id lanai_pci_tbl[] = {
 	{ PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_LANAI2) },
 	{ PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_LANAIHB) },
 	{ 0, }	/* terminal entry */
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index d879f3bca107..9588d80f318e 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -253,7 +253,7 @@ static void nicstar_remove_one(struct pci_dev *pcidev)
 	kfree(card);
 }
 
-static struct pci_device_id nicstar_pci_tbl[] = {
+static const struct pci_device_id nicstar_pci_tbl[] = {
 	{ PCI_VDEVICE(IDT, PCI_DEVICE_ID_IDT_IDT77201), 0 },
 	{0,}			/* terminate list */
 };
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index c8f2ca6d8b29..8754793223cd 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -611,7 +611,7 @@ static struct attribute *solos_attrs[] = {
 	NULL
 };
 
-static struct attribute_group solos_attr_group = {
+static const struct attribute_group solos_attr_group = {
 	.attrs = solos_attrs,
 	.name = "parameters",
 };
@@ -628,7 +628,7 @@ static struct attribute *gpio_attrs[] = {
 	NULL
 };
 
-static struct attribute_group gpio_attr_group = {
+static const struct attribute_group gpio_attr_group = {
 	.attrs = gpio_attrs,
 	.name = "gpio",
 };
@@ -1476,7 +1476,7 @@ static void fpga_remove(struct pci_dev *dev)
 	kfree(card);
 }
 
-static struct pci_device_id fpga_pci_tbl[] = {
+static const struct pci_device_id fpga_pci_tbl[] = {
 	{ 0x10ee, 0x0300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
 	{ 0, }
 };
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 07bdd51b3b9a..1ef67db03c8e 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1642,7 +1642,7 @@ out_free:
 
 MODULE_LICENSE("GPL");
 
-static struct pci_device_id zatm_pci_tbl[] = {
+static const struct pci_device_id zatm_pci_tbl[] = {
 	{ PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1221), ZATM_COPPER },
 	{ PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1225), 0 },
 	{ 0, }
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 35952a94875e..3a6ead603e49 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -98,6 +98,7 @@ config BT_HCIUART_NOKIA
 	depends on BT_HCIUART_SERDEV
 	depends on PM
 	select BT_HCIUART_H4
+	select BT_BCM
 	help
 	  Nokia H4+ is serial protocol for communication between Bluetooth
 	  device and host. This protocol is required for Bluetooth devices
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index b793853ff05f..204afe66de92 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -140,7 +140,8 @@ MODULE_DEVICE_TABLE(usb, ath3k_table);
 
 #define BTUSB_ATH3012		0x80
 /* This table is to load patch and sysconfig files
- * for AR3012 */
+ * for AR3012
+ */
 static const struct usb_device_id ath3k_blist_tbl[] = {
 
 	/* Atheros AR3012 with sflash firmware*/
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 32dcac017395..194788739a83 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -684,14 +684,16 @@ static int bt3c_config(struct pcmcia_device *link)
 	unsigned long try;
 
 	/* First pass: look for a config entry that looks normal.
-	   Two tries: without IO aliases, then with aliases */
+	 * Two tries: without IO aliases, then with aliases
+	 */
 	for (try = 0; try < 2; try++)
 		if (!pcmcia_loop_config(link, bt3c_check_config, (void *) try))
 			goto found_port;
 
 	/* Second pass: try to find an entry that isn't picky about
-	   its base address, then try to grab any standard serial port
-	   address, and finally try to get any free port. */
+	 * its base address, then try to grab any standard serial port
+	 * address, and finally try to get any free port.
+	 */
 	if (!pcmcia_loop_config(link, bt3c_check_config_notpicky, NULL))
 		goto found_port;
 
diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index eb794f08b238..03341ce98c32 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -1455,7 +1455,8 @@ done:
 	fw_dump_ptr = fw_dump_data;
 
 	/* Dump all the memory data into single file, a userspace script will
-	   be used to split all the memory data to multiple files*/
+	 * be used to split all the memory data to multiple files
+	 */
 	BT_INFO("== btmrvl firmware dump to /sys/class/devcoredump start");
 	for (idx = 0; idx < dump_num; idx++) {
 		struct memory_type_mapping *entry = &mem_type_mapping_tbl[idx];
@@ -1482,7 +1483,8 @@ done:
 	}
 
 	/* fw_dump_data will be free in device coredump release function
-	   after 5 min*/
+	 * after 5 min
+	 */
 	dev_coredumpv(&card->func->dev, fw_dump_data, fw_dump_len, GFP_KERNEL);
 	BT_INFO("== btmrvl firmware dump to /sys/class/devcoredump end");
 }
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index 28afd5d585f9..0bbdfcef2aa8 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -81,7 +81,7 @@ static int rome_patch_ver_req(struct hci_dev *hdev, u32 *rome_version)
 	 * and lower 2 bytes from patch will be used.
 	 */
 	*rome_version = (le32_to_cpu(ver->soc_id) << 16) |
-		        (le16_to_cpu(ver->rome_ver) & 0x0000ffff);
+			(le16_to_cpu(ver->rome_ver) & 0x0000ffff);
 
 out:
 	kfree_skb(skb);
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 8279094dd713..d9a99b4302ea 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -279,6 +279,8 @@ static int rtl_load_config(struct hci_dev *hdev, const char *name, u8 **buff)
 		return ret;
 	ret = fw->size;
 	*buff = kmemdup(fw->data, ret, GFP_KERNEL);
+	if (!*buff)
+		ret = -ENOMEM;
 
 	release_firmware(fw);
 
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index 1cb958e199eb..c8e945d19ffe 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -144,7 +144,8 @@ static int btsdio_rx_packet(struct btsdio_data *data)
 	if (!skb) {
 		/* Out of memory. Prepare a read retry and just
 		 * return with the expectation that the next time
-		 * we're called we'll have more memory. */
+		 * we're called we'll have more memory.
+		 */
 		return -ENOMEM;
 	}
 
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 7df79bb12350..310e9c2e09b6 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -614,14 +614,16 @@ static int btuart_config(struct pcmcia_device *link)
 	int try;
 
 	/* First pass: look for a config entry that looks normal.
-	   Two tries: without IO aliases, then with aliases */
+	 * Two tries: without IO aliases, then with aliases
+	 */
 	for (try = 0; try < 2; try++)
 		if (!pcmcia_loop_config(link, btuart_check_config, &try))
 			goto found_port;
 
 	/* Second pass: try to find an entry that isn't picky about
-	   its base address, then try to grab any standard serial port
-	   address, and finally try to get any free port. */
+	 * its base address, then try to grab any standard serial port
+	 * address, and finally try to get any free port.
+	 */
 	if (!pcmcia_loop_config(link, btuart_check_config_notpicky, NULL))
 		goto found_port;
 
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index fa24d693af24..24cc8383fdd4 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -131,7 +131,8 @@ static const struct usb_device_id btusb_table[] = {
 	{ USB_DEVICE(0x19ff, 0x0239), .driver_info = BTUSB_BCM_PATCHRAM },
 
 	/* Broadcom BCM43142A0 (Foxconn/Lenovo) */
-	{ USB_DEVICE(0x105b, 0xe065), .driver_info = BTUSB_BCM_PATCHRAM },
+	{ USB_VENDOR_AND_INTERFACE_INFO(0x105b, 0xff, 0x01, 0x01),
+	  .driver_info = BTUSB_BCM_PATCHRAM },
 
 	/* Broadcom BCM920703 (HTC Vive) */
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x0bb4, 0xff, 0x01, 0x01),
@@ -268,6 +269,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0489, 0xe0a2), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x04ca, 0x3011), .driver_info = BTUSB_QCA_ROME },
+	{ USB_DEVICE(0x04ca, 0x3016), .driver_info = BTUSB_QCA_ROME },
 
 	/* Broadcom BCM2035 */
 	{ USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
@@ -656,7 +658,8 @@ static void btusb_intr_complete(struct urb *urb)
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (err < 0) {
 		/* -EPERM: urb is being killed;
-		 * -ENODEV: device got disconnected */
+		 * -ENODEV: device got disconnected
+		 */
 		if (err != -EPERM && err != -ENODEV)
 			BT_ERR("%s urb %p failed to resubmit (%d)",
 			       hdev->name, urb, -err);
@@ -745,7 +748,8 @@ static void btusb_bulk_complete(struct urb *urb)
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (err < 0) {
 		/* -EPERM: urb is being killed;
-		 * -ENODEV: device got disconnected */
+		 * -ENODEV: device got disconnected
+		 */
 		if (err != -EPERM && err != -ENODEV)
 			BT_ERR("%s urb %p failed to resubmit (%d)",
 			       hdev->name, urb, -err);
@@ -840,7 +844,8 @@ static void btusb_isoc_complete(struct urb *urb)
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (err < 0) {
 		/* -EPERM: urb is being killed;
-		 * -ENODEV: device got disconnected */
+		 * -ENODEV: device got disconnected
+		 */
 		if (err != -EPERM && err != -ENODEV)
 			BT_ERR("%s urb %p failed to resubmit (%d)",
 			       hdev->name, urb, -err);
@@ -952,7 +957,8 @@ static void btusb_diag_complete(struct urb *urb)
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (err < 0) {
 		/* -EPERM: urb is being killed;
-		 * -ENODEV: device got disconnected */
+		 * -ENODEV: device got disconnected
+		 */
 		if (err != -EPERM && err != -ENODEV)
 			BT_ERR("%s urb %p failed to resubmit (%d)",
 			       hdev->name, urb, -err);
@@ -2896,7 +2902,8 @@ static int btusb_probe(struct usb_interface *intf,
 		struct usb_device *udev = interface_to_usbdev(intf);
 
 		/* Old firmware would otherwise let ath3k driver load
-		 * patch and sysconfig files */
+		 * patch and sysconfig files
+		 */
 		if (le16_to_cpu(udev->descriptor.bcdDevice) <= 0x0001)
 			return -ENODEV;
 	}
@@ -3067,6 +3074,12 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info & BTUSB_QCA_ROME) {
 		data->setup_on_usb = btusb_setup_qca;
 		hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
+
+		/* QCA Rome devices lose their updated firmware over suspend,
+		 * but the USB hub doesn't notice any status change.
+		 * Explicitly request a device reset on resume.
+		 */
+		set_bit(BTUSB_RESET_RESUME, &data->flags);
 	}
 
 #ifdef CONFIG_BT_HCIBTUSB_RTL
@@ -3259,13 +3272,28 @@ static void play_deferred(struct btusb_data *data)
 	int err;
 
 	while ((urb = usb_get_from_anchor(&data->deferred))) {
+		usb_anchor_urb(urb, &data->tx_anchor);
+
 		err = usb_submit_urb(urb, GFP_ATOMIC);
-		if (err < 0)
+		if (err < 0) {
+			if (err != -EPERM && err != -ENODEV)
+				BT_ERR("%s urb %p submission failed (%d)",
+				       data->hdev->name, urb, -err);
+			kfree(urb->setup_packet);
+			usb_unanchor_urb(urb);
+			usb_free_urb(urb);
 			break;
+		}
 
 		data->tx_in_flight++;
+		usb_free_urb(urb);
+	}
+
+	/* Cleanup the rest deferred urbs. */
+	while ((urb = usb_get_from_anchor(&data->deferred))) {
+		kfree(urb->setup_packet);
+		usb_free_urb(urb);
 	}
-	usb_scuttle_anchored_urbs(&data->deferred);
 }
 
 static int btusb_resume(struct usb_interface *intf)
diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c
index 85a3978b064f..5ef8000f90a9 100644
--- a/drivers/bluetooth/btwilink.c
+++ b/drivers/bluetooth/btwilink.c
@@ -93,8 +93,7 @@ static void st_reg_completion_cb(void *priv_data, int data)
 	complete(&lhst->wait_reg_completion);
 }
 
-/* Called by Shared Transport layer when receive data is
- * available */
+/* Called by Shared Transport layer when receive data is available */
 static long st_receive(void *priv_data, struct sk_buff *skb)
 {
 	struct ti_st *lhst = priv_data;
@@ -198,7 +197,8 @@ static int ti_st_open(struct hci_dev *hdev)
 		}
 
 		/* Is ST registration callback
-		 * called with ERROR status? */
+		 * called with ERROR status?
+		 */
 		if (hst->reg_status != 0) {
 			BT_ERR("ST registration completed with invalid "
 					"status %d", hst->reg_status);
@@ -276,7 +276,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 
 static int bt_ti_probe(struct platform_device *pdev)
 {
-	static struct ti_st *hst;
+	struct ti_st *hst;
 	struct hci_dev *hdev;
 	int err;
 
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 6a662d0161b4..6b42372c53ef 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -176,7 +176,7 @@ static irqreturn_t bcm_host_wake(int irq, void *data)
 static int bcm_request_irq(struct bcm_data *bcm)
 {
 	struct bcm_device *bdev = bcm->dev;
-	int err = 0;
+	int err;
 
 	/* If this is not a platform device, do not enable PM functionalities */
 	mutex_lock(&bcm_device_lock);
@@ -185,21 +185,23 @@ static int bcm_request_irq(struct bcm_data *bcm)
 		goto unlock;
 	}
 
-	if (bdev->irq > 0) {
-		err = devm_request_irq(&bdev->pdev->dev, bdev->irq,
-				       bcm_host_wake, IRQF_TRIGGER_RISING,
-				       "host_wake", bdev);
-		if (err)
-			goto unlock;
+	if (bdev->irq <= 0) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
 
-		device_init_wakeup(&bdev->pdev->dev, true);
+	err = devm_request_irq(&bdev->pdev->dev, bdev->irq, bcm_host_wake,
+			       IRQF_TRIGGER_RISING, "host_wake", bdev);
+	if (err)
+		goto unlock;
 
-		pm_runtime_set_autosuspend_delay(&bdev->pdev->dev,
-						 BCM_AUTOSUSPEND_DELAY);
-		pm_runtime_use_autosuspend(&bdev->pdev->dev);
-		pm_runtime_set_active(&bdev->pdev->dev);
-		pm_runtime_enable(&bdev->pdev->dev);
-	}
+	device_init_wakeup(&bdev->pdev->dev, true);
+
+	pm_runtime_set_autosuspend_delay(&bdev->pdev->dev,
+					 BCM_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(&bdev->pdev->dev);
+	pm_runtime_set_active(&bdev->pdev->dev);
+	pm_runtime_enable(&bdev->pdev->dev);
 
 unlock:
 	mutex_unlock(&bcm_device_lock);
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index 4e328d7d47bb..3b82a87224a9 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -172,7 +172,7 @@ struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb,
 			    const struct h4_recv_pkt *pkts, int pkts_count)
 {
 	struct hci_uart *hu = hci_get_drvdata(hdev);
-	u8 alignment = hu->alignment;
+	u8 alignment = hu->alignment ? hu->alignment : 1;
 
 	while (count) {
 		int i, len;
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 8397b716fa65..a746627e784e 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -457,7 +457,8 @@ static int hci_uart_tty_open(struct tty_struct *tty)
 	BT_DBG("tty %p", tty);
 
 	/* Error if the tty has no write op instead of leaving an exploitable
-	   hole */
+	 * hole
+	 */
 	if (tty->ops->write == NULL)
 		return -EOPNOTSUPP;
 
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index c982943f0747..424c15aa7bb7 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -622,7 +622,8 @@ static int download_firmware(struct ll_device *lldev)
 			cmd = (struct hci_command *)action_ptr;
 			if (cmd->opcode == 0xff36) {
 				/* ignore remote change
-				 * baud rate HCI VS command */
+				 * baud rate HCI VS command
+				 */
 				bt_dev_warn(lldev->hu.hdev, "change remote baud rate command in firmware");
 				break;
 			}
@@ -742,14 +743,8 @@ static int hci_ti_probe(struct serdev_device *serdev)
 static void hci_ti_remove(struct serdev_device *serdev)
 {
 	struct ll_device *lldev = serdev_device_get_drvdata(serdev);
-	struct hci_uart *hu = &lldev->hu;
-	struct hci_dev *hdev = hu->hdev;
 
-	cancel_work_sync(&hu->write_work);
-
-	hci_unregister_dev(hdev);
-	hci_free_dev(hdev);
-	hu->proto->close(hu);
+	hci_uart_unregister_device(&lldev->hu);
 }
 
 static const struct of_device_id hci_ti_of_match[] = {
diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c
index 181a15b549e5..3539fd03f47e 100644
--- a/drivers/bluetooth/hci_nokia.c
+++ b/drivers/bluetooth/hci_nokia.c
@@ -767,16 +767,8 @@ static int nokia_bluetooth_serdev_probe(struct serdev_device *serdev)
 static void nokia_bluetooth_serdev_remove(struct serdev_device *serdev)
 {
 	struct nokia_bt_dev *btdev = serdev_device_get_drvdata(serdev);
-	struct hci_uart *hu = &btdev->hu;
-	struct hci_dev *hdev = hu->hdev;
 
-	cancel_work_sync(&hu->write_work);
-
-	hci_unregister_dev(hdev);
-	hci_free_dev(hdev);
-	hu->proto->close(hu);
-
-	pm_runtime_disable(&btdev->serdev->dev);
+	hci_uart_unregister_device(&btdev->hu);
 }
 
 static int nokia_bluetooth_runtime_suspend(struct device *dev)
diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c
index aea930101dd2..b725ac4f7ff6 100644
--- a/drivers/bluetooth/hci_serdev.c
+++ b/drivers/bluetooth/hci_serdev.c
@@ -354,3 +354,16 @@ err_alloc:
 	return err;
 }
 EXPORT_SYMBOL_GPL(hci_uart_register_device);
+
+void hci_uart_unregister_device(struct hci_uart *hu)
+{
+	struct hci_dev *hdev = hu->hdev;
+
+	hci_unregister_dev(hdev);
+	hci_free_dev(hdev);
+
+	cancel_work_sync(&hu->write_work);
+
+	hu->proto->close(hu);
+}
+EXPORT_SYMBOL_GPL(hci_uart_unregister_device);
diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
index c6e9e1cf63f8..d9cd95d81149 100644
--- a/drivers/bluetooth/hci_uart.h
+++ b/drivers/bluetooth/hci_uart.h
@@ -112,6 +112,7 @@ struct hci_uart {
 int hci_uart_register_proto(const struct hci_uart_proto *p);
 int hci_uart_unregister_proto(const struct hci_uart_proto *p);
 int hci_uart_register_device(struct hci_uart *hu, const struct hci_uart_proto *p);
+void hci_uart_unregister_device(struct hci_uart *hu);
 
 int hci_uart_tx_wakeup(struct hci_uart *hu);
 int hci_uart_init_ready(struct hci_uart *hu);
diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
index 19982a4a9bba..18f5ed082f41 100644
--- a/drivers/infiniband/hw/bnxt_re/Kconfig
+++ b/drivers/infiniband/hw/bnxt_re/Kconfig
@@ -1,6 +1,7 @@
 config INFINIBAND_BNXT_RE
     tristate "Broadcom Netxtreme HCA support"
     depends on ETHERNET && NETDEVICES && PCI && INET && DCB
+    depends on MAY_USE_DEVLINK
     select NET_VENDOR_BROADCOM
     select BNXT
     ---help---
diff --git a/drivers/isdn/hardware/eicon/divacapi.h b/drivers/isdn/hardware/eicon/divacapi.h
index a315a2914d70..c4868a0d82f4 100644
--- a/drivers/isdn/hardware/eicon/divacapi.h
+++ b/drivers/isdn/hardware/eicon/divacapi.h
@@ -26,15 +26,7 @@
 
 /*#define DEBUG */
 
-
-
-
-
-
-
-
-
-
+#include <linux/types.h>
 
 #define IMPLEMENT_DTMF 1
 #define IMPLEMENT_LINE_INTERCONNECT2 1
@@ -82,8 +74,6 @@
 #define CODEC_PERMANENT    0x02
 #define ADV_VOICE          0x03
 #define MAX_CIP_TYPES      5  /* kind of CIP types for group optimization */
-#define C_IND_MASK_DWORDS  ((MAX_APPL + 32) >> 5)
-
 
 #define FAX_CONNECT_INFO_BUFFER_SIZE  256
 #define NCPI_BUFFER_SIZE              256
@@ -265,8 +255,8 @@ struct _PLCI {
 	word          ncci_ring_list;
 	byte          inc_dis_ncci_table[MAX_CHANNELS_PER_PLCI];
 	t_std_internal_command internal_command_queue[MAX_INTERNAL_COMMAND_LEVELS];
-	dword         c_ind_mask_table[C_IND_MASK_DWORDS];
-	dword         group_optimization_mask_table[C_IND_MASK_DWORDS];
+	DECLARE_BITMAP(c_ind_mask_table, MAX_APPL);
+	DECLARE_BITMAP(group_optimization_mask_table, MAX_APPL);
 	byte          RBuffer[200];
 	dword         msg_in_queue[MSG_IN_QUEUE_SIZE/sizeof(dword)];
 	API_SAVE      saved_msg;
diff --git a/drivers/isdn/hardware/eicon/message.c b/drivers/isdn/hardware/eicon/message.c
index 3b11422b1cce..eadd1ed1e014 100644
--- a/drivers/isdn/hardware/eicon/message.c
+++ b/drivers/isdn/hardware/eicon/message.c
@@ -23,9 +23,7 @@
  *
  */
 
-
-
-
+#include <linux/bitmap.h>
 
 #include "platform.h"
 #include "di_defs.h"
@@ -35,19 +33,9 @@
 #include "mdm_msg.h"
 #include "divasync.h"
 
-
-
 #define FILE_ "MESSAGE.C"
 #define dprintf
 
-
-
-
-
-
-
-
-
 /*------------------------------------------------------------------*/
 /* This is options supported for all adapters that are server by    */
 /* XDI driver. Allo it is not necessary to ask it from every adapter*/
@@ -72,9 +60,6 @@ static dword diva_xdi_extended_features = 0;
 /*------------------------------------------------------------------*/
 
 static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci);
-static void set_group_ind_mask(PLCI *plci);
-static void clear_group_ind_mask_bit(PLCI *plci, word b);
-static byte test_group_ind_mask_bit(PLCI *plci, word b);
 void AutomaticLaw(DIVA_CAPI_ADAPTER *);
 word CapiRelease(word);
 word CapiRegister(word);
@@ -1087,106 +1072,6 @@ static void plci_remove(PLCI *plci)
 }
 
 /*------------------------------------------------------------------*/
-/* Application Group function helpers                               */
-/*------------------------------------------------------------------*/
-
-static void set_group_ind_mask(PLCI *plci)
-{
-	word i;
-
-	for (i = 0; i < C_IND_MASK_DWORDS; i++)
-		plci->group_optimization_mask_table[i] = 0xffffffffL;
-}
-
-static void clear_group_ind_mask_bit(PLCI *plci, word b)
-{
-	plci->group_optimization_mask_table[b >> 5] &= ~(1L << (b & 0x1f));
-}
-
-static byte test_group_ind_mask_bit(PLCI *plci, word b)
-{
-	return ((plci->group_optimization_mask_table[b >> 5] & (1L << (b & 0x1f))) != 0);
-}
-
-/*------------------------------------------------------------------*/
-/* c_ind_mask operations for arbitrary MAX_APPL                     */
-/*------------------------------------------------------------------*/
-
-static void clear_c_ind_mask(PLCI *plci)
-{
-	word i;
-
-	for (i = 0; i < C_IND_MASK_DWORDS; i++)
-		plci->c_ind_mask_table[i] = 0;
-}
-
-static byte c_ind_mask_empty(PLCI *plci)
-{
-	word i;
-
-	i = 0;
-	while ((i < C_IND_MASK_DWORDS) && (plci->c_ind_mask_table[i] == 0))
-		i++;
-	return (i == C_IND_MASK_DWORDS);
-}
-
-static void set_c_ind_mask_bit(PLCI *plci, word b)
-{
-	plci->c_ind_mask_table[b >> 5] |= (1L << (b & 0x1f));
-}
-
-static void clear_c_ind_mask_bit(PLCI *plci, word b)
-{
-	plci->c_ind_mask_table[b >> 5] &= ~(1L << (b & 0x1f));
-}
-
-static byte test_c_ind_mask_bit(PLCI *plci, word b)
-{
-	return ((plci->c_ind_mask_table[b >> 5] & (1L << (b & 0x1f))) != 0);
-}
-
-static void dump_c_ind_mask(PLCI *plci)
-{
-	word i, j, k;
-	dword d;
-	char *p;
-	char buf[40];
-
-	for (i = 0; i < C_IND_MASK_DWORDS; i += 4)
-	{
-		p = buf + 36;
-		*p = '\0';
-		for (j = 0; j < 4; j++)
-		{
-			if (i + j < C_IND_MASK_DWORDS)
-			{
-				d = plci->c_ind_mask_table[i + j];
-				for (k = 0; k < 8; k++)
-				{
-					*(--p) = hex_asc_lo(d);
-					d >>= 4;
-				}
-			}
-			else if (i != 0)
-			{
-				for (k = 0; k < 8; k++)
-					*(--p) = ' ';
-			}
-			*(--p) = ' ';
-		}
-		dbug(1, dprintf("c_ind_mask =%s", (char *) p));
-	}
-}
-
-
-
-
-
-#define dump_plcis(a)
-
-
-
-/*------------------------------------------------------------------*/
 /* translation function for each message                            */
 /*------------------------------------------------------------------*/
 
@@ -1457,13 +1342,13 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 		return 1;
 	}
 	else if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) {
-		clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
-		dump_c_ind_mask(plci);
+		__clear_bit(appl->Id - 1, plci->c_ind_mask_table);
+		dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
 		Reject = GET_WORD(parms[0].info);
 		dbug(1, dprintf("Reject=0x%x", Reject));
 		if (Reject)
 		{
-			if (c_ind_mask_empty(plci))
+			if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
 			{
 				if ((Reject & 0xff00) == 0x3400)
 				{
@@ -1553,11 +1438,8 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 				sig_req(plci, CALL_RES, 0);
 			}
 
-			for (i = 0; i < max_appl; i++) {
-				if (test_c_ind_mask_bit(plci, i)) {
-					sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
-				}
-			}
+			for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+				sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
 		}
 	}
 	return 1;
@@ -1584,13 +1466,10 @@ static byte disconnect_req(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 	{
 		if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT)
 		{
-			clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
+			__clear_bit(appl->Id - 1, plci->c_ind_mask_table);
 			plci->appl = appl;
-			for (i = 0; i < max_appl; i++)
-			{
-				if (test_c_ind_mask_bit(plci, i))
-					sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
-			}
+			for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+				sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
 			plci->State = OUTG_DIS_PENDING;
 		}
 		if (plci->Sig.Id && plci->appl)
@@ -1634,7 +1513,7 @@ static byte disconnect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 	{
 		/* clear ind mask bit, just in case of collsion of          */
 		/* DISCONNECT_IND and CONNECT_RES                           */
-		clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
+		__clear_bit(appl->Id - 1, plci->c_ind_mask_table);
 		ncci_free_receive_buffers(plci, 0);
 		if (plci_remove_check(plci))
 		{
@@ -1642,7 +1521,7 @@ static byte disconnect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 		}
 		if (plci->State == INC_DIS_PENDING
 		    || plci->State == SUSPENDING) {
-			if (c_ind_mask_empty(plci)) {
+			if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) {
 				if (plci->State != SUSPENDING) plci->State = IDLE;
 				dbug(1, dprintf("chs=%d", plci->channels));
 				if (!plci->channels) {
@@ -3351,13 +3230,11 @@ static byte select_b_req(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 				}
 				plci->State = INC_CON_CONNECTED_ALERT;
 				plci->appl = appl;
-				clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
-				dump_c_ind_mask(plci);
-				for (i = 0; i < max_appl; i++) /* disconnect the other appls */
-				{                         /* its quasi a connect        */
-					if (test_c_ind_mask_bit(plci, i))
-						sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
-				}
+				__clear_bit(appl->Id - 1, plci->c_ind_mask_table);
+				dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
+				/* disconnect the other appls its quasi a connect */
+				for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+					sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
 			}
 
 			api_save_msg(msg, "s", &plci->saved_msg);
@@ -5692,19 +5569,17 @@ static void sig_ind(PLCI *plci)
 		cip = find_cip(a, parms[4], parms[6]);
 		cip_mask = 1L << cip;
 		dbug(1, dprintf("cip=%d,cip_mask=%lx", cip, cip_mask));
-		clear_c_ind_mask(plci);
+		bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
 		if (!remove_started && !a->adapter_disabled)
 		{
-			set_c_ind_mask_bit(plci, MAX_APPL);
 			group_optimization(a, plci);
-			for (i = 0; i < max_appl; i++) {
+			for_each_set_bit(i, plci->group_optimization_mask_table, max_appl) {
 				if (application[i].Id
 				    && (a->CIP_Mask[i] & 1 || a->CIP_Mask[i] & cip_mask)
-				    && CPN_filter_ok(parms[0], a, i)
-				    && test_group_ind_mask_bit(plci, i)) {
+				    && CPN_filter_ok(parms[0], a, i)) {
 					dbug(1, dprintf("storedcip_mask[%d]=0x%lx", i, a->CIP_Mask[i]));
-					set_c_ind_mask_bit(plci, i);
-					dump_c_ind_mask(plci);
+					__set_bit(i, plci->c_ind_mask_table);
+					dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
 					plci->State = INC_CON_PENDING;
 					plci->call_dir = (plci->call_dir & ~(CALL_DIR_OUT | CALL_DIR_ORIGINATE)) |
 						CALL_DIR_IN | CALL_DIR_ANSWER;
@@ -5750,10 +5625,9 @@ static void sig_ind(PLCI *plci)
 						      SendMultiIE(plci, Id, multi_pi_parms, PI, 0x210, true));
 				}
 			}
-			clear_c_ind_mask_bit(plci, MAX_APPL);
-			dump_c_ind_mask(plci);
+			dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
 		}
-		if (c_ind_mask_empty(plci)) {
+		if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) {
 			sig_req(plci, HANGUP, 0);
 			send_req(plci);
 			plci->State = IDLE;
@@ -5994,13 +5868,13 @@ static void sig_ind(PLCI *plci)
 		break;
 
 	case RESUME:
-		clear_c_ind_mask_bit(plci, (word)(plci->appl->Id - 1));
+		__clear_bit(plci->appl->Id - 1, plci->c_ind_mask_table);
 		PUT_WORD(&resume_cau[4], GOOD);
 		sendf(plci->appl, _FACILITY_I, Id, 0, "ws", (word)3, resume_cau);
 		break;
 
 	case SUSPEND:
-		clear_c_ind_mask(plci);
+		bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
 
 		if (plci->NL.Id && !plci->nl_remove_id) {
 			mixer_remove(plci);
@@ -6037,15 +5911,12 @@ static void sig_ind(PLCI *plci)
 
 		if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT)
 		{
-			for (i = 0; i < max_appl; i++)
-			{
-				if (test_c_ind_mask_bit(plci, i))
-					sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
-			}
+			for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+				sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
 		}
 		else
 		{
-			clear_c_ind_mask(plci);
+			bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
 		}
 		if (!plci->appl)
 		{
@@ -6055,7 +5926,7 @@ static void sig_ind(PLCI *plci)
 				a->listen_active--;
 			}
 			plci->State = INC_DIS_PENDING;
-			if (c_ind_mask_empty(plci))
+			if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
 			{
 				plci->State = IDLE;
 				if (plci->NL.Id && !plci->nl_remove_id)
@@ -6341,14 +6212,10 @@ static void SendInfo(PLCI *plci, dword Id, byte **parms, byte iesent)
 			    || Info_Number == DSP
 			    || Info_Number == UUI)
 			{
-				for (j = 0; j < max_appl; j++)
-				{
-					if (test_c_ind_mask_bit(plci, j))
-					{
-						dbug(1, dprintf("Ovl_Ind"));
-						iesent = true;
-						sendf(&application[j], _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
-					}
+				for_each_set_bit(j, plci->c_ind_mask_table, max_appl) {
+					dbug(1, dprintf("Ovl_Ind"));
+					iesent = true;
+					sendf(&application[j], _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
 				}
 			}
 		}               /* all other signalling states */
@@ -6416,14 +6283,10 @@ static byte SendMultiIE(PLCI *plci, dword Id, byte **parms, byte ie_type,
 		}
 		else if (!plci->appl && Info_Number)
 		{                                        /* overlap receiving broadcast */
-			for (j = 0; j < max_appl; j++)
-			{
-				if (test_c_ind_mask_bit(plci, j))
-				{
-					iesent = true;
-					dbug(1, dprintf("Mlt_Ovl_Ind"));
-					sendf(&application[j] , _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
-				}
+			for_each_set_bit(j, plci->c_ind_mask_table, max_appl) {
+				iesent = true;
+				dbug(1, dprintf("Mlt_Ovl_Ind"));
+				sendf(&application[j] , _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
 			}
 		}                                        /* all other signalling states */
 		else if (Info_Number
@@ -7270,7 +7133,6 @@ static word get_plci(DIVA_CAPI_ADAPTER *a)
 	word i, j;
 	PLCI *plci;
 
-	dump_plcis(a);
 	for (i = 0; i < a->max_plci && a->plci[i].Id; i++);
 	if (i == a->max_plci) {
 		dbug(1, dprintf("get_plci: out of PLCIs"));
@@ -7321,8 +7183,8 @@ static word get_plci(DIVA_CAPI_ADAPTER *a)
 
 	plci->ncci_ring_list = 0;
 	for (j = 0; j < MAX_CHANNELS_PER_PLCI; j++) plci->inc_dis_ncci_table[j] = 0;
-	clear_c_ind_mask(plci);
-	set_group_ind_mask(plci);
+	bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
+	bitmap_fill(plci->group_optimization_mask_table, MAX_APPL);
 	plci->fax_connect_info_length = 0;
 	plci->nsf_control_bits = 0;
 	plci->ncpi_state = 0x00;
@@ -9373,10 +9235,10 @@ word CapiRelease(word Id)
 					if (plci->State == INC_CON_PENDING
 					    || plci->State == INC_CON_ALERT)
 					{
-						if (test_c_ind_mask_bit(plci, (word)(Id - 1)))
+						if (test_bit(Id - 1, plci->c_ind_mask_table))
 						{
-							clear_c_ind_mask_bit(plci, (word)(Id - 1));
-							if (c_ind_mask_empty(plci))
+							__clear_bit(Id - 1, plci->c_ind_mask_table);
+							if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
 							{
 								sig_req(plci, HANGUP, 0);
 								send_req(plci);
@@ -9384,10 +9246,10 @@ word CapiRelease(word Id)
 							}
 						}
 					}
-					if (test_c_ind_mask_bit(plci, (word)(Id - 1)))
+					if (test_bit(Id - 1, plci->c_ind_mask_table))
 					{
-						clear_c_ind_mask_bit(plci, (word)(Id - 1));
-						if (c_ind_mask_empty(plci))
+						__clear_bit(Id - 1, plci->c_ind_mask_table);
+						if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
 						{
 							if (!plci->appl)
 							{
@@ -9452,7 +9314,7 @@ word CapiRelease(word Id)
 static word plci_remove_check(PLCI *plci)
 {
 	if (!plci) return true;
-	if (!plci->NL.Id && c_ind_mask_empty(plci))
+	if (!plci->NL.Id && bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
 	{
 		if (plci->Sig.Id == 0xff)
 			plci->Sig.Id = 0;
@@ -14735,7 +14597,8 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
 	word appl_number_group_type[MAX_APPL];
 	PLCI *auxplci;
 
-	set_group_ind_mask(plci); /* all APPLs within this inc. call are allowed to dial in */
+	/* all APPLs within this inc. call are allowed to dial in */
+	bitmap_fill(plci->group_optimization_mask_table, MAX_APPL);
 
 	if (!a->group_optimization_enabled)
 	{
@@ -14771,13 +14634,12 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
 				if (a->plci[k].Id)
 				{
 					auxplci = &a->plci[k];
-					if (auxplci->appl == &application[i]) /* application has a busy PLCI */
-					{
+					if (auxplci->appl == &application[i]) {
+						/* application has a busy PLCI */
 						busy = true;
 						dbug(1, dprintf("Appl 0x%x is busy", i + 1));
-					}
-					else if (test_c_ind_mask_bit(auxplci, i)) /* application has an incoming call pending */
-					{
+					} else if (test_bit(i, plci->c_ind_mask_table)) {
+						/* application has an incoming call pending */
 						busy = true;
 						dbug(1, dprintf("Appl 0x%x has inc. call pending", i + 1));
 					}
@@ -14826,7 +14688,8 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
 					if (appl_number_group_type[i] == appl_number_group_type[j])
 					{
 						dbug(1, dprintf("Appl 0x%x is member of group 0x%x, no call", j + 1, appl_number_group_type[j]));
-						clear_group_ind_mask_bit(plci, j);           /* disable call on other group members */
+						/* disable call on other group members */
+						__clear_bit(j, plci->group_optimization_mask_table);
 						appl_number_group_type[j] = 0;       /* remove disabled group member from group list */
 					}
 				}
@@ -14834,7 +14697,7 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
 		}
 		else                                                 /* application should not get a call */
 		{
-			clear_group_ind_mask_bit(plci, i);
+			__clear_bit(i, plci->group_optimization_mask_table);
 		}
 	}
 
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index a306de4318d7..9375cef22420 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -311,9 +311,7 @@ module_param(ipddp_mode, int, 0);
 static int __init ipddp_init_module(void)
 {
 	dev_ipddp = ipddp_init();
-        if (IS_ERR(dev_ipddp))
-                return PTR_ERR(dev_ipddp);
-	return 0;
+	return PTR_ERR_OR_ZERO(dev_ipddp);
 }
 
 static void __exit ipddp_cleanup_module(void)
diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h
index cbb4f8566bbe..d09b2b46ab63 100644
--- a/drivers/net/arcnet/arcdevice.h
+++ b/drivers/net/arcnet/arcdevice.h
@@ -20,7 +20,7 @@
 #include <linux/if_arcnet.h>
 
 #ifdef __KERNEL__
-#include  <linux/irqreturn.h>
+#include <linux/interrupt.h>
 
 /*
  * RECON_THRESHOLD is the maximum number of RECON messages to receive
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 01cab9548785..eb7f76753c9c 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -109,7 +109,7 @@ static struct attribute *com20020_state_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group com20020_state_group = {
+static const struct attribute_group com20020_state_group = {
 	.name = NULL,
 	.attrs = com20020_state_attrs,
 };
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 770623a0cc01..040b493f60ae 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -759,7 +759,7 @@ static struct attribute *per_bond_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group bonding_group = {
+static const struct attribute_group bonding_group = {
 	.name = "bonding",
 	.attrs = per_bond_attrs,
 };
diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 0e0df0ba288c..f37ce0e1b603 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -1232,7 +1232,7 @@ static struct attribute *at91_sysfs_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group at91_sysfs_attr_group = {
+static const struct attribute_group at91_sysfs_attr_group = {
 	.attrs = at91_sysfs_attrs,
 };
 
diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
index 2ba1a81500c1..12a53c8e8e1d 100644
--- a/drivers/net/can/janz-ican3.c
+++ b/drivers/net/can/janz-ican3.c
@@ -1875,7 +1875,7 @@ static struct attribute *ican3_sysfs_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group ican3_sysfs_attr_group = {
+static const struct attribute_group ican3_sysfs_attr_group = {
 	.attrs = ican3_sysfs_attrs,
 };
 
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 648f91b58d1e..558667c814c9 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -327,12 +327,8 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
 static int bcm_sf2_eee_init(struct dsa_switch *ds, int port,
 			    struct phy_device *phy)
 {
-	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
-	struct ethtool_eee *p = &priv->port_sts[port].eee;
 	int ret;
 
-	p->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full);
-
 	ret = phy_init_eee(phy, 0);
 	if (ret)
 		return 0;
@@ -342,8 +338,8 @@ static int bcm_sf2_eee_init(struct dsa_switch *ds, int port,
 	return 1;
 }
 
-static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port,
-			      struct ethtool_eee *e)
+static int bcm_sf2_sw_get_mac_eee(struct dsa_switch *ds, int port,
+				  struct ethtool_eee *e)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->port_sts[port].eee;
@@ -356,22 +352,14 @@ static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port,
-			      struct phy_device *phydev,
-			      struct ethtool_eee *e)
+static int bcm_sf2_sw_set_mac_eee(struct dsa_switch *ds, int port,
+				  struct ethtool_eee *e)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->port_sts[port].eee;
 
 	p->eee_enabled = e->eee_enabled;
-
-	if (!p->eee_enabled) {
-		bcm_sf2_eee_enable_set(ds, port, false);
-	} else {
-		p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev);
-		if (!p->eee_enabled)
-			return -EOPNOTSUPP;
-	}
+	bcm_sf2_eee_enable_set(ds, port, e->eee_enabled);
 
 	return 0;
 }
@@ -800,7 +788,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds)
 static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
 			       struct ethtool_wolinfo *wol)
 {
-	struct net_device *p = ds->dst[ds->index].cpu_dp->netdev;
+	struct net_device *p = ds->dst->cpu_dp->netdev;
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_wolinfo pwol;
 
@@ -823,7 +811,7 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
 static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port,
 			      struct ethtool_wolinfo *wol)
 {
-	struct net_device *p = ds->dst[ds->index].cpu_dp->netdev;
+	struct net_device *p = ds->dst->cpu_dp->netdev;
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	s8 cpu_port = ds->dst->cpu_dp->index;
 	struct ethtool_wolinfo pwol;
@@ -1023,8 +1011,8 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
 	.set_wol		= bcm_sf2_sw_set_wol,
 	.port_enable		= bcm_sf2_port_setup,
 	.port_disable		= bcm_sf2_port_disable,
-	.get_eee		= bcm_sf2_sw_get_eee,
-	.set_eee		= bcm_sf2_sw_set_eee,
+	.get_mac_eee		= bcm_sf2_sw_get_mac_eee,
+	.set_mac_eee		= bcm_sf2_sw_set_mac_eee,
 	.port_bridge_join	= b53_br_join,
 	.port_bridge_leave	= b53_br_leave,
 	.port_stp_state_set	= b53_br_set_stp_state,
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index cd76e61f1fca..8e430d1ee297 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -20,6 +20,9 @@
 
 #include "lan9303.h"
 
+/* 13.2 System Control and Status Registers
+ * Multiply register number by 4 to get address offset.
+ */
 #define LAN9303_CHIP_REV 0x14
 # define LAN9303_CHIP_ID 0x9303
 #define LAN9303_IRQ_CFG 0x15
@@ -53,6 +56,9 @@
 #define LAN9303_VIRT_PHY_BASE 0x70
 #define LAN9303_VIRT_SPECIAL_CTRL 0x77
 
+/*13.4 Switch Fabric Control and Status Registers
+ * Accessed indirectly via SWITCH_CSR_CMD, SWITCH_CSR_DATA.
+ */
 #define LAN9303_SW_DEV_ID 0x0000
 #define LAN9303_SW_RESET 0x0001
 #define LAN9303_SW_RESET_RESET BIT(0)
@@ -242,7 +248,7 @@ static int lan9303_virt_phy_reg_write(struct lan9303 *chip, int regnum, u16 val)
 	return regmap_write(chip->regmap, LAN9303_VIRT_PHY_BASE + regnum, val);
 }
 
-static int lan9303_port_phy_reg_wait_for_completion(struct lan9303 *chip)
+static int lan9303_indirect_phy_wait_for_completion(struct lan9303 *chip)
 {
 	int ret, i;
 	u32 reg;
@@ -262,7 +268,7 @@ static int lan9303_port_phy_reg_wait_for_completion(struct lan9303 *chip)
 	return -EIO;
 }
 
-static int lan9303_port_phy_reg_read(struct lan9303 *chip, int addr, int regnum)
+static int lan9303_indirect_phy_read(struct lan9303 *chip, int addr, int regnum)
 {
 	int ret;
 	u32 val;
@@ -272,7 +278,7 @@ static int lan9303_port_phy_reg_read(struct lan9303 *chip, int addr, int regnum)
 
 	mutex_lock(&chip->indirect_mutex);
 
-	ret = lan9303_port_phy_reg_wait_for_completion(chip);
+	ret = lan9303_indirect_phy_wait_for_completion(chip);
 	if (ret)
 		goto on_error;
 
@@ -281,7 +287,7 @@ static int lan9303_port_phy_reg_read(struct lan9303 *chip, int addr, int regnum)
 	if (ret)
 		goto on_error;
 
-	ret = lan9303_port_phy_reg_wait_for_completion(chip);
+	ret = lan9303_indirect_phy_wait_for_completion(chip);
 	if (ret)
 		goto on_error;
 
@@ -299,8 +305,8 @@ on_error:
 	return ret;
 }
 
-static int lan9303_phy_reg_write(struct lan9303 *chip, int addr, int regnum,
-				 unsigned int val)
+static int lan9303_indirect_phy_write(struct lan9303 *chip, int addr,
+				      int regnum, u16 val)
 {
 	int ret;
 	u32 reg;
@@ -311,7 +317,7 @@ static int lan9303_phy_reg_write(struct lan9303 *chip, int addr, int regnum,
 
 	mutex_lock(&chip->indirect_mutex);
 
-	ret = lan9303_port_phy_reg_wait_for_completion(chip);
+	ret = lan9303_indirect_phy_wait_for_completion(chip);
 	if (ret)
 		goto on_error;
 
@@ -328,6 +334,12 @@ on_error:
 	return ret;
 }
 
+const struct lan9303_phy_ops lan9303_indirect_phy_ops = {
+	.phy_read = lan9303_indirect_phy_read,
+	.phy_write = lan9303_indirect_phy_write,
+};
+EXPORT_SYMBOL_GPL(lan9303_indirect_phy_ops);
+
 static int lan9303_switch_wait_for_completion(struct lan9303 *chip)
 {
 	int ret, i;
@@ -427,14 +439,15 @@ static int lan9303_detect_phy_setup(struct lan9303 *chip)
 	 * Special reg 18 of phy 3 reads as 0x0000, if 'phy_addr_sel_strap' is 0
 	 * and the IDs are 0-1-2, else it contains something different from
 	 * 0x0000, which means 'phy_addr_sel_strap' is 1 and the IDs are 1-2-3.
+	 * 0xffff is returned on MDIO read with no response.
 	 */
-	reg = lan9303_port_phy_reg_read(chip, 3, MII_LAN911X_SPECIAL_MODES);
+	reg = chip->ops->phy_read(chip, 3, MII_LAN911X_SPECIAL_MODES);
 	if (reg < 0) {
 		dev_err(chip->dev, "Failed to detect phy config: %d\n", reg);
 		return reg;
 	}
 
-	if (reg != 0)
+	if ((reg != 0) && (reg != 0xffff))
 		chip->phy_addr_sel_strap = 1;
 	else
 		chip->phy_addr_sel_strap = 0;
@@ -719,7 +732,7 @@ static int lan9303_phy_read(struct dsa_switch *ds, int phy, int regnum)
 	if (phy > phy_base + 2)
 		return -ENODEV;
 
-	return lan9303_port_phy_reg_read(chip, phy, regnum);
+	return chip->ops->phy_read(chip, phy, regnum);
 }
 
 static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum,
@@ -733,7 +746,7 @@ static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum,
 	if (phy > phy_base + 2)
 		return -ENODEV;
 
-	return lan9303_phy_reg_write(chip, phy, regnum, val);
+	return chip->ops->phy_write(chip, phy, regnum, val);
 }
 
 static int lan9303_port_enable(struct dsa_switch *ds, int port,
@@ -766,13 +779,13 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port,
 	switch (port) {
 	case 1:
 		lan9303_disable_packet_processing(chip, LAN9303_PORT_1_OFFSET);
-		lan9303_phy_reg_write(chip, chip->phy_addr_sel_strap + 1,
-				      MII_BMCR, BMCR_PDOWN);
+		lan9303_phy_write(ds, chip->phy_addr_sel_strap + 1,
+				  MII_BMCR, BMCR_PDOWN);
 		break;
 	case 2:
 		lan9303_disable_packet_processing(chip, LAN9303_PORT_2_OFFSET);
-		lan9303_phy_reg_write(chip, chip->phy_addr_sel_strap + 2,
-				      MII_BMCR, BMCR_PDOWN);
+		lan9303_phy_write(ds, chip->phy_addr_sel_strap + 2,
+				  MII_BMCR, BMCR_PDOWN);
 		break;
 	default:
 		dev_dbg(chip->dev,
diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h
index d1512dad2d90..4d8be555ff4d 100644
--- a/drivers/net/dsa/lan9303.h
+++ b/drivers/net/dsa/lan9303.h
@@ -2,6 +2,15 @@
 #include <linux/device.h>
 #include <net/dsa.h>
 
+struct lan9303;
+
+struct lan9303_phy_ops {
+	/* PHY 1 and 2 access*/
+	int	(*phy_read)(struct lan9303 *chip, int port, int regnum);
+	int	(*phy_write)(struct lan9303 *chip, int port,
+			     int regnum, u16 val);
+};
+
 struct lan9303 {
 	struct device *dev;
 	struct regmap *regmap;
@@ -11,9 +20,11 @@ struct lan9303 {
 	bool phy_addr_sel_strap;
 	struct dsa_switch *ds;
 	struct mutex indirect_mutex; /* protect indexed register access */
+	const struct lan9303_phy_ops *ops;
 };
 
 extern const struct regmap_access_table lan9303_register_set;
+extern const struct lan9303_phy_ops lan9303_indirect_phy_ops;
 
 int lan9303_probe(struct lan9303 *chip, struct device_node *np);
 int lan9303_remove(struct lan9303 *chip);
diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c
index ab3ce0da5071..24ec20f7f444 100644
--- a/drivers/net/dsa/lan9303_i2c.c
+++ b/drivers/net/dsa/lan9303_i2c.c
@@ -63,6 +63,8 @@ static int lan9303_i2c_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, sw_dev);
 	sw_dev->chip.dev = &client->dev;
 
+	sw_dev->chip.ops = &lan9303_indirect_phy_ops;
+
 	ret = lan9303_probe(&sw_dev->chip, client->dev.of_node);
 	if (ret != 0)
 		return ret;
diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c
index 93c36c0541cf..fc16668a487f 100644
--- a/drivers/net/dsa/lan9303_mdio.c
+++ b/drivers/net/dsa/lan9303_mdio.c
@@ -40,6 +40,7 @@ static int lan9303_mdio_write(void *ctx, uint32_t reg, uint32_t val)
 {
 	struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx;
 
+	reg <<= 2; /* reg num to offset */
 	mutex_lock(&sw_dev->device->bus->mdio_lock);
 	lan9303_mdio_real_write(sw_dev->device, reg, val & 0xffff);
 	lan9303_mdio_real_write(sw_dev->device, reg + 2, (val >> 16) & 0xffff);
@@ -57,6 +58,7 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val)
 {
 	struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx;
 
+	reg <<= 2; /* reg num to offset */
 	mutex_lock(&sw_dev->device->bus->mdio_lock);
 	*val = lan9303_mdio_real_read(sw_dev->device, reg);
 	*val |= (lan9303_mdio_real_read(sw_dev->device, reg + 2) << 16);
@@ -65,6 +67,25 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val)
 	return 0;
 }
 
+int lan9303_mdio_phy_write(struct lan9303 *chip, int phy, int reg, u16 val)
+{
+	struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev);
+
+	return mdiobus_write_nested(sw_dev->device->bus, phy, reg, val);
+}
+
+int lan9303_mdio_phy_read(struct lan9303 *chip, int phy,  int reg)
+{
+	struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev);
+
+	return mdiobus_read_nested(sw_dev->device->bus, phy, reg);
+}
+
+static const struct lan9303_phy_ops lan9303_mdio_phy_ops = {
+	.phy_read = lan9303_mdio_phy_read,
+	.phy_write = lan9303_mdio_phy_write,
+};
+
 static const struct regmap_config lan9303_mdio_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 32,
@@ -106,6 +127,8 @@ static int lan9303_mdio_probe(struct mdio_device *mdiodev)
 	dev_set_drvdata(&mdiodev->dev, sw_dev);
 	sw_dev->chip.dev = &mdiodev->dev;
 
+	sw_dev->chip.ops = &lan9303_mdio_phy_ops;
+
 	ret = lan9303_probe(&sw_dev->chip, mdiodev->dev.of_node);
 	if (ret != 0)
 		return ret;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 5bcdd33101b0..521738c4cd17 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -810,63 +810,18 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
 	mutex_unlock(&chip->reg_lock);
 }
 
-static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
-			     struct ethtool_eee *e)
+static int mv88e6xxx_get_mac_eee(struct dsa_switch *ds, int port,
+				 struct ethtool_eee *e)
 {
-	struct mv88e6xxx_chip *chip = ds->priv;
-	u16 reg;
-	int err;
-
-	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&chip->reg_lock);
-
-	err = mv88e6xxx_phy_read(chip, port, 16, &reg);
-	if (err)
-		goto out;
-
-	e->eee_enabled = !!(reg & 0x0200);
-	e->tx_lpi_enabled = !!(reg & 0x0100);
-
-	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &reg);
-	if (err)
-		goto out;
-
-	e->eee_active = !!(reg & MV88E6352_PORT_STS_EEE);
-out:
-	mutex_unlock(&chip->reg_lock);
-
-	return err;
+	/* Nothing to do on the port's MAC */
+	return 0;
 }
 
-static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
-			     struct phy_device *phydev, struct ethtool_eee *e)
+static int mv88e6xxx_set_mac_eee(struct dsa_switch *ds, int port,
+				 struct ethtool_eee *e)
 {
-	struct mv88e6xxx_chip *chip = ds->priv;
-	u16 reg;
-	int err;
-
-	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&chip->reg_lock);
-
-	err = mv88e6xxx_phy_read(chip, port, 16, &reg);
-	if (err)
-		goto out;
-
-	reg &= ~0x0300;
-	if (e->eee_enabled)
-		reg |= 0x0200;
-	if (e->tx_lpi_enabled)
-		reg |= 0x0100;
-
-	err = mv88e6xxx_phy_write(chip, port, 16, reg);
-out:
-	mutex_unlock(&chip->reg_lock);
-
-	return err;
+	/* Nothing to do on the port's MAC */
+	return 0;
 }
 
 static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
@@ -926,6 +881,22 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
 		dev_err(ds->dev, "p%d: failed to update state\n", port);
 }
 
+static int mv88e6xxx_pot_setup(struct mv88e6xxx_chip *chip)
+{
+	if (chip->info->ops->pot_clear)
+		return chip->info->ops->pot_clear(chip);
+
+	return 0;
+}
+
+static int mv88e6xxx_rsvd2cpu_setup(struct mv88e6xxx_chip *chip)
+{
+	if (chip->info->ops->mgmt_rsvd2cpu)
+		return chip->info->ops->mgmt_rsvd2cpu(chip);
+
+	return 0;
+}
+
 static int mv88e6xxx_atu_setup(struct mv88e6xxx_chip *chip)
 {
 	int err;
@@ -2116,7 +2087,7 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 		goto unlock;
 
 	/* Setup Switch Global 2 Registers */
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) {
+	if (chip->info->global2_addr) {
 		err = mv88e6xxx_g2_setup(chip);
 		if (err)
 			goto unlock;
@@ -2142,16 +2113,13 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 	if (err)
 		goto unlock;
 
-	/* Some generations have the configuration of sending reserved
-	 * management frames to the CPU in global2, others in
-	 * global1. Hence it does not fit the two setup functions
-	 * above.
-	 */
-	if (chip->info->ops->mgmt_rsvd2cpu) {
-		err = chip->info->ops->mgmt_rsvd2cpu(chip);
-		if (err)
-			goto unlock;
-	}
+	err = mv88e6xxx_pot_setup(chip);
+	if (err)
+		goto unlock;
+
+	err = mv88e6xxx_rsvd2cpu_setup(chip);
+	if (err)
+		goto unlock;
 
 unlock:
 	mutex_unlock(&chip->reg_lock);
@@ -2236,7 +2204,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
 
 	if (np) {
 		bus->name = np->full_name;
-		snprintf(bus->id, MII_BUS_ID_SIZE, "%s", np->full_name);
+		snprintf(bus->id, MII_BUS_ID_SIZE, "%pOF", np);
 	} else {
 		bus->name = "mv88e6xxx SMI";
 		snprintf(bus->id, MII_BUS_ID_SIZE, "mv88e6xxx-%d", index++);
@@ -2385,7 +2353,8 @@ static const struct mv88e6xxx_ops mv88e6085_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.ppu_enable = mv88e6185_g1_ppu_enable,
 	.ppu_disable = mv88e6185_g1_ppu_disable,
 	.reset = mv88e6185_g1_reset,
@@ -2408,7 +2377,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
 	.stats_get_sset_count = mv88e6095_stats_get_sset_count,
 	.stats_get_strings = mv88e6095_stats_get_strings,
 	.stats_get_stats = mv88e6095_stats_get_stats,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
 	.ppu_enable = mv88e6185_g1_ppu_enable,
 	.ppu_disable = mv88e6185_g1_ppu_disable,
 	.reset = mv88e6185_g1_reset,
@@ -2441,7 +2410,8 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2467,7 +2437,8 @@ static const struct mv88e6xxx_ops mv88e6123_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2496,7 +2467,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
 	.ppu_enable = mv88e6185_g1_ppu_enable,
 	.ppu_disable = mv88e6185_g1_ppu_disable,
 	.reset = mv88e6185_g1_reset,
@@ -2533,6 +2504,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu =  mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2563,7 +2535,8 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2587,7 +2560,8 @@ static const struct mv88e6xxx_ops mv88e6165_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2619,7 +2593,8 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2653,7 +2628,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2686,7 +2662,8 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2720,7 +2697,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2746,7 +2724,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
 	.ppu_enable = mv88e6185_g1_ppu_enable,
 	.ppu_disable = mv88e6185_g1_ppu_disable,
 	.reset = mv88e6185_g1_reset,
@@ -2782,6 +2760,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2816,6 +2795,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2850,6 +2830,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2884,7 +2865,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2920,6 +2902,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2952,14 +2935,15 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
 	.stats_get_stats = mv88e6320_stats_get_stats,
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6185_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
 };
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
-	/* MV88E6XXX_FAMILY_6321 */
+	/* MV88E6XXX_FAMILY_6320 */
 	.irl_init_all = mv88e6352_g2_irl_init_all,
 	.get_eeprom = mv88e6xxx_g2_get_eeprom16,
 	.set_eeprom = mv88e6xxx_g2_set_eeprom16,
@@ -3018,6 +3002,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu =  mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3049,7 +3034,8 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3081,7 +3067,8 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3115,7 +3102,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3153,6 +3141,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -3190,6 +3179,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -3206,12 +3196,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6097,
 		.ops = &mv88e6085_ops,
 	},
 
@@ -3224,11 +3216,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
 		.atu_move_port_mask = 0xf,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6095,
 		.ops = &mv88e6095_ops,
 	},
 
@@ -3241,12 +3234,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6097,
 		.ops = &mv88e6097_ops,
 	},
 
@@ -3259,12 +3254,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
 		.ops = &mv88e6123_ops,
 	},
 
@@ -3277,11 +3274,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
 		.atu_move_port_mask = 0xf,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6185,
 		.ops = &mv88e6131_ops,
 	},
 
@@ -3294,11 +3292,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.atu_move_port_mask = 0x1f,
+		.g2_irqs = 10,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6341,
 		.ops = &mv88e6141_ops,
 	},
 
@@ -3311,12 +3311,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
 		.ops = &mv88e6161_ops,
 	},
 
@@ -3329,12 +3331,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
 		.ops = &mv88e6165_ops,
 	},
 
@@ -3347,12 +3351,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
 		.ops = &mv88e6171_ops,
 	},
 
@@ -3365,12 +3371,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
 		.ops = &mv88e6172_ops,
 	},
 
@@ -3383,12 +3391,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
 		.ops = &mv88e6175_ops,
 	},
 
@@ -3401,12 +3411,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
 		.ops = &mv88e6176_ops,
 	},
 
@@ -3419,11 +3431,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
 		.atu_move_port_mask = 0xf,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6185,
 		.ops = &mv88e6185_ops,
 	},
 
@@ -3436,12 +3449,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.pvt = true,
+		.multi_chip = true,
 		.atu_move_port_mask = 0x1f,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6190_ops,
 	},
 
@@ -3454,12 +3469,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6190x_ops,
 	},
 
@@ -3472,12 +3489,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6191_ops,
 	},
 
@@ -3490,12 +3509,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
 		.ops = &mv88e6240_ops,
 	},
 
@@ -3508,12 +3529,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6290_ops,
 	},
 
@@ -3526,12 +3549,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6320,
 		.ops = &mv88e6320_ops,
 	},
 
@@ -3544,11 +3568,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
 		.atu_move_port_mask = 0xf,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6320,
 		.ops = &mv88e6321_ops,
 	},
 
@@ -3561,11 +3586,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.atu_move_port_mask = 0x1f,
+		.g2_irqs = 10,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6341,
 		.ops = &mv88e6341_ops,
 	},
 
@@ -3578,12 +3605,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
 		.ops = &mv88e6350_ops,
 	},
 
@@ -3596,12 +3625,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
 		.ops = &mv88e6351_ops,
 	},
 
@@ -3614,12 +3645,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
 		.ops = &mv88e6352_ops,
 	},
 	[MV88E6390] = {
@@ -3631,12 +3664,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6390_ops,
 	},
 	[MV88E6390X] = {
@@ -3648,12 +3683,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6390x_ops,
 	},
 };
@@ -3723,7 +3760,7 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
 {
 	if (sw_addr == 0)
 		chip->smi_ops = &mv88e6xxx_smi_single_chip_ops;
-	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_MULTI_CHIP))
+	else if (chip->info->multi_chip)
 		chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops;
 	else
 		return -EINVAL;
@@ -3853,8 +3890,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.get_sset_count		= mv88e6xxx_get_sset_count,
 	.port_enable		= mv88e6xxx_port_enable,
 	.port_disable		= mv88e6xxx_port_disable,
-	.set_eee		= mv88e6xxx_set_eee,
-	.get_eee		= mv88e6xxx_get_eee,
+	.get_mac_eee		= mv88e6xxx_get_mac_eee,
+	.set_mac_eee		= mv88e6xxx_set_mac_eee,
 	.get_eeprom_len		= mv88e6xxx_get_eeprom_len,
 	.get_eeprom		= mv88e6xxx_get_eeprom,
 	.set_eeprom		= mv88e6xxx_set_eeprom,
@@ -3971,7 +4008,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 		if (err)
 			goto out;
 
-		if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) {
+		if (chip->info->g2_irqs > 0) {
 			err = mv88e6xxx_g2_irq_setup(chip);
 			if (err)
 				goto out_g1_irq;
@@ -3991,7 +4028,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 out_mdio:
 	mv88e6xxx_mdios_unregister(chip);
 out_g2_irq:
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT) && chip->irq > 0)
+	if (chip->info->g2_irqs > 0 && chip->irq > 0)
 		mv88e6xxx_g2_irq_free(chip);
 out_g1_irq:
 	if (chip->irq > 0) {
@@ -4013,7 +4050,7 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 	mv88e6xxx_mdios_unregister(chip);
 
 	if (chip->irq > 0) {
-		if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT))
+		if (chip->info->g2_irqs > 0)
 			mv88e6xxx_g2_irq_free(chip);
 		mv88e6xxx_g1_irq_free(chip);
 	}
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 086444016352..334f6f7544ba 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -97,133 +97,6 @@ enum mv88e6xxx_family {
 	MV88E6XXX_FAMILY_6390,  /* 6190 6190X 6191 6290 6390 6390X */
 };
 
-enum mv88e6xxx_cap {
-	/* Energy Efficient Ethernet.
-	 */
-	MV88E6XXX_CAP_EEE,
-
-	/* Multi-chip Addressing Mode.
-	 * Some chips respond to only 2 registers of its own SMI device address
-	 * when it is non-zero, and use indirect access to internal registers.
-	 */
-	MV88E6XXX_CAP_SMI_CMD,		/* (0x00) SMI Command */
-	MV88E6XXX_CAP_SMI_DATA,		/* (0x01) SMI Data */
-
-	/* Switch Global (1) Registers.
-	 */
-	MV88E6XXX_CAP_G1_ATU_FID,	/* (0x01) ATU FID Register */
-	MV88E6XXX_CAP_G1_VTU_FID,	/* (0x02) VTU FID Register */
-
-	/* Switch Global 2 Registers.
-	 * The device contains a second set of global 16-bit registers.
-	 */
-	MV88E6XXX_CAP_GLOBAL2,
-	MV88E6XXX_CAP_G2_INT,		/* (0x00) Interrupt Status */
-	MV88E6XXX_CAP_G2_MGMT_EN_2X,	/* (0x02) MGMT Enable Register 2x */
-	MV88E6XXX_CAP_G2_MGMT_EN_0X,	/* (0x03) MGMT Enable Register 0x */
-	MV88E6XXX_CAP_G2_POT,		/* (0x0f) Priority Override Table */
-
-	/* Per VLAN Spanning Tree Unit (STU).
-	 * The Port State database, if present, is accessed through VTU
-	 * operations and dedicated SID registers. See MV88E6352_G1_VTU_SID.
-	 */
-	MV88E6XXX_CAP_STU,
-
-	/* VLAN Table Unit.
-	 * The VTU is used to program 802.1Q VLANs. See MV88E6XXX_G1_VTU_OP.
-	 */
-	MV88E6XXX_CAP_VTU,
-};
-
-/* Bitmask of capabilities */
-#define MV88E6XXX_FLAG_EEE		BIT_ULL(MV88E6XXX_CAP_EEE)
-
-#define MV88E6XXX_FLAG_SMI_CMD		BIT_ULL(MV88E6XXX_CAP_SMI_CMD)
-#define MV88E6XXX_FLAG_SMI_DATA		BIT_ULL(MV88E6XXX_CAP_SMI_DATA)
-
-#define MV88E6XXX_FLAG_G1_VTU_FID	BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID)
-
-#define MV88E6XXX_FLAG_GLOBAL2		BIT_ULL(MV88E6XXX_CAP_GLOBAL2)
-#define MV88E6XXX_FLAG_G2_INT		BIT_ULL(MV88E6XXX_CAP_G2_INT)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_2X	BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_0X	BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X)
-#define MV88E6XXX_FLAG_G2_POT		BIT_ULL(MV88E6XXX_CAP_G2_POT)
-
-/* Multi-chip Addressing Mode */
-#define MV88E6XXX_FLAGS_MULTI_CHIP	\
-	(MV88E6XXX_FLAG_SMI_CMD |	\
-	 MV88E6XXX_FLAG_SMI_DATA)
-
-#define MV88E6XXX_FLAGS_FAMILY_6095	\
-	(MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6097	\
-	(MV88E6XXX_FLAG_G1_VTU_FID |	\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |        \
-	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6165	\
-	(MV88E6XXX_FLAG_G1_VTU_FID |	\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6185	\
-	(MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6320	\
-	(MV88E6XXX_FLAG_EEE |		\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6341	\
-	(MV88E6XXX_FLAG_EEE |		\
-	 MV88E6XXX_FLAG_G1_VTU_FID |	\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6351	\
-	(MV88E6XXX_FLAG_G1_VTU_FID |	\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6352	\
-	(MV88E6XXX_FLAG_EEE |		\
-	 MV88E6XXX_FLAG_G1_VTU_FID |	\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6390	\
-	(MV88E6XXX_FLAG_EEE |		\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |        \
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
 struct mv88e6xxx_ops;
 
 struct mv88e6xxx_info {
@@ -235,11 +108,18 @@ struct mv88e6xxx_info {
 	unsigned int max_vid;
 	unsigned int port_base_addr;
 	unsigned int global1_addr;
+	unsigned int global2_addr;
 	unsigned int age_time_coeff;
 	unsigned int g1_irqs;
+	unsigned int g2_irqs;
 	bool pvt;
+
+	/* Multi-chip Addressing Mode.
+	 * Some chips respond to only 2 registers of its own SMI device address
+	 * when it is non-zero, and use indirect access to internal registers.
+	 */
+	bool multi_chip;
 	enum dsa_tag_protocol tag_protocol;
-	unsigned long long flags;
 
 	/* Mask for FromPort and ToPort value of PortVec used in ATU Move
 	 * operation. 0 means that the ATU Move operation is not supported.
@@ -359,6 +239,9 @@ struct mv88e6xxx_ops {
 			 struct mii_bus *bus,
 			 int addr, int reg, u16 val);
 
+	/* Priority Override Table operations */
+	int (*pot_clear)(struct mv88e6xxx_chip *chip);
+
 	/* PHY Polling Unit (PPU) operations */
 	int (*ppu_enable)(struct mv88e6xxx_chip *chip);
 	int (*ppu_disable)(struct mv88e6xxx_chip *chip);
@@ -449,7 +332,6 @@ struct mv88e6xxx_ops {
 	int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port);
 	const struct mv88e6xxx_irq_ops *watchdog_ops;
 
-	/* Can be either in g1 or g2, so don't use a prefix */
 	int (*mgmt_rsvd2cpu)(struct mv88e6xxx_chip *chip);
 
 	/* Power on/off a SERDES interface */
@@ -482,12 +364,6 @@ struct mv88e6xxx_hw_stat {
 	int type;
 };
 
-static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip,
-				 unsigned long flags)
-{
-	return (chip->info->flags & flags) == flags;
-}
-
 static inline bool mv88e6xxx_has_pvt(struct mv88e6xxx_chip *chip)
 {
 	return chip->info->pvt;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 158d0f499874..16f556261022 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -22,48 +22,99 @@
 
 static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
 {
-	return mv88e6xxx_read(chip, MV88E6XXX_G2, reg, val);
+	return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val);
 }
 
 static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
 {
-	return mv88e6xxx_write(chip, MV88E6XXX_G2, reg, val);
+	return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val);
 }
 
 static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
 {
-	return mv88e6xxx_update(chip, MV88E6XXX_G2, reg, update);
+	return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update);
 }
 
 static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 {
-	return mv88e6xxx_wait(chip, MV88E6XXX_G2, reg, mask);
+	return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
+}
+
+/* Offset 0x00: Interrupt Source Register */
+
+static int mv88e6xxx_g2_int_source(struct mv88e6xxx_chip *chip, u16 *src)
+{
+	/* Read (and clear most of) the Interrupt Source bits */
+	return mv88e6xxx_g2_read(chip, MV88E6XXX_G2_INT_SRC, src);
+}
+
+/* Offset 0x01: Interrupt Mask Register */
+
+static int mv88e6xxx_g2_int_mask(struct mv88e6xxx_chip *chip, u16 mask)
+{
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_INT_MASK, mask);
 }
 
 /* Offset 0x02: Management Enable 2x */
+
+static int mv88e6xxx_g2_mgmt_enable_2x(struct mv88e6xxx_chip *chip, u16 en2x)
+{
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_2X, en2x);
+}
+
 /* Offset 0x03: Management Enable 0x */
 
-int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+static int mv88e6xxx_g2_mgmt_enable_0x(struct mv88e6xxx_chip *chip, u16 en0x)
+{
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_0X, en0x);
+}
+
+/* Offset 0x05: Switch Management Register */
+
+static int mv88e6xxx_g2_switch_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip,
+					     bool enable)
+{
+	u16 val;
+	int err;
+
+	err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SWITCH_MGMT, &val);
+	if (err)
+		return err;
+
+	if (enable)
+		val |= MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU;
+	else
+		val &= ~MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU;
+
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, val);
+}
+
+int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
 {
 	int err;
 
 	/* Consider the frames with reserved multicast destination
-	 * addresses matching 01:80:c2:00:00:2x as MGMT.
+	 * addresses matching 01:80:c2:00:00:0x as MGMT.
 	 */
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) {
-		err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_2X, 0xffff);
-		if (err)
-			return err;
-	}
+	err = mv88e6xxx_g2_mgmt_enable_0x(chip, 0xffff);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_switch_mgmt_rsvd2cpu(chip, true);
+}
+
+int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+	int err;
 
 	/* Consider the frames with reserved multicast destination
-	 * addresses matching 01:80:c2:00:00:0x as MGMT.
+	 * addresses matching 01:80:c2:00:00:2x as MGMT.
 	 */
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X))
-		return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_0X,
-					  0xffff);
+	err = mv88e6xxx_g2_mgmt_enable_2x(chip, 0xffff);
+	if (err)
+		return err;
 
-	return 0;
+	return mv88e6185_g2_mgmt_rsvd2cpu(chip);
 }
 
 /* Offset 0x06: Device Mapping Table register */
@@ -260,7 +311,7 @@ static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
 	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_PRIO_OVERRIDE, val);
 }
 
-static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
+int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 {
 	int i, err;
 
@@ -933,7 +984,7 @@ static irqreturn_t mv88e6xxx_g2_irq_thread_fn(int irq, void *dev_id)
 	u16 reg;
 
 	mutex_lock(&chip->reg_lock);
-	err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_INT_SOURCE, &reg);
+	err = mv88e6xxx_g2_int_source(chip, &reg);
 	mutex_unlock(&chip->reg_lock);
 	if (err)
 		goto out;
@@ -959,8 +1010,11 @@ static void mv88e6xxx_g2_irq_bus_lock(struct irq_data *d)
 static void mv88e6xxx_g2_irq_bus_sync_unlock(struct irq_data *d)
 {
 	struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
+	int err;
 
-	mv88e6xxx_g2_write(chip, MV88E6XXX_G2_INT_MASK, ~chip->g2_irq.masked);
+	err = mv88e6xxx_g2_int_mask(chip, ~chip->g2_irq.masked);
+	if (err)
+		dev_err(chip->dev, "failed to mask interrupts\n");
 
 	mutex_unlock(&chip->reg_lock);
 }
@@ -1063,9 +1117,6 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 	 * port at the highest priority.
 	 */
 	reg = MV88E6XXX_G2_SWITCH_MGMT_FORCE_FLOW_CTL_PRI | (0x7 << 4);
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) ||
-	    mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X))
-		reg |= MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU | 0x7;
 	err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, reg);
 	if (err)
 		return err;
@@ -1080,12 +1131,5 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 	if (err)
 		return err;
 
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
-		/* Clear the priority override table. */
-		err = mv88e6xxx_g2_clear_pot(chip);
-		if (err)
-			return err;
-	}
-
 	return 0;
 }
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 317ffd8f323d..669f59017b12 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -17,14 +17,27 @@
 
 #include "chip.h"
 
-#define MV88E6XXX_G2	0x1c
-
 /* Offset 0x00: Interrupt Source Register */
-#define MV88E6XXX_G2_INT_SOURCE			0x00
+#define MV88E6XXX_G2_INT_SRC			0x00
+#define MV88E6XXX_G2_INT_SRC_WDOG		0x8000
+#define MV88E6XXX_G2_INT_SRC_JAM_LIMIT		0x4000
+#define MV88E6XXX_G2_INT_SRC_DUPLEX_MISMATCH	0x2000
+#define MV88E6XXX_G2_INT_SRC_WAKE_EVENT		0x1000
+#define MV88E6352_G2_INT_SRC_SERDES		0x0800
+#define MV88E6352_G2_INT_SRC_PHY		0x001f
+#define MV88E6390_G2_INT_SRC_PHY		0x07fe
+
 #define MV88E6XXX_G2_INT_SOURCE_WATCHDOG	15
 
 /* Offset 0x01: Interrupt Mask Register */
-#define MV88E6XXX_G2_INT_MASK	0x01
+#define MV88E6XXX_G2_INT_MASK			0x01
+#define MV88E6XXX_G2_INT_MASK_WDOG		0x8000
+#define MV88E6XXX_G2_INT_MASK_JAM_LIMIT		0x4000
+#define MV88E6XXX_G2_INT_MASK_DUPLEX_MISMATCH	0x2000
+#define MV88E6XXX_G2_INT_MASK_WAKE_EVENT	0x1000
+#define MV88E6352_G2_INT_MASK_SERDES		0x0800
+#define MV88E6352_G2_INT_MASK_PHY		0x001f
+#define MV88E6390_G2_INT_MASK_PHY		0x07fe
 
 /* Offset 0x02: MGMT Enable Register 2x */
 #define MV88E6XXX_G2_MGMT_EN_2X		0x02
@@ -245,7 +258,11 @@ int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip);
-int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+
+int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+
+int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip);
 
 extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
@@ -254,7 +271,7 @@ extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 {
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) {
+	if (chip->info->global2_addr) {
 		dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n");
 		return -EOPNOTSUPP;
 	}
@@ -347,7 +364,17 @@ static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip)
 {
 }
 
-static inline int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+static inline int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/dsa/mv88e6xxx/phy.c b/drivers/net/dsa/mv88e6xxx/phy.c
index 3500ac0ea848..436668bd50dc 100644
--- a/drivers/net/dsa/mv88e6xxx/phy.c
+++ b/drivers/net/dsa/mv88e6xxx/phy.c
@@ -13,7 +13,6 @@
 
 #include <linux/mdio.h>
 #include <linux/module.h>
-#include <net/dsa.h>
 
 #include "chip.h"
 #include "phy.h"
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index 8f3991bf1851..b16d5f0e6e9c 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -216,9 +216,6 @@
 /* Offset 0x13: OutFiltered Counter */
 #define MV88E6XXX_PORT_OUT_FILTERED	0x13
 
-/* Offset 0x16: LED Control */
-#define MV88E6XXX_PORT_LED_CONTROL	0x16
-
 /* Offset 0x18: IEEE Priority Mapping Table */
 #define MV88E6390_PORT_IEEE_PRIO_MAP_TABLE			0x18
 #define MV88E6390_PORT_IEEE_PRIO_MAP_TABLE_UPDATE		0x8000
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index b3bee7eab45f..36c169b0c705 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -637,8 +637,8 @@ qca8k_get_sset_count(struct dsa_switch *ds)
 	return ARRAY_SIZE(ar8327_mib);
 }
 
-static void
-qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
+static int
+qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port);
@@ -646,73 +646,21 @@ qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
 
 	mutex_lock(&priv->reg_mutex);
 	reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL);
-	if (enable)
+	if (eee->eee_enabled)
 		reg |= lpi_en;
 	else
 		reg &= ~lpi_en;
 	qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg);
 	mutex_unlock(&priv->reg_mutex);
-}
-
-static int
-qca8k_eee_init(struct dsa_switch *ds, int port,
-	       struct phy_device *phy)
-{
-	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-	struct ethtool_eee *p = &priv->port_sts[port].eee;
-	int ret;
-
-	p->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full);
-
-	ret = phy_init_eee(phy, 0);
-	if (ret)
-		return ret;
-
-	qca8k_eee_enable_set(ds, port, true);
 
 	return 0;
 }
 
 static int
-qca8k_set_eee(struct dsa_switch *ds, int port,
-	      struct phy_device *phydev,
-	      struct ethtool_eee *e)
-{
-	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-	struct ethtool_eee *p = &priv->port_sts[port].eee;
-	int ret = 0;
-
-	p->eee_enabled = e->eee_enabled;
-
-	if (e->eee_enabled) {
-		p->eee_enabled = qca8k_eee_init(ds, port, phydev);
-		if (!p->eee_enabled)
-			ret = -EOPNOTSUPP;
-	}
-	qca8k_eee_enable_set(ds, port, p->eee_enabled);
-
-	return ret;
-}
-
-static int
-qca8k_get_eee(struct dsa_switch *ds, int port,
-	      struct ethtool_eee *e)
+qca8k_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e)
 {
-	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-	struct ethtool_eee *p = &priv->port_sts[port].eee;
-	struct net_device *netdev = ds->ports[port].netdev;
-	int ret;
-
-	ret = phy_ethtool_get_eee(netdev->phydev, p);
-	if (!ret)
-		e->eee_active =
-			!!(p->supported & p->advertised & p->lp_advertised);
-	else
-		e->eee_active = 0;
-
-	e->eee_enabled = p->eee_enabled;
-
-	return ret;
+	/* Nothing to do on the port's MAC */
+	return 0;
 }
 
 static void
@@ -914,8 +862,8 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
 	.phy_write		= qca8k_phy_write,
 	.get_ethtool_stats	= qca8k_get_ethtool_stats,
 	.get_sset_count		= qca8k_get_sset_count,
-	.get_eee		= qca8k_get_eee,
-	.set_eee		= qca8k_set_eee,
+	.get_mac_eee		= qca8k_get_mac_eee,
+	.set_mac_eee		= qca8k_set_mac_eee,
 	.port_enable		= qca8k_port_enable,
 	.port_disable		= qca8k_port_disable,
 	.port_stp_state_set	= qca8k_port_stp_state_set,
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 1ed4fac6cd6d..1cf8a920d4ff 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -156,7 +156,6 @@ enum qca8k_fdb_cmd {
 };
 
 struct ar8xxx_port_status {
-	struct ethtool_eee eee;
 	int enabled;
 };
 
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index d0c165d2086e..d0a1f9ce3168 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -345,7 +345,7 @@ static void dummy_setup(struct net_device *dev)
 	dev->flags &= ~IFF_MULTICAST;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
 	dev->features	|= NETIF_F_SG | NETIF_F_FRAGLIST;
-	dev->features	|= NETIF_F_ALL_TSO | NETIF_F_UFO;
+	dev->features	|= NETIF_F_ALL_TSO;
 	dev->features	|= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX;
 	dev->features	|= NETIF_F_GSO_ENCAP_ALL;
 	dev->hw_features |= dev->features;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index ecef3ee87b17..2fd9b80b39b0 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1918,18 +1918,18 @@ static void xgbe_poll_controller(struct net_device *netdev)
 }
 #endif /* End CONFIG_NET_POLL_CONTROLLER */
 
-static int xgbe_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
-			 __be16 proto,
-			 struct tc_to_netdev *tc_to_netdev)
+static int xgbe_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			 void *type_data)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct tc_mqprio_qopt *mqprio = type_data;
 	u8 tc;
 
-	if (tc_to_netdev->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc_to_netdev->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-	tc = tc_to_netdev->mqprio->num_tc;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	tc = mqprio->num_tc;
 
 	if (tc > pdata->hw_feat.tc_cnt)
 		return -EINVAL;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 0938294f640a..e9282c924621 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -129,6 +129,7 @@
 #include <net/dcbnl.h>
 #include <linux/completion.h>
 #include <linux/cpumask.h>
+#include <linux/interrupt.h>
 
 #define XGBE_DRV_NAME		"amd-xgbe"
 #define XGBE_DRV_VERSION	"1.0.3"
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index 96dd5300e0e5..e58b157b7d7c 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -114,8 +114,8 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
 	int j, rev, rc = -EBUSY;
 
 	if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) {
-		printk(KERN_ERR "can't use MACE %s: need 3 addrs and 3 irqs\n",
-		       mace->full_name);
+		printk(KERN_ERR "can't use MACE %pOF: need 3 addrs and 3 irqs\n",
+		       mace);
 		return -ENODEV;
 	}
 
@@ -123,8 +123,8 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
 	if (addr == NULL) {
 		addr = of_get_property(mace, "local-mac-address", NULL);
 		if (addr == NULL) {
-			printk(KERN_ERR "Can't get mac-address for MACE %s\n",
-			       mace->full_name);
+			printk(KERN_ERR "Can't get mac-address for MACE %pOF\n",
+			       mace);
 			return -ENODEV;
 		}
 	}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 9a0817938eca..4b445750b93e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -134,7 +134,10 @@ static inline unsigned int aq_ring_dx_in_range(unsigned int h, unsigned int i,
 }
 
 #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
-int aq_ring_rx_clean(struct aq_ring_s *self, int *work_done, int budget)
+int aq_ring_rx_clean(struct aq_ring_s *self,
+		     struct napi_struct *napi,
+		     int *work_done,
+		     int budget)
 {
 	struct net_device *ndev = aq_nic_get_ndev(self->aq_nic);
 	int err = 0;
@@ -240,7 +243,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int *work_done, int budget)
 
 		skb_record_rx_queue(skb, self->idx);
 
-		netif_receive_skb(skb);
+		napi_gro_receive(napi, skb);
 
 		++self->stats.rx.packets;
 		self->stats.rx.bytes += skb->len;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index eecd6d1c4d73..782176c5f4f8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -148,7 +148,10 @@ int aq_ring_init(struct aq_ring_s *self);
 void aq_ring_rx_deinit(struct aq_ring_s *self);
 void aq_ring_free(struct aq_ring_s *self);
 void aq_ring_tx_clean(struct aq_ring_s *self);
-int aq_ring_rx_clean(struct aq_ring_s *self, int *work_done, int budget);
+int aq_ring_rx_clean(struct aq_ring_s *self,
+		     struct napi_struct *napi,
+		     int *work_done,
+		     int budget);
 int aq_ring_rx_fill(struct aq_ring_s *self);
 
 #endif /* AQ_RING_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index ad5b4d4dac7f..ec390c5eed35 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -78,6 +78,7 @@ __acquires(&self->lock)
 			if (ring[AQ_VEC_RX_ID].sw_head !=
 				ring[AQ_VEC_RX_ID].hw_head) {
 				err = aq_ring_rx_clean(&ring[AQ_VEC_RX_ID],
+						       napi,
 						       &work_done,
 						       budget - work_done);
 				if (err < 0)
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 68de2f2652f2..3241af1ce718 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -720,6 +720,18 @@ static int arc_emac_set_address(struct net_device *ndev, void *p)
 	return 0;
 }
 
+static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	if (!dev->phydev)
+		return -ENODEV;
+
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
+}
+
+
 static const struct net_device_ops arc_emac_netdev_ops = {
 	.ndo_open		= arc_emac_open,
 	.ndo_stop		= arc_emac_stop,
@@ -727,6 +739,7 @@ static const struct net_device_ops arc_emac_netdev_ops = {
 	.ndo_set_mac_address	= arc_emac_set_address,
 	.ndo_get_stats		= arc_emac_stats,
 	.ndo_set_rx_mode	= arc_emac_set_rx_mode,
+	.ndo_do_ioctl		= arc_emac_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= arc_emac_poll_controller,
 #endif
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 96413808c726..1456cb18f830 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -61,10 +61,12 @@ config BCM63XX_ENET
 
 config BCMGENET
 	tristate "Broadcom GENET internal MAC support"
+	depends on OF && HAS_IOMEM
 	select MII
 	select PHYLIB
 	select FIXED_PHY
 	select BCM7XXX_PHY
+	select MDIO_BCM_UNIMAC
 	help
 	  This driver supports the built-in Ethernet MACs found in the
 	  Broadcom BCM7xxx Set Top Box family chipset.
@@ -193,6 +195,7 @@ config SYSTEMPORT
 config BNXT
 	tristate "Broadcom NetXtreme-C/E support"
 	depends on PCI
+	depends on MAY_USE_DEVLINK
 	select FW_LOADER
 	select LIBCRC32C
 	---help---
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 5333601f855f..bf9ca3c79d1a 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -201,10 +201,10 @@ static int bcm_sysport_set_features(struct net_device *dev,
  */
 static const struct bcm_sysport_stats bcm_sysport_gstrings_stats[] = {
 	/* general stats */
-	STAT_NETDEV(rx_packets),
-	STAT_NETDEV(tx_packets),
-	STAT_NETDEV(rx_bytes),
-	STAT_NETDEV(tx_bytes),
+	STAT_NETDEV64(rx_packets),
+	STAT_NETDEV64(tx_packets),
+	STAT_NETDEV64(rx_bytes),
+	STAT_NETDEV64(tx_bytes),
 	STAT_NETDEV(rx_errors),
 	STAT_NETDEV(tx_errors),
 	STAT_NETDEV(rx_dropped),
@@ -316,6 +316,7 @@ static inline bool bcm_sysport_lite_stat_valid(enum bcm_sysport_stat_type type)
 {
 	switch (type) {
 	case BCM_SYSPORT_STAT_NETDEV:
+	case BCM_SYSPORT_STAT_NETDEV64:
 	case BCM_SYSPORT_STAT_RXCHK:
 	case BCM_SYSPORT_STAT_RBUF:
 	case BCM_SYSPORT_STAT_SOFT:
@@ -398,6 +399,7 @@ static void bcm_sysport_update_mib_counters(struct bcm_sysport_priv *priv)
 		s = &bcm_sysport_gstrings_stats[i];
 		switch (s->type) {
 		case BCM_SYSPORT_STAT_NETDEV:
+		case BCM_SYSPORT_STAT_NETDEV64:
 		case BCM_SYSPORT_STAT_SOFT:
 			continue;
 		case BCM_SYSPORT_STAT_MIB_RX:
@@ -434,7 +436,10 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 				  struct ethtool_stats *stats, u64 *data)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
+	struct u64_stats_sync *syncp = &priv->syncp;
 	struct bcm_sysport_tx_ring *ring;
+	unsigned int start;
 	int i, j;
 
 	if (netif_running(dev))
@@ -447,10 +452,20 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 		s = &bcm_sysport_gstrings_stats[i];
 		if (s->type == BCM_SYSPORT_STAT_NETDEV)
 			p = (char *)&dev->stats;
+		else if (s->type == BCM_SYSPORT_STAT_NETDEV64)
+			p = (char *)stats64;
 		else
 			p = (char *)priv;
+
 		p += s->stat_offset;
-		data[j] = *(unsigned long *)p;
+
+		if (s->stat_sizeof == sizeof(u64))
+			do {
+				start = u64_stats_fetch_begin_irq(syncp);
+				data[i] = *(u64 *)p;
+			} while (u64_stats_fetch_retry_irq(syncp, start));
+		else
+			data[i] = *(u32 *)p;
 		j++;
 	}
 
@@ -662,6 +677,7 @@ static int bcm_sysport_alloc_rx_bufs(struct bcm_sysport_priv *priv)
 static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
 					unsigned int budget)
 {
+	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
 	struct net_device *ndev = priv->netdev;
 	unsigned int processed = 0, to_process;
 	struct bcm_sysport_cb *cb;
@@ -765,6 +781,10 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
 		skb->protocol = eth_type_trans(skb, ndev);
 		ndev->stats.rx_packets++;
 		ndev->stats.rx_bytes += len;
+		u64_stats_update_begin(&priv->syncp);
+		stats64->rx_packets++;
+		stats64->rx_bytes += len;
+		u64_stats_update_end(&priv->syncp);
 
 		napi_gro_receive(&priv->napi, skb);
 next:
@@ -787,17 +807,15 @@ static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_tx_ring *ring,
 	struct device *kdev = &priv->pdev->dev;
 
 	if (cb->skb) {
-		ring->bytes += cb->skb->len;
 		*bytes_compl += cb->skb->len;
 		dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr),
 				 dma_unmap_len(cb, dma_len),
 				 DMA_TO_DEVICE);
-		ring->packets++;
 		(*pkts_compl)++;
 		bcm_sysport_free_cb(cb);
 	/* SKB fragment */
 	} else if (dma_unmap_addr(cb, dma_addr)) {
-		ring->bytes += dma_unmap_len(cb, dma_len);
+		*bytes_compl += dma_unmap_len(cb, dma_len);
 		dma_unmap_page(kdev, dma_unmap_addr(cb, dma_addr),
 			       dma_unmap_len(cb, dma_len), DMA_TO_DEVICE);
 		dma_unmap_addr_set(cb, dma_addr, 0);
@@ -808,9 +826,9 @@ static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_tx_ring *ring,
 static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 					     struct bcm_sysport_tx_ring *ring)
 {
-	struct net_device *ndev = priv->netdev;
 	unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs;
 	unsigned int pkts_compl = 0, bytes_compl = 0;
+	struct net_device *ndev = priv->netdev;
 	struct bcm_sysport_cb *cb;
 	u32 hw_ind;
 
@@ -849,6 +867,11 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 		last_c_index &= (num_tx_cbs - 1);
 	}
 
+	u64_stats_update_begin(&priv->syncp);
+	ring->packets += pkts_compl;
+	ring->bytes += bytes_compl;
+	u64_stats_update_end(&priv->syncp);
+
 	ring->c_index = c_index;
 
 	netif_dbg(priv, tx_done, ndev,
@@ -1671,22 +1694,41 @@ static int bcm_sysport_change_mac(struct net_device *dev, void *p)
 	return 0;
 }
 
-static struct net_device_stats *bcm_sysport_get_nstats(struct net_device *dev)
+static void bcm_sysport_get_stats64(struct net_device *dev,
+				    struct rtnl_link_stats64 *stats)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
-	unsigned long tx_bytes = 0, tx_packets = 0;
+	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
 	struct bcm_sysport_tx_ring *ring;
+	u64 tx_packets = 0, tx_bytes = 0;
+	unsigned int start;
 	unsigned int q;
 
+	netdev_stats_to_stats64(stats, &dev->stats);
+
 	for (q = 0; q < dev->num_tx_queues; q++) {
 		ring = &priv->tx_rings[q];
-		tx_bytes += ring->bytes;
-		tx_packets += ring->packets;
+		do {
+			start = u64_stats_fetch_begin_irq(&priv->syncp);
+			tx_bytes = ring->bytes;
+			tx_packets = ring->packets;
+		} while (u64_stats_fetch_retry_irq(&priv->syncp, start));
+
+		stats->tx_bytes += tx_bytes;
+		stats->tx_packets += tx_packets;
 	}
 
-	dev->stats.tx_bytes = tx_bytes;
-	dev->stats.tx_packets = tx_packets;
-	return &dev->stats;
+	/* lockless update tx_bytes and tx_packets */
+	u64_stats_update_begin(&priv->syncp);
+	stats64->tx_bytes = stats->tx_bytes;
+	stats64->tx_packets = stats->tx_packets;
+	u64_stats_update_end(&priv->syncp);
+
+	do {
+		start = u64_stats_fetch_begin_irq(&priv->syncp);
+		stats->rx_packets = stats64->rx_packets;
+		stats->rx_bytes = stats64->rx_bytes;
+	} while (u64_stats_fetch_retry_irq(&priv->syncp, start));
 }
 
 static void bcm_sysport_netif_start(struct net_device *dev)
@@ -1950,7 +1992,7 @@ static const struct net_device_ops bcm_sysport_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= bcm_sysport_poll_controller,
 #endif
-	.ndo_get_stats		= bcm_sysport_get_nstats,
+	.ndo_get_stats64	= bcm_sysport_get_stats64,
 };
 
 #define REV_FMT	"v%2x.%02x"
@@ -2098,6 +2140,8 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	/* libphy will adjust the link state accordingly */
 	netif_carrier_off(dev);
 
+	u64_stats_init(&priv->syncp);
+
 	ret = register_netdev(dev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register net_device\n");
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 77a51c167a69..80b4ffff63b7 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -603,6 +603,7 @@ struct bcm_sysport_mib {
 /* HW maintains a large list of counters */
 enum bcm_sysport_stat_type {
 	BCM_SYSPORT_STAT_NETDEV = -1,
+	BCM_SYSPORT_STAT_NETDEV64,
 	BCM_SYSPORT_STAT_MIB_RX,
 	BCM_SYSPORT_STAT_MIB_TX,
 	BCM_SYSPORT_STAT_RUNT,
@@ -619,6 +620,13 @@ enum bcm_sysport_stat_type {
 	.type = BCM_SYSPORT_STAT_NETDEV, \
 }
 
+#define STAT_NETDEV64(m) { \
+	.stat_string = __stringify(m), \
+	.stat_sizeof = sizeof(((struct bcm_sysport_stats64 *)0)->m), \
+	.stat_offset = offsetof(struct bcm_sysport_stats64, m), \
+	.type = BCM_SYSPORT_STAT_NETDEV64, \
+}
+
 #define STAT_MIB(str, m, _type) { \
 	.stat_string = str, \
 	.stat_sizeof = sizeof(((struct bcm_sysport_priv *)0)->m), \
@@ -659,6 +667,14 @@ struct bcm_sysport_stats {
 	u16 reg_offset;
 };
 
+struct bcm_sysport_stats64 {
+	/* 64bit stats on 32bit/64bit Machine */
+	u64	rx_packets;
+	u64	rx_bytes;
+	u64	tx_packets;
+	u64	tx_bytes;
+};
+
 /* Software house keeping helper structure */
 struct bcm_sysport_cb {
 	struct sk_buff	*skb;		/* SKB for RX packets */
@@ -743,5 +759,10 @@ struct bcm_sysport_priv {
 
 	/* Ethtool */
 	u32			msg_enable;
+
+	struct bcm_sysport_stats64	stats64;
+
+	/* For atomic update generic 64bit value on 32bit Machine */
+	struct u64_stats_sync	syncp;
 };
 #endif /* __BCM_SYSPORT_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 67fe3d826566..1216c1f1e052 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4284,15 +4284,17 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
 	return 0;
 }
 
-int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-		     __be16 proto, struct tc_to_netdev *tc)
+int __bnx2x_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		     void *type_data)
 {
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-	return bnx2x_setup_tc(dev, tc->mqprio->num_tc);
+	return bnx2x_setup_tc(dev, mqprio->num_tc);
 }
 
 /* called with rtnl_lock */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index c26688d2f326..a5265e1344f1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -486,8 +486,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
 /* setup_tc callback */
 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc);
-int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-		     __be16 proto, struct tc_to_netdev *tc);
+int __bnx2x_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		     void *type_data);
 
 int bnx2x_get_vf_config(struct net_device *dev, int vf,
 			struct ifla_vf_info *ivi);
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index a7ca45b251cb..d141a22ac50b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_BNXT) += bnxt_en.o
 
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o
+bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e7c8539cbddf..6e14fc4fe2c8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -33,6 +33,7 @@
 #include <linux/mii.h>
 #include <linux/if.h>
 #include <linux/if_vlan.h>
+#include <linux/if_bridge.h>
 #include <linux/rtc.h>
 #include <linux/bpf.h>
 #include <net/ip.h>
@@ -56,6 +57,7 @@
 #include "bnxt_ethtool.h"
 #include "bnxt_dcb.h"
 #include "bnxt_xdp.h"
+#include "bnxt_vfr.h"
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
 
@@ -243,6 +245,16 @@ const u16 bnxt_lhint_arr[] = {
 	TX_BD_FLAGS_LHINT_2048_AND_LARGER,
 };
 
+static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb)
+{
+	struct metadata_dst *md_dst = skb_metadata_dst(skb);
+
+	if (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)
+		return 0;
+
+	return md_dst->u.port_info.port_id;
+}
+
 static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bnxt *bp = netdev_priv(dev);
@@ -287,7 +299,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_buf->nr_frags = last_frag;
 
 	vlan_tag_flags = 0;
-	cfa_action = 0;
+	cfa_action = bnxt_xmit_get_cfa_action(skb);
 	if (skb_vlan_tag_present(skb)) {
 		vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN |
 				 skb_vlan_tag_get(skb);
@@ -322,7 +334,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			tx_push1->tx_bd_hsize_lflags = 0;
 
 		tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
-		tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
+		tx_push1->tx_bd_cfa_action =
+			cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT);
 
 		end = pdata + length;
 		end = PTR_ALIGN(end, 8) - 1;
@@ -427,7 +440,8 @@ normal_tx:
 	txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
 
 	txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
-	txbd1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
+	txbd1->tx_bd_cfa_action =
+			cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT);
 	for (i = 0; i < last_frag; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
@@ -1032,7 +1046,10 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 		bnxt_sched_reset(bp, rxr);
 		return;
 	}
-
+	/* Store cfa_code in tpa_info to use in tpa_end
+	 * completion processing.
+	 */
+	tpa_info->cfa_code = TPA_START_CFA_CODE(tpa_start1);
 	prod_rx_buf->data = tpa_info->data;
 	prod_rx_buf->data_ptr = tpa_info->data_ptr;
 
@@ -1267,6 +1284,17 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp,
 	return skb;
 }
 
+/* Given the cfa_code of a received packet determine which
+ * netdev (vf-rep or PF) the packet is destined to.
+ */
+static struct net_device *bnxt_get_pkt_dev(struct bnxt *bp, u16 cfa_code)
+{
+	struct net_device *dev = bnxt_get_vf_rep(bp, cfa_code);
+
+	/* if vf-rep dev is NULL, the must belongs to the PF */
+	return dev ? dev : bp->dev;
+}
+
 static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 					   struct bnxt_napi *bnapi,
 					   u32 *raw_cons,
@@ -1360,7 +1388,9 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 			return NULL;
 		}
 	}
-	skb->protocol = eth_type_trans(skb, bp->dev);
+
+	skb->protocol =
+		eth_type_trans(skb, bnxt_get_pkt_dev(bp, tpa_info->cfa_code));
 
 	if (tpa_info->hash_type != PKT_HASH_TYPE_NONE)
 		skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type);
@@ -1387,6 +1417,18 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 	return skb;
 }
 
+static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
+			     struct sk_buff *skb)
+{
+	if (skb->dev != bp->dev) {
+		/* this packet belongs to a vf-rep */
+		bnxt_vf_rep_rx(bp, skb);
+		return;
+	}
+	skb_record_rx_queue(skb, bnapi->index);
+	napi_gro_receive(&bnapi->napi, skb);
+}
+
 /* returns the following:
  * 1       - 1 packet successfully received
  * 0       - successful TPA_START, packet not completed yet
@@ -1403,7 +1445,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 	struct rx_cmp *rxcmp;
 	struct rx_cmp_ext *rxcmp1;
 	u32 tmp_raw_cons = *raw_cons;
-	u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
+	u16 cfa_code, cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
 	struct bnxt_sw_rx_bd *rx_buf;
 	unsigned int len;
 	u8 *data_ptr, agg_bufs, cmp_type;
@@ -1445,8 +1487,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 
 		rc = -ENOMEM;
 		if (likely(skb)) {
-			skb_record_rx_queue(skb, bnapi->index);
-			napi_gro_receive(&bnapi->napi, skb);
+			bnxt_deliver_skb(bp, bnapi, skb);
 			rc = 1;
 		}
 		*event |= BNXT_RX_EVENT;
@@ -1535,7 +1576,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 		skb_set_hash(skb, le32_to_cpu(rxcmp->rx_cmp_rss_hash), type);
 	}
 
-	skb->protocol = eth_type_trans(skb, dev);
+	cfa_code = RX_CMP_CFA_CODE(rxcmp1);
+	skb->protocol = eth_type_trans(skb, bnxt_get_pkt_dev(bp, cfa_code));
 
 	if ((rxcmp1->rx_cmp_flags2 &
 	     cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) &&
@@ -1560,8 +1602,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 		}
 	}
 
-	skb_record_rx_queue(skb, bnapi->index);
-	napi_gro_receive(&bnapi->napi, skb);
+	bnxt_deliver_skb(bp, bnapi, skb);
 	rc = 1;
 
 next_rx:
@@ -4577,6 +4618,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 {
 	struct hwrm_func_qcfg_input req = {0};
 	struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	u16 flags;
 	int rc;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
@@ -4593,15 +4635,15 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 		vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
 	}
 #endif
-	if (BNXT_PF(bp)) {
-		u16 flags = le16_to_cpu(resp->flags);
-
-		if (flags & (FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED |
-			     FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED))
-			bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
-		if (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST)
-			bp->flags |= BNXT_FLAG_MULTI_HOST;
+	flags = le16_to_cpu(resp->flags);
+	if (flags & (FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED |
+		     FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED)) {
+		bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
+		if (flags & FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED)
+			bp->flags |= BNXT_FLAG_FW_DCBX_AGENT;
 	}
+	if (BNXT_PF(bp) && (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST))
+		bp->flags |= BNXT_FLAG_MULTI_HOST;
 
 	switch (resp->port_partition_type) {
 	case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
@@ -4610,6 +4652,13 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 		bp->port_partition_type = resp->port_partition_type;
 		break;
 	}
+	if (bp->hwrm_spec_code < 0x10707 ||
+	    resp->evb_mode == FUNC_QCFG_RESP_EVB_MODE_VEB)
+		bp->br_mode = BRIDGE_MODE_VEB;
+	else if (resp->evb_mode == FUNC_QCFG_RESP_EVB_MODE_VEPA)
+		bp->br_mode = BRIDGE_MODE_VEPA;
+	else
+		bp->br_mode = BRIDGE_MODE_UNDEF;
 
 func_qcfg_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
@@ -4911,6 +4960,26 @@ static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
 	}
 }
 
+static int bnxt_hwrm_set_br_mode(struct bnxt *bp, u16 br_mode)
+{
+	struct hwrm_func_cfg_input req = {0};
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
+	req.fid = cpu_to_le16(0xffff);
+	req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_EVB_MODE);
+	if (br_mode == BRIDGE_MODE_VEB)
+		req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEB;
+	else if (br_mode == BRIDGE_MODE_VEPA)
+		req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEPA;
+	else
+		return -EINVAL;
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		rc = -EIO;
+	return rc;
+}
+
 static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
 {
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
@@ -5559,12 +5628,10 @@ void bnxt_tx_disable(struct bnxt *bp)
 {
 	int i;
 	struct bnxt_tx_ring_info *txr;
-	struct netdev_queue *txq;
 
 	if (bp->tx_ring) {
 		for (i = 0; i < bp->tx_nr_rings; i++) {
 			txr = &bp->tx_ring[i];
-			txq = netdev_get_tx_queue(bp->dev, i);
 			txr->dev_state = BNXT_DEV_STATE_CLOSING;
 		}
 	}
@@ -5577,11 +5644,9 @@ void bnxt_tx_enable(struct bnxt *bp)
 {
 	int i;
 	struct bnxt_tx_ring_info *txr;
-	struct netdev_queue *txq;
 
 	for (i = 0; i < bp->tx_nr_rings; i++) {
 		txr = &bp->tx_ring[i];
-		txq = netdev_get_tx_queue(bp->dev, i);
 		txr->dev_state = 0;
 	}
 	netif_tx_wake_all_queues(bp->dev);
@@ -5646,7 +5711,7 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 	if (rc)
 		goto hwrm_phy_qcaps_exit;
 
-	if (resp->eee_supported & PORT_PHY_QCAPS_RESP_EEE_SUPPORTED) {
+	if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) {
 		struct ethtool_eee *eee = &bp->eee;
 		u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode);
 
@@ -5686,13 +5751,15 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 
 	memcpy(&link_info->phy_qcfg_resp, resp, sizeof(*resp));
 	link_info->phy_link_status = resp->link;
-	link_info->duplex =  resp->duplex;
+	link_info->duplex = resp->duplex_cfg;
+	if (bp->hwrm_spec_code >= 0x10800)
+		link_info->duplex = resp->duplex_state;
 	link_info->pause = resp->pause;
 	link_info->auto_mode = resp->auto_mode;
 	link_info->auto_pause_setting = resp->auto_pause;
 	link_info->lp_pause = resp->link_partner_adv_pause;
 	link_info->force_pause_setting = resp->force_pause;
-	link_info->duplex_setting = resp->duplex;
+	link_info->duplex_setting = resp->duplex_cfg;
 	if (link_info->phy_link_status == BNXT_LINK_LINK)
 		link_info->link_speed = le16_to_cpu(resp->link_speed);
 	else
@@ -6214,6 +6281,9 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 	/* Poll link status and check for SFP+ module status */
 	bnxt_get_port_module_status(bp);
 
+	/* VF-reps may need to be re-opened after the PF is re-opened */
+	if (BNXT_PF(bp))
+		bnxt_vf_reps_open(bp);
 	return 0;
 
 open_err:
@@ -6302,6 +6372,10 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 		if (rc)
 			netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete!\n");
 	}
+
+	/* Close the VF-reps before closing PF */
+	if (BNXT_PF(bp))
+		bnxt_vf_reps_close(bp);
 #endif
 	/* Change device state to avoid TX queue wake up's */
 	bnxt_tx_disable(bp);
@@ -6813,7 +6887,8 @@ static void bnxt_timer(unsigned long data)
 	if (atomic_read(&bp->intr_sem) != 0)
 		goto bnxt_restart_timer;
 
-	if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS)) {
+	if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS) &&
+	    bp->stats_coal_ticks) {
 		set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event);
 		schedule_work(&bp->sp_task);
 	}
@@ -7162,15 +7237,17 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 	return 0;
 }
 
-static int bnxt_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-			 __be16 proto, struct tc_to_netdev *ntc)
+static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			 void *type_data)
 {
-	if (ntc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-	return bnxt_setup_mq_tc(dev, ntc->mqprio->num_tc);
+	return bnxt_setup_mq_tc(dev, mqprio->num_tc);
 }
 
 #ifdef CONFIG_RFS_ACCEL
@@ -7422,6 +7499,102 @@ static void bnxt_udp_tunnel_del(struct net_device *dev,
 	schedule_work(&bp->sp_task);
 }
 
+static int bnxt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+			       struct net_device *dev, u32 filter_mask,
+			       int nlflags)
+{
+	struct bnxt *bp = netdev_priv(dev);
+
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bp->br_mode, 0, 0,
+				       nlflags, filter_mask, NULL);
+}
+
+static int bnxt_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+			       u16 flags)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct nlattr *attr, *br_spec;
+	int rem, rc = 0;
+
+	if (bp->hwrm_spec_code < 0x10708 || !BNXT_SINGLE_PF(bp))
+		return -EOPNOTSUPP;
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (!br_spec)
+		return -EINVAL;
+
+	nla_for_each_nested(attr, br_spec, rem) {
+		u16 mode;
+
+		if (nla_type(attr) != IFLA_BRIDGE_MODE)
+			continue;
+
+		if (nla_len(attr) < sizeof(mode))
+			return -EINVAL;
+
+		mode = nla_get_u16(attr);
+		if (mode == bp->br_mode)
+			break;
+
+		rc = bnxt_hwrm_set_br_mode(bp, mode);
+		if (!rc)
+			bp->br_mode = mode;
+		break;
+	}
+	return rc;
+}
+
+static int bnxt_get_phys_port_name(struct net_device *dev, char *buf,
+				   size_t len)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	int rc;
+
+	/* The PF and it's VF-reps only support the switchdev framework */
+	if (!BNXT_PF(bp))
+		return -EOPNOTSUPP;
+
+	rc = snprintf(buf, len, "p%d", bp->pf.port_id);
+
+	if (rc >= len)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr)
+{
+	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return -EOPNOTSUPP;
+
+	/* The PF and it's VF-reps only support the switchdev framework */
+	if (!BNXT_PF(bp))
+		return -EOPNOTSUPP;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+		/* In SRIOV each PF-pool (PF + child VFs) serves as a
+		 * switching domain, the PF's perm mac-addr can be used
+		 * as the unique parent-id
+		 */
+		attr->u.ppid.id_len = ETH_ALEN;
+		ether_addr_copy(attr->u.ppid.id, bp->pf.mac_addr);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static int bnxt_swdev_port_attr_get(struct net_device *dev,
+				    struct switchdev_attr *attr)
+{
+	return bnxt_port_attr_get(netdev_priv(dev), attr);
+}
+
+static const struct switchdev_ops bnxt_switchdev_ops = {
+	.switchdev_port_attr_get	= bnxt_swdev_port_attr_get
+};
+
 static const struct net_device_ops bnxt_netdev_ops = {
 	.ndo_open		= bnxt_open,
 	.ndo_start_xmit		= bnxt_start_xmit,
@@ -7453,6 +7626,9 @@ static const struct net_device_ops bnxt_netdev_ops = {
 	.ndo_udp_tunnel_add	= bnxt_udp_tunnel_add,
 	.ndo_udp_tunnel_del	= bnxt_udp_tunnel_del,
 	.ndo_xdp		= bnxt_xdp,
+	.ndo_bridge_getlink	= bnxt_bridge_getlink,
+	.ndo_bridge_setlink	= bnxt_bridge_setlink,
+	.ndo_get_phys_port_name = bnxt_get_phys_port_name
 };
 
 static void bnxt_remove_one(struct pci_dev *pdev)
@@ -7460,8 +7636,10 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (BNXT_PF(bp))
+	if (BNXT_PF(bp)) {
 		bnxt_sriov_disable(bp);
+		bnxt_dl_unregister(bp);
+	}
 
 	pci_disable_pcie_error_reporting(pdev);
 	unregister_netdev(dev);
@@ -7710,6 +7888,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->netdev_ops = &bnxt_netdev_ops;
 	dev->watchdog_timeo = BNXT_TX_TIMEOUT;
 	dev->ethtool_ops = &bnxt_ethtool_ops;
+	SWITCHDEV_SET_OPS(dev, &bnxt_switchdev_ops);
 	pci_set_drvdata(pdev, dev);
 
 	rc = bnxt_alloc_hwrm_resources(bp);
@@ -7764,6 +7943,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 #ifdef CONFIG_BNXT_SRIOV
 	init_waitqueue_head(&bp->sriov_cfg_wait);
+	mutex_init(&bp->sriov_lock);
 #endif
 	bp->gro_func = bnxt_gro_func_5730x;
 	if (BNXT_CHIP_P4_PLUS(bp))
@@ -7855,6 +8035,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err_clr_int;
 
+	if (BNXT_PF(bp))
+		bnxt_dl_register(bp);
+
 	netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
 		    board_info[ent->driver_data].name,
 		    (long)pci_resource_start(pdev, 0), dev->dev_addr);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index f34691f85602..2d84d5719b70 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -12,13 +12,16 @@
 #define BNXT_H
 
 #define DRV_MODULE_NAME		"bnxt_en"
-#define DRV_MODULE_VERSION	"1.7.0"
+#define DRV_MODULE_VERSION	"1.8.0"
 
 #define DRV_VER_MAJ	1
-#define DRV_VER_MIN	7
+#define DRV_VER_MIN	8
 #define DRV_VER_UPD	0
 
 #include <linux/interrupt.h>
+#include <net/devlink.h>
+#include <net/dst_metadata.h>
+#include <net/switchdev.h>
 
 struct tx_bd {
 	__le32 tx_bd_len_flags_type;
@@ -242,6 +245,10 @@ struct rx_cmp_ext {
 	    ((le32_to_cpu((rxcmp1)->rx_cmp_flags2) &			\
 	     RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3)
 
+#define RX_CMP_CFA_CODE(rxcmpl1)					\
+	((le32_to_cpu((rxcmpl1)->rx_cmp_cfa_code_errors_v2) &		\
+	  RX_CMPL_CFA_CODE_MASK) >> RX_CMPL_CFA_CODE_SFT)
+
 struct rx_agg_cmp {
 	__le32 rx_agg_cmp_len_flags_type;
 	#define RX_AGG_CMP_TYPE					(0x3f << 0)
@@ -311,6 +318,10 @@ struct rx_tpa_start_cmp_ext {
 	__le32 rx_tpa_start_cmp_hdr_info;
 };
 
+#define TPA_START_CFA_CODE(rx_tpa_start)				\
+	((le32_to_cpu((rx_tpa_start)->rx_tpa_start_cmp_cfa_code_v2) &	\
+	 RX_TPA_START_CMP_CFA_CODE) >> RX_TPA_START_CMPL_CFA_CODE_SHIFT)
+
 struct rx_tpa_end_cmp {
 	__le32 rx_tpa_end_cmp_len_flags_type;
 	#define RX_TPA_END_CMP_TYPE				(0x3f << 0)
@@ -618,6 +629,8 @@ struct bnxt_tpa_info {
 
 #define BNXT_TPA_OUTER_L3_OFF(hdr_info)	\
 	((hdr_info) & 0x1ff)
+
+	u16			cfa_code; /* cfa_code in TPA start compl */
 };
 
 struct bnxt_rx_ring_info {
@@ -825,8 +838,8 @@ struct bnxt_link_info {
 	u8			loop_back;
 	u8			link_up;
 	u8			duplex;
-#define BNXT_LINK_DUPLEX_HALF	PORT_PHY_QCFG_RESP_DUPLEX_HALF
-#define BNXT_LINK_DUPLEX_FULL	PORT_PHY_QCFG_RESP_DUPLEX_FULL
+#define BNXT_LINK_DUPLEX_HALF	PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF
+#define BNXT_LINK_DUPLEX_FULL	PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL
 	u8			pause;
 #define BNXT_LINK_PAUSE_TX	PORT_PHY_QCFG_RESP_PAUSE_TX
 #define BNXT_LINK_PAUSE_RX	PORT_PHY_QCFG_RESP_PAUSE_RX
@@ -928,6 +941,24 @@ struct bnxt_test_info {
 #define BNXT_CAG_REG_LEGACY_INT_STATUS	0x4014
 #define BNXT_CAG_REG_BASE		0x300000
 
+struct bnxt_vf_rep_stats {
+	u64			packets;
+	u64			bytes;
+	u64			dropped;
+};
+
+struct bnxt_vf_rep {
+	struct bnxt			*bp;
+	struct net_device		*dev;
+	struct metadata_dst		*dst;
+	u16				vf_idx;
+	u16				tx_cfa_action;
+	u16				rx_cfa_code;
+
+	struct bnxt_vf_rep_stats	rx_stats;
+	struct bnxt_vf_rep_stats	tx_stats;
+};
+
 struct bnxt {
 	void __iomem		*bar0;
 	void __iomem		*bar1;
@@ -1027,6 +1058,7 @@ struct bnxt {
 	#define BNXT_FLAG_MULTI_HOST	0x100000
 	#define BNXT_FLAG_SHORT_CMD	0x200000
 	#define BNXT_FLAG_DOUBLE_DB	0x400000
+	#define BNXT_FLAG_FW_DCBX_AGENT	0x800000
 	#define BNXT_FLAG_CHIP_NITRO_A0	0x1000000
 
 	#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |		\
@@ -1164,6 +1196,7 @@ struct bnxt {
 	u8			nge_port_cnt;
 	__le16			nge_fw_dst_port_id;
 	u8			port_partition_type;
+	u16			br_mode;
 
 	u16			rx_coal_ticks;
 	u16			rx_coal_ticks_irq;
@@ -1206,6 +1239,12 @@ struct bnxt {
 	wait_queue_head_t	sriov_cfg_wait;
 	bool			sriov_cfg;
 #define BNXT_SRIOV_CFG_WAIT_TMO	msecs_to_jiffies(10000)
+
+	/* lock to protect VF-rep creation/cleanup via
+	 * multiple paths such as ->sriov_configure() and
+	 * devlink ->eswitch_mode_set()
+	 */
+	struct mutex		sriov_lock;
 #endif
 
 #define BNXT_NTP_FLTR_MAX_FLTR	4096
@@ -1232,6 +1271,12 @@ struct bnxt {
 	struct bnxt_led_info	leds[BNXT_MAX_LED];
 
 	struct bpf_prog		*xdp_prog;
+
+	/* devlink interface and vf-rep structs */
+	struct devlink		*dl;
+	enum devlink_eswitch_mode eswitch_mode;
+	struct bnxt_vf_rep	**vf_reps; /* array of vf-rep ptrs */
+	u16			*cfa_code_map; /* cfa_code -> vf_idx map */
 };
 
 #define BNXT_RX_STATS_OFFSET(counter)			\
@@ -1306,4 +1351,5 @@ int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
 int bnxt_get_max_rings(struct bnxt *, int *, int *, bool);
 void bnxt_restore_pf_fw_resources(struct bnxt *bp);
+int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr);
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 5c6dd0ce209f..aa1f3a2c7a78 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -93,6 +93,12 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
 			cos2bw.tsa =
 				QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_ETS;
 			cos2bw.bw_weight = ets->tc_tx_bw[i];
+			/* older firmware requires min_bw to be set to the
+			 * same weight value in percent.
+			 */
+			cos2bw.min_bw =
+				cpu_to_le32((ets->tc_tx_bw[i] * 100) |
+					    BW_VALUE_UNIT_PERCENT1_100);
 		}
 		memcpy(data, &cos2bw.queue_id, sizeof(cos2bw) - 4);
 		if (i == 0) {
@@ -549,13 +555,18 @@ static u8 bnxt_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	/* only support IEEE */
-	if ((mode & DCB_CAP_DCBX_VER_CEE) || !(mode & DCB_CAP_DCBX_VER_IEEE))
+	/* All firmware DCBX settings are set in NVRAM */
+	if (bp->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)
 		return 1;
 
 	if (mode & DCB_CAP_DCBX_HOST) {
 		if (BNXT_VF(bp) || (bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
 			return 1;
+
+		/* only support IEEE */
+		if ((mode & DCB_CAP_DCBX_VER_CEE) ||
+		    !(mode & DCB_CAP_DCBX_VER_IEEE))
+			return 1;
 	}
 
 	if (mode == bp->dcbx_cap)
@@ -584,7 +595,7 @@ void bnxt_dcb_init(struct bnxt *bp)
 	bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE;
 	if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
 		bp->dcbx_cap |= DCB_CAP_DCBX_HOST;
-	else
+	else if (bp->flags & BNXT_FLAG_FW_DCBX_AGENT)
 		bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
 	bp->dev->dcbnl_ops = &dcbnl_ops;
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
index ecd0a5e46a49..d2e0af960bf5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
@@ -26,6 +26,7 @@ struct bnxt_cos2bw_cfg {
 	u8			queue_id;
 	__le32			min_bw;
 	__le32			max_bw;
+#define BW_VALUE_UNIT_PERCENT1_100		(0x1UL << 29)
 	u8			tsa;
 	u8			pri_lvl;
 	u8			bw_weight;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index be6acadcb202..08b870d7d466 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -86,9 +86,11 @@ static int bnxt_set_coalesce(struct net_device *dev,
 	if (bp->stats_coal_ticks != coal->stats_block_coalesce_usecs) {
 		u32 stats_ticks = coal->stats_block_coalesce_usecs;
 
-		stats_ticks = clamp_t(u32, stats_ticks,
-				      BNXT_MIN_STATS_COAL_TICKS,
-				      BNXT_MAX_STATS_COAL_TICKS);
+		/* Allow 0, which means disable. */
+		if (stats_ticks)
+			stats_ticks = clamp_t(u32, stats_ticks,
+					      BNXT_MIN_STATS_COAL_TICKS,
+					      BNXT_MAX_STATS_COAL_TICKS);
 		stats_ticks = rounddown(stats_ticks, BNXT_MIN_STATS_COAL_TICKS);
 		bp->stats_coal_ticks = stats_ticks;
 		update_stats = true;
@@ -198,19 +200,23 @@ static const struct {
 
 #define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)
 
+static int bnxt_get_num_stats(struct bnxt *bp)
+{
+	int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
+
+	if (bp->flags & BNXT_FLAG_PORT_STATS)
+		num_stats += BNXT_NUM_PORT_STATS;
+
+	return num_stats;
+}
+
 static int bnxt_get_sset_count(struct net_device *dev, int sset)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
 	switch (sset) {
-	case ETH_SS_STATS: {
-		int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
-
-		if (bp->flags & BNXT_FLAG_PORT_STATS)
-			num_stats += BNXT_NUM_PORT_STATS;
-
-		return num_stats;
-	}
+	case ETH_SS_STATS:
+		return bnxt_get_num_stats(bp);
 	case ETH_SS_TEST:
 		if (!bp->num_tests)
 			return -EOPNOTSUPP;
@@ -225,11 +231,8 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 {
 	u32 i, j = 0;
 	struct bnxt *bp = netdev_priv(dev);
-	u32 buf_size = sizeof(struct ctx_hw_stats) * bp->cp_nr_rings;
 	u32 stat_fields = sizeof(struct ctx_hw_stats) / 8;
 
-	memset(buf, 0, buf_size);
-
 	if (!bp->bnapi)
 		return;
 
@@ -520,7 +523,7 @@ static int bnxt_grxclsrule(struct bnxt *bp, struct ethtool_rxnfc *cmd)
 	struct flow_keys *fkeys;
 	int i, rc = -EINVAL;
 
-	if (fs->location < 0 || fs->location >= BNXT_NTP_FLTR_MAX_FLTR)
+	if (fs->location >= BNXT_NTP_FLTR_MAX_FLTR)
 		return rc;
 
 	for (i = 0; i < BNXT_NTP_FLTR_HASH_SIZE; i++) {
@@ -835,7 +838,7 @@ static void bnxt_get_drvinfo(struct net_device *dev,
 		strlcpy(info->fw_version, bp->fw_ver_str,
 			sizeof(info->fw_version));
 	strlcpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info));
-	info->n_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
+	info->n_stats = bnxt_get_num_stats(bp);
 	info->testinfo_len = bp->num_tests;
 	/* TODO CHIMP_FW: eeprom dump details */
 	info->eedump_len = 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 7dc71bb95837..3ba22e8ee914 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -11,14 +11,14 @@
 #ifndef BNXT_HSI_H
 #define BNXT_HSI_H
 
-/* HSI and HWRM Specification 1.7.6 */
+/* HSI and HWRM Specification 1.8.0 */
 #define HWRM_VERSION_MAJOR	1
-#define HWRM_VERSION_MINOR	7
-#define HWRM_VERSION_UPDATE	6
+#define HWRM_VERSION_MINOR	8
+#define HWRM_VERSION_UPDATE	0
 
-#define HWRM_VERSION_RSVD	2 /* non-zero means beta version */
+#define HWRM_VERSION_RSVD	0 /* non-zero means beta version */
 
-#define HWRM_VERSION_STR	"1.7.6.2"
+#define HWRM_VERSION_STR	"1.8.0.0"
 /*
  * Following is the signature for HWRM message field that indicates not
  * applicable (All F's). Need to cast it the size of the field if needed.
@@ -813,7 +813,7 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED	    0x4UL
 	#define FUNC_QCFG_RESP_FLAGS_STD_TX_RING_MODE_ENABLED      0x8UL
 	#define FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED	    0x10UL
-	#define FUNC_QCFG_RESP_FLAGS_MULTI_HOST			    0x20UL
+	#define FUNC_QCFG_RESP_FLAGS_MULTI_HOST		    0x20UL
 	u8 mac_address[6];
 	__le16 pci_id;
 	__le16 alloc_rsscos_ctx;
@@ -835,9 +835,8 @@ struct hwrm_func_qcfg_output {
 	u8 port_pf_cnt;
 	#define FUNC_QCFG_RESP_PORT_PF_CNT_UNAVAIL		   0x0UL
 	__le16 dflt_vnic_id;
-	u8 host_cnt;
-	#define FUNC_QCFG_RESP_HOST_CNT_UNAVAIL		   0x0UL
 	u8 unused_0;
+	u8 unused_1;
 	__le32 min_bw;
 	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK		    0xfffffffUL
 	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT		    0
@@ -874,12 +873,56 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_EVB_MODE_NO_EVB			   0x0UL
 	#define FUNC_QCFG_RESP_EVB_MODE_VEB			   0x1UL
 	#define FUNC_QCFG_RESP_EVB_MODE_VEPA			   0x2UL
-	u8 unused_1;
+	u8 unused_2;
 	__le16 alloc_vfs;
 	__le32 alloc_mcast_filters;
 	__le32 alloc_hw_ring_grps;
 	__le16 alloc_sp_tx_rings;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* hwrm_func_vlan_cfg */
+/* Input (48 bytes) */
+struct hwrm_func_vlan_cfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 fid;
+	u8 unused_0;
+	u8 unused_1;
+	__le32 enables;
+	#define FUNC_VLAN_CFG_REQ_ENABLES_STAG_VID		    0x1UL
+	#define FUNC_VLAN_CFG_REQ_ENABLES_CTAG_VID		    0x2UL
+	#define FUNC_VLAN_CFG_REQ_ENABLES_STAG_PCP		    0x4UL
+	#define FUNC_VLAN_CFG_REQ_ENABLES_CTAG_PCP		    0x8UL
+	#define FUNC_VLAN_CFG_REQ_ENABLES_STAG_TPID		    0x10UL
+	#define FUNC_VLAN_CFG_REQ_ENABLES_CTAG_TPID		    0x20UL
+	__le16 stag_vid;
+	u8 stag_pcp;
 	u8 unused_2;
+	__be16 stag_tpid;
+	__le16 ctag_vid;
+	u8 ctag_pcp;
+	u8 unused_3;
+	__be16 ctag_tpid;
+	__le32 rsvd1;
+	__le32 rsvd2;
+	__le32 unused_4;
+};
+
+/* Output (16 bytes) */
+struct hwrm_func_vlan_cfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
 	u8 valid;
 };
 
@@ -902,6 +945,7 @@ struct hwrm_func_cfg_input {
 	#define FUNC_CFG_REQ_FLAGS_STD_TX_RING_MODE_ENABLE	    0x200UL
 	#define FUNC_CFG_REQ_FLAGS_STD_TX_RING_MODE_DISABLE	    0x400UL
 	#define FUNC_CFG_REQ_FLAGS_VIRT_MAC_PERSIST		    0x800UL
+	#define FUNC_CFG_REQ_FLAGS_NO_AUTOCLEAR_STATISTIC	    0x1000UL
 	__le32 enables;
 	#define FUNC_CFG_REQ_ENABLES_MTU			    0x1UL
 	#define FUNC_CFG_REQ_ENABLES_MRU			    0x2UL
@@ -1456,9 +1500,9 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB		   0x1f4UL
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB		   0x3e8UL
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB		   0xffffUL
-	u8 duplex;
-	#define PORT_PHY_QCFG_RESP_DUPLEX_HALF			   0x0UL
-	#define PORT_PHY_QCFG_RESP_DUPLEX_FULL			   0x1UL
+	u8 duplex_cfg;
+	#define PORT_PHY_QCFG_RESP_DUPLEX_CFG_HALF		   0x0UL
+	#define PORT_PHY_QCFG_RESP_DUPLEX_CFG_FULL		   0x1UL
 	u8 pause;
 	#define PORT_PHY_QCFG_RESP_PAUSE_TX			    0x1UL
 	#define PORT_PHY_QCFG_RESP_PAUSE_RX			    0x2UL
@@ -1573,6 +1617,9 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_BASELR4	   0x16UL
 	#define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_BASEER4	   0x17UL
 	#define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_ACTIVE_CABLE      0x18UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASET		   0x19UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASESX		   0x1aUL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASECX		   0x1bUL
 	u8 media_type;
 	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN		   0x0UL
 	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP		   0x1UL
@@ -1651,14 +1698,16 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED    0x10UL
 	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED  0x20UL
 	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED    0x40UL
+	u8 duplex_state;
+	#define PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF		   0x0UL
+	#define PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL		   0x1UL
 	u8 unused_1;
-	u8 unused_2;
 	char phy_vendor_name[16];
 	char phy_vendor_partnumber[16];
-	__le32 unused_3;
+	__le32 unused_2;
+	u8 unused_3;
 	u8 unused_4;
 	u8 unused_5;
-	u8 unused_6;
 	u8 valid;
 };
 
@@ -1744,6 +1793,51 @@ struct hwrm_port_mac_cfg_output {
 	u8 valid;
 };
 
+/* hwrm_port_mac_ptp_qcfg */
+/* Input (24 bytes) */
+struct hwrm_port_mac_ptp_qcfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 port_id;
+	__le16 unused_0[3];
+};
+
+/* Output (80 bytes) */
+struct hwrm_port_mac_ptp_qcfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	u8 flags;
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_DIRECT_ACCESS	    0x1UL
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS	    0x2UL
+	u8 unused_0;
+	__le16 unused_1;
+	__le32 rx_ts_reg_off_lower;
+	__le32 rx_ts_reg_off_upper;
+	__le32 rx_ts_reg_off_seq_id;
+	__le32 rx_ts_reg_off_src_id_0;
+	__le32 rx_ts_reg_off_src_id_1;
+	__le32 rx_ts_reg_off_src_id_2;
+	__le32 rx_ts_reg_off_domain_id;
+	__le32 rx_ts_reg_off_fifo;
+	__le32 rx_ts_reg_off_fifo_adv;
+	__le32 rx_ts_reg_off_granularity;
+	__le32 tx_ts_reg_off_lower;
+	__le32 tx_ts_reg_off_upper;
+	__le32 tx_ts_reg_off_seq_id;
+	__le32 tx_ts_reg_off_fifo;
+	__le32 tx_ts_reg_off_granularity;
+	__le32 unused_2;
+	u8 unused_3;
+	u8 unused_4;
+	u8 unused_5;
+	u8 valid;
+};
+
 /* hwrm_port_qstats */
 /* Input (40 bytes) */
 struct hwrm_port_qstats_input {
@@ -1874,10 +1968,10 @@ struct hwrm_port_phy_qcaps_output {
 	__le16 req_type;
 	__le16 seq_id;
 	__le16 resp_len;
-	u8 eee_supported;
-	#define PORT_PHY_QCAPS_RESP_EEE_SUPPORTED		    0x1UL
-	#define PORT_PHY_QCAPS_RESP_RSVD1_MASK			    0xfeUL
-	#define PORT_PHY_QCAPS_RESP_RSVD1_SFT			    1
+	u8 flags;
+	#define PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED	    0x1UL
+	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_MASK		    0xfeUL
+	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_SFT		    1
 	u8 unused_0;
 	__le16 supported_speeds_force_mode;
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD 0x1UL
@@ -3152,6 +3246,95 @@ struct hwrm_queue_cos2bw_cfg_output {
 	u8 valid;
 };
 
+/* hwrm_queue_dscp_qcaps */
+/* Input (24 bytes) */
+struct hwrm_queue_dscp_qcaps_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	u8 port_id;
+	u8 unused_0[7];
+};
+
+/* Output (16 bytes) */
+struct hwrm_queue_dscp_qcaps_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	u8 num_dscp_bits;
+	u8 unused_0;
+	__le16 max_entries;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* hwrm_queue_dscp2pri_qcfg */
+/* Input (32 bytes) */
+struct hwrm_queue_dscp2pri_qcfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le64 dest_data_addr;
+	u8 port_id;
+	u8 unused_0;
+	__le16 dest_data_buffer_size;
+	__le32 unused_1;
+};
+
+/* Output (16 bytes) */
+struct hwrm_queue_dscp2pri_qcfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le16 entry_cnt;
+	u8 default_pri;
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* hwrm_queue_dscp2pri_cfg */
+/* Input (40 bytes) */
+struct hwrm_queue_dscp2pri_cfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le64 src_data_addr;
+	__le32 flags;
+	#define QUEUE_DSCP2PRI_CFG_REQ_FLAGS_USE_HW_DEFAULT_PRI    0x1UL
+	__le32 enables;
+	#define QUEUE_DSCP2PRI_CFG_REQ_ENABLES_DEFAULT_PRI	    0x1UL
+	u8 port_id;
+	u8 default_pri;
+	__le16 entry_cnt;
+	__le32 unused_0;
+};
+
+/* Output (16 bytes) */
+struct hwrm_queue_dscp2pri_cfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
 /* hwrm_vnic_alloc */
 /* Input (24 bytes) */
 struct hwrm_vnic_alloc_input {
@@ -4038,7 +4221,7 @@ struct hwrm_cfa_encap_record_alloc_input {
 	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE       0x8UL
 	u8 unused_0;
 	__le16 unused_1;
-	__le32 encap_data[16];
+	__le32 encap_data[20];
 };
 
 /* Output (16 bytes) */
@@ -4120,8 +4303,8 @@ struct hwrm_cfa_ntuple_filter_alloc_input {
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6     0x6UL
 	u8 ip_protocol;
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN   0x0UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP       0x6UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP       0x11UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP       0x6UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP       0x11UL
 	__le16 dst_id;
 	__le16 mirror_vnic_id;
 	u8 tunnel_type;
@@ -4224,6 +4407,58 @@ struct hwrm_cfa_ntuple_filter_cfg_output {
 	u8 valid;
 };
 
+/* hwrm_cfa_vfr_alloc */
+/* Input (32 bytes) */
+struct hwrm_cfa_vfr_alloc_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 vf_id;
+	__le16 reserved;
+	__le32 unused_0;
+	char vfr_name[32];
+};
+
+/* Output (16 bytes) */
+struct hwrm_cfa_vfr_alloc_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le16 rx_cfa_code;
+	__le16 tx_cfa_action;
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 valid;
+};
+
+/* hwrm_cfa_vfr_free */
+/* Input (24 bytes) */
+struct hwrm_cfa_vfr_free_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	char vfr_name[32];
+};
+
+/* Output (16 bytes) */
+struct hwrm_cfa_vfr_free_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
 /* hwrm_tunnel_dst_port_query */
 /* Input (24 bytes) */
 struct hwrm_tunnel_dst_port_query_input {
@@ -4448,12 +4683,13 @@ struct hwrm_fw_reset_input {
 	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT		   0x1UL
 	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL	   0x2UL
 	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE		   0x3UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD		   0x4UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST		   0x4UL
 	u8 selfrst_status;
 	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE	   0x0UL
 	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP	   0x1UL
 	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST	   0x2UL
-	__le16 unused_0[3];
+	u8 host_idx;
+	u8 unused_0[5];
 };
 
 /* Output (16 bytes) */
@@ -4487,7 +4723,7 @@ struct hwrm_fw_qstatus_input {
 	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT		   0x1UL
 	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL	   0x2UL
 	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE		   0x3UL
-	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD		   0x4UL
+	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_HOST		   0x4UL
 	u8 unused_0[7];
 };
 
@@ -4572,6 +4808,16 @@ struct hwrm_fw_set_structured_data_output {
 	u8 valid;
 };
 
+/* Command specific Error Codes (8 bytes) */
+struct hwrm_fw_set_structured_data_cmd_err {
+	u8 code;
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN       0x0UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT   0x1UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT       0x2UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID	   0x3UL
+	u8 unused_0[7];
+};
+
 /* hwrm_fw_get_structured_data */
 /* Input (32 bytes) */
 struct hwrm_fw_get_structured_data_input {
@@ -4611,6 +4857,14 @@ struct hwrm_fw_get_structured_data_output {
 	u8 valid;
 };
 
+/* Command specific Error Codes (8 bytes) */
+struct hwrm_fw_get_structured_data_cmd_err {
+	u8 code;
+	#define FW_GET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN       0x0UL
+	#define FW_GET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID	   0x3UL
+	u8 unused_0[7];
+};
+
 /* hwrm_exec_fwd_resp */
 /* Input (128 bytes) */
 struct hwrm_exec_fwd_resp_input {
@@ -5411,7 +5665,7 @@ struct cmd_nums {
 	#define HWRM_PORT_LPBK_CLR_STATS			   (0x26UL)
 	#define HWRM_PORT_PHY_QCFG				   (0x27UL)
 	#define HWRM_PORT_MAC_QCFG				   (0x28UL)
-	#define RESERVED7					   (0x29UL)
+	#define HWRM_PORT_MAC_PTP_QCFG				   (0x29UL)
 	#define HWRM_PORT_PHY_QCAPS				   (0x2aUL)
 	#define HWRM_PORT_PHY_I2C_WRITE			   (0x2bUL)
 	#define HWRM_PORT_PHY_I2C_READ				   (0x2cUL)
@@ -5421,14 +5675,17 @@ struct cmd_nums {
 	#define HWRM_QUEUE_QPORTCFG				   (0x30UL)
 	#define HWRM_QUEUE_QCFG				   (0x31UL)
 	#define HWRM_QUEUE_CFG					   (0x32UL)
-	#define RESERVED2					   (0x33UL)
-	#define RESERVED3					   (0x34UL)
+	#define HWRM_FUNC_VLAN_CFG				   (0x33UL)
+	#define HWRM_FUNC_VLAN_QCFG				   (0x34UL)
 	#define HWRM_QUEUE_PFCENABLE_QCFG			   (0x35UL)
 	#define HWRM_QUEUE_PFCENABLE_CFG			   (0x36UL)
 	#define HWRM_QUEUE_PRI2COS_QCFG			   (0x37UL)
 	#define HWRM_QUEUE_PRI2COS_CFG				   (0x38UL)
 	#define HWRM_QUEUE_COS2BW_QCFG				   (0x39UL)
 	#define HWRM_QUEUE_COS2BW_CFG				   (0x3aUL)
+	#define HWRM_QUEUE_DSCP_QCAPS				   (0x3bUL)
+	#define HWRM_QUEUE_DSCP2PRI_QCFG			   (0x3cUL)
+	#define HWRM_QUEUE_DSCP2PRI_CFG			   (0x3dUL)
 	#define HWRM_VNIC_ALLOC				   (0x40UL)
 	#define HWRM_VNIC_FREE					   (0x41UL)
 	#define HWRM_VNIC_CFG					   (0x42UL)
@@ -5455,7 +5712,7 @@ struct cmd_nums {
 	#define HWRM_CFA_L2_FILTER_FREE			   (0x91UL)
 	#define HWRM_CFA_L2_FILTER_CFG				   (0x92UL)
 	#define HWRM_CFA_L2_SET_RX_MASK			   (0x93UL)
-	#define RESERVED4					   (0x94UL)
+	#define HWRM_CFA_VLAN_ANTISPOOF_CFG			   (0x94UL)
 	#define HWRM_CFA_TUNNEL_FILTER_ALLOC			   (0x95UL)
 	#define HWRM_CFA_TUNNEL_FILTER_FREE			   (0x96UL)
 	#define HWRM_CFA_ENCAP_RECORD_ALLOC			   (0x97UL)
@@ -5494,6 +5751,8 @@ struct cmd_nums {
 	#define HWRM_CFA_METER_PROFILE_CFG			   (0xf7UL)
 	#define HWRM_CFA_METER_INSTANCE_ALLOC			   (0xf8UL)
 	#define HWRM_CFA_METER_INSTANCE_FREE			   (0xf9UL)
+	#define HWRM_CFA_VFR_ALLOC				   (0xfdUL)
+	#define HWRM_CFA_VFR_FREE				   (0xfeUL)
 	#define HWRM_CFA_VF_PAIR_ALLOC				   (0x100UL)
 	#define HWRM_CFA_VF_PAIR_FREE				   (0x101UL)
 	#define HWRM_CFA_VF_PAIR_INFO				   (0x102UL)
@@ -5502,6 +5761,9 @@ struct cmd_nums {
 	#define HWRM_CFA_FLOW_FLUSH				   (0x105UL)
 	#define HWRM_CFA_FLOW_STATS				   (0x106UL)
 	#define HWRM_CFA_FLOW_INFO				   (0x107UL)
+	#define HWRM_CFA_DECAP_FILTER_ALLOC			   (0x108UL)
+	#define HWRM_CFA_DECAP_FILTER_FREE			   (0x109UL)
+	#define HWRM_CFA_VLAN_ANTISPOOF_QCFG			   (0x10aUL)
 	#define HWRM_SELFTEST_QLIST				   (0x200UL)
 	#define HWRM_SELFTEST_EXEC				   (0x201UL)
 	#define HWRM_SELFTEST_IRQ				   (0x202UL)
@@ -5510,6 +5772,8 @@ struct cmd_nums {
 	#define HWRM_DBG_WRITE_DIRECT				   (0xff12UL)
 	#define HWRM_DBG_WRITE_INDIRECT			   (0xff13UL)
 	#define HWRM_DBG_DUMP					   (0xff14UL)
+	#define HWRM_DBG_ERASE_NVM				   (0xff15UL)
+	#define HWRM_DBG_CFG					   (0xff16UL)
 	#define HWRM_NVM_FACTORY_DEFAULTS			   (0xffeeUL)
 	#define HWRM_NVM_VALIDATE_OPTION			   (0xffefUL)
 	#define HWRM_NVM_FLUSH					   (0xfff0UL)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index b8e7248294d9..d37925a8a65b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -18,6 +18,7 @@
 #include "bnxt.h"
 #include "bnxt_ulp.h"
 #include "bnxt_sriov.h"
+#include "bnxt_vfr.h"
 #include "bnxt_ethtool.h"
 
 #ifdef CONFIG_BNXT_SRIOV
@@ -587,6 +588,10 @@ void bnxt_sriov_disable(struct bnxt *bp)
 	if (!num_vfs)
 		return;
 
+	/* synchronize VF and VF-rep create and destroy */
+	mutex_lock(&bp->sriov_lock);
+	bnxt_vf_reps_destroy(bp);
+
 	if (pci_vfs_assigned(bp->pdev)) {
 		bnxt_hwrm_fwd_async_event_cmpl(
 			bp, NULL, ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD);
@@ -597,6 +602,7 @@ void bnxt_sriov_disable(struct bnxt *bp)
 		/* Free the HW resources reserved for various VF's */
 		bnxt_hwrm_func_vf_resource_free(bp, num_vfs);
 	}
+	mutex_unlock(&bp->sriov_lock);
 
 	bnxt_free_vf_resources(bp);
 
@@ -794,8 +800,10 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf)
 					PORT_PHY_QCFG_RESP_LINK_LINK;
 				phy_qcfg_resp.link_speed = cpu_to_le16(
 					PORT_PHY_QCFG_RESP_LINK_SPEED_10GB);
-				phy_qcfg_resp.duplex =
-					PORT_PHY_QCFG_RESP_DUPLEX_FULL;
+				phy_qcfg_resp.duplex_cfg =
+					PORT_PHY_QCFG_RESP_DUPLEX_CFG_FULL;
+				phy_qcfg_resp.duplex_state =
+					PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL;
 				phy_qcfg_resp.pause =
 					(PORT_PHY_QCFG_RESP_PAUSE_TX |
 					 PORT_PHY_QCFG_RESP_PAUSE_RX);
@@ -804,7 +812,8 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf)
 			/* force link down */
 			phy_qcfg_resp.link = PORT_PHY_QCFG_RESP_LINK_NO_LINK;
 			phy_qcfg_resp.link_speed = 0;
-			phy_qcfg_resp.duplex = PORT_PHY_QCFG_RESP_DUPLEX_HALF;
+			phy_qcfg_resp.duplex_state =
+				PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF;
 			phy_qcfg_resp.pause = 0;
 		}
 		rc = bnxt_hwrm_fwd_resp(bp, vf, &phy_qcfg_resp,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
new file mode 100644
index 000000000000..b05c5d0ee3f9
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -0,0 +1,495 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2016-2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/jhash.h>
+
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_vfr.h"
+
+#ifdef CONFIG_BNXT_SRIOV
+
+#define CFA_HANDLE_INVALID		0xffff
+#define VF_IDX_INVALID			0xffff
+
+static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx,
+			      u16 *tx_cfa_action, u16 *rx_cfa_code)
+{
+	struct hwrm_cfa_vfr_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_vfr_alloc_input req = { 0 };
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_ALLOC, -1, -1);
+	req.vf_id = cpu_to_le16(vf_idx);
+	sprintf(req.vfr_name, "vfr%d", vf_idx);
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		*tx_cfa_action = le16_to_cpu(resp->tx_cfa_action);
+		*rx_cfa_code = le16_to_cpu(resp->rx_cfa_code);
+		netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x",
+			   *tx_cfa_action, *rx_cfa_code);
+	} else {
+		netdev_info(bp->dev, "%s error rc=%d", __func__, rc);
+	}
+
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
+static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
+{
+	struct hwrm_cfa_vfr_free_input req = { 0 };
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_FREE, -1, -1);
+	sprintf(req.vfr_name, "vfr%d", vf_idx);
+
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		netdev_info(bp->dev, "%s error rc=%d", __func__, rc);
+	return rc;
+}
+
+static int bnxt_vf_rep_open(struct net_device *dev)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	struct bnxt *bp = vf_rep->bp;
+
+	/* Enable link and TX only if the parent PF is open. */
+	if (netif_running(bp->dev)) {
+		netif_carrier_on(dev);
+		netif_tx_start_all_queues(dev);
+	}
+	return 0;
+}
+
+static int bnxt_vf_rep_close(struct net_device *dev)
+{
+	netif_carrier_off(dev);
+	netif_tx_disable(dev);
+
+	return 0;
+}
+
+static netdev_tx_t bnxt_vf_rep_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	int rc, len = skb->len;
+
+	skb_dst_drop(skb);
+	dst_hold((struct dst_entry *)vf_rep->dst);
+	skb_dst_set(skb, (struct dst_entry *)vf_rep->dst);
+	skb->dev = vf_rep->dst->u.port_info.lower_dev;
+
+	rc = dev_queue_xmit(skb);
+	if (!rc) {
+		vf_rep->tx_stats.packets++;
+		vf_rep->tx_stats.bytes += len;
+	}
+	return rc;
+}
+
+static void
+bnxt_vf_rep_get_stats64(struct net_device *dev,
+			struct rtnl_link_stats64 *stats)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+
+	stats->rx_packets = vf_rep->rx_stats.packets;
+	stats->rx_bytes = vf_rep->rx_stats.bytes;
+	stats->tx_packets = vf_rep->tx_stats.packets;
+	stats->tx_bytes = vf_rep->tx_stats.bytes;
+}
+
+struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code)
+{
+	u16 vf_idx;
+
+	if (cfa_code && bp->cfa_code_map && BNXT_PF(bp)) {
+		vf_idx = bp->cfa_code_map[cfa_code];
+		if (vf_idx != VF_IDX_INVALID)
+			return bp->vf_reps[vf_idx]->dev;
+	}
+	return NULL;
+}
+
+void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(skb->dev);
+	struct bnxt_vf_rep_stats *rx_stats;
+
+	rx_stats = &vf_rep->rx_stats;
+	vf_rep->rx_stats.bytes += skb->len;
+	vf_rep->rx_stats.packets++;
+
+	netif_receive_skb(skb);
+}
+
+static int bnxt_vf_rep_get_phys_port_name(struct net_device *dev, char *buf,
+					  size_t len)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	struct pci_dev *pf_pdev = vf_rep->bp->pdev;
+	int rc;
+
+	rc = snprintf(buf, len, "pf%dvf%d", PCI_FUNC(pf_pdev->devfn),
+		      vf_rep->vf_idx);
+	if (rc >= len)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+static void bnxt_vf_rep_get_drvinfo(struct net_device *dev,
+				    struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
+	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
+}
+
+static int bnxt_vf_rep_port_attr_get(struct net_device *dev,
+				     struct switchdev_attr *attr)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+
+	/* as only PORT_PARENT_ID is supported currently use common code
+	 * between PF and VF-rep for now.
+	 */
+	return bnxt_port_attr_get(vf_rep->bp, attr);
+}
+
+static const struct switchdev_ops bnxt_vf_rep_switchdev_ops = {
+	.switchdev_port_attr_get	= bnxt_vf_rep_port_attr_get
+};
+
+static const struct ethtool_ops bnxt_vf_rep_ethtool_ops = {
+	.get_drvinfo		= bnxt_vf_rep_get_drvinfo
+};
+
+static const struct net_device_ops bnxt_vf_rep_netdev_ops = {
+	.ndo_open		= bnxt_vf_rep_open,
+	.ndo_stop		= bnxt_vf_rep_close,
+	.ndo_start_xmit		= bnxt_vf_rep_xmit,
+	.ndo_get_stats64	= bnxt_vf_rep_get_stats64,
+	.ndo_get_phys_port_name = bnxt_vf_rep_get_phys_port_name
+};
+
+/* Called when the parent PF interface is closed:
+ * As the mode transition from SWITCHDEV to LEGACY
+ * happens under the rtnl_lock() this routine is safe
+ * under the rtnl_lock()
+ */
+void bnxt_vf_reps_close(struct bnxt *bp)
+{
+	struct bnxt_vf_rep *vf_rep;
+	u16 num_vfs, i;
+
+	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return;
+
+	num_vfs = pci_num_vf(bp->pdev);
+	for (i = 0; i < num_vfs; i++) {
+		vf_rep = bp->vf_reps[i];
+		if (netif_running(vf_rep->dev))
+			bnxt_vf_rep_close(vf_rep->dev);
+	}
+}
+
+/* Called when the parent PF interface is opened (re-opened):
+ * As the mode transition from SWITCHDEV to LEGACY
+ * happen under the rtnl_lock() this routine is safe
+ * under the rtnl_lock()
+ */
+void bnxt_vf_reps_open(struct bnxt *bp)
+{
+	int i;
+
+	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return;
+
+	for (i = 0; i < pci_num_vf(bp->pdev); i++)
+		bnxt_vf_rep_open(bp->vf_reps[i]->dev);
+}
+
+static void __bnxt_vf_reps_destroy(struct bnxt *bp)
+{
+	u16 num_vfs = pci_num_vf(bp->pdev);
+	struct bnxt_vf_rep *vf_rep;
+	int i;
+
+	for (i = 0; i < num_vfs; i++) {
+		vf_rep = bp->vf_reps[i];
+		if (vf_rep) {
+			dst_release((struct dst_entry *)vf_rep->dst);
+
+			if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID)
+				hwrm_cfa_vfr_free(bp, vf_rep->vf_idx);
+
+			if (vf_rep->dev) {
+				/* if register_netdev failed, then netdev_ops
+				 * would have been set to NULL
+				 */
+				if (vf_rep->dev->netdev_ops)
+					unregister_netdev(vf_rep->dev);
+				free_netdev(vf_rep->dev);
+			}
+		}
+	}
+
+	kfree(bp->vf_reps);
+	bp->vf_reps = NULL;
+}
+
+void bnxt_vf_reps_destroy(struct bnxt *bp)
+{
+	bool closed = false;
+
+	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return;
+
+	if (!bp->vf_reps)
+		return;
+
+	/* Ensure that parent PF's and VF-reps' RX/TX has been quiesced
+	 * before proceeding with VF-rep cleanup.
+	 */
+	rtnl_lock();
+	if (netif_running(bp->dev)) {
+		bnxt_close_nic(bp, false, false);
+		closed = true;
+	}
+	/* un-publish cfa_code_map so that RX path can't see it anymore */
+	kfree(bp->cfa_code_map);
+	bp->cfa_code_map = NULL;
+	bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+
+	if (closed)
+		bnxt_open_nic(bp, false, false);
+	rtnl_unlock();
+
+	/* Need to call vf_reps_destroy() outside of rntl_lock
+	 * as unregister_netdev takes rtnl_lock
+	 */
+	__bnxt_vf_reps_destroy(bp);
+}
+
+/* Use the OUI of the PF's perm addr and report the same mac addr
+ * for the same VF-rep each time
+ */
+static void bnxt_vf_rep_eth_addr_gen(u8 *src_mac, u16 vf_idx, u8 *mac)
+{
+	u32 addr;
+
+	ether_addr_copy(mac, src_mac);
+
+	addr = jhash(src_mac, ETH_ALEN, 0) + vf_idx;
+	mac[3] = (u8)(addr & 0xFF);
+	mac[4] = (u8)((addr >> 8) & 0xFF);
+	mac[5] = (u8)((addr >> 16) & 0xFF);
+}
+
+static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
+				    struct net_device *dev)
+{
+	struct net_device *pf_dev = bp->dev;
+
+	dev->netdev_ops = &bnxt_vf_rep_netdev_ops;
+	dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops;
+	SWITCHDEV_SET_OPS(dev, &bnxt_vf_rep_switchdev_ops);
+	/* Just inherit all the featues of the parent PF as the VF-R
+	 * uses the RX/TX rings of the parent PF
+	 */
+	dev->hw_features = pf_dev->hw_features;
+	dev->gso_partial_features = pf_dev->gso_partial_features;
+	dev->vlan_features = pf_dev->vlan_features;
+	dev->hw_enc_features = pf_dev->hw_enc_features;
+	dev->features |= pf_dev->features;
+	bnxt_vf_rep_eth_addr_gen(bp->pf.mac_addr, vf_rep->vf_idx,
+				 dev->perm_addr);
+	ether_addr_copy(dev->dev_addr, dev->perm_addr);
+}
+
+static int bnxt_vf_reps_create(struct bnxt *bp)
+{
+	u16 *cfa_code_map = NULL, num_vfs = pci_num_vf(bp->pdev);
+	struct bnxt_vf_rep *vf_rep;
+	struct net_device *dev;
+	int rc, i;
+
+	bp->vf_reps = kcalloc(num_vfs, sizeof(vf_rep), GFP_KERNEL);
+	if (!bp->vf_reps)
+		return -ENOMEM;
+
+	/* storage for cfa_code to vf-idx mapping */
+	cfa_code_map = kmalloc(sizeof(*bp->cfa_code_map) * MAX_CFA_CODE,
+			       GFP_KERNEL);
+	if (!cfa_code_map) {
+		rc = -ENOMEM;
+		goto err;
+	}
+	for (i = 0; i < MAX_CFA_CODE; i++)
+		cfa_code_map[i] = VF_IDX_INVALID;
+
+	for (i = 0; i < num_vfs; i++) {
+		dev = alloc_etherdev(sizeof(*vf_rep));
+		if (!dev) {
+			rc = -ENOMEM;
+			goto err;
+		}
+
+		vf_rep = netdev_priv(dev);
+		bp->vf_reps[i] = vf_rep;
+		vf_rep->dev = dev;
+		vf_rep->bp = bp;
+		vf_rep->vf_idx = i;
+		vf_rep->tx_cfa_action = CFA_HANDLE_INVALID;
+
+		/* get cfa handles from FW */
+		rc = hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx,
+					&vf_rep->tx_cfa_action,
+					&vf_rep->rx_cfa_code);
+		if (rc) {
+			rc = -ENOLINK;
+			goto err;
+		}
+		cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx;
+
+		vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
+						 GFP_KERNEL);
+		if (!vf_rep->dst) {
+			rc = -ENOMEM;
+			goto err;
+		}
+		/* only cfa_action is needed to mux a packet while TXing */
+		vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action;
+		vf_rep->dst->u.port_info.lower_dev = bp->dev;
+
+		bnxt_vf_rep_netdev_init(bp, vf_rep, dev);
+		rc = register_netdev(dev);
+		if (rc) {
+			/* no need for unregister_netdev in cleanup */
+			dev->netdev_ops = NULL;
+			goto err;
+		}
+	}
+
+	/* publish cfa_code_map only after all VF-reps have been initialized */
+	bp->cfa_code_map = cfa_code_map;
+	bp->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+	netif_keep_dst(bp->dev);
+	return 0;
+
+err:
+	netdev_info(bp->dev, "%s error=%d", __func__, rc);
+	kfree(cfa_code_map);
+	__bnxt_vf_reps_destroy(bp);
+	return rc;
+}
+
+/* Devlink related routines */
+static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
+
+	*mode = bp->eswitch_mode;
+	return 0;
+}
+
+static int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+	struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
+	int rc = 0;
+
+	mutex_lock(&bp->sriov_lock);
+	if (bp->eswitch_mode == mode) {
+		netdev_info(bp->dev, "already in %s eswitch mode",
+			    mode == DEVLINK_ESWITCH_MODE_LEGACY ?
+			    "legacy" : "switchdev");
+		rc = -EINVAL;
+		goto done;
+	}
+
+	switch (mode) {
+	case DEVLINK_ESWITCH_MODE_LEGACY:
+		bnxt_vf_reps_destroy(bp);
+		break;
+
+	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+		if (pci_num_vf(bp->pdev) == 0) {
+			netdev_info(bp->dev,
+				    "Enable VFs before setting swtichdev mode");
+			rc = -EPERM;
+			goto done;
+		}
+		rc = bnxt_vf_reps_create(bp);
+		break;
+
+	default:
+		rc = -EINVAL;
+		goto done;
+	}
+done:
+	mutex_unlock(&bp->sriov_lock);
+	return rc;
+}
+
+static const struct devlink_ops bnxt_dl_ops = {
+	.eswitch_mode_set = bnxt_dl_eswitch_mode_set,
+	.eswitch_mode_get = bnxt_dl_eswitch_mode_get
+};
+
+int bnxt_dl_register(struct bnxt *bp)
+{
+	struct devlink *dl;
+	int rc;
+
+	if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV))
+		return 0;
+
+	if (bp->hwrm_spec_code < 0x10800) {
+		netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n");
+		return -ENOTSUPP;
+	}
+
+	dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+	if (!dl) {
+		netdev_warn(bp->dev, "devlink_alloc failed");
+		return -ENOMEM;
+	}
+
+	bnxt_link_bp_to_dl(dl, bp);
+	bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+	rc = devlink_register(dl, &bp->pdev->dev);
+	if (rc) {
+		bnxt_link_bp_to_dl(dl, NULL);
+		devlink_free(dl);
+		netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+void bnxt_dl_unregister(struct bnxt *bp)
+{
+	struct devlink *dl = bp->dl;
+
+	if (!dl)
+		return;
+
+	devlink_unregister(dl);
+	devlink_free(dl);
+}
+
+#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
new file mode 100644
index 000000000000..e55a3b693e20
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
@@ -0,0 +1,72 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2016-2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_VFR_H
+#define BNXT_VFR_H
+
+#ifdef CONFIG_BNXT_SRIOV
+
+#define	MAX_CFA_CODE			65536
+
+/* Struct to hold housekeeping info needed by devlink interface */
+struct bnxt_dl {
+	struct bnxt *bp;	/* back ptr to the controlling dev */
+};
+
+static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl)
+{
+	return ((struct bnxt_dl *)devlink_priv(dl))->bp;
+}
+
+static inline void bnxt_link_bp_to_dl(struct devlink *dl, struct bnxt *bp)
+{
+	struct bnxt_dl *bp_dl = devlink_priv(dl);
+
+	bp_dl->bp = bp;
+	if (bp)
+		bp->dl = dl;
+}
+
+int bnxt_dl_register(struct bnxt *bp);
+void bnxt_dl_unregister(struct bnxt *bp);
+void bnxt_vf_reps_destroy(struct bnxt *bp);
+void bnxt_vf_reps_close(struct bnxt *bp);
+void bnxt_vf_reps_open(struct bnxt *bp);
+void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb);
+struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code);
+
+#else
+
+static inline int bnxt_dl_register(struct bnxt *bp)
+{
+	return 0;
+}
+
+static inline void bnxt_dl_unregister(struct bnxt *bp)
+{
+}
+
+static inline void bnxt_vf_reps_close(struct bnxt *bp)
+{
+}
+
+static inline void bnxt_vf_reps_open(struct bnxt *bp)
+{
+}
+
+static inline void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb)
+{
+}
+
+static inline struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code)
+{
+	return NULL;
+}
+#endif /* CONFIG_BNXT_SRIOV */
+#endif /* BNXT_VFR_H */
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 3a34fdba5301..b1fdd3cc10d1 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -657,6 +657,7 @@ struct bcmgenet_priv {
 
 	struct clk *clk;
 	struct platform_device *pdev;
+	struct platform_device *mii_pdev;
 
 	/* WOL */
 	struct clk *clk_wol;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 30cb97b4a1d7..18f5723be2c9 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -24,62 +24,10 @@
 #include <linux/of_net.h>
 #include <linux/of_mdio.h>
 #include <linux/platform_data/bcmgenet.h>
+#include <linux/platform_data/mdio-bcm-unimac.h>
 
 #include "bcmgenet.h"
 
-/* read a value from the MII */
-static int bcmgenet_mii_read(struct mii_bus *bus, int phy_id, int location)
-{
-	int ret;
-	struct net_device *dev = bus->priv;
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	u32 reg;
-
-	bcmgenet_umac_writel(priv, (MDIO_RD | (phy_id << MDIO_PMD_SHIFT) |
-			     (location << MDIO_REG_SHIFT)), UMAC_MDIO_CMD);
-	/* Start MDIO transaction*/
-	reg = bcmgenet_umac_readl(priv, UMAC_MDIO_CMD);
-	reg |= MDIO_START_BUSY;
-	bcmgenet_umac_writel(priv, reg, UMAC_MDIO_CMD);
-	wait_event_timeout(priv->wq,
-			   !(bcmgenet_umac_readl(priv, UMAC_MDIO_CMD)
-			   & MDIO_START_BUSY),
-			   HZ / 100);
-	ret = bcmgenet_umac_readl(priv, UMAC_MDIO_CMD);
-
-	/* Some broken devices are known not to release the line during
-	 * turn-around, e.g: Broadcom BCM53125 external switches, so check for
-	 * that condition here and ignore the MDIO controller read failure
-	 * indication.
-	 */
-	if (!(bus->phy_ignore_ta_mask & 1 << phy_id) && (ret & MDIO_READ_FAIL))
-		return -EIO;
-
-	return ret & 0xffff;
-}
-
-/* write a value to the MII */
-static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id,
-			      int location, u16 val)
-{
-	struct net_device *dev = bus->priv;
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	u32 reg;
-
-	bcmgenet_umac_writel(priv, (MDIO_WR | (phy_id << MDIO_PMD_SHIFT) |
-			     (location << MDIO_REG_SHIFT) | (0xffff & val)),
-			     UMAC_MDIO_CMD);
-	reg = bcmgenet_umac_readl(priv, UMAC_MDIO_CMD);
-	reg |= MDIO_START_BUSY;
-	bcmgenet_umac_writel(priv, reg, UMAC_MDIO_CMD);
-	wait_event_timeout(priv->wq,
-			   !(bcmgenet_umac_readl(priv, UMAC_MDIO_CMD) &
-			   MDIO_START_BUSY),
-			   HZ / 100);
-
-	return 0;
-}
-
 /* setup netdev link state when PHY link status change and
  * update UMAC and RGMII block when link up
  */
@@ -393,104 +341,121 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	return 0;
 }
 
-/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
- * their internal MDIO management controller making them fail to successfully
- * be read from or written to for the first transaction.  We insert a dummy
- * BMSR read here to make sure that phy_get_device() and get_phy_id() can
- * correctly read the PHY MII_PHYSID1/2 registers and successfully register a
- * PHY device for this peripheral.
- *
- * Once the PHY driver is registered, we can workaround subsequent reads from
- * there (e.g: during system-wide power management).
- *
- * bus->reset is invoked before mdiobus_scan during mdiobus_register and is
- * therefore the right location to stick that workaround. Since we do not want
- * to read from non-existing PHYs, we either use bus->phy_mask or do a manual
- * Device Tree scan to limit the search area.
- */
-static int bcmgenet_mii_bus_reset(struct mii_bus *bus)
+static struct device_node *bcmgenet_mii_of_find_mdio(struct bcmgenet_priv *priv)
 {
-	struct net_device *dev = bus->priv;
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct device_node *np = priv->mdio_dn;
-	struct device_node *child = NULL;
-	u32 read_mask = 0;
-	int addr = 0;
+	struct device_node *dn = priv->pdev->dev.of_node;
+	struct device *kdev = &priv->pdev->dev;
+	char *compat;
 
-	if (!np) {
-		read_mask = 1 << priv->phy_addr;
-	} else {
-		for_each_available_child_of_node(np, child) {
-			addr = of_mdio_parse_addr(&dev->dev, child);
-			if (addr < 0)
-				continue;
+	compat = kasprintf(GFP_KERNEL, "brcm,genet-mdio-v%d", priv->version);
+	if (!compat)
+		return NULL;
 
-			read_mask |= 1 << addr;
-		}
+	priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
+	kfree(compat);
+	if (!priv->mdio_dn) {
+		dev_err(kdev, "unable to find MDIO bus node\n");
+		return NULL;
 	}
 
-	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
-		if (read_mask & 1 << addr) {
-			dev_dbg(&dev->dev, "Workaround for PHY @ %d\n", addr);
-			mdiobus_read(bus, addr, MII_BMSR);
-		}
+	return priv->mdio_dn;
+}
+
+static void bcmgenet_mii_pdata_init(struct bcmgenet_priv *priv,
+				    struct unimac_mdio_pdata *ppd)
+{
+	struct device *kdev = &priv->pdev->dev;
+	struct bcmgenet_platform_data *pd = kdev->platform_data;
+
+	if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) {
+		/*
+		 * Internal or external PHY with MDIO access
+		 */
+		if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR)
+			ppd->phy_mask = 1 << pd->phy_address;
+		else
+			ppd->phy_mask = 0;
 	}
+}
 
+static int bcmgenet_mii_wait(void *wait_func_data)
+{
+	struct bcmgenet_priv *priv = wait_func_data;
+
+	wait_event_timeout(priv->wq,
+			   !(bcmgenet_umac_readl(priv, UMAC_MDIO_CMD)
+			   & MDIO_START_BUSY),
+			   HZ / 100);
 	return 0;
 }
 
-static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv)
+static int bcmgenet_mii_register(struct bcmgenet_priv *priv)
 {
-	struct mii_bus *bus;
+	struct platform_device *pdev = priv->pdev;
+	struct bcmgenet_platform_data *pdata = pdev->dev.platform_data;
+	struct device_node *dn = pdev->dev.of_node;
+	struct unimac_mdio_pdata ppd;
+	struct platform_device *ppdev;
+	struct resource *pres, res;
+	int id, ret;
+
+	pres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	memset(&res, 0, sizeof(res));
+	memset(&ppd, 0, sizeof(ppd));
+
+	ppd.wait_func = bcmgenet_mii_wait;
+	ppd.wait_func_data = priv;
+	ppd.bus_name = "bcmgenet MII bus";
+
+	/* Unimac MDIO bus controller starts at UniMAC offset + MDIO_CMD
+	 * and is 2 * 32-bits word long, 8 bytes total.
+	 */
+	res.start = pres->start + GENET_UMAC_OFF + UMAC_MDIO_CMD;
+	res.end = res.start + 8;
+	res.flags = IORESOURCE_MEM;
 
-	if (priv->mii_bus)
-		return 0;
+	if (dn)
+		id = of_alias_get_id(dn, "eth");
+	else
+		id = pdev->id;
 
-	priv->mii_bus = mdiobus_alloc();
-	if (!priv->mii_bus) {
-		pr_err("failed to allocate\n");
+	ppdev = platform_device_alloc(UNIMAC_MDIO_DRV_NAME, id);
+	if (!ppdev)
 		return -ENOMEM;
-	}
 
-	bus = priv->mii_bus;
-	bus->priv = priv->dev;
-	bus->name = "bcmgenet MII bus";
-	bus->parent = &priv->pdev->dev;
-	bus->read = bcmgenet_mii_read;
-	bus->write = bcmgenet_mii_write;
-	bus->reset = bcmgenet_mii_bus_reset;
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d",
-		 priv->pdev->name, priv->pdev->id);
+	/* Retain this platform_device pointer for later cleanup */
+	priv->mii_pdev = ppdev;
+	ppdev->dev.parent = &pdev->dev;
+	ppdev->dev.of_node = bcmgenet_mii_of_find_mdio(priv);
+	if (pdata)
+		bcmgenet_mii_pdata_init(priv, &ppd);
+
+	ret = platform_device_add_resources(ppdev, &res, 1);
+	if (ret)
+		goto out;
+
+	ret = platform_device_add_data(ppdev, &ppd, sizeof(ppd));
+	if (ret)
+		goto out;
+
+	ret = platform_device_add(ppdev);
+	if (ret)
+		goto out;
 
 	return 0;
+out:
+	platform_device_put(ppdev);
+	return ret;
 }
 
 static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
 {
 	struct device_node *dn = priv->pdev->dev.of_node;
 	struct device *kdev = &priv->pdev->dev;
-	struct phy_device *phydev = NULL;
-	char *compat;
+	struct phy_device *phydev;
 	int phy_mode;
 	int ret;
 
-	compat = kasprintf(GFP_KERNEL, "brcm,genet-mdio-v%d", priv->version);
-	if (!compat)
-		return -ENOMEM;
-
-	priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
-	kfree(compat);
-	if (!priv->mdio_dn) {
-		dev_err(kdev, "unable to find MDIO bus node\n");
-		return -ENODEV;
-	}
-
-	ret = of_mdiobus_register(priv->mii_bus, priv->mdio_dn);
-	if (ret) {
-		dev_err(kdev, "failed to register MDIO bus\n");
-		return ret;
-	}
-
 	/* Fetch the PHY phandle */
 	priv->phy_dn = of_parse_phandle(dn, "phy-handle", 0);
 
@@ -537,33 +502,23 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
 {
 	struct device *kdev = &priv->pdev->dev;
 	struct bcmgenet_platform_data *pd = kdev->platform_data;
-	struct mii_bus *mdio = priv->mii_bus;
+	char phy_name[MII_BUS_ID_SIZE + 3];
+	char mdio_bus_id[MII_BUS_ID_SIZE];
 	struct phy_device *phydev;
-	int ret;
+
+	snprintf(mdio_bus_id, MII_BUS_ID_SIZE, "%s-%d",
+		 UNIMAC_MDIO_DRV_NAME, priv->pdev->id);
 
 	if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) {
+		snprintf(phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT,
+			 mdio_bus_id, pd->phy_address);
+
 		/*
 		 * Internal or external PHY with MDIO access
 		 */
-		if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR)
-			mdio->phy_mask = ~(1 << pd->phy_address);
-		else
-			mdio->phy_mask = 0;
-
-		ret = mdiobus_register(mdio);
-		if (ret) {
-			dev_err(kdev, "failed to register MDIO bus\n");
-			return ret;
-		}
-
-		if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR)
-			phydev = mdiobus_get_phy(mdio, pd->phy_address);
-		else
-			phydev = phy_find_first(mdio);
-
+		phydev = phy_attach(priv->dev, phy_name, pd->phy_interface);
 		if (!phydev) {
 			dev_err(kdev, "failed to register PHY device\n");
-			mdiobus_unregister(mdio);
 			return -ENODEV;
 		}
 	} else {
@@ -609,10 +564,9 @@ static int bcmgenet_mii_bus_init(struct bcmgenet_priv *priv)
 int bcmgenet_mii_init(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct device_node *dn = priv->pdev->dev.of_node;
 	int ret;
 
-	ret = bcmgenet_mii_alloc(priv);
+	ret = bcmgenet_mii_register(priv);
 	if (ret)
 		return ret;
 
@@ -623,11 +577,7 @@ int bcmgenet_mii_init(struct net_device *dev)
 	return 0;
 
 out:
-	if (of_phy_is_fixed_link(dn))
-		of_phy_deregister_fixed_link(dn);
-	of_node_put(priv->phy_dn);
-	mdiobus_unregister(priv->mii_bus);
-	mdiobus_free(priv->mii_bus);
+	bcmgenet_mii_exit(dev);
 	return ret;
 }
 
@@ -639,6 +589,6 @@ void bcmgenet_mii_exit(struct net_device *dev)
 	if (of_phy_is_fixed_link(dn))
 		of_phy_deregister_fixed_link(dn);
 	of_node_put(priv->phy_dn);
-	mdiobus_unregister(priv->mii_bus);
-	mdiobus_free(priv->mii_bus);
+	platform_device_unregister(priv->mii_pdev);
+	platform_device_put(priv->mii_pdev);
 }
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 26d25749c3e4..6df2cad61647 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -68,7 +68,7 @@
 #define GEM_MAX_TX_LEN		((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
 
 #define GEM_MTU_MIN_SIZE	ETH_MIN_MTU
-#define MACB_NETIF_LSO		(NETIF_F_TSO | NETIF_F_UFO)
+#define MACB_NETIF_LSO		NETIF_F_TSO
 
 #define MACB_WOL_HAS_MAGIC_PACKET	(0x1 << 0)
 #define MACB_WOL_ENABLED		(0x1 << 1)
diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index 9906fda76087..248a8fc45069 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -128,7 +128,7 @@ static void macb_remove(struct pci_dev *pdev)
 	clk_unregister(plat_data->hclk);
 }
 
-static struct pci_device_id dev_id_table[] = {
+static const struct pci_device_id dev_id_table[] = {
 	{ PCI_DEVICE(CDNS_VENDOR_ID, CDNS_DEVICE_ID), },
 	{ 0, }
 };
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index ebd353bc78ff..09e287597c74 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -105,6 +105,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
 	"tx_total_sent",
 	"tx_total_fwd",
 	"tx_err_pko",
+	"tx_err_pki",
 	"tx_err_link",
 	"tx_err_drop",
 
@@ -826,6 +827,8 @@ lio_get_ethtool_stats(struct net_device *netdev,
 	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_total_fwd);
 	/*per_core_stats[j].link_stats[i].fromhost.fw_err_pko */
 	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pko);
+	/*per_core_stats[j].link_stats[i].fromhost.fw_err_pki */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pki);
 	/*per_core_stats[j].link_stats[i].fromhost.fw_err_link */
 	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_link);
 	/*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost.
@@ -1568,6 +1571,7 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev,
 		tstats->fw_total_sent = rsp_tstats->fw_total_sent;
 		tstats->fw_total_fwd = rsp_tstats->fw_total_fwd;
 		tstats->fw_err_pko = rsp_tstats->fw_err_pko;
+		tstats->fw_err_pki = rsp_tstats->fw_err_pki;
 		tstats->fw_err_link = rsp_tstats->fw_err_link;
 		tstats->fw_err_drop = rsp_tstats->fw_err_drop;
 		tstats->fw_tso = rsp_tstats->fw_tso;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 51583ae4b1eb..8c2cd8011bae 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -59,6 +59,21 @@ static char fw_type[LIO_MAX_FW_TYPE_LEN];
 module_param_string(fw_type, fw_type, sizeof(fw_type), 0000);
 MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\"");
 
+static u32 console_bitmask;
+module_param(console_bitmask, int, 0644);
+MODULE_PARM_DESC(console_bitmask,
+		 "Bitmask indicating which consoles have debug output redirected to syslog.");
+
+/**
+ * \brief determines if a given console has debug enabled.
+ * @param console console to check
+ * @returns  1 = enabled. 0 otherwise
+ */
+int octeon_console_debug_enabled(u32 console)
+{
+	return (console_bitmask >> (console)) & 0x1;
+}
+
 static int ptp_enable = 1;
 
 /* Polling interval for determining when NIC application is alive */
@@ -1825,6 +1840,11 @@ static int octeon_chip_specific_setup(struct octeon_device *oct)
 	case OCTEON_CN23XX_PCIID_PF:
 		oct->chip_id = OCTEON_CN23XX_PF_VID;
 		ret = setup_cn23xx_octeon_pf_device(oct);
+#ifdef CONFIG_PCI_IOV
+		if (!ret)
+			pci_sriov_set_totalvfs(oct->pci_dev,
+					       oct->sriov_info.max_vfs);
+#endif
 		s = "CN23XX";
 		break;
 
@@ -2544,8 +2564,8 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
 {
 	struct octeon_droq_ops droq_ops;
 	struct net_device *netdev;
-	static int cpu_id;
-	static int cpu_id_modulus;
+	int cpu_id;
+	int cpu_id_modulus;
 	struct octeon_droq *droq;
 	struct napi_struct *napi;
 	int q, q_no, retval = 0;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 9b247102eb92..935ff299cdd9 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -1663,10 +1663,10 @@ static int setup_io_queues(struct octeon_device *octeon_dev, int ifidx)
 {
 	struct octeon_droq_ops droq_ops;
 	struct net_device *netdev;
-	static int cpu_id_modulus;
+	int cpu_id_modulus;
 	struct octeon_droq *droq;
 	struct napi_struct *napi;
-	static int cpu_id;
+	int cpu_id;
 	int num_tx_descs;
 	struct lio *lio;
 	int retval = 0;
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 231dd7fbfb80..3b9e3646b971 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -27,8 +27,8 @@
 
 #define LIQUIDIO_PACKAGE ""
 #define LIQUIDIO_BASE_MAJOR_VERSION 1
-#define LIQUIDIO_BASE_MINOR_VERSION 5
-#define LIQUIDIO_BASE_MICRO_VERSION 1
+#define LIQUIDIO_BASE_MINOR_VERSION 6
+#define LIQUIDIO_BASE_MICRO_VERSION 0
 #define LIQUIDIO_BASE_VERSION   __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \
 				__stringify(LIQUIDIO_BASE_MINOR_VERSION)
 #define LIQUIDIO_MICRO_VERSION  "." __stringify(LIQUIDIO_BASE_MICRO_VERSION)
@@ -768,6 +768,7 @@ struct nic_rx_stats {
 	/* firmware stats */
 	u64 fw_total_rcvd;
 	u64 fw_total_fwd;
+	u64 fw_total_fwd_bytes;
 	u64 fw_err_pko;
 	u64 fw_err_link;
 	u64 fw_err_drop;
@@ -814,6 +815,7 @@ struct nic_tx_stats {
 	u64 fw_tso;		/* number of tso requests */
 	u64 fw_tso_fwd;		/* number of packets segmented in tso */
 	u64 fw_tx_vxlan;
+	u64 fw_err_pki;
 };
 
 struct oct_link_stats {
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index e08f7600f986..dd0efc9b4286 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
@@ -37,13 +37,6 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
 					     u32 flags);
 static int octeon_console_read(struct octeon_device *oct, u32 console_num,
 			       char *buffer, u32 buf_size);
-static u32 console_bitmask;
-module_param(console_bitmask, int, 0644);
-MODULE_PARM_DESC(console_bitmask,
-		 "Bitmask indicating which consoles have debug output redirected to syslog.");
-
-#define MIN(a, b) min((a), (b))
-#define CAST_ULL(v) ((u64)(v))
 
 #define BOOTLOADER_PCI_READ_BUFFER_DATA_ADDR    0x0006c008
 #define BOOTLOADER_PCI_READ_BUFFER_LEN_ADDR     0x0006c004
@@ -139,16 +132,6 @@ struct octeon_pci_console_desc {
 };
 
 /**
- * \brief determines if a given console has debug enabled.
- * @param console console to check
- * @returns  1 = enabled. 0 otherwise
- */
-static int octeon_console_debug_enabled(u32 console)
-{
-	return (console_bitmask >> (console)) & 0x1;
-}
-
-/**
  * This function is the implementation of the get macros defined
  * for individual structure members. The argument are generated
  * by the macros inorder to read only the needed memory.
@@ -234,7 +217,7 @@ static int __cvmx_bootmem_check_version(struct octeon_device *oct,
 	    (exact_match && major_version != exact_match)) {
 		dev_err(&oct->pci_dev->dev, "bootmem ver mismatch %d.%d addr:0x%llx\n",
 			major_version, minor_version,
-			CAST_ULL(oct->bootmem_desc_addr));
+			(long long)oct->bootmem_desc_addr);
 		return -1;
 	} else {
 		return 0;
@@ -704,7 +687,7 @@ static int octeon_console_read(struct octeon_device *oct, u32 console_num,
 	if (bytes_to_read <= 0)
 		return bytes_to_read;
 
-	bytes_to_read = MIN(bytes_to_read, (s32)buf_size);
+	bytes_to_read = min_t(s32, bytes_to_read, buf_size);
 
 	/* Check to see if what we want to read is not contiguous, and limit
 	 * ourselves to the contiguous block
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 623e28ca736e..495cc8880646 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -528,9 +528,10 @@ static struct octeon_config_ptr {
 };
 
 static char oct_dev_state_str[OCT_DEV_STATES + 1][32] = {
-	"BEGIN", "PCI-MAP-DONE", "DISPATCH-INIT-DONE",
+	"BEGIN", "PCI-ENABLE-DONE", "PCI-MAP-DONE", "DISPATCH-INIT-DONE",
 	"IQ-INIT-DONE", "SCBUFF-POOL-INIT-DONE", "RESPLIST-INIT-DONE",
-	"DROQ-INIT-DONE", "IO-QUEUES-INIT-DONE", "CONSOLE-INIT-DONE",
+	"DROQ-INIT-DONE", "MBOX-SETUP-DONE", "MSIX-ALLOC-VECTOR-DONE",
+	"INTR-SET-DONE", "IO-QUEUES-INIT-DONE", "CONSOLE-INIT-DONE",
 	"HOST-READY", "CORE-READY", "RUNNING", "IN-RESET",
 	"INVALID"
 };
@@ -876,11 +877,11 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
 
 	oct->num_iqs = 0;
 
-	oct->instr_queue[0] = vmalloc_node(sizeof(*oct->instr_queue[0]),
+	oct->instr_queue[0] = vzalloc_node(sizeof(*oct->instr_queue[0]),
 				numa_node);
 	if (!oct->instr_queue[0])
 		oct->instr_queue[0] =
-			vmalloc(sizeof(struct octeon_instr_queue));
+			vzalloc(sizeof(struct octeon_instr_queue));
 	if (!oct->instr_queue[0])
 		return 1;
 	memset(oct->instr_queue[0], 0, sizeof(struct octeon_instr_queue));
@@ -923,9 +924,9 @@ int octeon_setup_output_queues(struct octeon_device *oct)
 		desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn23xx_vf));
 	}
 	oct->num_oqs = 0;
-	oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node);
+	oct->droq[0] = vzalloc_node(sizeof(*oct->droq[0]), numa_node);
 	if (!oct->droq[0])
-		oct->droq[0] = vmalloc(sizeof(*oct->droq[0]));
+		oct->droq[0] = vzalloc(sizeof(*oct->droq[0]));
 	if (!oct->droq[0])
 		return 1;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index c90ed48ae8ab..31efdef02a24 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -22,6 +22,8 @@
 #ifndef _OCTEON_DEVICE_H_
 #define  _OCTEON_DEVICE_H_
 
+#include <linux/interrupt.h>
+
 /** PCI VendorId Device Id */
 #define  OCTEON_CN68XX_PCIID          0x91177d
 #define  OCTEON_CN66XX_PCIID          0x92177d
@@ -737,6 +739,8 @@ int octeon_wait_for_bootloader(struct octeon_device *oct,
  */
 int octeon_init_consoles(struct octeon_device *oct);
 
+int octeon_console_debug_enabled(u32 console);
+
 /**
  * Adds access to a console to the device.
  *
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 2e190deb2233..f7b5d68eb4cf 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -145,6 +145,8 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct,
 
 	for (i = 0; i < droq->max_count; i++) {
 		pg_info = &droq->recv_buf_list[i].pg_info;
+		if (!pg_info)
+			continue;
 
 		if (pg_info->dma)
 			lio_unmap_ring(oct->pci_dev,
@@ -275,12 +277,12 @@ int octeon_init_droq(struct octeon_device *oct,
 		droq->max_count);
 
 	droq->recv_buf_list = (struct octeon_recv_buffer *)
-			      vmalloc_node(droq->max_count *
+			      vzalloc_node(droq->max_count *
 						OCT_DROQ_RECVBUF_SIZE,
 						numa_node);
 	if (!droq->recv_buf_list)
 		droq->recv_buf_list = (struct octeon_recv_buffer *)
-				      vmalloc(droq->max_count *
+				      vzalloc(droq->max_count *
 						OCT_DROQ_RECVBUF_SIZE);
 	if (!droq->recv_buf_list) {
 		dev_err(&oct->pci_dev->dev, "Output queue recv buf list alloc failed\n");
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 0bc6a4ffce30..6a015362c340 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -793,7 +793,9 @@ static struct attribute *cxgb3_attrs[] = {
 	NULL
 };
 
-static struct attribute_group cxgb3_attr_group = {.attrs = cxgb3_attrs };
+static const struct attribute_group cxgb3_attr_group = {
+	.attrs = cxgb3_attrs,
+};
 
 static ssize_t tm_attr_show(struct device *d,
 			    char *buf, int sched)
@@ -880,7 +882,9 @@ static struct attribute *offload_attrs[] = {
 	NULL
 };
 
-static struct attribute_group offload_attr_group = {.attrs = offload_attrs };
+static const struct attribute_group offload_attr_group = {
+	.attrs = offload_attrs,
+};
 
 /*
  * Sends an sk_buff to an offload queue driver
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ef4be781fd05..1978abbc6ceb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -338,10 +338,12 @@ struct adapter_params {
 	unsigned int sf_nsec;             /* # of flash sectors */
 	unsigned int sf_fw_start;         /* start of FW image in flash */
 
-	unsigned int fw_vers;
-	unsigned int bs_vers;		/* bootstrap version */
-	unsigned int tp_vers;
-	unsigned int er_vers;		/* expansion ROM version */
+	unsigned int fw_vers;		  /* firmware version */
+	unsigned int bs_vers;		  /* bootstrap version */
+	unsigned int tp_vers;		  /* TP microcode version */
+	unsigned int er_vers;		  /* expansion ROM version */
+	unsigned int scfg_vers;		  /* Serial Configuration version */
+	unsigned int vpd_vers;		  /* VPD Version */
 	u8 api_vers[7];
 
 	unsigned short mtus[NMTUS];
@@ -1407,6 +1409,10 @@ int t4_get_fw_version(struct adapter *adapter, u32 *vers);
 int t4_get_bs_version(struct adapter *adapter, u32 *vers);
 int t4_get_tp_version(struct adapter *adapter, u32 *vers);
 int t4_get_exprom_version(struct adapter *adapter, u32 *vers);
+int t4_get_scfg_version(struct adapter *adapter, u32 *vers);
+int t4_get_vpd_version(struct adapter *adapter, u32 *vers);
+int t4_get_version_info(struct adapter *adapter);
+void t4_dump_version_info(struct adapter *adapter);
 int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info,
 	       const u8 *fw_data, unsigned int fw_size,
 	       struct fw_hdr *card_fw, enum dev_state state, int *reset);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 26eb00a45db1..03f593e84c24 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -801,6 +801,104 @@ static int set_link_ksettings(struct net_device *dev,
 	return ret;
 }
 
+/* Translate the Firmware FEC value into the ethtool value. */
+static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec)
+{
+	unsigned int eth_fec = 0;
+
+	if (fw_fec & FW_PORT_CAP_FEC_RS)
+		eth_fec |= ETHTOOL_FEC_RS;
+	if (fw_fec & FW_PORT_CAP_FEC_BASER_RS)
+		eth_fec |= ETHTOOL_FEC_BASER;
+
+	/* if nothing is set, then FEC is off */
+	if (!eth_fec)
+		eth_fec = ETHTOOL_FEC_OFF;
+
+	return eth_fec;
+}
+
+/* Translate Common Code FEC value into ethtool value. */
+static inline unsigned int cc_to_eth_fec(unsigned int cc_fec)
+{
+	unsigned int eth_fec = 0;
+
+	if (cc_fec & FEC_AUTO)
+		eth_fec |= ETHTOOL_FEC_AUTO;
+	if (cc_fec & FEC_RS)
+		eth_fec |= ETHTOOL_FEC_RS;
+	if (cc_fec & FEC_BASER_RS)
+		eth_fec |= ETHTOOL_FEC_BASER;
+
+	/* if nothing is set, then FEC is off */
+	if (!eth_fec)
+		eth_fec = ETHTOOL_FEC_OFF;
+
+	return eth_fec;
+}
+
+/* Translate ethtool FEC value into Common Code value. */
+static inline unsigned int eth_to_cc_fec(unsigned int eth_fec)
+{
+	unsigned int cc_fec = 0;
+
+	if (eth_fec & ETHTOOL_FEC_OFF)
+		return cc_fec;
+
+	if (eth_fec & ETHTOOL_FEC_AUTO)
+		cc_fec |= FEC_AUTO;
+	if (eth_fec & ETHTOOL_FEC_RS)
+		cc_fec |= FEC_RS;
+	if (eth_fec & ETHTOOL_FEC_BASER)
+		cc_fec |= FEC_BASER_RS;
+
+	return cc_fec;
+}
+
+static int get_fecparam(struct net_device *dev, struct ethtool_fecparam *fec)
+{
+	const struct port_info *pi = netdev_priv(dev);
+	const struct link_config *lc = &pi->link_cfg;
+
+	/* Translate the Firmware FEC Support into the ethtool value.  We
+	 * always support IEEE 802.3 "automatic" selection of Link FEC type if
+	 * any FEC is supported.
+	 */
+	fec->fec = fwcap_to_eth_fec(lc->supported);
+	if (fec->fec != ETHTOOL_FEC_OFF)
+		fec->fec |= ETHTOOL_FEC_AUTO;
+
+	/* Translate the current internal FEC parameters into the
+	 * ethtool values.
+	 */
+	fec->active_fec = cc_to_eth_fec(lc->fec);
+
+	return 0;
+}
+
+static int set_fecparam(struct net_device *dev, struct ethtool_fecparam *fec)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct link_config *lc = &pi->link_cfg;
+	struct link_config old_lc;
+	int ret;
+
+	/* Save old Link Configuration in case the L1 Configure below
+	 * fails.
+	 */
+	old_lc = *lc;
+
+	/* Try to perform the L1 Configure and return the result of that
+	 * effort.  If it fails, revert the attempted change.
+	 */
+	lc->requested_fec = eth_to_cc_fec(fec->fec);
+	ret = t4_link_l1cfg(pi->adapter, pi->adapter->mbox,
+			    pi->tx_chan, lc);
+	if (ret)
+		*lc = old_lc;
+	return ret;
+}
+
 static void get_pauseparam(struct net_device *dev,
 			   struct ethtool_pauseparam *epause)
 {
@@ -1255,6 +1353,8 @@ static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 static const struct ethtool_ops cxgb_ethtool_ops = {
 	.get_link_ksettings = get_link_ksettings,
 	.set_link_ksettings = set_link_ksettings,
+	.get_fecparam      = get_fecparam,
+	.set_fecparam      = set_fecparam,
 	.get_drvinfo       = get_drvinfo,
 	.get_msglevel      = get_msglevel,
 	.set_msglevel      = set_msglevel,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index e403fa18f1b1..d80b20d695e0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2889,14 +2889,29 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 	return err;
 }
 
-static int cxgb_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-			 __be16 proto, struct tc_to_netdev *tc)
+static int cxgb_setup_tc_cls_u32(struct net_device *dev,
+				 struct tc_cls_u32_offload *cls_u32)
 {
-	struct port_info *pi = netdev2pinfo(dev);
-	struct adapter *adap = netdev2adap(dev);
+	if (TC_H_MAJ(cls_u32->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    cls_u32->common.chain_index)
+		return -EOPNOTSUPP;
 
-	if (chain_index)
+	switch (cls_u32->command) {
+	case TC_CLSU32_NEW_KNODE:
+	case TC_CLSU32_REPLACE_KNODE:
+		return cxgb4_config_knode(dev, cls_u32);
+	case TC_CLSU32_DELETE_KNODE:
+		return cxgb4_delete_knode(dev, cls_u32);
+	default:
 		return -EOPNOTSUPP;
+	}
+}
+
+static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			 void *type_data)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
 
 	if (!(adap->flags & FULL_INIT_DONE)) {
 		dev_err(adap->pdev_dev,
@@ -2905,20 +2920,12 @@ static int cxgb_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
 		return -EINVAL;
 	}
 
-	if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
-	    tc->type == TC_SETUP_CLSU32) {
-		switch (tc->cls_u32->command) {
-		case TC_CLSU32_NEW_KNODE:
-		case TC_CLSU32_REPLACE_KNODE:
-			return cxgb4_config_knode(dev, proto, tc->cls_u32);
-		case TC_CLSU32_DELETE_KNODE:
-			return cxgb4_delete_knode(dev, proto, tc->cls_u32);
-		default:
-			return -EOPNOTSUPP;
-		}
+	switch (type) {
+	case TC_SETUP_CLSU32:
+		return cxgb_setup_tc_cls_u32(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
 	}
-
-	return -EOPNOTSUPP;
 }
 
 static netdev_features_t cxgb_fix_features(struct net_device *dev,
@@ -3610,11 +3617,8 @@ static int adap_init0(struct adapter *adap)
 	 * later reporting and B. to warn if the currently loaded firmware
 	 * is excessively mismatched relative to the driver.)
 	 */
-	t4_get_fw_version(adap, &adap->params.fw_vers);
-	t4_get_bs_version(adap, &adap->params.bs_vers);
-	t4_get_tp_version(adap, &adap->params.tp_vers);
-	t4_get_exprom_version(adap, &adap->params.er_vers);
 
+	t4_get_version_info(adap);
 	ret = t4_check_fw_version(adap);
 	/* If firmware is too old (not supported by driver) force an update. */
 	if (ret)
@@ -4560,56 +4564,8 @@ static void cxgb4_check_pcie_caps(struct adapter *adap)
 /* Dump basic information about the adapter */
 static void print_adapter_info(struct adapter *adapter)
 {
-	/* Device information */
-	dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n",
-		 adapter->params.vpd.id,
-		 CHELSIO_CHIP_RELEASE(adapter->params.chip));
-	dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n",
-		 adapter->params.vpd.sn, adapter->params.vpd.pn);
-
-	/* Firmware Version */
-	if (!adapter->params.fw_vers)
-		dev_warn(adapter->pdev_dev, "No firmware loaded\n");
-	else
-		dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n",
-			 FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers),
-			 FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers),
-			 FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers),
-			 FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers));
-
-	/* Bootstrap Firmware Version. (Some adapters don't have Bootstrap
-	 * Firmware, so dev_info() is more appropriate here.)
-	 */
-	if (!adapter->params.bs_vers)
-		dev_info(adapter->pdev_dev, "No bootstrap loaded\n");
-	else
-		dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n",
-			 FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers),
-			 FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers),
-			 FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers),
-			 FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers));
-
-	/* TP Microcode Version */
-	if (!adapter->params.tp_vers)
-		dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n");
-	else
-		dev_info(adapter->pdev_dev,
-			 "TP Microcode version: %u.%u.%u.%u\n",
-			 FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers),
-			 FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers),
-			 FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers),
-			 FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers));
-
-	/* Expansion ROM version */
-	if (!adapter->params.er_vers)
-		dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n");
-	else
-		dev_info(adapter->pdev_dev,
-			 "Expansion ROM version: %u.%u.%u.%u\n",
-			 FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers),
-			 FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers),
-			 FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers),
-			 FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers));
+	/* Hardware/Firmware/etc. Version/Revision IDs */
+	t4_dump_version_info(adapter);
 
 	/* Software/Hardware configuration */
 	dev_info(adapter->pdev_dev, "Configuration: %sNIC %s, %s capable\n",
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index ef06ce8247ab..48970ba08bdc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -96,7 +96,7 @@ static int fill_action_fields(struct adapter *adap,
 	LIST_HEAD(actions);
 
 	exts = cls->knode.exts;
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	tcf_exts_to_list(exts, &actions);
@@ -146,11 +146,11 @@ static int fill_action_fields(struct adapter *adap,
 	return 0;
 }
 
-int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls)
+int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 {
 	const struct cxgb4_match_field *start, *link_start = NULL;
 	struct adapter *adapter = netdev2adap(dev);
+	__be16 protocol = cls->common.protocol;
 	struct ch_filter_specification fs;
 	struct cxgb4_tc_u32_table *t;
 	struct cxgb4_link *link;
@@ -338,8 +338,7 @@ out:
 	return ret;
 }
 
-int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls)
+int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 {
 	struct adapter *adapter = netdev2adap(dev);
 	unsigned int filter_id, max_tids, i, j;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
index 021261a41c13..70a07b7cca56 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
@@ -44,10 +44,8 @@ static inline bool can_tc_u32_offload(struct net_device *dev)
 	return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false;
 }
 
-int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls);
-int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls);
+int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls);
+int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls);
 
 void cxgb4_cleanup_tc_u32(struct adapter *adapter);
 struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 82bf7aac6cdb..24087c886974 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -913,7 +913,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0xd010, 0xd03c,
 		0xdfc0, 0xdfe0,
 		0xe000, 0xea7c,
-		0xf000, 0x11190,
+		0xf000, 0x11110,
+		0x11118, 0x11190,
 		0x19040, 0x1906c,
 		0x19078, 0x19080,
 		0x1908c, 0x190e4,
@@ -1439,8 +1440,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x1ff00, 0x1ff84,
 		0x1ffc0, 0x1ffc8,
 		0x30000, 0x30030,
-		0x30038, 0x30038,
-		0x30040, 0x30040,
 		0x30100, 0x30144,
 		0x30190, 0x301a0,
 		0x301a8, 0x301b8,
@@ -1551,8 +1550,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x33c3c, 0x33c50,
 		0x33cf0, 0x33cfc,
 		0x34000, 0x34030,
-		0x34038, 0x34038,
-		0x34040, 0x34040,
 		0x34100, 0x34144,
 		0x34190, 0x341a0,
 		0x341a8, 0x341b8,
@@ -1663,8 +1660,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x37c3c, 0x37c50,
 		0x37cf0, 0x37cfc,
 		0x38000, 0x38030,
-		0x38038, 0x38038,
-		0x38040, 0x38040,
 		0x38100, 0x38144,
 		0x38190, 0x381a0,
 		0x381a8, 0x381b8,
@@ -1775,8 +1770,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x3bc3c, 0x3bc50,
 		0x3bcf0, 0x3bcfc,
 		0x3c000, 0x3c030,
-		0x3c038, 0x3c038,
-		0x3c040, 0x3c040,
 		0x3c100, 0x3c144,
 		0x3c190, 0x3c1a0,
 		0x3c1a8, 0x3c1b8,
@@ -2040,12 +2033,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x1190, 0x1194,
 		0x11a0, 0x11a4,
 		0x11b0, 0x11b4,
-		0x11fc, 0x1258,
-		0x1280, 0x12d4,
-		0x12d9, 0x12d9,
-		0x12de, 0x12de,
-		0x12e3, 0x12e3,
-		0x12e8, 0x133c,
+		0x11fc, 0x1274,
+		0x1280, 0x133c,
 		0x1800, 0x18fc,
 		0x3000, 0x302c,
 		0x3060, 0x30b0,
@@ -2076,6 +2065,9 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x5ea0, 0x5eb0,
 		0x5ec0, 0x5ec0,
 		0x5ec8, 0x5ed0,
+		0x5ee0, 0x5ee0,
+		0x5ef0, 0x5ef0,
+		0x5f00, 0x5f00,
 		0x6000, 0x6020,
 		0x6028, 0x6040,
 		0x6058, 0x609c,
@@ -2133,6 +2125,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0xd300, 0xd31c,
 		0xdfc0, 0xdfe0,
 		0xe000, 0xf008,
+		0xf010, 0xf018,
+		0xf020, 0xf028,
 		0x11000, 0x11014,
 		0x11048, 0x1106c,
 		0x11074, 0x11088,
@@ -2256,13 +2250,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x1ff00, 0x1ff84,
 		0x1ffc0, 0x1ffc8,
 		0x30000, 0x30030,
-		0x30038, 0x30038,
-		0x30040, 0x30040,
-		0x30048, 0x30048,
-		0x30050, 0x30050,
-		0x3005c, 0x30060,
-		0x30068, 0x30068,
-		0x30070, 0x30070,
 		0x30100, 0x30168,
 		0x30190, 0x301a0,
 		0x301a8, 0x301b8,
@@ -2325,13 +2312,12 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x326a8, 0x326a8,
 		0x326ec, 0x326ec,
 		0x32a00, 0x32abc,
-		0x32b00, 0x32b38,
+		0x32b00, 0x32b18,
+		0x32b20, 0x32b38,
 		0x32b40, 0x32b58,
 		0x32b60, 0x32b78,
 		0x32c00, 0x32c00,
 		0x32c08, 0x32c3c,
-		0x32e00, 0x32e2c,
-		0x32f00, 0x32f2c,
 		0x33000, 0x3302c,
 		0x33034, 0x33050,
 		0x33058, 0x33058,
@@ -2396,13 +2382,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x33c38, 0x33c50,
 		0x33cf0, 0x33cfc,
 		0x34000, 0x34030,
-		0x34038, 0x34038,
-		0x34040, 0x34040,
-		0x34048, 0x34048,
-		0x34050, 0x34050,
-		0x3405c, 0x34060,
-		0x34068, 0x34068,
-		0x34070, 0x34070,
 		0x34100, 0x34168,
 		0x34190, 0x341a0,
 		0x341a8, 0x341b8,
@@ -2465,13 +2444,12 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x366a8, 0x366a8,
 		0x366ec, 0x366ec,
 		0x36a00, 0x36abc,
-		0x36b00, 0x36b38,
+		0x36b00, 0x36b18,
+		0x36b20, 0x36b38,
 		0x36b40, 0x36b58,
 		0x36b60, 0x36b78,
 		0x36c00, 0x36c00,
 		0x36c08, 0x36c3c,
-		0x36e00, 0x36e2c,
-		0x36f00, 0x36f2c,
 		0x37000, 0x3702c,
 		0x37034, 0x37050,
 		0x37058, 0x37058,
@@ -2545,8 +2523,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x40280, 0x40280,
 		0x40304, 0x40304,
 		0x40330, 0x4033c,
-		0x41304, 0x413b8,
-		0x413c0, 0x413c8,
+		0x41304, 0x413c8,
 		0x413d0, 0x413dc,
 		0x413f0, 0x413f0,
 		0x41400, 0x4140c,
@@ -3100,6 +3077,179 @@ int t4_get_exprom_version(struct adapter *adap, u32 *vers)
 }
 
 /**
+ *      t4_get_vpd_version - return the VPD version
+ *      @adapter: the adapter
+ *      @vers: where to place the version
+ *
+ *      Reads the VPD via the Firmware interface (thus this can only be called
+ *      once we're ready to issue Firmware commands).  The format of the
+ *      VPD version is adapter specific.  Returns 0 on success, an error on
+ *      failure.
+ *
+ *      Note that early versions of the Firmware didn't include the ability
+ *      to retrieve the VPD version, so we zero-out the return-value parameter
+ *      in that case to avoid leaving it with garbage in it.
+ *
+ *      Also note that the Firmware will return its cached copy of the VPD
+ *      Revision ID, not the actual Revision ID as written in the Serial
+ *      EEPROM.  This is only an issue if a new VPD has been written and the
+ *      Firmware/Chip haven't yet gone through a RESET sequence.  So it's best
+ *      to defer calling this routine till after a FW_RESET_CMD has been issued
+ *      if the Host Driver will be performing a full adapter initialization.
+ */
+int t4_get_vpd_version(struct adapter *adapter, u32 *vers)
+{
+	u32 vpdrev_param;
+	int ret;
+
+	vpdrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+			FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_VPDREV));
+	ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+			      1, &vpdrev_param, vers);
+	if (ret)
+		*vers = 0;
+	return ret;
+}
+
+/**
+ *      t4_get_scfg_version - return the Serial Configuration version
+ *      @adapter: the adapter
+ *      @vers: where to place the version
+ *
+ *      Reads the Serial Configuration Version via the Firmware interface
+ *      (thus this can only be called once we're ready to issue Firmware
+ *      commands).  The format of the Serial Configuration version is
+ *      adapter specific.  Returns 0 on success, an error on failure.
+ *
+ *      Note that early versions of the Firmware didn't include the ability
+ *      to retrieve the Serial Configuration version, so we zero-out the
+ *      return-value parameter in that case to avoid leaving it with
+ *      garbage in it.
+ *
+ *      Also note that the Firmware will return its cached copy of the Serial
+ *      Initialization Revision ID, not the actual Revision ID as written in
+ *      the Serial EEPROM.  This is only an issue if a new VPD has been written
+ *      and the Firmware/Chip haven't yet gone through a RESET sequence.  So
+ *      it's best to defer calling this routine till after a FW_RESET_CMD has
+ *      been issued if the Host Driver will be performing a full adapter
+ *      initialization.
+ */
+int t4_get_scfg_version(struct adapter *adapter, u32 *vers)
+{
+	u32 scfgrev_param;
+	int ret;
+
+	scfgrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+			 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_SCFGREV));
+	ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+			      1, &scfgrev_param, vers);
+	if (ret)
+		*vers = 0;
+	return ret;
+}
+
+/**
+ *      t4_get_version_info - extract various chip/firmware version information
+ *      @adapter: the adapter
+ *
+ *      Reads various chip/firmware version numbers and stores them into the
+ *      adapter Adapter Parameters structure.  If any of the efforts fails
+ *      the first failure will be returned, but all of the version numbers
+ *      will be read.
+ */
+int t4_get_version_info(struct adapter *adapter)
+{
+	int ret = 0;
+
+	#define FIRST_RET(__getvinfo) \
+	do { \
+		int __ret = __getvinfo; \
+		if (__ret && !ret) \
+			ret = __ret; \
+	} while (0)
+
+	FIRST_RET(t4_get_fw_version(adapter, &adapter->params.fw_vers));
+	FIRST_RET(t4_get_bs_version(adapter, &adapter->params.bs_vers));
+	FIRST_RET(t4_get_tp_version(adapter, &adapter->params.tp_vers));
+	FIRST_RET(t4_get_exprom_version(adapter, &adapter->params.er_vers));
+	FIRST_RET(t4_get_scfg_version(adapter, &adapter->params.scfg_vers));
+	FIRST_RET(t4_get_vpd_version(adapter, &adapter->params.vpd_vers));
+
+	#undef FIRST_RET
+	return ret;
+}
+
+/**
+ *      t4_dump_version_info - dump all of the adapter configuration IDs
+ *      @adapter: the adapter
+ *
+ *      Dumps all of the various bits of adapter configuration version/revision
+ *      IDs information.  This is typically called at some point after
+ *      t4_get_version_info() has been called.
+ */
+void t4_dump_version_info(struct adapter *adapter)
+{
+	/* Device information */
+	dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n",
+		 adapter->params.vpd.id,
+		 CHELSIO_CHIP_RELEASE(adapter->params.chip));
+	dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n",
+		 adapter->params.vpd.sn, adapter->params.vpd.pn);
+
+	/* Firmware Version */
+	if (!adapter->params.fw_vers)
+		dev_warn(adapter->pdev_dev, "No firmware loaded\n");
+	else
+		dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n",
+			 FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers),
+			 FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers),
+			 FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers),
+			 FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers));
+
+	/* Bootstrap Firmware Version. (Some adapters don't have Bootstrap
+	 * Firmware, so dev_info() is more appropriate here.)
+	 */
+	if (!adapter->params.bs_vers)
+		dev_info(adapter->pdev_dev, "No bootstrap loaded\n");
+	else
+		dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n",
+			 FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers),
+			 FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers),
+			 FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers),
+			 FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers));
+
+	/* TP Microcode Version */
+	if (!adapter->params.tp_vers)
+		dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n");
+	else
+		dev_info(adapter->pdev_dev,
+			 "TP Microcode version: %u.%u.%u.%u\n",
+			 FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers),
+			 FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers),
+			 FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers),
+			 FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers));
+
+	/* Expansion ROM version */
+	if (!adapter->params.er_vers)
+		dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n");
+	else
+		dev_info(adapter->pdev_dev,
+			 "Expansion ROM version: %u.%u.%u.%u\n",
+			 FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers),
+			 FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers),
+			 FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers),
+			 FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers));
+
+	/* Serial Configuration version */
+	dev_info(adapter->pdev_dev, "Serial Configuration version: %#x\n",
+		 adapter->params.scfg_vers);
+
+	/* VPD Version */
+	dev_info(adapter->pdev_dev, "VPD version: %#x\n",
+		 adapter->params.vpd_vers);
+}
+
+/**
  *	t4_check_fw_version - check if the FW is supported with this driver
  *	@adap: the adapter
  *
@@ -3690,11 +3840,64 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf)
 		     FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
 		     FW_PORT_CAP_ANEG)
 
+/* Translate Firmware Port Capabilities Pause specification to Common Code */
+static inline unsigned int fwcap_to_cc_pause(unsigned int fw_pause)
+{
+	unsigned int cc_pause = 0;
+
+	if (fw_pause & FW_PORT_CAP_FC_RX)
+		cc_pause |= PAUSE_RX;
+	if (fw_pause & FW_PORT_CAP_FC_TX)
+		cc_pause |= PAUSE_TX;
+
+	return cc_pause;
+}
+
+/* Translate Common Code Pause specification into Firmware Port Capabilities */
+static inline unsigned int cc_to_fwcap_pause(unsigned int cc_pause)
+{
+	unsigned int fw_pause = 0;
+
+	if (cc_pause & PAUSE_RX)
+		fw_pause |= FW_PORT_CAP_FC_RX;
+	if (cc_pause & PAUSE_TX)
+		fw_pause |= FW_PORT_CAP_FC_TX;
+
+	return fw_pause;
+}
+
+/* Translate Firmware Forward Error Correction specification to Common Code */
+static inline unsigned int fwcap_to_cc_fec(unsigned int fw_fec)
+{
+	unsigned int cc_fec = 0;
+
+	if (fw_fec & FW_PORT_CAP_FEC_RS)
+		cc_fec |= FEC_RS;
+	if (fw_fec & FW_PORT_CAP_FEC_BASER_RS)
+		cc_fec |= FEC_BASER_RS;
+
+	return cc_fec;
+}
+
+/* Translate Common Code Forward Error Correction specification to Firmware */
+static inline unsigned int cc_to_fwcap_fec(unsigned int cc_fec)
+{
+	unsigned int fw_fec = 0;
+
+	if (cc_fec & FEC_RS)
+		fw_fec |= FW_PORT_CAP_FEC_RS;
+	if (cc_fec & FEC_BASER_RS)
+		fw_fec |= FW_PORT_CAP_FEC_BASER_RS;
+
+	return fw_fec;
+}
+
 /**
  *	t4_link_l1cfg - apply link configuration to MAC/PHY
- *	@phy: the PHY to setup
- *	@mac: the MAC to setup
- *	@lc: the requested link configuration
+ *	@adapter: the adapter
+ *	@mbox: the Firmware Mailbox to use
+ *	@port: the Port ID
+ *	@lc: the Port's Link Configuration
  *
  *	Set up a port's MAC and PHY according to a desired link configuration.
  *	- If the PHY can auto-negotiate first decide what to advertise, then
@@ -3707,22 +3910,46 @@ int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
 		  struct link_config *lc)
 {
 	struct fw_port_cmd c;
-	unsigned int mdi = FW_PORT_CAP_MDI_V(FW_PORT_CAP_MDI_AUTO);
-	unsigned int fc = 0, fec = 0, fw_fec = 0;
+	unsigned int fw_mdi = FW_PORT_CAP_MDI_V(FW_PORT_CAP_MDI_AUTO);
+	unsigned int fw_fc, cc_fec, fw_fec;
+	unsigned int rcap;
 
 	lc->link_ok = 0;
-	if (lc->requested_fc & PAUSE_RX)
-		fc |= FW_PORT_CAP_FC_RX;
-	if (lc->requested_fc & PAUSE_TX)
-		fc |= FW_PORT_CAP_FC_TX;
 
-	fec = lc->requested_fec & FEC_AUTO ? lc->auto_fec : lc->requested_fec;
+	/* Convert driver coding of Pause Frame Flow Control settings into the
+	 * Firmware's API.
+	 */
+	fw_fc = cc_to_fwcap_pause(lc->requested_fc);
+
+	/* Convert Common Code Forward Error Control settings into the
+	 * Firmware's API.  If the current Requested FEC has "Automatic"
+	 * (IEEE 802.3) specified, then we use whatever the Firmware
+	 * sent us as part of it's IEEE 802.3-based interpratation of
+	 * the Transceiver Module EPROM FEC parameters.  Otherwise we
+	 * use whatever is in the current Requested FEC settings.
+	 */
+	if (lc->requested_fec & FEC_AUTO)
+		cc_fec = lc->auto_fec;
+	else
+		cc_fec = lc->requested_fec;
+	fw_fec = cc_to_fwcap_fec(cc_fec);
 
-	if (fec & FEC_RS)
-		fw_fec |= FW_PORT_CAP_FEC_RS;
-	if (fec & FEC_BASER_RS)
-		fw_fec |= FW_PORT_CAP_FEC_BASER_RS;
+	/* Figure out what our Requested Port Capabilities are going to be.
+	 */
+	if (!(lc->supported & FW_PORT_CAP_ANEG)) {
+		rcap = (lc->supported & ADVERT_MASK) | fw_fc | fw_fec;
+		lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
+		lc->fec = cc_fec;
+	} else if (lc->autoneg == AUTONEG_DISABLE) {
+		rcap = lc->requested_speed | fw_fc | fw_fec | fw_mdi;
+		lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
+		lc->fec = cc_fec;
+	} else {
+		rcap = lc->advertising | fw_fc | fw_fec | fw_mdi;
+	}
 
+	/* And send that on to the Firmware ...
+	 */
 	memset(&c, 0, sizeof(c));
 	c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
 				     FW_CMD_REQUEST_F | FW_CMD_EXEC_F |
@@ -3730,19 +3957,7 @@ int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
 	c.action_to_len16 =
 		cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) |
 			    FW_LEN16(c));
-
-	if (!(lc->supported & FW_PORT_CAP_ANEG)) {
-		c.u.l1cfg.rcap = cpu_to_be32((lc->supported & ADVERT_MASK) |
-					     fc | fw_fec);
-		lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
-	} else if (lc->autoneg == AUTONEG_DISABLE) {
-		c.u.l1cfg.rcap = cpu_to_be32(lc->requested_speed | fc |
-					     fw_fec | mdi);
-		lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
-	} else
-		c.u.l1cfg.rcap = cpu_to_be32(lc->advertising | fc |
-					     fw_fec | mdi);
-
+	c.u.l1cfg.rcap = cpu_to_be32(rcap);
 	return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
 }
 
@@ -7480,19 +7695,28 @@ static const char *t4_link_down_rc_str(unsigned char link_down_rc)
 void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
 {
 	const struct fw_port_cmd *p = (const void *)rpl;
+	unsigned int acaps = be16_to_cpu(p->u.info.acap);
 	struct adapter *adap = pi->adapter;
 
 	/* link/module state change message */
-	int speed = 0, fc = 0;
+	int speed = 0, fc, fec;
 	struct link_config *lc;
 	u32 stat = be32_to_cpu(p->u.info.lstatus_to_modtype);
 	int link_ok = (stat & FW_PORT_CMD_LSTATUS_F) != 0;
 	u32 mod = FW_PORT_CMD_MODTYPE_G(stat);
 
+	/* Unfortunately the format of the Link Status returned by the
+	 * Firmware isn't the same as the Firmware Port Capabilities bitfield
+	 * used everywhere else ...
+	 */
+	fc = 0;
 	if (stat & FW_PORT_CMD_RXPAUSE_F)
 		fc |= PAUSE_RX;
 	if (stat & FW_PORT_CMD_TXPAUSE_F)
 		fc |= PAUSE_TX;
+
+	fec = fwcap_to_cc_fec(acaps);
+
 	if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M))
 		speed = 100;
 	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G))
@@ -7509,11 +7733,20 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
 	lc = &pi->link_cfg;
 
 	if (mod != pi->mod_type) {
+		/* When a new Transceiver Module is inserted, the Firmware
+		 * will examine any Forward Error Correction parameters
+		 * present in the Transceiver Module i2c EPROM and determine
+		 * the supported and recommended FEC settings from those
+		 * based on IEEE 802.3 standards.  We always record the
+		 * IEEE 802.3 recommended "automatic" settings.
+		 */
+		lc->auto_fec = fec;
+
 		pi->mod_type = mod;
 		t4_os_portmod_changed(adap, pi->port_id);
 	}
 	if (link_ok != lc->link_ok || speed != lc->speed ||
-	    fc != lc->fc) {	/* something changed */
+	    fc != lc->fc || fec != lc->fec) {	/* something changed */
 		if (!link_ok && lc->link_ok) {
 			unsigned char rc = FW_PORT_CMD_LINKDNRC_G(stat);
 
@@ -7525,6 +7758,8 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
 		lc->link_ok = link_ok;
 		lc->speed = speed;
 		lc->fc = fc;
+		lc->fec = fec;
+
 		lc->supported = be16_to_cpu(p->u.info.pcap);
 		lc->lp_advertising = be16_to_cpu(p->u.info.lpacap);
 
@@ -7614,7 +7849,8 @@ static void get_pci_mode(struct adapter *adapter, struct pci_params *p)
 /**
  *	init_link_config - initialize a link's SW state
  *	@lc: structure holding the link state
- *	@caps: link capabilities
+ *	@pcaps: link Port Capabilities
+ *	@acaps: link current Advertised Port Capabilities
  *
  *	Initializes the SW state maintained for each link, including the link's
  *	capabilities and default speed/flow-control/autonegotiation settings.
@@ -7627,15 +7863,11 @@ static void init_link_config(struct link_config *lc, unsigned int pcaps,
 	lc->requested_speed = 0;
 	lc->speed = 0;
 	lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
-	lc->auto_fec = 0;
 
 	/* For Forward Error Control, we default to whatever the Firmware
 	 * tells us the Link is currently advertising.
 	 */
-	if (acaps & FW_PORT_CAP_FEC_RS)
-		lc->auto_fec |= FEC_RS;
-	if (acaps & FW_PORT_CAP_FEC_BASER_RS)
-		lc->auto_fec |= FEC_BASER_RS;
+	lc->auto_fec = fwcap_to_cc_fec(acaps);
 	lc->requested_fec = FEC_AUTO;
 	lc->fec = lc->auto_fec;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 0ebed64d62d3..ad825fbc21a5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1124,6 +1124,8 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_MAXIRD_ADAPTER = 0x14, /* max supported adap IRD */
 	FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17,
 	FW_PARAMS_PARAM_DEV_FWCACHE = 0x18,
+	FW_PARAMS_PARAM_DEV_SCFGREV = 0x1A,
+	FW_PARAMS_PARAM_DEV_VPDREV = 0x1B,
 	FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR	= 0x1C,
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
 };
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 17e566a8b345..84394b43c0a1 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -1303,7 +1303,6 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		0x00, 'L', 'i', 'n', 'u', 'x'
 	};
 	static int last_irq;
-	static int multiport_cnt;	/* For four-port boards w/one EEPROM */
 	int i, irq;
 	unsigned short sum;
 	unsigned char *ee_data;
@@ -1557,7 +1556,6 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		} else if (ee_data[0] == 0xff  &&  ee_data[1] == 0xff &&
 				   ee_data[2] == 0) {
 			sa_offset = 2;		/* Grrr, damn Matrox boards. */
-			multiport_cnt = 4;
 		}
 #ifdef CONFIG_MIPS_COBALT
                if ((pdev->bus->number == 0) &&
diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index 4ee042c034a1..1b79a6defd56 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -73,7 +73,7 @@
 
 #define ETHERCAT_MASTER_ID	0x14
 
-static struct pci_device_id ids[] = {
+static const struct pci_device_id ids[] = {
 	{ PCI_DEVICE(0x15ec, 0x5000), },
 	{ 0, }
 };
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
index 2b62841c4c63..05989aafaf32 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -139,10 +139,7 @@ int be_roce_register_driver(struct ocrdma_driver *drv)
 	}
 	ocrdma_drv = drv;
 	list_for_each_entry(dev, &be_adapter_list, entry) {
-		struct net_device *netdev;
-
 		_be_roce_dev_add(dev);
-		netdev = dev->netdev;
 	}
 	mutex_unlock(&be_adapter_list_lock);
 	return 0;
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 757b873735a5..733d54caabb6 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -342,18 +342,19 @@ static void dpaa_get_stats64(struct net_device *net_dev,
 	}
 }
 
-static int dpaa_setup_tc(struct net_device *net_dev, u32 handle,
-			 u32 chain_index, __be16 proto, struct tc_to_netdev *tc)
+static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+			 void *type_data)
 {
 	struct dpaa_priv *priv = netdev_priv(net_dev);
+	struct tc_mqprio_qopt *mqprio = type_data;
 	u8 num_tc;
 	int i;
 
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-	num_tc = tc->mqprio->num_tc;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	num_tc = mqprio->num_tc;
 
 	if (num_tc == priv->num_tc)
 		return 0;
@@ -398,8 +399,8 @@ static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev)
 
 	of_dev = of_find_device_by_node(mac_node);
 	if (!of_dev) {
-		dev_err(dpaa_dev, "of_find_device_by_node(%s) failed\n",
-			mac_node->full_name);
+		dev_err(dpaa_dev, "of_find_device_by_node(%pOF) failed\n",
+			mac_node);
 		of_node_put(mac_node);
 		return ERR_PTR(-EINVAL);
 	}
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index a6e323f15637..df09b254553d 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -173,10 +173,12 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 #endif /* CONFIG_M5272 */
 
 /* The FEC stores dest/src/type/vlan, data, and checksum for receive packets.
+ *
+ * 2048 byte skbufs are allocated. However, alignment requirements
+ * varies between FEC variants. Worst case is 64, so round down by 64.
  */
-#define PKT_MAXBUF_SIZE		1522
+#define PKT_MAXBUF_SIZE		(round_down(2048 - 64, 64))
 #define PKT_MINBUF_SIZE		64
-#define PKT_MAXBLR_SIZE		1536
 
 /* FEC receive acceleration */
 #define FEC_RACC_IPDIS		(1 << 1)
@@ -851,7 +853,7 @@ static void fec_enet_enable_ring(struct net_device *ndev)
 	for (i = 0; i < fep->num_rx_queues; i++) {
 		rxq = fep->rx_queue[i];
 		writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i));
-		writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));
+		writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));
 
 		/* enable DMA1/2 */
 		if (i)
@@ -1904,8 +1906,10 @@ static int fec_enet_mii_probe(struct net_device *ndev)
 		phy_dev = of_phy_connect(ndev, fep->phy_node,
 					 &fec_enet_adjust_link, 0,
 					 fep->phy_interface);
-		if (!phy_dev)
+		if (!phy_dev) {
+			netdev_err(ndev, "Unable to connect to phy\n");
 			return -ENODEV;
+		}
 	} else {
 		/* check for attached phy */
 		for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) {
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index aa8cf5d2a53c..6d7269d87a85 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -960,8 +960,8 @@ static int mpc52xx_fec_probe(struct platform_device *op)
 
 	/* We're done ! */
 	platform_set_drvdata(op, ndev);
-	netdev_info(ndev, "%s MAC %pM\n",
-		    op->dev.of_node->full_name, ndev->dev_addr);
+	netdev_info(ndev, "%pOF MAC %pM\n",
+		    op->dev.of_node, ndev->dev_addr);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 4aefe2438969..e714b8fa55eb 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -1925,8 +1925,8 @@ static int fman_reset(struct fman *fman)
 
 		guts_regs = of_iomap(guts_node, 0);
 		if (!guts_regs) {
-			dev_err(fman->dev, "%s: Couldn't map %s regs\n",
-				__func__, guts_node->full_name);
+			dev_err(fman->dev, "%s: Couldn't map %pOF regs\n",
+				__func__, guts_node);
 			goto guts_regs;
 		}
 #define FMAN1_ALL_MACS_MASK	0xFCC00000
@@ -2780,8 +2780,8 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
 
 	err = of_property_read_u32(fm_node, "cell-index", &val);
 	if (err) {
-		dev_err(&of_dev->dev, "%s: failed to read cell-index for %s\n",
-			__func__, fm_node->full_name);
+		dev_err(&of_dev->dev, "%s: failed to read cell-index for %pOF\n",
+			__func__, fm_node);
 		goto fman_node_put;
 	}
 	fman->dts_params.id = (u8)val;
@@ -2834,8 +2834,8 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
 	err = of_property_read_u32_array(fm_node, "fsl,qman-channel-range",
 					 &range[0], 2);
 	if (err) {
-		dev_err(&of_dev->dev, "%s: failed to read fsl,qman-channel-range for %s\n",
-			__func__, fm_node->full_name);
+		dev_err(&of_dev->dev, "%s: failed to read fsl,qman-channel-range for %pOF\n",
+			__func__, fm_node);
 		goto fman_node_put;
 	}
 	fman->dts_params.qman_channel_base = range[0];
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index 57bf44fa16a1..49bfa11f2d20 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -1720,8 +1720,8 @@ static int fman_port_probe(struct platform_device *of_dev)
 
 	err = of_property_read_u32(port_node, "cell-index", &val);
 	if (err) {
-		dev_err(port->dev, "%s: reading cell-index for %s failed\n",
-			__func__, port_node->full_name);
+		dev_err(port->dev, "%s: reading cell-index for %pOF failed\n",
+			__func__, port_node);
 		err = -EINVAL;
 		goto return_err;
 	}
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 6e67d22fd0d5..14cd2c8b0024 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -698,8 +698,8 @@ static int mac_probe(struct platform_device *_of_dev)
 		priv->internal_phy_node = of_parse_phandle(mac_node,
 							  "pcsphy-handle", 0);
 	} else {
-		dev_err(dev, "MAC node (%s) contains unsupported MAC\n",
-			mac_node->full_name);
+		dev_err(dev, "MAC node (%pOF) contains unsupported MAC\n",
+			mac_node);
 		err = -EINVAL;
 		goto _return;
 	}
@@ -712,16 +712,15 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the FM node */
 	dev_node = of_get_parent(mac_node);
 	if (!dev_node) {
-		dev_err(dev, "of_get_parent(%s) failed\n",
-			mac_node->full_name);
+		dev_err(dev, "of_get_parent(%pOF) failed\n",
+			mac_node);
 		err = -EINVAL;
 		goto _return_dev_set_drvdata;
 	}
 
 	of_dev = of_find_device_by_node(dev_node);
 	if (!of_dev) {
-		dev_err(dev, "of_find_device_by_node(%s) failed\n",
-			dev_node->full_name);
+		dev_err(dev, "of_find_device_by_node(%pOF) failed\n", dev_node);
 		err = -EINVAL;
 		goto _return_of_node_put;
 	}
@@ -729,8 +728,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the FMan cell-index */
 	err = of_property_read_u32(dev_node, "cell-index", &val);
 	if (err) {
-		dev_err(dev, "failed to read cell-index for %s\n",
-			dev_node->full_name);
+		dev_err(dev, "failed to read cell-index for %pOF\n", dev_node);
 		err = -EINVAL;
 		goto _return_of_node_put;
 	}
@@ -739,7 +737,7 @@ static int mac_probe(struct platform_device *_of_dev)
 
 	priv->fman = fman_bind(&of_dev->dev);
 	if (!priv->fman) {
-		dev_err(dev, "fman_bind(%s) failed\n", dev_node->full_name);
+		dev_err(dev, "fman_bind(%pOF) failed\n", dev_node);
 		err = -ENODEV;
 		goto _return_of_node_put;
 	}
@@ -749,8 +747,8 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the address of the memory mapped registers */
 	err = of_address_to_resource(mac_node, 0, &res);
 	if (err < 0) {
-		dev_err(dev, "of_address_to_resource(%s) = %d\n",
-			mac_node->full_name, err);
+		dev_err(dev, "of_address_to_resource(%pOF) = %d\n",
+			mac_node, err);
 		goto _return_dev_set_drvdata;
 	}
 
@@ -784,8 +782,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the cell-index */
 	err = of_property_read_u32(mac_node, "cell-index", &val);
 	if (err) {
-		dev_err(dev, "failed to read cell-index for %s\n",
-			mac_node->full_name);
+		dev_err(dev, "failed to read cell-index for %pOF\n", mac_node);
 		err = -EINVAL;
 		goto _return_dev_set_drvdata;
 	}
@@ -794,8 +791,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the MAC address */
 	mac_addr = of_get_mac_address(mac_node);
 	if (!mac_addr) {
-		dev_err(dev, "of_get_mac_address(%s) failed\n",
-			mac_node->full_name);
+		dev_err(dev, "of_get_mac_address(%pOF) failed\n", mac_node);
 		err = -EINVAL;
 		goto _return_dev_set_drvdata;
 	}
@@ -804,15 +800,15 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the port handles */
 	nph = of_count_phandle_with_args(mac_node, "fsl,fman-ports", NULL);
 	if (unlikely(nph < 0)) {
-		dev_err(dev, "of_count_phandle_with_args(%s, fsl,fman-ports) failed\n",
-			mac_node->full_name);
+		dev_err(dev, "of_count_phandle_with_args(%pOF, fsl,fman-ports) failed\n",
+			mac_node);
 		err = nph;
 		goto _return_dev_set_drvdata;
 	}
 
 	if (nph != ARRAY_SIZE(mac_dev->port)) {
-		dev_err(dev, "Not supported number of fman-ports handles of mac node %s from device tree\n",
-			mac_node->full_name);
+		dev_err(dev, "Not supported number of fman-ports handles of mac node %pOF from device tree\n",
+			mac_node);
 		err = -EINVAL;
 		goto _return_dev_set_drvdata;
 	}
@@ -821,24 +817,24 @@ static int mac_probe(struct platform_device *_of_dev)
 		/* Find the port node */
 		dev_node = of_parse_phandle(mac_node, "fsl,fman-ports", i);
 		if (!dev_node) {
-			dev_err(dev, "of_parse_phandle(%s, fsl,fman-ports) failed\n",
-				mac_node->full_name);
+			dev_err(dev, "of_parse_phandle(%pOF, fsl,fman-ports) failed\n",
+				mac_node);
 			err = -EINVAL;
 			goto _return_of_node_put;
 		}
 
 		of_dev = of_find_device_by_node(dev_node);
 		if (!of_dev) {
-			dev_err(dev, "of_find_device_by_node(%s) failed\n",
-				dev_node->full_name);
+			dev_err(dev, "of_find_device_by_node(%pOF) failed\n",
+				dev_node);
 			err = -EINVAL;
 			goto _return_of_node_put;
 		}
 
 		mac_dev->port[i] = fman_port_bind(&of_dev->dev);
 		if (!mac_dev->port[i]) {
-			dev_err(dev, "dev_get_drvdata(%s) failed\n",
-				dev_node->full_name);
+			dev_err(dev, "dev_get_drvdata(%pOF) failed\n",
+				dev_node);
 			err = -EINVAL;
 			goto _return_of_node_put;
 		}
@@ -849,8 +845,8 @@ static int mac_probe(struct platform_device *_of_dev)
 	phy_if = of_get_phy_mode(mac_node);
 	if (phy_if < 0) {
 		dev_warn(dev,
-			 "of_get_phy_mode() for %s failed. Defaulting to SGMII\n",
-			 mac_node->full_name);
+			 "of_get_phy_mode() for %pOF failed. Defaulting to SGMII\n",
+			 mac_node);
 		phy_if = PHY_INTERFACE_MODE_SGMII;
 	}
 	priv->phy_if = phy_if;
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index a10de1e9c157..80ad16acf0f1 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -267,8 +267,8 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end)
 
 		ret = of_address_to_resource(np, 0, &res);
 		if (ret < 0) {
-			pr_debug("fsl-pq-mdio: no address range in node %s\n",
-				 np->full_name);
+			pr_debug("fsl-pq-mdio: no address range in node %pOF\n",
+				 np);
 			continue;
 		}
 
@@ -280,8 +280,8 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end)
 		if (!iprop) {
 			iprop = of_get_property(np, "device-id", NULL);
 			if (!iprop) {
-				pr_debug("fsl-pq-mdio: no UCC ID in node %s\n",
-					 np->full_name);
+				pr_debug("fsl-pq-mdio: no UCC ID in node %pOF\n",
+					 np);
 				continue;
 			}
 		}
@@ -293,8 +293,8 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end)
 		 * numbered from 1, not 0.
 		 */
 		if (ucc_set_qe_mux_mii_mng(id - 1) < 0) {
-			pr_debug("fsl-pq-mdio: invalid UCC ID in node %s\n",
-				 np->full_name);
+			pr_debug("fsl-pq-mdio: invalid UCC ID in node %pOF\n",
+				 np);
 			continue;
 		}
 
@@ -442,8 +442,8 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 	if (data->get_tbipa) {
 		for_each_child_of_node(np, tbi) {
 			if (strcmp(tbi->type, "tbi-phy") == 0) {
-				dev_dbg(&pdev->dev, "found TBI PHY node %s\n",
-					strrchr(tbi->full_name, '/') + 1);
+				dev_dbg(&pdev->dev, "found TBI PHY node %pOFP\n",
+					tbi);
 				break;
 			}
 		}
@@ -454,8 +454,8 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 
 			if (!prop) {
 				dev_err(&pdev->dev,
-					"missing 'reg' property in node %s\n",
-					tbi->full_name);
+					"missing 'reg' property in node %pOF\n",
+					tbi);
 				err = -EBUSY;
 				goto error;
 			}
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index d11287e11371..91c7bdb9b43c 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -76,4 +76,31 @@ config HNS_ENET
 	  This selects the general ethernet driver for HNS.  This module make
 	  use of any HNS AE driver, such as HNS_DSAF
 
+config HNS3
+	tristate "Hisilicon Network Subsystem Support HNS3 (Framework)"
+    depends on PCI
+	---help---
+	  This selects the framework support for Hisilicon Network Subsystem 3.
+	  This layer facilitates clients like ENET, RoCE and user-space ethernet
+	  drivers(like ODP)to register with HNAE devices and their associated
+	  operations.
+
+config HNS3_HCLGE
+	tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support"
+    depends on PCI_MSI
+	depends on HNS3
+	---help---
+	  This selects the HNS3_HCLGE network acceleration engine & its hardware
+	  compatibility layer. The engine would be used in Hisilicon hip08 family of
+	  SoCs and further upcoming SoCs.
+
+config HNS3_ENET
+	tristate "Hisilicon HNS3 Ethernet Device Support"
+    depends on 64BIT && PCI
+	depends on HNS3 && HNS3_HCLGE
+	---help---
+	  This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08
+	  family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
+	  devices and their associated operations.
+
 endif # NET_VENDOR_HISILICON
diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile
index 8661695024dc..3828c435c18f 100644
--- a/drivers/net/ethernet/hisilicon/Makefile
+++ b/drivers/net/ethernet/hisilicon/Makefile
@@ -6,4 +6,5 @@ obj-$(CONFIG_HIX5HD2_GMAC) += hix5hd2_gmac.o
 obj-$(CONFIG_HIP04_ETH) += hip04_eth.o
 obj-$(CONFIG_HNS_MDIO) += hns_mdio.o
 obj-$(CONFIG_HNS) += hns/
+obj-$(CONFIG_HNS3) += hns3/
 obj-$(CONFIG_HISI_FEMAC) += hisi_femac.o
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index 9d9b6e6dd988..a051e582d541 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -202,6 +202,7 @@ hnae_init_ring(struct hnae_queue *q, struct hnae_ring *ring, int flags)
 	ring->q = q;
 	ring->flags = flags;
 	spin_lock_init(&ring->lock);
+	ring->coal_param = q->handle->coal_param;
 	assert(!ring->desc && !ring->desc_cb && !ring->desc_dma_addr);
 
 	/* not matter for tx or rx ring, the ntc and ntc start from 0 */
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 7ba653af19cb..3e62692af011 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -89,6 +89,10 @@ do { \
 
 #define RCB_RING_NAME_LEN 16
 
+#define HNAE_LOWEST_LATENCY_COAL_PARAM	30
+#define HNAE_LOW_LATENCY_COAL_PARAM	80
+#define HNAE_BULK_LATENCY_COAL_PARAM	150
+
 enum hnae_led_state {
 	HNAE_LED_INACTIVE,
 	HNAE_LED_ACTIVE,
@@ -292,6 +296,12 @@ struct hnae_ring {
 
 	int flags;          /* ring attribute */
 	int irq_init_flag;
+
+	/* total rx bytes after last rx rate calucated */
+	u64 coal_last_rx_bytes;
+	unsigned long coal_last_jiffies;
+	u32 coal_param;
+	u32 coal_rx_rate;	/* rx rate in MB */
 };
 
 #define ring_ptr_move_fw(ring, p) \
@@ -548,8 +558,13 @@ struct hnae_handle {
 	u32 if_support;
 	int q_num;
 	int vf_id;
+	unsigned long coal_last_jiffies;
+	u32 coal_param;		/* self adapt coalesce param */
+	/* the ring index of last ring that set coal param */
+	u32 coal_ring_idx;
 	u32 eport_id;
 	u32 dport_id;	/* v2 tx bd should fill the dport_id */
+	bool coal_adapt_en;
 	enum hnae_port_type port_type;
 	enum hnae_media_type media_type;
 	struct list_head node;    /* list to hnae_ae_dev->handle_list */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index a37166ee577b..bd68379d2bea 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -99,6 +99,7 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev,
 	ae_handle->owner_dev = dsaf_dev->dev;
 	ae_handle->dev = dev;
 	ae_handle->q_num = qnum_per_vf;
+	ae_handle->coal_param = HNAE_LOWEST_LATENCY_COAL_PARAM;
 
 	/* find ring pair, and set vf id*/
 	for (ae_handle->vf_id = 0;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 3987699f8fe6..36520634c96a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -812,6 +812,113 @@ static int hns_desc_unused(struct hnae_ring *ring)
 	return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu;
 }
 
+#define HNS_LOWEST_LATENCY_RATE		27	/* 27 MB/s */
+#define HNS_LOW_LATENCY_RATE			80	/* 80 MB/s */
+
+#define HNS_COAL_BDNUM			3
+
+static u32 hns_coal_rx_bdnum(struct hnae_ring *ring)
+{
+	bool coal_enable = ring->q->handle->coal_adapt_en;
+
+	if (coal_enable &&
+	    ring->coal_last_rx_bytes > HNS_LOWEST_LATENCY_RATE)
+		return HNS_COAL_BDNUM;
+	else
+		return 0;
+}
+
+static void hns_update_rx_rate(struct hnae_ring *ring)
+{
+	bool coal_enable = ring->q->handle->coal_adapt_en;
+	u32 time_passed_ms;
+	u64 total_bytes;
+
+	if (!coal_enable ||
+	    time_before(jiffies, ring->coal_last_jiffies + (HZ >> 4)))
+		return;
+
+	/* ring->stats.rx_bytes overflowed */
+	if (ring->coal_last_rx_bytes > ring->stats.rx_bytes) {
+		ring->coal_last_rx_bytes = ring->stats.rx_bytes;
+		ring->coal_last_jiffies = jiffies;
+		return;
+	}
+
+	total_bytes = ring->stats.rx_bytes - ring->coal_last_rx_bytes;
+	time_passed_ms = jiffies_to_msecs(jiffies - ring->coal_last_jiffies);
+	do_div(total_bytes, time_passed_ms);
+	ring->coal_rx_rate = total_bytes >> 10;
+
+	ring->coal_last_rx_bytes = ring->stats.rx_bytes;
+	ring->coal_last_jiffies = jiffies;
+}
+
+/**
+ * smooth_alg - smoothing algrithm for adjusting coalesce parameter
+ **/
+static u32 smooth_alg(u32 new_param, u32 old_param)
+{
+	u32 gap = (new_param > old_param) ? new_param - old_param
+					  : old_param - new_param;
+
+	if (gap > 8)
+		gap >>= 3;
+
+	if (new_param > old_param)
+		return old_param + gap;
+	else
+		return old_param - gap;
+}
+
+/**
+ * hns_nic_adp_coalesce - self adapte coalesce according to rx rate
+ * @ring_data: pointer to hns_nic_ring_data
+ **/
+static void hns_nic_adpt_coalesce(struct hns_nic_ring_data *ring_data)
+{
+	struct hnae_ring *ring = ring_data->ring;
+	struct hnae_handle *handle = ring->q->handle;
+	u32 new_coal_param, old_coal_param = ring->coal_param;
+
+	if (ring->coal_rx_rate < HNS_LOWEST_LATENCY_RATE)
+		new_coal_param = HNAE_LOWEST_LATENCY_COAL_PARAM;
+	else if (ring->coal_rx_rate < HNS_LOW_LATENCY_RATE)
+		new_coal_param = HNAE_LOW_LATENCY_COAL_PARAM;
+	else
+		new_coal_param = HNAE_BULK_LATENCY_COAL_PARAM;
+
+	if (new_coal_param == old_coal_param &&
+	    new_coal_param == handle->coal_param)
+		return;
+
+	new_coal_param = smooth_alg(new_coal_param, old_coal_param);
+	ring->coal_param = new_coal_param;
+
+	/**
+	 * Because all ring in one port has one coalesce param, when one ring
+	 * calculate its own coalesce param, it cannot write to hardware at
+	 * once. There are three conditions as follows:
+	 *       1. current ring's coalesce param is larger than the hardware.
+	 *       2. or ring which adapt last time can change again.
+	 *       3. timeout.
+	 */
+	if (new_coal_param == handle->coal_param) {
+		handle->coal_last_jiffies = jiffies;
+		handle->coal_ring_idx = ring_data->queue_index;
+	} else if (new_coal_param > handle->coal_param ||
+		   handle->coal_ring_idx == ring_data->queue_index ||
+		   time_after(jiffies, handle->coal_last_jiffies + (HZ >> 4))) {
+		handle->dev->ops->set_coalesce_usecs(handle,
+					new_coal_param);
+		handle->dev->ops->set_coalesce_frames(handle,
+					1, new_coal_param);
+		handle->coal_param = new_coal_param;
+		handle->coal_ring_idx = ring_data->queue_index;
+		handle->coal_last_jiffies = jiffies;
+	}
+}
+
 static int hns_nic_rx_poll_one(struct hns_nic_ring_data *ring_data,
 			       int budget, void *v)
 {
@@ -868,20 +975,27 @@ static bool hns_nic_rx_fini_pro(struct hns_nic_ring_data *ring_data)
 {
 	struct hnae_ring *ring = ring_data->ring;
 	int num = 0;
+	bool rx_stopped;
 
-	ring_data->ring->q->handle->dev->ops->toggle_ring_irq(ring, 0);
+	hns_update_rx_rate(ring);
 
 	/* for hardware bug fixed */
+	ring_data->ring->q->handle->dev->ops->toggle_ring_irq(ring, 0);
 	num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
 
-	if (num > 0) {
+	if (num <= hns_coal_rx_bdnum(ring)) {
+		if (ring->q->handle->coal_adapt_en)
+			hns_nic_adpt_coalesce(ring_data);
+
+		rx_stopped = true;
+	} else {
 		ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
 			ring_data->ring, 1);
 
-		return false;
-	} else {
-		return true;
+		rx_stopped = false;
 	}
+
+	return rx_stopped;
 }
 
 static bool hns_nic_rx_fini_pro_v2(struct hns_nic_ring_data *ring_data)
@@ -889,12 +1003,17 @@ static bool hns_nic_rx_fini_pro_v2(struct hns_nic_ring_data *ring_data)
 	struct hnae_ring *ring = ring_data->ring;
 	int num;
 
+	hns_update_rx_rate(ring);
 	num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
 
-	if (!num)
+	if (num <= hns_coal_rx_bdnum(ring)) {
+		if (ring->q->handle->coal_adapt_en)
+			hns_nic_adpt_coalesce(ring_data);
+
 		return true;
-	else
-		return false;
+	}
+
+	return false;
 }
 
 static inline void hns_nic_reclaim_one_desc(struct hnae_ring *ring,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
index 9cb4c7884201..26e9afcbdd50 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
@@ -38,7 +38,7 @@ struct hns_nic_ring_data {
 	struct hnae_ring *ring;
 	struct napi_struct napi;
 	cpumask_t mask; /* affinity mask */
-	int queue_index;
+	u32 queue_index;
 	int (*poll_one)(struct hns_nic_ring_data *, int, void *);
 	void (*ex_process)(struct hns_nic_ring_data *, struct sk_buff *);
 	bool (*fini_process)(struct hns_nic_ring_data *);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index a8db27e86a11..7ea7f8a4aa2a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -595,7 +595,7 @@ static void hns_nic_self_test(struct net_device *ndev,
 		set_bit(NIC_STATE_TESTING, &priv->state);
 
 		if (if_running)
-			(void)dev_close(ndev);
+			dev_close(ndev);
 
 		for (i = 0; i < SELF_TEST_TPYE_NUM; i++) {
 			if (!st_param[i][1])
@@ -735,8 +735,8 @@ static int hns_get_coalesce(struct net_device *net_dev,
 
 	ops = priv->ae_handle->dev->ops;
 
-	ec->use_adaptive_rx_coalesce = 1;
-	ec->use_adaptive_tx_coalesce = 1;
+	ec->use_adaptive_rx_coalesce = priv->ae_handle->coal_adapt_en;
+	ec->use_adaptive_tx_coalesce = priv->ae_handle->coal_adapt_en;
 
 	if ((!ops->get_coalesce_usecs) ||
 	    (!ops->get_max_coalesced_frames))
@@ -787,6 +787,9 @@ static int hns_set_coalesce(struct net_device *net_dev,
 	    (!ops->set_coalesce_frames))
 		return -ESRCH;
 
+	if (ec->use_adaptive_rx_coalesce != priv->ae_handle->coal_adapt_en)
+		priv->ae_handle->coal_adapt_en = ec->use_adaptive_rx_coalesce;
+
 	rc1 = ops->set_coalesce_usecs(priv->ae_handle,
 				      ec->rx_coalesce_usecs);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/Makefile b/drivers/net/ethernet/hisilicon/hns3/Makefile
new file mode 100644
index 000000000000..a9349e1f3e51
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the HISILICON network device drivers.
+#
+
+obj-$(CONFIG_HNS3) += hns3pf/
+
+obj-$(CONFIG_HNS3) += hnae3.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
new file mode 100644
index 000000000000..59efbd605416
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "hnae3.h"
+
+static LIST_HEAD(hnae3_ae_algo_list);
+static LIST_HEAD(hnae3_client_list);
+static LIST_HEAD(hnae3_ae_dev_list);
+
+/* we are keeping things simple and using single lock for all the
+ * list. This is a non-critical code so other updations, if happen
+ * in parallel, can wait.
+ */
+static DEFINE_MUTEX(hnae3_common_lock);
+
+static bool hnae3_client_match(enum hnae3_client_type client_type,
+			       enum hnae3_dev_type dev_type)
+{
+	if ((dev_type == HNAE3_DEV_KNIC) && (client_type == HNAE3_CLIENT_KNIC ||
+					     client_type == HNAE3_CLIENT_ROCE))
+		return true;
+
+	if (dev_type == HNAE3_DEV_UNIC && client_type == HNAE3_CLIENT_UNIC)
+		return true;
+
+	return false;
+}
+
+static int hnae3_match_n_instantiate(struct hnae3_client *client,
+				     struct hnae3_ae_dev *ae_dev,
+				     bool is_reg, bool *matched)
+{
+	int ret;
+
+	*matched = false;
+
+	/* check if this client matches the type of ae_dev */
+	if (!(hnae3_client_match(client->type, ae_dev->dev_type) &&
+	      hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
+		return 0;
+	}
+	/* there is a match of client and dev */
+	*matched = true;
+
+	/* now, (un-)instantiate client by calling lower layer */
+	if (is_reg) {
+		ret = ae_dev->ops->init_client_instance(client, ae_dev);
+		if (ret)
+			dev_err(&ae_dev->pdev->dev,
+				"fail to instantiate client\n");
+		return ret;
+	}
+
+	ae_dev->ops->uninit_client_instance(client, ae_dev);
+	return 0;
+}
+
+int hnae3_register_client(struct hnae3_client *client)
+{
+	struct hnae3_client *client_tmp;
+	struct hnae3_ae_dev *ae_dev;
+	bool matched;
+	int ret = 0;
+
+	mutex_lock(&hnae3_common_lock);
+	/* one system should only have one client for every type */
+	list_for_each_entry(client_tmp, &hnae3_client_list, node) {
+		if (client_tmp->type == client->type)
+			goto exit;
+	}
+
+	list_add_tail(&client->node, &hnae3_client_list);
+
+	/* initialize the client on every matched port */
+	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
+		/* if the client could not be initialized on current port, for
+		 * any error reasons, move on to next available port
+		 */
+		ret = hnae3_match_n_instantiate(client, ae_dev, true, &matched);
+		if (ret)
+			dev_err(&ae_dev->pdev->dev,
+				"match and instantiation failed for port\n");
+	}
+
+exit:
+	mutex_unlock(&hnae3_common_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(hnae3_register_client);
+
+void hnae3_unregister_client(struct hnae3_client *client)
+{
+	struct hnae3_ae_dev *ae_dev;
+	bool matched;
+
+	mutex_lock(&hnae3_common_lock);
+	/* un-initialize the client on every matched port */
+	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
+		hnae3_match_n_instantiate(client, ae_dev, false, &matched);
+	}
+
+	list_del(&client->node);
+	mutex_unlock(&hnae3_common_lock);
+}
+EXPORT_SYMBOL(hnae3_unregister_client);
+
+/* hnae3_register_ae_algo - register a AE algorithm to hnae3 framework
+ * @ae_algo: AE algorithm
+ * NOTE: the duplicated name will not be checked
+ */
+int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
+{
+	const struct pci_device_id *id;
+	struct hnae3_ae_dev *ae_dev;
+	struct hnae3_client *client;
+	bool matched;
+	int ret = 0;
+
+	mutex_lock(&hnae3_common_lock);
+
+	list_add_tail(&ae_algo->node, &hnae3_ae_algo_list);
+
+	/* Check if this algo/ops matches the list of ae_devs */
+	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
+		id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
+		if (!id)
+			continue;
+
+		/* ae_dev init should set flag */
+		ae_dev->ops = ae_algo->ops;
+		ret = ae_algo->ops->init_ae_dev(ae_dev);
+		if (ret) {
+			dev_err(&ae_dev->pdev->dev, "init ae_dev error.\n");
+			continue;
+		}
+
+		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
+
+		/* check the client list for the match with this ae_dev type and
+		 * initialize the figure out client instance
+		 */
+		list_for_each_entry(client, &hnae3_client_list, node) {
+			ret = hnae3_match_n_instantiate(client, ae_dev, true,
+							&matched);
+			if (ret)
+				dev_err(&ae_dev->pdev->dev,
+					"match and instantiation failed\n");
+			if (matched)
+				break;
+		}
+	}
+
+	mutex_unlock(&hnae3_common_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(hnae3_register_ae_algo);
+
+/* hnae3_unregister_ae_algo - unregisters a AE algorithm
+ * @ae_algo: the AE algorithm to unregister
+ */
+void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
+{
+	const struct pci_device_id *id;
+	struct hnae3_ae_dev *ae_dev;
+	struct hnae3_client *client;
+	bool matched;
+
+	mutex_lock(&hnae3_common_lock);
+	/* Check if there are matched ae_dev */
+	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
+		id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
+		if (!id)
+			continue;
+
+		/* check the client list for the match with this ae_dev type and
+		 * un-initialize the figure out client instance
+		 */
+		list_for_each_entry(client, &hnae3_client_list, node) {
+			hnae3_match_n_instantiate(client, ae_dev, false,
+						  &matched);
+			if (matched)
+				break;
+		}
+
+		ae_algo->ops->uninit_ae_dev(ae_dev);
+		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
+	}
+
+	list_del(&ae_algo->node);
+	mutex_unlock(&hnae3_common_lock);
+}
+EXPORT_SYMBOL(hnae3_unregister_ae_algo);
+
+/* hnae3_register_ae_dev - registers a AE device to hnae3 framework
+ * @ae_dev: the AE device
+ * NOTE: the duplicated name will not be checked
+ */
+int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	const struct pci_device_id *id;
+	struct hnae3_ae_algo *ae_algo;
+	struct hnae3_client *client;
+	bool matched;
+	int ret = 0;
+
+	mutex_lock(&hnae3_common_lock);
+	list_add_tail(&ae_dev->node, &hnae3_ae_dev_list);
+
+	/* Check if there are matched ae_algo */
+	list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
+		id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
+		if (!id)
+			continue;
+
+		ae_dev->ops = ae_algo->ops;
+
+		if (!ae_dev->ops) {
+			dev_err(&ae_dev->pdev->dev, "ae_dev ops are null\n");
+			goto out_err;
+		}
+
+		/* ae_dev init should set flag */
+		ret = ae_dev->ops->init_ae_dev(ae_dev);
+		if (ret) {
+			dev_err(&ae_dev->pdev->dev, "init ae_dev error\n");
+			goto out_err;
+		}
+
+		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
+		break;
+	}
+
+	/* check the client list for the match with this ae_dev type and
+	 * initialize the figure out client instance
+	 */
+	list_for_each_entry(client, &hnae3_client_list, node) {
+		ret = hnae3_match_n_instantiate(client, ae_dev, true,
+						&matched);
+		if (ret)
+			dev_err(&ae_dev->pdev->dev,
+				"match and instantiation failed\n");
+		if (matched)
+			break;
+	}
+
+out_err:
+	mutex_unlock(&hnae3_common_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(hnae3_register_ae_dev);
+
+/* hnae3_unregister_ae_dev - unregisters a AE device
+ * @ae_dev: the AE device to unregister
+ */
+void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	const struct pci_device_id *id;
+	struct hnae3_ae_algo *ae_algo;
+	struct hnae3_client *client;
+	bool matched;
+
+	mutex_lock(&hnae3_common_lock);
+	/* Check if there are matched ae_algo */
+	list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
+		id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
+		if (!id)
+			continue;
+
+		list_for_each_entry(client, &hnae3_client_list, node) {
+			hnae3_match_n_instantiate(client, ae_dev, false,
+						  &matched);
+			if (matched)
+				break;
+		}
+
+		ae_algo->ops->uninit_ae_dev(ae_dev);
+		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
+	}
+
+	list_del(&ae_dev->node);
+	mutex_unlock(&hnae3_common_lock);
+}
+EXPORT_SYMBOL(hnae3_unregister_ae_dev);
+
+MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("HNAE3(Hisilicon Network Acceleration Engine) Framework");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
new file mode 100644
index 000000000000..b2f28ae81273
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HNAE3_H
+#define __HNAE3_H
+
+/* Names used in this framework:
+ *      ae handle (handle):
+ *        a set of queues provided by AE
+ *      ring buffer queue (rbq):
+ *        the channel between upper layer and the AE, can do tx and rx
+ *      ring:
+ *        a tx or rx channel within a rbq
+ *      ring description (desc):
+ *        an element in the ring with packet information
+ *      buffer:
+ *        a memory region referred by desc with the full packet payload
+ *
+ * "num" means a static number set as a parameter, "count" mean a dynamic
+ *   number set while running
+ * "cb" means control block
+ */
+
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+
+/* Device IDs */
+#define HNAE3_DEV_ID_GE				0xA220
+#define HNAE3_DEV_ID_25GE			0xA221
+#define HNAE3_DEV_ID_25GE_RDMA			0xA222
+#define HNAE3_DEV_ID_25GE_RDMA_MACSEC		0xA223
+#define HNAE3_DEV_ID_50GE_RDMA			0xA224
+#define HNAE3_DEV_ID_50GE_RDMA_MACSEC		0xA225
+#define HNAE3_DEV_ID_100G_RDMA_MACSEC		0xA226
+#define HNAE3_DEV_ID_100G_VF			0xA22E
+#define HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF	0xA22F
+
+#define HNAE3_CLASS_NAME_SIZE 16
+
+#define HNAE3_DEV_INITED_B			0x0
+#define HNAE_DEV_SUPPORT_ROCE_B			0x1
+
+#define ring_ptr_move_fw(ring, p) \
+	((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
+#define ring_ptr_move_bw(ring, p) \
+	((ring)->p = ((ring)->p - 1 + (ring)->desc_num) % (ring)->desc_num)
+
+enum hns_desc_type {
+	DESC_TYPE_SKB,
+	DESC_TYPE_PAGE,
+};
+
+struct hnae3_handle;
+
+struct hnae3_queue {
+	void __iomem *io_base;
+	struct hnae3_ae_algo *ae_algo;
+	struct hnae3_handle *handle;
+	int tqp_index;	/* index in a handle */
+	u32 buf_size;	/* size for hnae_desc->addr, preset by AE */
+	u16 desc_num;	/* total number of desc */
+};
+
+/*hnae3 loop mode*/
+enum hnae3_loop {
+	HNAE3_MAC_INTER_LOOP_MAC,
+	HNAE3_MAC_INTER_LOOP_SERDES,
+	HNAE3_MAC_INTER_LOOP_PHY,
+	HNAE3_MAC_LOOP_NONE,
+};
+
+enum hnae3_client_type {
+	HNAE3_CLIENT_KNIC,
+	HNAE3_CLIENT_UNIC,
+	HNAE3_CLIENT_ROCE,
+};
+
+enum hnae3_dev_type {
+	HNAE3_DEV_KNIC,
+	HNAE3_DEV_UNIC,
+};
+
+/* mac media type */
+enum hnae3_media_type {
+	HNAE3_MEDIA_TYPE_UNKNOWN,
+	HNAE3_MEDIA_TYPE_FIBER,
+	HNAE3_MEDIA_TYPE_COPPER,
+	HNAE3_MEDIA_TYPE_BACKPLANE,
+};
+
+struct hnae3_vector_info {
+	u8 __iomem *io_addr;
+	int vector;
+};
+
+#define HNAE3_RING_TYPE_B 0
+#define HNAE3_RING_TYPE_TX 0
+#define HNAE3_RING_TYPE_RX 1
+
+struct hnae3_ring_chain_node {
+	struct hnae3_ring_chain_node *next;
+	u32 tqp_index;
+	u32 flag;
+};
+
+#define HNAE3_IS_TX_RING(node) \
+	(((node)->flag & (1 << HNAE3_RING_TYPE_B)) == HNAE3_RING_TYPE_TX)
+
+struct hnae3_client_ops {
+	int (*init_instance)(struct hnae3_handle *handle);
+	void (*uninit_instance)(struct hnae3_handle *handle, bool reset);
+	void (*link_status_change)(struct hnae3_handle *handle, bool state);
+};
+
+#define HNAE3_CLIENT_NAME_LENGTH 16
+struct hnae3_client {
+	char name[HNAE3_CLIENT_NAME_LENGTH];
+	u16 version;
+	unsigned long state;
+	enum hnae3_client_type type;
+	const struct hnae3_client_ops *ops;
+	struct list_head node;
+};
+
+struct hnae3_ae_dev {
+	struct pci_dev *pdev;
+	const struct hnae3_ae_ops *ops;
+	struct list_head node;
+	u32 flag;
+	enum hnae3_dev_type dev_type;
+	void *priv;
+};
+
+/* This struct defines the operation on the handle.
+ *
+ * init_ae_dev(): (mandatory)
+ *   Get PF configure from pci_dev and initialize PF hardware
+ * uninit_ae_dev()
+ *   Disable PF device and release PF resource
+ * register_client
+ *   Register client to ae_dev
+ * unregister_client()
+ *   Unregister client from ae_dev
+ * start()
+ *   Enable the hardware
+ * stop()
+ *   Disable the hardware
+ * get_status()
+ *   Get the carrier state of the back channel of the handle, 1 for ok, 0 for
+ *   non-ok
+ * get_ksettings_an_result()
+ *   Get negotiation status,speed and duplex
+ * update_speed_duplex_h()
+ *   Update hardware speed and duplex
+ * get_media_type()
+ *   Get media type of MAC
+ * adjust_link()
+ *   Adjust link status
+ * set_loopback()
+ *   Set loopback
+ * set_promisc_mode
+ *   Set promisc mode
+ * set_mtu()
+ *   set mtu
+ * get_pauseparam()
+ *   get tx and rx of pause frame use
+ * set_pauseparam()
+ *   set tx and rx of pause frame use
+ * set_autoneg()
+ *   set auto autonegotiation of pause frame use
+ * get_autoneg()
+ *   get auto autonegotiation of pause frame use
+ * get_coalesce_usecs()
+ *   get usecs to delay a TX interrupt after a packet is sent
+ * get_rx_max_coalesced_frames()
+ *   get Maximum number of packets to be sent before a TX interrupt.
+ * set_coalesce_usecs()
+ *   set usecs to delay a TX interrupt after a packet is sent
+ * set_coalesce_frames()
+ *   set Maximum number of packets to be sent before a TX interrupt.
+ * get_mac_addr()
+ *   get mac address
+ * set_mac_addr()
+ *   set mac address
+ * add_uc_addr
+ *   Add unicast addr to mac table
+ * rm_uc_addr
+ *   Remove unicast addr from mac table
+ * set_mc_addr()
+ *   Set multicast address
+ * add_mc_addr
+ *   Add multicast address to mac table
+ * rm_mc_addr
+ *   Remove multicast address from mac table
+ * update_stats()
+ *   Update Old network device statistics
+ * get_ethtool_stats()
+ *   Get ethtool network device statistics
+ * get_strings()
+ *   Get a set of strings that describe the requested objects
+ * get_sset_count()
+ *   Get number of strings that @get_strings will write
+ * update_led_status()
+ *   Update the led status
+ * set_led_id()
+ *   Set led id
+ * get_regs()
+ *   Get regs dump
+ * get_regs_len()
+ *   Get the len of the regs dump
+ * get_rss_key_size()
+ *   Get rss key size
+ * get_rss_indir_size()
+ *   Get rss indirection table size
+ * get_rss()
+ *   Get rss table
+ * set_rss()
+ *   Set rss table
+ * get_tc_size()
+ *   Get tc size of handle
+ * get_vector()
+ *   Get vector number and vector information
+ * map_ring_to_vector()
+ *   Map rings to vector
+ * unmap_ring_from_vector()
+ *   Unmap rings from vector
+ * add_tunnel_udp()
+ *   Add tunnel information to hardware
+ * del_tunnel_udp()
+ *   Delete tunnel information from hardware
+ * reset_queue()
+ *   Reset queue
+ * get_fw_version()
+ *   Get firmware version
+ * get_mdix_mode()
+ *   Get media typr of phy
+ * set_vlan_filter()
+ *   Set vlan filter config of Ports
+ * set_vf_vlan_filter()
+ *   Set vlan filter config of vf
+ */
+struct hnae3_ae_ops {
+	int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
+	void (*uninit_ae_dev)(struct hnae3_ae_dev *ae_dev);
+
+	int (*init_client_instance)(struct hnae3_client *client,
+				    struct hnae3_ae_dev *ae_dev);
+	void (*uninit_client_instance)(struct hnae3_client *client,
+				       struct hnae3_ae_dev *ae_dev);
+	int (*start)(struct hnae3_handle *handle);
+	void (*stop)(struct hnae3_handle *handle);
+	int (*get_status)(struct hnae3_handle *handle);
+	void (*get_ksettings_an_result)(struct hnae3_handle *handle,
+					u8 *auto_neg, u32 *speed, u8 *duplex);
+
+	int (*update_speed_duplex_h)(struct hnae3_handle *handle);
+	int (*cfg_mac_speed_dup_h)(struct hnae3_handle *handle, int speed,
+				   u8 duplex);
+
+	void (*get_media_type)(struct hnae3_handle *handle, u8 *media_type);
+	void (*adjust_link)(struct hnae3_handle *handle, int speed, int duplex);
+	int (*set_loopback)(struct hnae3_handle *handle,
+			    enum hnae3_loop loop_mode, bool en);
+
+	void (*set_promisc_mode)(struct hnae3_handle *handle, u32 en);
+	int (*set_mtu)(struct hnae3_handle *handle, int new_mtu);
+
+	void (*get_pauseparam)(struct hnae3_handle *handle,
+			       u32 *auto_neg, u32 *rx_en, u32 *tx_en);
+	int (*set_pauseparam)(struct hnae3_handle *handle,
+			      u32 auto_neg, u32 rx_en, u32 tx_en);
+
+	int (*set_autoneg)(struct hnae3_handle *handle, bool enable);
+	int (*get_autoneg)(struct hnae3_handle *handle);
+
+	void (*get_coalesce_usecs)(struct hnae3_handle *handle,
+				   u32 *tx_usecs, u32 *rx_usecs);
+	void (*get_rx_max_coalesced_frames)(struct hnae3_handle *handle,
+					    u32 *tx_frames, u32 *rx_frames);
+	int (*set_coalesce_usecs)(struct hnae3_handle *handle, u32 timeout);
+	int (*set_coalesce_frames)(struct hnae3_handle *handle,
+				   u32 coalesce_frames);
+	void (*get_coalesce_range)(struct hnae3_handle *handle,
+				   u32 *tx_frames_low, u32 *rx_frames_low,
+				   u32 *tx_frames_high, u32 *rx_frames_high,
+				   u32 *tx_usecs_low, u32 *rx_usecs_low,
+				   u32 *tx_usecs_high, u32 *rx_usecs_high);
+
+	void (*get_mac_addr)(struct hnae3_handle *handle, u8 *p);
+	int (*set_mac_addr)(struct hnae3_handle *handle, void *p);
+	int (*add_uc_addr)(struct hnae3_handle *handle,
+			   const unsigned char *addr);
+	int (*rm_uc_addr)(struct hnae3_handle *handle,
+			  const unsigned char *addr);
+	int (*set_mc_addr)(struct hnae3_handle *handle, void *addr);
+	int (*add_mc_addr)(struct hnae3_handle *handle,
+			   const unsigned char *addr);
+	int (*rm_mc_addr)(struct hnae3_handle *handle,
+			  const unsigned char *addr);
+
+	void (*set_tso_stats)(struct hnae3_handle *handle, int enable);
+	void (*update_stats)(struct hnae3_handle *handle,
+			     struct net_device_stats *net_stats);
+	void (*get_stats)(struct hnae3_handle *handle, u64 *data);
+
+	void (*get_strings)(struct hnae3_handle *handle,
+			    u32 stringset, u8 *data);
+	int (*get_sset_count)(struct hnae3_handle *handle, int stringset);
+
+	void (*get_regs)(struct hnae3_handle *handle, void *data);
+	int (*get_regs_len)(struct hnae3_handle *handle);
+
+	u32 (*get_rss_key_size)(struct hnae3_handle *handle);
+	u32 (*get_rss_indir_size)(struct hnae3_handle *handle);
+	int (*get_rss)(struct hnae3_handle *handle, u32 *indir, u8 *key,
+		       u8 *hfunc);
+	int (*set_rss)(struct hnae3_handle *handle, const u32 *indir,
+		       const u8 *key, const u8 hfunc);
+
+	int (*get_tc_size)(struct hnae3_handle *handle);
+
+	int (*get_vector)(struct hnae3_handle *handle, u16 vector_num,
+			  struct hnae3_vector_info *vector_info);
+	int (*map_ring_to_vector)(struct hnae3_handle *handle,
+				  int vector_num,
+				  struct hnae3_ring_chain_node *vr_chain);
+	int (*unmap_ring_from_vector)(struct hnae3_handle *handle,
+				      int vector_num,
+				      struct hnae3_ring_chain_node *vr_chain);
+
+	int (*add_tunnel_udp)(struct hnae3_handle *handle, u16 port_num);
+	int (*del_tunnel_udp)(struct hnae3_handle *handle, u16 port_num);
+
+	void (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
+	u32 (*get_fw_version)(struct hnae3_handle *handle);
+	void (*get_mdix_mode)(struct hnae3_handle *handle,
+			      u8 *tp_mdix_ctrl, u8 *tp_mdix);
+
+	int (*set_vlan_filter)(struct hnae3_handle *handle, __be16 proto,
+			       u16 vlan_id, bool is_kill);
+	int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
+				  u16 vlan, u8 qos, __be16 proto);
+};
+
+struct hnae3_ae_algo {
+	const struct hnae3_ae_ops *ops;
+	struct list_head node;
+	char name[HNAE3_CLASS_NAME_SIZE];
+	const struct pci_device_id *pdev_id_table;
+};
+
+#define HNAE3_INT_NAME_LEN        (IFNAMSIZ + 16)
+#define HNAE3_ITR_COUNTDOWN_START 100
+
+struct hnae3_tc_info {
+	u16	tqp_offset;	/* TQP offset from base TQP */
+	u16	tqp_count;	/* Total TQPs */
+	u8	up;		/* user priority */
+	u8	tc;		/* TC index */
+	bool	enable;		/* If this TC is enable or not */
+};
+
+#define HNAE3_MAX_TC		8
+struct hnae3_knic_private_info {
+	struct net_device *netdev; /* Set by KNIC client when init instance */
+	u16 rss_size;		   /* Allocated RSS queues */
+	u16 rx_buf_len;
+	u16 num_desc;
+
+	u8 num_tc;		   /* Total number of enabled TCs */
+	struct hnae3_tc_info tc_info[HNAE3_MAX_TC]; /* Idx of array is HW TC */
+
+	u16 num_tqps;		  /* total number of TQPs in this handle */
+	struct hnae3_queue **tqp;  /* array base of all TQPs in this instance */
+};
+
+struct hnae3_roce_private_info {
+	struct net_device *netdev;
+	void __iomem *roce_io_base;
+	int base_vector;
+	int num_vectors;
+};
+
+struct hnae3_unic_private_info {
+	struct net_device *netdev;
+	u16 rx_buf_len;
+	u16 num_desc;
+	u16 num_tqps;	/* total number of tqps in this handle */
+	struct hnae3_queue **tqp;  /* array base of all TQPs of this instance */
+};
+
+#define HNAE3_SUPPORT_MAC_LOOPBACK    1
+#define HNAE3_SUPPORT_PHY_LOOPBACK    2
+#define HNAE3_SUPPORT_SERDES_LOOPBACK 4
+
+struct hnae3_handle {
+	struct hnae3_client *client;
+	struct pci_dev *pdev;
+	void *priv;
+	struct hnae3_ae_algo *ae_algo;  /* the class who provides this handle */
+	u64 flags; /* Indicate the capabilities for this handle*/
+
+	union {
+		struct net_device *netdev; /* first member */
+		struct hnae3_knic_private_info kinfo;
+		struct hnae3_unic_private_info uinfo;
+		struct hnae3_roce_private_info rinfo;
+	};
+
+	u32 numa_node_mask;	/* for multi-chip support */
+};
+
+#define hnae_set_field(origin, mask, shift, val) \
+	do { \
+		(origin) &= (~(mask)); \
+		(origin) |= ((val) << (shift)) & (mask); \
+	} while (0)
+#define hnae_get_field(origin, mask, shift) (((origin) & (mask)) >> (shift))
+
+#define hnae_set_bit(origin, shift, val) \
+	hnae_set_field((origin), (0x1 << (shift)), (shift), (val))
+#define hnae_get_bit(origin, shift) \
+	hnae_get_field((origin), (0x1 << (shift)), (shift))
+
+int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev);
+void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev);
+
+void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo);
+int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo);
+
+void hnae3_unregister_client(struct hnae3_client *client);
+int hnae3_register_client(struct hnae3_client *client);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
new file mode 100644
index 000000000000..162e8a42acd0
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the HISILICON network device drivers.
+#
+
+ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
+
+obj-$(CONFIG_HNS3_HCLGE) += hclge.o
+hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o
+
+obj-$(CONFIG_HNS3_ENET) += hns3.o
+hns3-objs = hns3_enet.o hns3_ethtool.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
new file mode 100644
index 000000000000..bc869842728f
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/dma-direction.h>
+#include "hclge_cmd.h"
+#include "hnae3.h"
+#include "hclge_main.h"
+
+#define hclge_is_csq(ring) ((ring)->flag & HCLGE_TYPE_CSQ)
+#define hclge_ring_to_dma_dir(ring) (hclge_is_csq(ring) ? \
+	DMA_TO_DEVICE : DMA_FROM_DEVICE)
+#define cmq_ring_to_dev(ring)   (&(ring)->dev->pdev->dev)
+
+static int hclge_ring_space(struct hclge_cmq_ring *ring)
+{
+	int ntu = ring->next_to_use;
+	int ntc = ring->next_to_clean;
+	int used = (ntu - ntc + ring->desc_num) % ring->desc_num;
+
+	return ring->desc_num - used - 1;
+}
+
+static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring)
+{
+	int size  = ring->desc_num * sizeof(struct hclge_desc);
+
+	ring->desc = kzalloc(size, GFP_KERNEL);
+	if (!ring->desc)
+		return -ENOMEM;
+
+	ring->desc_dma_addr = dma_map_single(cmq_ring_to_dev(ring), ring->desc,
+					     size, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(cmq_ring_to_dev(ring), ring->desc_dma_addr)) {
+		ring->desc_dma_addr = 0;
+		kfree(ring->desc);
+		ring->desc = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void hclge_free_cmd_desc(struct hclge_cmq_ring *ring)
+{
+	dma_unmap_single(cmq_ring_to_dev(ring), ring->desc_dma_addr,
+			 ring->desc_num * sizeof(ring->desc[0]),
+			 DMA_BIDIRECTIONAL);
+
+	ring->desc_dma_addr = 0;
+	kfree(ring->desc);
+	ring->desc = NULL;
+}
+
+static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type)
+{
+	struct hclge_hw *hw = &hdev->hw;
+	struct hclge_cmq_ring *ring =
+		(ring_type == HCLGE_TYPE_CSQ) ? &hw->cmq.csq : &hw->cmq.crq;
+	int ret;
+
+	ring->flag = ring_type;
+	ring->dev = hdev;
+
+	ret = hclge_alloc_cmd_desc(ring);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "descriptor %s alloc error %d\n",
+			(ring_type == HCLGE_TYPE_CSQ) ? "CSQ" : "CRQ", ret);
+		return ret;
+	}
+
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+
+	return 0;
+}
+
+void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
+				enum hclge_opcode_type opcode, bool is_read)
+{
+	memset((void *)desc, 0, sizeof(struct hclge_desc));
+	desc->opcode = cpu_to_le16(opcode);
+	desc->flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN);
+
+	if (is_read)
+		desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_WR);
+	else
+		desc->flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+}
+
+static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring)
+{
+	dma_addr_t dma = ring->desc_dma_addr;
+	struct hclge_dev *hdev = ring->dev;
+	struct hclge_hw *hw = &hdev->hw;
+
+	if (ring->flag == HCLGE_TYPE_CSQ) {
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_L_REG,
+				(u32)dma);
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_H_REG,
+				(u32)((dma >> 31) >> 1));
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG,
+				(ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
+				HCLGE_NIC_CMQ_ENABLE);
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0);
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_HEAD_REG, 0);
+	} else {
+		hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_L_REG,
+				(u32)dma);
+		hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_H_REG,
+				(u32)((dma >> 31) >> 1));
+		hclge_write_dev(hw, HCLGE_NIC_CRQ_DEPTH_REG,
+				(ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
+				HCLGE_NIC_CMQ_ENABLE);
+		hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
+		hclge_write_dev(hw, HCLGE_NIC_CRQ_HEAD_REG, 0);
+	}
+}
+
+static void hclge_cmd_init_regs(struct hclge_hw *hw)
+{
+	hclge_cmd_config_regs(&hw->cmq.csq);
+	hclge_cmd_config_regs(&hw->cmq.crq);
+}
+
+static int hclge_cmd_csq_clean(struct hclge_hw *hw)
+{
+	struct hclge_cmq_ring *csq = &hw->cmq.csq;
+	u16 ntc = csq->next_to_clean;
+	struct hclge_desc *desc;
+	int clean = 0;
+	u32 head;
+
+	desc = &csq->desc[ntc];
+	head = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG);
+
+	while (head != ntc) {
+		memset(desc, 0, sizeof(*desc));
+		ntc++;
+		if (ntc == csq->desc_num)
+			ntc = 0;
+		desc = &csq->desc[ntc];
+		clean++;
+	}
+	csq->next_to_clean = ntc;
+
+	return clean;
+}
+
+static int hclge_cmd_csq_done(struct hclge_hw *hw)
+{
+	u32 head = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG);
+	return head == hw->cmq.csq.next_to_use;
+}
+
+static bool hclge_is_special_opcode(u16 opcode)
+{
+	u16 spec_opcode[3] = {0x0030, 0x0031, 0x0032};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
+		if (spec_opcode[i] == opcode)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * hclge_cmd_send - send command to command queue
+ * @hw: pointer to the hw struct
+ * @desc: prefilled descriptor for describing the command
+ * @num : the number of descriptors to be sent
+ *
+ * This is the main send command for command queue, it
+ * sends the queue, cleans the queue, etc
+ **/
+int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
+{
+	struct hclge_dev *hdev = (struct hclge_dev *)hw->back;
+	struct hclge_desc *desc_to_use;
+	bool complete = false;
+	u32 timeout = 0;
+	int handle = 0;
+	int retval = 0;
+	u16 opcode, desc_ret;
+	int ntc;
+
+	spin_lock_bh(&hw->cmq.csq.lock);
+
+	if (num > hclge_ring_space(&hw->cmq.csq)) {
+		spin_unlock_bh(&hw->cmq.csq.lock);
+		return -EBUSY;
+	}
+
+	/**
+	 * Record the location of desc in the ring for this time
+	 * which will be use for hardware to write back
+	 */
+	ntc = hw->cmq.csq.next_to_use;
+	opcode = desc[0].opcode;
+	while (handle < num) {
+		desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use];
+		*desc_to_use = desc[handle];
+		(hw->cmq.csq.next_to_use)++;
+		if (hw->cmq.csq.next_to_use == hw->cmq.csq.desc_num)
+			hw->cmq.csq.next_to_use = 0;
+		handle++;
+	}
+
+	/* Write to hardware */
+	hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, hw->cmq.csq.next_to_use);
+
+	/**
+	 * If the command is sync, wait for the firmware to write back,
+	 * if multi descriptors to be sent, use the first one to check
+	 */
+	if (HCLGE_SEND_SYNC(desc->flag)) {
+		do {
+			if (hclge_cmd_csq_done(hw))
+				break;
+			udelay(1);
+			timeout++;
+		} while (timeout < hw->cmq.tx_timeout);
+	}
+
+	if (hclge_cmd_csq_done(hw)) {
+		complete = true;
+		handle = 0;
+		while (handle < num) {
+			/* Get the result of hardware write back */
+			desc_to_use = &hw->cmq.csq.desc[ntc];
+			desc[handle] = *desc_to_use;
+			pr_debug("Get cmd desc:\n");
+
+			if (likely(!hclge_is_special_opcode(opcode)))
+				desc_ret = desc[handle].retval;
+			else
+				desc_ret = desc[0].retval;
+
+			if ((enum hclge_cmd_return_status)desc_ret ==
+			    HCLGE_CMD_EXEC_SUCCESS)
+				retval = 0;
+			else
+				retval = -EIO;
+			hw->cmq.last_status = (enum hclge_cmd_status)desc_ret;
+			ntc++;
+			handle++;
+			if (ntc == hw->cmq.csq.desc_num)
+				ntc = 0;
+		}
+	}
+
+	if (!complete)
+		retval = -EAGAIN;
+
+	/* Clean the command send queue */
+	handle = hclge_cmd_csq_clean(hw);
+	if (handle != num) {
+		dev_warn(&hdev->pdev->dev,
+			 "cleaned %d, need to clean %d\n", handle, num);
+	}
+
+	spin_unlock_bh(&hw->cmq.csq.lock);
+
+	return retval;
+}
+
+enum hclge_cmd_status hclge_cmd_query_firmware_version(struct hclge_hw *hw,
+						       u32 *version)
+{
+	struct hclge_query_version *resp;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FW_VER, 1);
+	resp = (struct hclge_query_version *)desc.data;
+
+	ret = hclge_cmd_send(hw, &desc, 1);
+	if (!ret)
+		*version = le32_to_cpu(resp->firmware);
+
+	return ret;
+}
+
+int hclge_cmd_init(struct hclge_dev *hdev)
+{
+	u32 version;
+	int ret;
+
+	/* Setup the queue entries for use cmd queue */
+	hdev->hw.cmq.csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
+	hdev->hw.cmq.crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
+
+	/* Setup the lock for command queue */
+	spin_lock_init(&hdev->hw.cmq.csq.lock);
+	spin_lock_init(&hdev->hw.cmq.crq.lock);
+
+	/* Setup Tx write back timeout */
+	hdev->hw.cmq.tx_timeout = HCLGE_CMDQ_TX_TIMEOUT;
+
+	/* Setup queue rings */
+	ret = hclge_init_cmd_queue(hdev, HCLGE_TYPE_CSQ);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"CSQ ring setup error %d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_init_cmd_queue(hdev, HCLGE_TYPE_CRQ);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"CRQ ring setup error %d\n", ret);
+		goto err_csq;
+	}
+
+	hclge_cmd_init_regs(&hdev->hw);
+
+	ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"firmware version query failed %d\n", ret);
+		return ret;
+	}
+	hdev->fw_version = version;
+
+	dev_info(&hdev->pdev->dev, "The firware version is %08x\n", version);
+
+	return 0;
+err_csq:
+	hclge_free_cmd_desc(&hdev->hw.cmq.csq);
+	return ret;
+}
+
+static void hclge_destroy_queue(struct hclge_cmq_ring *ring)
+{
+	spin_lock_bh(&ring->lock);
+	hclge_free_cmd_desc(ring);
+	spin_unlock_bh(&ring->lock);
+}
+
+void hclge_destroy_cmd_queue(struct hclge_hw *hw)
+{
+	hclge_destroy_queue(&hw->cmq.csq);
+	hclge_destroy_queue(&hw->cmq.crq);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
new file mode 100644
index 000000000000..91ae0135ee50
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -0,0 +1,740 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_CMD_H
+#define __HCLGE_CMD_H
+#include <linux/types.h>
+#include <linux/io.h>
+
+#define HCLGE_CMDQ_TX_TIMEOUT		1000
+
+struct hclge_dev;
+struct hclge_desc {
+	__le16 opcode;
+
+#define HCLGE_CMDQ_RX_INVLD_B		0
+#define HCLGE_CMDQ_RX_OUTVLD_B		1
+
+	__le16 flag;
+	__le16 retval;
+	__le16 rsv;
+	__le32 data[6];
+};
+
+struct hclge_desc_cb {
+	dma_addr_t dma;
+	void *va;
+	u32 length;
+};
+
+struct hclge_cmq_ring {
+	dma_addr_t desc_dma_addr;
+	struct hclge_desc *desc;
+	struct hclge_desc_cb *desc_cb;
+	struct hclge_dev  *dev;
+	u32 head;
+	u32 tail;
+
+	u16 buf_size;
+	u16 desc_num;
+	int next_to_use;
+	int next_to_clean;
+	u8 flag;
+	spinlock_t lock; /* Command queue lock */
+};
+
+enum hclge_cmd_return_status {
+	HCLGE_CMD_EXEC_SUCCESS	= 0,
+	HCLGE_CMD_NO_AUTH	= 1,
+	HCLGE_CMD_NOT_EXEC	= 2,
+	HCLGE_CMD_QUEUE_FULL	= 3,
+};
+
+enum hclge_cmd_status {
+	HCLGE_STATUS_SUCCESS	= 0,
+	HCLGE_ERR_CSQ_FULL	= -1,
+	HCLGE_ERR_CSQ_TIMEOUT	= -2,
+	HCLGE_ERR_CSQ_ERROR	= -3,
+};
+
+struct hclge_cmq {
+	struct hclge_cmq_ring csq;
+	struct hclge_cmq_ring crq;
+	u16 tx_timeout; /* Tx timeout */
+	enum hclge_cmd_status last_status;
+};
+
+#define HCLGE_CMD_FLAG_IN_VALID_SHIFT	0
+#define HCLGE_CMD_FLAG_OUT_VALID_SHIFT	1
+#define HCLGE_CMD_FLAG_NEXT_SHIFT	2
+#define HCLGE_CMD_FLAG_WR_OR_RD_SHIFT	3
+#define HCLGE_CMD_FLAG_NO_INTR_SHIFT	4
+#define HCLGE_CMD_FLAG_ERR_INTR_SHIFT	5
+
+#define HCLGE_CMD_FLAG_IN	BIT(HCLGE_CMD_FLAG_IN_VALID_SHIFT)
+#define HCLGE_CMD_FLAG_OUT	BIT(HCLGE_CMD_FLAG_OUT_VALID_SHIFT)
+#define HCLGE_CMD_FLAG_NEXT	BIT(HCLGE_CMD_FLAG_NEXT_SHIFT)
+#define HCLGE_CMD_FLAG_WR	BIT(HCLGE_CMD_FLAG_WR_OR_RD_SHIFT)
+#define HCLGE_CMD_FLAG_NO_INTR	BIT(HCLGE_CMD_FLAG_NO_INTR_SHIFT)
+#define HCLGE_CMD_FLAG_ERR_INTR	BIT(HCLGE_CMD_FLAG_ERR_INTR_SHIFT)
+
+enum hclge_opcode_type {
+	/* Generic command */
+	HCLGE_OPC_QUERY_FW_VER		= 0x0001,
+	HCLGE_OPC_CFG_RST_TRIGGER	= 0x0020,
+	HCLGE_OPC_GBL_RST_STATUS	= 0x0021,
+	HCLGE_OPC_QUERY_FUNC_STATUS	= 0x0022,
+	HCLGE_OPC_QUERY_PF_RSRC		= 0x0023,
+	HCLGE_OPC_QUERY_VF_RSRC		= 0x0024,
+	HCLGE_OPC_GET_CFG_PARAM		= 0x0025,
+
+	HCLGE_OPC_STATS_64_BIT		= 0x0030,
+	HCLGE_OPC_STATS_32_BIT		= 0x0031,
+	HCLGE_OPC_STATS_MAC		= 0x0032,
+	/* Device management command */
+
+	/* MAC commond */
+	HCLGE_OPC_CONFIG_MAC_MODE	= 0x0301,
+	HCLGE_OPC_CONFIG_AN_MODE	= 0x0304,
+	HCLGE_OPC_QUERY_AN_RESULT	= 0x0306,
+	HCLGE_OPC_QUERY_LINK_STATUS	= 0x0307,
+	HCLGE_OPC_CONFIG_MAX_FRM_SIZE	= 0x0308,
+	HCLGE_OPC_CONFIG_SPEED_DUP	= 0x0309,
+	/* MACSEC command */
+
+	/* PFC/Pause CMD*/
+	HCLGE_OPC_CFG_MAC_PAUSE_EN      = 0x0701,
+	HCLGE_OPC_CFG_PFC_PAUSE_EN      = 0x0702,
+	HCLGE_OPC_CFG_MAC_PARA          = 0x0703,
+	HCLGE_OPC_CFG_PFC_PARA          = 0x0704,
+	HCLGE_OPC_QUERY_MAC_TX_PKT_CNT  = 0x0705,
+	HCLGE_OPC_QUERY_MAC_RX_PKT_CNT  = 0x0706,
+	HCLGE_OPC_QUERY_PFC_TX_PKT_CNT  = 0x0707,
+	HCLGE_OPC_QUERY_PFC_RX_PKT_CNT  = 0x0708,
+	HCLGE_OPC_PRI_TO_TC_MAPPING     = 0x0709,
+	HCLGE_OPC_QOS_MAP               = 0x070A,
+
+	/* ETS/scheduler commands */
+	HCLGE_OPC_TM_PG_TO_PRI_LINK	= 0x0804,
+	HCLGE_OPC_TM_QS_TO_PRI_LINK     = 0x0805,
+	HCLGE_OPC_TM_NQ_TO_QS_LINK      = 0x0806,
+	HCLGE_OPC_TM_RQ_TO_QS_LINK      = 0x0807,
+	HCLGE_OPC_TM_PORT_WEIGHT        = 0x0808,
+	HCLGE_OPC_TM_PG_WEIGHT          = 0x0809,
+	HCLGE_OPC_TM_QS_WEIGHT          = 0x080A,
+	HCLGE_OPC_TM_PRI_WEIGHT         = 0x080B,
+	HCLGE_OPC_TM_PRI_C_SHAPPING     = 0x080C,
+	HCLGE_OPC_TM_PRI_P_SHAPPING     = 0x080D,
+	HCLGE_OPC_TM_PG_C_SHAPPING      = 0x080E,
+	HCLGE_OPC_TM_PG_P_SHAPPING      = 0x080F,
+	HCLGE_OPC_TM_PORT_SHAPPING      = 0x0810,
+	HCLGE_OPC_TM_PG_SCH_MODE_CFG    = 0x0812,
+	HCLGE_OPC_TM_PRI_SCH_MODE_CFG   = 0x0813,
+	HCLGE_OPC_TM_QS_SCH_MODE_CFG    = 0x0814,
+	HCLGE_OPC_TM_BP_TO_QSET_MAPPING = 0x0815,
+
+	/* Packet buffer allocate command */
+	HCLGE_OPC_TX_BUFF_ALLOC		= 0x0901,
+	HCLGE_OPC_RX_PRIV_BUFF_ALLOC	= 0x0902,
+	HCLGE_OPC_RX_PRIV_WL_ALLOC	= 0x0903,
+	HCLGE_OPC_RX_COM_THRD_ALLOC	= 0x0904,
+	HCLGE_OPC_RX_COM_WL_ALLOC	= 0x0905,
+	HCLGE_OPC_RX_GBL_PKT_CNT	= 0x0906,
+
+	/* PTP command */
+	/* TQP management command */
+	HCLGE_OPC_SET_TQP_MAP		= 0x0A01,
+
+	/* TQP command */
+	HCLGE_OPC_CFG_TX_QUEUE		= 0x0B01,
+	HCLGE_OPC_QUERY_TX_POINTER	= 0x0B02,
+	HCLGE_OPC_QUERY_TX_STATUS	= 0x0B03,
+	HCLGE_OPC_CFG_RX_QUEUE		= 0x0B11,
+	HCLGE_OPC_QUERY_RX_POINTER	= 0x0B12,
+	HCLGE_OPC_QUERY_RX_STATUS	= 0x0B13,
+	HCLGE_OPC_STASH_RX_QUEUE_LRO	= 0x0B16,
+	HCLGE_OPC_CFG_RX_QUEUE_LRO	= 0x0B17,
+	HCLGE_OPC_CFG_COM_TQP_QUEUE	= 0x0B20,
+	HCLGE_OPC_RESET_TQP_QUEUE	= 0x0B22,
+
+	/* TSO cmd */
+	HCLGE_OPC_TSO_GENERIC_CONFIG	= 0x0C01,
+
+	/* RSS cmd */
+	HCLGE_OPC_RSS_GENERIC_CONFIG	= 0x0D01,
+	HCLGE_OPC_RSS_INDIR_TABLE	= 0x0D07,
+	HCLGE_OPC_RSS_TC_MODE		= 0x0D08,
+	HCLGE_OPC_RSS_INPUT_TUPLE	= 0x0D02,
+
+	/* Promisuous mode command */
+	HCLGE_OPC_CFG_PROMISC_MODE	= 0x0E01,
+
+	/* Interrupts cmd */
+	HCLGE_OPC_ADD_RING_TO_VECTOR	= 0x1503,
+	HCLGE_OPC_DEL_RING_TO_VECTOR	= 0x1504,
+
+	/* MAC command */
+	HCLGE_OPC_MAC_VLAN_ADD		    = 0x1000,
+	HCLGE_OPC_MAC_VLAN_REMOVE	    = 0x1001,
+	HCLGE_OPC_MAC_VLAN_TYPE_ID	    = 0x1002,
+	HCLGE_OPC_MAC_VLAN_INSERT	    = 0x1003,
+	HCLGE_OPC_MAC_ETHTYPE_ADD	    = 0x1010,
+	HCLGE_OPC_MAC_ETHTYPE_REMOVE	= 0x1011,
+
+	/* Multicast linear table cmd */
+	HCLGE_OPC_MTA_MAC_MODE_CFG	    = 0x1020,
+	HCLGE_OPC_MTA_MAC_FUNC_CFG	    = 0x1021,
+	HCLGE_OPC_MTA_TBL_ITEM_CFG	    = 0x1022,
+	HCLGE_OPC_MTA_TBL_ITEM_QUERY	= 0x1023,
+
+	/* VLAN command */
+	HCLGE_OPC_VLAN_FILTER_CTRL	    = 0x1100,
+	HCLGE_OPC_VLAN_FILTER_PF_CFG	= 0x1101,
+	HCLGE_OPC_VLAN_FILTER_VF_CFG	= 0x1102,
+
+	/* MDIO command */
+	HCLGE_OPC_MDIO_CONFIG		= 0x1900,
+
+	/* QCN command */
+	HCLGE_OPC_QCN_MOD_CFG		= 0x1A01,
+	HCLGE_OPC_QCN_GRP_TMPLT_CFG	= 0x1A02,
+	HCLGE_OPC_QCN_SHAPPING_IR_CFG	= 0x1A03,
+	HCLGE_OPC_QCN_SHAPPING_BS_CFG	= 0x1A04,
+	HCLGE_OPC_QCN_QSET_LINK_CFG	= 0x1A05,
+	HCLGE_OPC_QCN_RP_STATUS_GET	= 0x1A06,
+	HCLGE_OPC_QCN_AJUST_INIT	= 0x1A07,
+	HCLGE_OPC_QCN_DFX_CNT_STATUS    = 0x1A08,
+
+	/* Mailbox cmd */
+	HCLGEVF_OPC_MBX_PF_TO_VF	= 0x2000,
+};
+
+#define HCLGE_TQP_REG_OFFSET		0x80000
+#define HCLGE_TQP_REG_SIZE		0x200
+
+#define HCLGE_RCB_INIT_QUERY_TIMEOUT	10
+#define HCLGE_RCB_INIT_FLAG_EN_B	0
+#define HCLGE_RCB_INIT_FLAG_FINI_B	8
+struct hclge_config_rcb_init {
+	__le16 rcb_init_flag;
+	u8 rsv[22];
+};
+
+struct hclge_tqp_map {
+	__le16 tqp_id;	/* Absolute tqp id for in this pf */
+	u8 tqp_vf;	/* VF id */
+#define HCLGE_TQP_MAP_TYPE_PF		0
+#define HCLGE_TQP_MAP_TYPE_VF		1
+#define HCLGE_TQP_MAP_TYPE_B		0
+#define HCLGE_TQP_MAP_EN_B		1
+	u8 tqp_flag;	/* Indicate it's pf or vf tqp */
+	__le16 tqp_vid; /* Virtual id in this pf/vf */
+	u8 rsv[18];
+};
+
+#define HCLGE_VECTOR_ELEMENTS_PER_CMD	11
+
+enum hclge_int_type {
+	HCLGE_INT_TX,
+	HCLGE_INT_RX,
+	HCLGE_INT_EVENT,
+};
+
+struct hclge_ctrl_vector_chain {
+	u8 int_vector_id;
+	u8 int_cause_num;
+#define HCLGE_INT_TYPE_S	0
+#define HCLGE_INT_TYPE_M	0x3
+#define HCLGE_TQP_ID_S		2
+#define HCLGE_TQP_ID_M		(0x3fff << HCLGE_TQP_ID_S)
+	__le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD];
+};
+
+#define HCLGE_TC_NUM		8
+#define HCLGE_TC0_PRI_BUF_EN_B	15 /* Bit 15 indicate enable or not */
+#define HCLGE_BUF_UNIT_S	7  /* Buf size is united by 128 bytes */
+struct hclge_tx_buff_alloc {
+	__le16 tx_pkt_buff[HCLGE_TC_NUM];
+	u8 tx_buff_rsv[8];
+};
+
+struct hclge_rx_priv_buff {
+	__le16 buf_num[HCLGE_TC_NUM];
+	u8 rsv[8];
+};
+
+struct hclge_query_version {
+	__le32 firmware;
+	__le32 firmware_rsv[5];
+};
+
+#define HCLGE_RX_PRIV_EN_B	15
+#define HCLGE_TC_NUM_ONE_DESC	4
+struct hclge_priv_wl {
+	__le16 high;
+	__le16 low;
+};
+
+struct hclge_rx_priv_wl_buf {
+	struct hclge_priv_wl tc_wl[HCLGE_TC_NUM_ONE_DESC];
+};
+
+struct hclge_rx_com_thrd {
+	struct hclge_priv_wl com_thrd[HCLGE_TC_NUM_ONE_DESC];
+};
+
+struct hclge_rx_com_wl {
+	struct hclge_priv_wl com_wl;
+};
+
+struct hclge_waterline {
+	u32 low;
+	u32 high;
+};
+
+struct hclge_tc_thrd {
+	u32 low;
+	u32 high;
+};
+
+struct hclge_priv_buf {
+	struct hclge_waterline wl;	/* Waterline for low and high*/
+	u32 buf_size;	/* TC private buffer size */
+	u32 enable;	/* Enable TC private buffer or not */
+};
+
+#define HCLGE_MAX_TC_NUM	8
+struct hclge_shared_buf {
+	struct hclge_waterline self;
+	struct hclge_tc_thrd tc_thrd[HCLGE_MAX_TC_NUM];
+	u32 buf_size;
+};
+
+#define HCLGE_RX_COM_WL_EN_B	15
+struct hclge_rx_com_wl_buf {
+	__le16 high_wl;
+	__le16 low_wl;
+	u8 rsv[20];
+};
+
+#define HCLGE_RX_PKT_EN_B	15
+struct hclge_rx_pkt_buf {
+	__le16 high_pkt;
+	__le16 low_pkt;
+	u8 rsv[20];
+};
+
+#define HCLGE_PF_STATE_DONE_B	0
+#define HCLGE_PF_STATE_MAIN_B	1
+#define HCLGE_PF_STATE_BOND_B	2
+#define HCLGE_PF_STATE_MAC_N_B	6
+#define HCLGE_PF_MAC_NUM_MASK	0x3
+#define HCLGE_PF_STATE_MAIN	BIT(HCLGE_PF_STATE_MAIN_B)
+#define HCLGE_PF_STATE_DONE	BIT(HCLGE_PF_STATE_DONE_B)
+struct hclge_func_status {
+	__le32  vf_rst_state[4];
+	u8 pf_state;
+	u8 mac_id;
+	u8 rsv1;
+	u8 pf_cnt_in_mac;
+	u8 pf_num;
+	u8 vf_num;
+	u8 rsv[2];
+};
+
+struct hclge_pf_res {
+	__le16 tqp_num;
+	__le16 buf_size;
+	__le16 msixcap_localid_ba_nic;
+	__le16 msixcap_localid_ba_rocee;
+#define HCLGE_PF_VEC_NUM_S		0
+#define HCLGE_PF_VEC_NUM_M		(0xff << HCLGE_PF_VEC_NUM_S)
+	__le16 pf_intr_vector_number;
+	__le16 pf_own_fun_number;
+	__le32 rsv[3];
+};
+
+#define HCLGE_CFG_OFFSET_S	0
+#define HCLGE_CFG_OFFSET_M	0xfffff /* Byte (8-10.3) */
+#define HCLGE_CFG_RD_LEN_S	24
+#define HCLGE_CFG_RD_LEN_M	(0xf << HCLGE_CFG_RD_LEN_S)
+#define HCLGE_CFG_RD_LEN_BYTES	16
+#define HCLGE_CFG_RD_LEN_UNIT	4
+
+#define HCLGE_CFG_VMDQ_S	0
+#define HCLGE_CFG_VMDQ_M	(0xff << HCLGE_CFG_VMDQ_S)
+#define HCLGE_CFG_TC_NUM_S	8
+#define HCLGE_CFG_TC_NUM_M	(0xff << HCLGE_CFG_TC_NUM_S)
+#define HCLGE_CFG_TQP_DESC_N_S	16
+#define HCLGE_CFG_TQP_DESC_N_M	(0xffff << HCLGE_CFG_TQP_DESC_N_S)
+#define HCLGE_CFG_PHY_ADDR_S	0
+#define HCLGE_CFG_PHY_ADDR_M	(0x1f << HCLGE_CFG_PHY_ADDR_S)
+#define HCLGE_CFG_MEDIA_TP_S	8
+#define HCLGE_CFG_MEDIA_TP_M	(0xff << HCLGE_CFG_MEDIA_TP_S)
+#define HCLGE_CFG_RX_BUF_LEN_S	16
+#define HCLGE_CFG_RX_BUF_LEN_M	(0xffff << HCLGE_CFG_RX_BUF_LEN_S)
+#define HCLGE_CFG_MAC_ADDR_H_S	0
+#define HCLGE_CFG_MAC_ADDR_H_M	(0xffff << HCLGE_CFG_MAC_ADDR_H_S)
+#define HCLGE_CFG_DEFAULT_SPEED_S	16
+#define HCLGE_CFG_DEFAULT_SPEED_M	(0xff << HCLGE_CFG_DEFAULT_SPEED_S)
+
+struct hclge_cfg_param {
+	__le32 offset;
+	__le32 rsv;
+	__le32 param[4];
+};
+
+#define HCLGE_MAC_MODE		0x0
+#define HCLGE_DESC_NUM		0x40
+
+#define HCLGE_ALLOC_VALID_B	0
+struct hclge_vf_num {
+	u8 alloc_valid;
+	u8 rsv[23];
+};
+
+#define HCLGE_RSS_DEFAULT_OUTPORT_B	4
+#define HCLGE_RSS_HASH_KEY_OFFSET_B	4
+#define HCLGE_RSS_HASH_KEY_NUM		16
+struct hclge_rss_config {
+	u8 hash_config;
+	u8 rsv[7];
+	u8 hash_key[HCLGE_RSS_HASH_KEY_NUM];
+};
+
+struct hclge_rss_input_tuple {
+	u8 ipv4_tcp_en;
+	u8 ipv4_udp_en;
+	u8 ipv4_sctp_en;
+	u8 ipv4_fragment_en;
+	u8 ipv6_tcp_en;
+	u8 ipv6_udp_en;
+	u8 ipv6_sctp_en;
+	u8 ipv6_fragment_en;
+	u8 rsv[16];
+};
+
+#define HCLGE_RSS_CFG_TBL_SIZE	16
+
+struct hclge_rss_indirection_table {
+	u16 start_table_index;
+	u16 rss_set_bitmap;
+	u8 rsv[4];
+	u8 rss_result[HCLGE_RSS_CFG_TBL_SIZE];
+};
+
+#define HCLGE_RSS_TC_OFFSET_S		0
+#define HCLGE_RSS_TC_OFFSET_M		(0x3ff << HCLGE_RSS_TC_OFFSET_S)
+#define HCLGE_RSS_TC_SIZE_S		12
+#define HCLGE_RSS_TC_SIZE_M		(0x7 << HCLGE_RSS_TC_SIZE_S)
+#define HCLGE_RSS_TC_VALID_B		15
+struct hclge_rss_tc_mode {
+	u16 rss_tc_mode[HCLGE_MAX_TC_NUM];
+	u8 rsv[8];
+};
+
+#define HCLGE_LINK_STS_B	0
+#define HCLGE_LINK_STATUS	BIT(HCLGE_LINK_STS_B)
+struct hclge_link_status {
+	u8 status;
+	u8 rsv[23];
+};
+
+struct hclge_promisc_param {
+	u8 vf_id;
+	u8 enable;
+};
+
+#define HCLGE_PROMISC_EN_B	1
+#define HCLGE_PROMISC_EN_ALL	0x7
+#define HCLGE_PROMISC_EN_UC	0x1
+#define HCLGE_PROMISC_EN_MC	0x2
+#define HCLGE_PROMISC_EN_BC	0x4
+struct hclge_promisc_cfg {
+	u8 flag;
+	u8 vf_id;
+	__le16 rsv0;
+	u8 rsv1[20];
+};
+
+enum hclge_promisc_type {
+	HCLGE_UNICAST	= 1,
+	HCLGE_MULTICAST	= 2,
+	HCLGE_BROADCAST	= 3,
+};
+
+#define HCLGE_MAC_TX_EN_B	6
+#define HCLGE_MAC_RX_EN_B	7
+#define HCLGE_MAC_PAD_TX_B	11
+#define HCLGE_MAC_PAD_RX_B	12
+#define HCLGE_MAC_1588_TX_B	13
+#define HCLGE_MAC_1588_RX_B	14
+#define HCLGE_MAC_APP_LP_B	15
+#define HCLGE_MAC_LINE_LP_B	16
+#define HCLGE_MAC_FCS_TX_B	17
+#define HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B	18
+#define HCLGE_MAC_RX_FCS_STRIP_B	19
+#define HCLGE_MAC_RX_FCS_B	20
+#define HCLGE_MAC_TX_UNDER_MIN_ERR_B		21
+#define HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B	22
+
+struct hclge_config_mac_mode {
+	__le32 txrx_pad_fcs_loop_en;
+	u8 rsv[20];
+};
+
+#define HCLGE_CFG_SPEED_S		0
+#define HCLGE_CFG_SPEED_M		(0x3f << HCLGE_CFG_SPEED_S)
+
+#define HCLGE_CFG_DUPLEX_B		7
+#define HCLGE_CFG_DUPLEX_M		BIT(HCLGE_CFG_DUPLEX_B)
+
+struct hclge_config_mac_speed_dup {
+	u8 speed_dup;
+
+#define HCLGE_CFG_MAC_SPEED_CHANGE_EN_B	0
+	u8 mac_change_fec_en;
+	u8 rsv[22];
+};
+
+#define HCLGE_QUERY_SPEED_S		3
+#define HCLGE_QUERY_AN_B		0
+#define HCLGE_QUERY_DUPLEX_B		2
+
+#define HCLGE_QUERY_SPEED_M		(0x1f << HCLGE_QUERY_SPEED_S)
+#define HCLGE_QUERY_AN_M		BIT(HCLGE_QUERY_AN_B)
+#define HCLGE_QUERY_DUPLEX_M		BIT(HCLGE_QUERY_DUPLEX_B)
+
+struct hclge_query_an_speed_dup {
+	u8 an_syn_dup_speed;
+	u8 pause;
+	u8 rsv[23];
+};
+
+#define HCLGE_RING_ID_MASK		0x3ff
+#define HCLGE_TQP_ENABLE_B		0
+
+#define HCLGE_MAC_CFG_AN_EN_B		0
+#define HCLGE_MAC_CFG_AN_INT_EN_B	1
+#define HCLGE_MAC_CFG_AN_INT_MSK_B	2
+#define HCLGE_MAC_CFG_AN_INT_CLR_B	3
+#define HCLGE_MAC_CFG_AN_RST_B		4
+
+#define HCLGE_MAC_CFG_AN_EN	BIT(HCLGE_MAC_CFG_AN_EN_B)
+
+struct hclge_config_auto_neg {
+	__le32  cfg_an_cmd_flag;
+	u8      rsv[20];
+};
+
+#define HCLGE_MAC_MIN_MTU		64
+#define HCLGE_MAC_MAX_MTU		9728
+#define HCLGE_MAC_UPLINK_PORT		0x100
+
+struct hclge_config_max_frm_size {
+	__le16  max_frm_size;
+	u8      rsv[22];
+};
+
+enum hclge_mac_vlan_tbl_opcode {
+	HCLGE_MAC_VLAN_ADD,	/* Add new or modify mac_vlan */
+	HCLGE_MAC_VLAN_UPDATE,  /* Modify other fields of this table */
+	HCLGE_MAC_VLAN_REMOVE,  /* Remove a entry through mac_vlan key */
+	HCLGE_MAC_VLAN_LKUP,    /* Lookup a entry through mac_vlan key */
+};
+
+#define HCLGE_MAC_VLAN_BIT0_EN_B	0x0
+#define HCLGE_MAC_VLAN_BIT1_EN_B	0x1
+#define HCLGE_MAC_EPORT_SW_EN_B		0xc
+#define HCLGE_MAC_EPORT_TYPE_B		0xb
+#define HCLGE_MAC_EPORT_VFID_S		0x3
+#define HCLGE_MAC_EPORT_VFID_M		(0xff << HCLGE_MAC_EPORT_VFID_S)
+#define HCLGE_MAC_EPORT_PFID_S		0x0
+#define HCLGE_MAC_EPORT_PFID_M		(0x7 << HCLGE_MAC_EPORT_PFID_S)
+struct hclge_mac_vlan_tbl_entry {
+	u8	flags;
+	u8      resp_code;
+	__le16  vlan_tag;
+	__le32  mac_addr_hi32;
+	__le16  mac_addr_lo16;
+	__le16  rsv1;
+	u8      entry_type;
+	u8      mc_mac_en;
+	__le16  egress_port;
+	__le16  egress_queue;
+	u8      rsv2[6];
+};
+
+#define HCLGE_CFG_MTA_MAC_SEL_S		0x0
+#define HCLGE_CFG_MTA_MAC_SEL_M		(0x3 << HCLGE_CFG_MTA_MAC_SEL_S)
+#define HCLGE_CFG_MTA_MAC_EN_B		0x7
+struct hclge_mta_filter_mode {
+	u8	dmac_sel_en; /* Use lowest 2 bit as sel_mode, bit 7 as enable */
+	u8      rsv[23];
+};
+
+#define HCLGE_CFG_FUNC_MTA_ACCEPT_B	0x0
+struct hclge_cfg_func_mta_filter {
+	u8	accept; /* Only used lowest 1 bit */
+	u8      function_id;
+	u8      rsv[22];
+};
+
+#define HCLGE_CFG_MTA_ITEM_ACCEPT_B	0x0
+#define HCLGE_CFG_MTA_ITEM_IDX_S	0x0
+#define HCLGE_CFG_MTA_ITEM_IDX_M	(0xfff << HCLGE_CFG_MTA_ITEM_IDX_S)
+struct hclge_cfg_func_mta_item {
+	u16	item_idx; /* Only used lowest 12 bit */
+	u8      accept;   /* Only used lowest 1 bit */
+	u8      rsv[21];
+};
+
+struct hclge_mac_vlan_add {
+	__le16  flags;
+	__le16  mac_addr_hi16;
+	__le32  mac_addr_lo32;
+	__le32  mac_addr_msk_hi32;
+	__le16  mac_addr_msk_lo16;
+	__le16  vlan_tag;
+	__le16  ingress_port;
+	__le16  egress_port;
+	u8      rsv[4];
+};
+
+#define HNS3_MAC_VLAN_CFG_FLAG_BIT 0
+struct hclge_mac_vlan_remove {
+	__le16  flags;
+	__le16  mac_addr_hi16;
+	__le32  mac_addr_lo32;
+	__le32  mac_addr_msk_hi32;
+	__le16  mac_addr_msk_lo16;
+	__le16  vlan_tag;
+	__le16  ingress_port;
+	__le16  egress_port;
+	u8      rsv[4];
+};
+
+struct hclge_vlan_filter_ctrl {
+	u8 vlan_type;
+	u8 vlan_fe;
+	u8 rsv[22];
+};
+
+struct hclge_vlan_filter_pf_cfg {
+	u8 vlan_offset;
+	u8 vlan_cfg;
+	u8 rsv[2];
+	u8 vlan_offset_bitmap[20];
+};
+
+struct hclge_vlan_filter_vf_cfg {
+	u16 vlan_id;
+	u8  resp_code;
+	u8  rsv;
+	u8  vlan_cfg;
+	u8  rsv1[3];
+	u8  vf_bitmap[16];
+};
+
+struct hclge_cfg_com_tqp_queue {
+	__le16 tqp_id;
+	__le16 stream_id;
+	u8 enable;
+	u8 rsv[19];
+};
+
+struct hclge_cfg_tx_queue_pointer {
+	__le16 tqp_id;
+	__le16 tx_tail;
+	__le16 tx_head;
+	__le16 fbd_num;
+	__le16 ring_offset;
+	u8 rsv[14];
+};
+
+#define HCLGE_TSO_MSS_MIN_S	0
+#define HCLGE_TSO_MSS_MIN_M	(0x3FFF << HCLGE_TSO_MSS_MIN_S)
+
+#define HCLGE_TSO_MSS_MAX_S	16
+#define HCLGE_TSO_MSS_MAX_M	(0x3FFF << HCLGE_TSO_MSS_MAX_S)
+
+struct hclge_cfg_tso_status {
+	__le16 tso_mss_min;
+	__le16 tso_mss_max;
+	u8 rsv[20];
+};
+
+#define HCLGE_TSO_MSS_MIN	256
+#define HCLGE_TSO_MSS_MAX	9668
+
+#define HCLGE_TQP_RESET_B	0
+struct hclge_reset_tqp_queue {
+	__le16 tqp_id;
+	u8 reset_req;
+	u8 ready_to_reset;
+	u8 rsv[20];
+};
+
+#define HCLGE_DEFAULT_TX_BUF		0x4000	 /* 16k  bytes */
+#define HCLGE_TOTAL_PKT_BUF		0x108000 /* 1.03125M bytes */
+#define HCLGE_DEFAULT_DV		0xA000	 /* 40k byte */
+
+#define HCLGE_TYPE_CRQ			0
+#define HCLGE_TYPE_CSQ			1
+#define HCLGE_NIC_CSQ_BASEADDR_L_REG	0x27000
+#define HCLGE_NIC_CSQ_BASEADDR_H_REG	0x27004
+#define HCLGE_NIC_CSQ_DEPTH_REG		0x27008
+#define HCLGE_NIC_CSQ_TAIL_REG		0x27010
+#define HCLGE_NIC_CSQ_HEAD_REG		0x27014
+#define HCLGE_NIC_CRQ_BASEADDR_L_REG	0x27018
+#define HCLGE_NIC_CRQ_BASEADDR_H_REG	0x2701c
+#define HCLGE_NIC_CRQ_DEPTH_REG		0x27020
+#define HCLGE_NIC_CRQ_TAIL_REG		0x27024
+#define HCLGE_NIC_CRQ_HEAD_REG		0x27028
+#define HCLGE_NIC_CMQ_EN_B		16
+#define HCLGE_NIC_CMQ_ENABLE		BIT(HCLGE_NIC_CMQ_EN_B)
+#define HCLGE_NIC_CMQ_DESC_NUM		1024
+#define HCLGE_NIC_CMQ_DESC_NUM_S	3
+
+int hclge_cmd_init(struct hclge_dev *hdev);
+static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
+{
+	writel(value, base + reg);
+}
+
+#define hclge_write_dev(a, reg, value) \
+	hclge_write_reg((a)->io_base, (reg), (value))
+#define hclge_read_dev(a, reg) \
+	hclge_read_reg((a)->io_base, (reg))
+
+static inline u32 hclge_read_reg(u8 __iomem *base, u32 reg)
+{
+	u8 __iomem *reg_addr = READ_ONCE(base);
+
+	return readl(reg_addr + reg);
+}
+
+#define HCLGE_SEND_SYNC(flag) \
+	((flag) & HCLGE_CMD_FLAG_NO_INTR)
+
+struct hclge_hw;
+int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num);
+void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
+				enum hclge_opcode_type opcode, bool is_read);
+
+int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
+			       struct hclge_promisc_param *param);
+
+enum hclge_cmd_status hclge_cmd_mdio_write(struct hclge_hw *hw,
+					   struct hclge_desc *desc);
+enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw,
+					  struct hclge_desc *desc);
+
+void hclge_destroy_cmd_queue(struct hclge_hw *hw);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
new file mode 100644
index 000000000000..3611991689bc
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -0,0 +1,4267 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#include "hclge_cmd.h"
+#include "hclge_main.h"
+#include "hclge_mdio.h"
+#include "hclge_tm.h"
+#include "hnae3.h"
+
+#define HCLGE_NAME			"hclge"
+#define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
+#define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
+#define HCLGE_64BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_64_bit_stats, f))
+#define HCLGE_32BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_32_bit_stats, f))
+
+static int hclge_rss_init_hw(struct hclge_dev *hdev);
+static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
+				     enum hclge_mta_dmac_sel_type mta_mac_sel,
+				     bool enable);
+static int hclge_init_vlan_config(struct hclge_dev *hdev);
+
+static struct hnae3_ae_algo ae_algo;
+
+static const struct pci_device_id ae_algo_pci_tbl[] = {
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+	/* Required last entry */
+	{0, }
+};
+
+static const struct pci_device_id roce_pci_tbl[] = {
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+	/* Required last entry */
+	{0, }
+};
+
+static const char hns3_nic_test_strs[][ETH_GSTRING_LEN] = {
+	"Mac    Loopback test",
+	"Serdes Loopback test",
+	"Phy    Loopback test"
+};
+
+static const struct hclge_comm_stats_str g_all_64bit_stats_string[] = {
+	{"igu_rx_oversize_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_oversize_pkt)},
+	{"igu_rx_undersize_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_undersize_pkt)},
+	{"igu_rx_out_all_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_out_all_pkt)},
+	{"igu_rx_uni_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_uni_pkt)},
+	{"igu_rx_multi_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_multi_pkt)},
+	{"igu_rx_broad_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_broad_pkt)},
+	{"egu_tx_out_all_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_out_all_pkt)},
+	{"egu_tx_uni_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_uni_pkt)},
+	{"egu_tx_multi_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_multi_pkt)},
+	{"egu_tx_broad_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_broad_pkt)},
+	{"ssu_ppp_mac_key_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_ppp_mac_key_num)},
+	{"ssu_ppp_host_key_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_ppp_host_key_num)},
+	{"ppp_ssu_mac_rlt_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ppp_ssu_mac_rlt_num)},
+	{"ppp_ssu_host_rlt_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ppp_ssu_host_rlt_num)},
+	{"ssu_tx_in_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_tx_in_num)},
+	{"ssu_tx_out_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_tx_out_num)},
+	{"ssu_rx_in_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_rx_in_num)},
+	{"ssu_rx_out_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_rx_out_num)}
+};
+
+static const struct hclge_comm_stats_str g_all_32bit_stats_string[] = {
+	{"igu_rx_err_pkt",
+		HCLGE_32BIT_STATS_FIELD_OFF(igu_rx_err_pkt)},
+	{"igu_rx_no_eof_pkt",
+		HCLGE_32BIT_STATS_FIELD_OFF(igu_rx_no_eof_pkt)},
+	{"igu_rx_no_sof_pkt",
+		HCLGE_32BIT_STATS_FIELD_OFF(igu_rx_no_sof_pkt)},
+	{"egu_tx_1588_pkt",
+		HCLGE_32BIT_STATS_FIELD_OFF(egu_tx_1588_pkt)},
+	{"ssu_full_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(ssu_full_drop_num)},
+	{"ssu_part_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(ssu_part_drop_num)},
+	{"ppp_key_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(ppp_key_drop_num)},
+	{"ppp_rlt_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(ppp_rlt_drop_num)},
+	{"ssu_key_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(ssu_key_drop_num)},
+	{"pkt_curr_buf_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_cnt)},
+	{"qcn_fb_rcv_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(qcn_fb_rcv_cnt)},
+	{"qcn_fb_drop_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(qcn_fb_drop_cnt)},
+	{"qcn_fb_invaild_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(qcn_fb_invaild_cnt)},
+	{"rx_packet_tc0_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc0_in_cnt)},
+	{"rx_packet_tc1_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc1_in_cnt)},
+	{"rx_packet_tc2_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc2_in_cnt)},
+	{"rx_packet_tc3_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc3_in_cnt)},
+	{"rx_packet_tc4_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc4_in_cnt)},
+	{"rx_packet_tc5_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc5_in_cnt)},
+	{"rx_packet_tc6_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc6_in_cnt)},
+	{"rx_packet_tc7_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc7_in_cnt)},
+	{"rx_packet_tc0_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc0_out_cnt)},
+	{"rx_packet_tc1_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc1_out_cnt)},
+	{"rx_packet_tc2_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc2_out_cnt)},
+	{"rx_packet_tc3_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc3_out_cnt)},
+	{"rx_packet_tc4_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc4_out_cnt)},
+	{"rx_packet_tc5_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc5_out_cnt)},
+	{"rx_packet_tc6_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc6_out_cnt)},
+	{"rx_packet_tc7_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc7_out_cnt)},
+	{"tx_packet_tc0_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc0_in_cnt)},
+	{"tx_packet_tc1_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc1_in_cnt)},
+	{"tx_packet_tc2_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc2_in_cnt)},
+	{"tx_packet_tc3_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc3_in_cnt)},
+	{"tx_packet_tc4_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc4_in_cnt)},
+	{"tx_packet_tc5_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc5_in_cnt)},
+	{"tx_packet_tc6_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc6_in_cnt)},
+	{"tx_packet_tc7_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc7_in_cnt)},
+	{"tx_packet_tc0_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc0_out_cnt)},
+	{"tx_packet_tc1_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc1_out_cnt)},
+	{"tx_packet_tc2_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc2_out_cnt)},
+	{"tx_packet_tc3_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc3_out_cnt)},
+	{"tx_packet_tc4_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc4_out_cnt)},
+	{"tx_packet_tc5_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc5_out_cnt)},
+	{"tx_packet_tc6_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc6_out_cnt)},
+	{"tx_packet_tc7_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc7_out_cnt)},
+	{"pkt_curr_buf_tc0_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc0_cnt)},
+	{"pkt_curr_buf_tc1_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc1_cnt)},
+	{"pkt_curr_buf_tc2_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc2_cnt)},
+	{"pkt_curr_buf_tc3_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc3_cnt)},
+	{"pkt_curr_buf_tc4_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc4_cnt)},
+	{"pkt_curr_buf_tc5_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc5_cnt)},
+	{"pkt_curr_buf_tc6_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc6_cnt)},
+	{"pkt_curr_buf_tc7_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc7_cnt)},
+	{"mb_uncopy_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(mb_uncopy_num)},
+	{"lo_pri_unicast_rlt_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(lo_pri_unicast_rlt_drop_num)},
+	{"hi_pri_multicast_rlt_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(hi_pri_multicast_rlt_drop_num)},
+	{"lo_pri_multicast_rlt_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(lo_pri_multicast_rlt_drop_num)},
+	{"rx_oq_drop_pkt_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_oq_drop_pkt_cnt)},
+	{"tx_oq_drop_pkt_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_oq_drop_pkt_cnt)},
+	{"nic_l2_err_drop_pkt_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(nic_l2_err_drop_pkt_cnt)},
+	{"roc_l2_err_drop_pkt_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(roc_l2_err_drop_pkt_cnt)}
+};
+
+static const struct hclge_comm_stats_str g_mac_stats_string[] = {
+	{"mac_tx_mac_pause_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_mac_pause_num)},
+	{"mac_rx_mac_pause_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_mac_pause_num)},
+	{"mac_tx_pfc_pri0_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri0_pkt_num)},
+	{"mac_tx_pfc_pri1_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri1_pkt_num)},
+	{"mac_tx_pfc_pri2_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri2_pkt_num)},
+	{"mac_tx_pfc_pri3_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri3_pkt_num)},
+	{"mac_tx_pfc_pri4_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri4_pkt_num)},
+	{"mac_tx_pfc_pri5_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri5_pkt_num)},
+	{"mac_tx_pfc_pri6_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri6_pkt_num)},
+	{"mac_tx_pfc_pri7_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri7_pkt_num)},
+	{"mac_rx_pfc_pri0_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri0_pkt_num)},
+	{"mac_rx_pfc_pri1_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri1_pkt_num)},
+	{"mac_rx_pfc_pri2_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri2_pkt_num)},
+	{"mac_rx_pfc_pri3_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri3_pkt_num)},
+	{"mac_rx_pfc_pri4_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri4_pkt_num)},
+	{"mac_rx_pfc_pri5_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri5_pkt_num)},
+	{"mac_rx_pfc_pri6_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri6_pkt_num)},
+	{"mac_rx_pfc_pri7_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri7_pkt_num)},
+	{"mac_tx_total_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_total_pkt_num)},
+	{"mac_tx_total_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_total_oct_num)},
+	{"mac_tx_good_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_good_pkt_num)},
+	{"mac_tx_bad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_bad_pkt_num)},
+	{"mac_tx_good_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_good_oct_num)},
+	{"mac_tx_bad_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_bad_oct_num)},
+	{"mac_tx_uni_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_uni_pkt_num)},
+	{"mac_tx_multi_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_multi_pkt_num)},
+	{"mac_tx_broad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_broad_pkt_num)},
+	{"mac_tx_undersize_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_undersize_pkt_num)},
+	{"mac_tx_overrsize_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_overrsize_pkt_num)},
+	{"mac_tx_64_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_64_oct_pkt_num)},
+	{"mac_tx_65_127_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_65_127_oct_pkt_num)},
+	{"mac_tx_128_255_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_128_255_oct_pkt_num)},
+	{"mac_tx_256_511_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_256_511_oct_pkt_num)},
+	{"mac_tx_512_1023_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_512_1023_oct_pkt_num)},
+	{"mac_tx_1024_1518_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_1024_1518_oct_pkt_num)},
+	{"mac_tx_1519_max_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_1519_max_oct_pkt_num)},
+	{"mac_rx_total_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_total_pkt_num)},
+	{"mac_rx_total_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_total_oct_num)},
+	{"mac_rx_good_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_good_pkt_num)},
+	{"mac_rx_bad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_bad_pkt_num)},
+	{"mac_rx_good_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_good_oct_num)},
+	{"mac_rx_bad_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_bad_oct_num)},
+	{"mac_rx_uni_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_uni_pkt_num)},
+	{"mac_rx_multi_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_multi_pkt_num)},
+	{"mac_rx_broad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_broad_pkt_num)},
+	{"mac_rx_undersize_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_undersize_pkt_num)},
+	{"mac_rx_overrsize_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_overrsize_pkt_num)},
+	{"mac_rx_64_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_64_oct_pkt_num)},
+	{"mac_rx_65_127_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_65_127_oct_pkt_num)},
+	{"mac_rx_128_255_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_128_255_oct_pkt_num)},
+	{"mac_rx_256_511_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_256_511_oct_pkt_num)},
+	{"mac_rx_512_1023_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_512_1023_oct_pkt_num)},
+	{"mac_rx_1024_1518_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_1024_1518_oct_pkt_num)},
+	{"mac_rx_1519_max_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_1519_max_oct_pkt_num)},
+
+	{"mac_trans_fragment_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_fragment_pkt_num)},
+	{"mac_trans_undermin_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_undermin_pkt_num)},
+	{"mac_trans_jabber_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_jabber_pkt_num)},
+	{"mac_trans_err_all_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_err_all_pkt_num)},
+	{"mac_trans_from_app_good_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_from_app_good_pkt_num)},
+	{"mac_trans_from_app_bad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_from_app_bad_pkt_num)},
+	{"mac_rcv_fragment_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_fragment_pkt_num)},
+	{"mac_rcv_undermin_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_undermin_pkt_num)},
+	{"mac_rcv_jabber_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_jabber_pkt_num)},
+	{"mac_rcv_fcs_err_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_fcs_err_pkt_num)},
+	{"mac_rcv_send_app_good_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_send_app_good_pkt_num)},
+	{"mac_rcv_send_app_bad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_send_app_bad_pkt_num)}
+};
+
+static int hclge_64_bit_update_stats(struct hclge_dev *hdev)
+{
+#define HCLGE_64_BIT_CMD_NUM 5
+#define HCLGE_64_BIT_RTN_DATANUM 4
+	u64 *data = (u64 *)(&hdev->hw_stats.all_64_bit_stats);
+	struct hclge_desc desc[HCLGE_64_BIT_CMD_NUM];
+	u64 *desc_data;
+	int i, k, n;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_STATS_64_BIT, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_64_BIT_CMD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get 64 bit pkt stats fail, status = %d.\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < HCLGE_64_BIT_CMD_NUM; i++) {
+		if (unlikely(i == 0)) {
+			desc_data = (u64 *)(&desc[i].data[0]);
+			n = HCLGE_64_BIT_RTN_DATANUM - 1;
+		} else {
+			desc_data = (u64 *)(&desc[i]);
+			n = HCLGE_64_BIT_RTN_DATANUM;
+		}
+		for (k = 0; k < n; k++) {
+			*data++ += cpu_to_le64(*desc_data);
+			desc_data++;
+		}
+	}
+
+	return 0;
+}
+
+static void hclge_reset_partial_32bit_counter(struct hclge_32_bit_stats *stats)
+{
+	stats->pkt_curr_buf_cnt     = 0;
+	stats->pkt_curr_buf_tc0_cnt = 0;
+	stats->pkt_curr_buf_tc1_cnt = 0;
+	stats->pkt_curr_buf_tc2_cnt = 0;
+	stats->pkt_curr_buf_tc3_cnt = 0;
+	stats->pkt_curr_buf_tc4_cnt = 0;
+	stats->pkt_curr_buf_tc5_cnt = 0;
+	stats->pkt_curr_buf_tc6_cnt = 0;
+	stats->pkt_curr_buf_tc7_cnt = 0;
+}
+
+static int hclge_32_bit_update_stats(struct hclge_dev *hdev)
+{
+#define HCLGE_32_BIT_CMD_NUM 8
+#define HCLGE_32_BIT_RTN_DATANUM 8
+
+	struct hclge_desc desc[HCLGE_32_BIT_CMD_NUM];
+	struct hclge_32_bit_stats *all_32_bit_stats;
+	u32 *desc_data;
+	int i, k, n;
+	u64 *data;
+	int ret;
+
+	all_32_bit_stats = &hdev->hw_stats.all_32_bit_stats;
+	data = (u64 *)(&all_32_bit_stats->egu_tx_1588_pkt);
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_STATS_32_BIT, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_32_BIT_CMD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get 32 bit pkt stats fail, status = %d.\n", ret);
+
+		return ret;
+	}
+
+	hclge_reset_partial_32bit_counter(all_32_bit_stats);
+	for (i = 0; i < HCLGE_32_BIT_CMD_NUM; i++) {
+		if (unlikely(i == 0)) {
+			all_32_bit_stats->igu_rx_err_pkt +=
+				cpu_to_le32(desc[i].data[0]);
+			all_32_bit_stats->igu_rx_no_eof_pkt +=
+				cpu_to_le32(desc[i].data[1] & 0xffff);
+			all_32_bit_stats->igu_rx_no_sof_pkt +=
+				cpu_to_le32((desc[i].data[1] >> 16) & 0xffff);
+
+			desc_data = (u32 *)(&desc[i].data[2]);
+			n = HCLGE_32_BIT_RTN_DATANUM - 4;
+		} else {
+			desc_data = (u32 *)(&desc[i]);
+			n = HCLGE_32_BIT_RTN_DATANUM;
+		}
+		for (k = 0; k < n; k++) {
+			*data++ += cpu_to_le32(*desc_data);
+			desc_data++;
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_mac_update_stats(struct hclge_dev *hdev)
+{
+#define HCLGE_MAC_CMD_NUM 17
+#define HCLGE_RTN_DATA_NUM 4
+
+	u64 *data = (u64 *)(&hdev->hw_stats.mac_stats);
+	struct hclge_desc desc[HCLGE_MAC_CMD_NUM];
+	u64 *desc_data;
+	int i, k, n;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_STATS_MAC, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_MAC_CMD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get MAC pkt stats fail, status = %d.\n", ret);
+
+		return ret;
+	}
+
+	for (i = 0; i < HCLGE_MAC_CMD_NUM; i++) {
+		if (unlikely(i == 0)) {
+			desc_data = (u64 *)(&desc[i].data[0]);
+			n = HCLGE_RTN_DATA_NUM - 2;
+		} else {
+			desc_data = (u64 *)(&desc[i]);
+			n = HCLGE_RTN_DATA_NUM;
+		}
+		for (k = 0; k < n; k++) {
+			*data++ += cpu_to_le64(*desc_data);
+			desc_data++;
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_tqps_update_stats(struct hnae3_handle *handle)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hnae3_queue *queue;
+	struct hclge_desc desc[1];
+	struct hclge_tqp *tqp;
+	int ret, i;
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		queue = handle->kinfo.tqp[i];
+		tqp = container_of(queue, struct hclge_tqp, q);
+		/* command : HCLGE_OPC_QUERY_IGU_STAT */
+		hclge_cmd_setup_basic_desc(&desc[0],
+					   HCLGE_OPC_QUERY_RX_STATUS,
+					   true);
+
+		desc[0].data[0] = (tqp->index & 0x1ff);
+		ret = hclge_cmd_send(&hdev->hw, desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Query tqp stat fail, status = %d,queue = %d\n",
+				ret,	i);
+			return ret;
+		}
+		tqp->tqp_stats.rcb_rx_ring_pktnum_rcd +=
+			cpu_to_le32(desc[0].data[4]);
+	}
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		queue = handle->kinfo.tqp[i];
+		tqp = container_of(queue, struct hclge_tqp, q);
+		/* command : HCLGE_OPC_QUERY_IGU_STAT */
+		hclge_cmd_setup_basic_desc(&desc[0],
+					   HCLGE_OPC_QUERY_TX_STATUS,
+					   true);
+
+		desc[0].data[0] = (tqp->index & 0x1ff);
+		ret = hclge_cmd_send(&hdev->hw, desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Query tqp stat fail, status = %d,queue = %d\n",
+				ret, i);
+			return ret;
+		}
+		tqp->tqp_stats.rcb_tx_ring_pktnum_rcd +=
+			cpu_to_le32(desc[0].data[4]);
+	}
+
+	return 0;
+}
+
+static u64 *hclge_tqps_get_stats(struct hnae3_handle *handle, u64 *data)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct hclge_tqp *tqp;
+	u64 *buff = data;
+	int i;
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q);
+		*buff++ = cpu_to_le64(tqp->tqp_stats.rcb_tx_ring_pktnum_rcd);
+	}
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q);
+		*buff++ = cpu_to_le64(tqp->tqp_stats.rcb_rx_ring_pktnum_rcd);
+	}
+
+	return buff;
+}
+
+static int hclge_tqps_get_sset_count(struct hnae3_handle *handle, int stringset)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+
+	return kinfo->num_tqps * (2);
+}
+
+static u8 *hclge_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	u8 *buff = data;
+	int i = 0;
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		struct hclge_tqp *tqp = container_of(handle->kinfo.tqp[i],
+			struct hclge_tqp, q);
+		snprintf(buff, ETH_GSTRING_LEN, "rcb_q%d_tx_pktnum_rcd",
+			 tqp->index);
+		buff = buff + ETH_GSTRING_LEN;
+	}
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		struct hclge_tqp *tqp = container_of(kinfo->tqp[i],
+			struct hclge_tqp, q);
+		snprintf(buff, ETH_GSTRING_LEN, "rcb_q%d_rx_pktnum_rcd",
+			 tqp->index);
+		buff = buff + ETH_GSTRING_LEN;
+	}
+
+	return buff;
+}
+
+static u64 *hclge_comm_get_stats(void *comm_stats,
+				 const struct hclge_comm_stats_str strs[],
+				 int size, u64 *data)
+{
+	u64 *buf = data;
+	u32 i;
+
+	for (i = 0; i < size; i++)
+		buf[i] = HCLGE_STATS_READ(comm_stats, strs[i].offset);
+
+	return buf + size;
+}
+
+static u8 *hclge_comm_get_strings(u32 stringset,
+				  const struct hclge_comm_stats_str strs[],
+				  int size, u8 *data)
+{
+	char *buff = (char *)data;
+	u32 i;
+
+	if (stringset != ETH_SS_STATS)
+		return buff;
+
+	for (i = 0; i < size; i++) {
+		snprintf(buff, ETH_GSTRING_LEN,
+			 strs[i].desc);
+		buff = buff + ETH_GSTRING_LEN;
+	}
+
+	return (u8 *)buff;
+}
+
+static void hclge_update_netstat(struct hclge_hw_stats *hw_stats,
+				 struct net_device_stats *net_stats)
+{
+	net_stats->tx_dropped = 0;
+	net_stats->rx_dropped = hw_stats->all_32_bit_stats.ssu_full_drop_num;
+	net_stats->rx_dropped += hw_stats->all_32_bit_stats.ppp_key_drop_num;
+	net_stats->rx_dropped += hw_stats->all_32_bit_stats.ssu_key_drop_num;
+
+	net_stats->rx_errors = hw_stats->mac_stats.mac_rx_overrsize_pkt_num;
+	net_stats->rx_errors += hw_stats->mac_stats.mac_rx_undersize_pkt_num;
+	net_stats->rx_errors += hw_stats->all_32_bit_stats.igu_rx_err_pkt;
+	net_stats->rx_errors += hw_stats->all_32_bit_stats.igu_rx_no_eof_pkt;
+	net_stats->rx_errors += hw_stats->all_32_bit_stats.igu_rx_no_sof_pkt;
+	net_stats->rx_errors += hw_stats->mac_stats.mac_rcv_fcs_err_pkt_num;
+
+	net_stats->multicast = hw_stats->mac_stats.mac_tx_multi_pkt_num;
+	net_stats->multicast += hw_stats->mac_stats.mac_rx_multi_pkt_num;
+
+	net_stats->rx_crc_errors = hw_stats->mac_stats.mac_rcv_fcs_err_pkt_num;
+	net_stats->rx_length_errors =
+		hw_stats->mac_stats.mac_rx_undersize_pkt_num;
+	net_stats->rx_length_errors +=
+		hw_stats->mac_stats.mac_rx_overrsize_pkt_num;
+	net_stats->rx_over_errors =
+		hw_stats->mac_stats.mac_rx_overrsize_pkt_num;
+}
+
+static void hclge_update_stats_for_all(struct hclge_dev *hdev)
+{
+	struct hnae3_handle *handle;
+	int status;
+
+	handle = &hdev->vport[0].nic;
+	if (handle->client) {
+		status = hclge_tqps_update_stats(handle);
+		if (status) {
+			dev_err(&hdev->pdev->dev,
+				"Update TQPS stats fail, status = %d.\n",
+				status);
+		}
+	}
+
+	status = hclge_mac_update_stats(hdev);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update MAC stats fail, status = %d.\n", status);
+
+	status = hclge_32_bit_update_stats(hdev);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update 32 bit stats fail, status = %d.\n",
+			status);
+
+	hclge_update_netstat(&hdev->hw_stats, &handle->kinfo.netdev->stats);
+}
+
+static void hclge_update_stats(struct hnae3_handle *handle,
+			       struct net_device_stats *net_stats)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_hw_stats *hw_stats = &hdev->hw_stats;
+	int status;
+
+	status = hclge_mac_update_stats(hdev);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update MAC stats fail, status = %d.\n",
+			status);
+
+	status = hclge_32_bit_update_stats(hdev);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update 32 bit stats fail, status = %d.\n",
+			status);
+
+	status = hclge_64_bit_update_stats(hdev);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update 64 bit stats fail, status = %d.\n",
+			status);
+
+	status = hclge_tqps_update_stats(handle);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update TQPS stats fail, status = %d.\n",
+			status);
+
+	hclge_update_netstat(hw_stats, net_stats);
+}
+
+static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
+{
+#define HCLGE_LOOPBACK_TEST_FLAGS 0x7
+
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int count = 0;
+
+	/* Loopback test support rules:
+	 * mac: only GE mode support
+	 * serdes: all mac mode will support include GE/XGE/LGE/CGE
+	 * phy: only support when phy device exist on board
+	 */
+	if (stringset == ETH_SS_TEST) {
+		/* clear loopback bit flags at first */
+		handle->flags = (handle->flags & (~HCLGE_LOOPBACK_TEST_FLAGS));
+		if (hdev->hw.mac.speed == HCLGE_MAC_SPEED_10M ||
+		    hdev->hw.mac.speed == HCLGE_MAC_SPEED_100M ||
+		    hdev->hw.mac.speed == HCLGE_MAC_SPEED_1G) {
+			count += 1;
+			handle->flags |= HNAE3_SUPPORT_MAC_LOOPBACK;
+		} else {
+			count = -EOPNOTSUPP;
+		}
+	} else if (stringset == ETH_SS_STATS) {
+		count = ARRAY_SIZE(g_mac_stats_string) +
+			ARRAY_SIZE(g_all_32bit_stats_string) +
+			ARRAY_SIZE(g_all_64bit_stats_string) +
+			hclge_tqps_get_sset_count(handle, stringset);
+	}
+
+	return count;
+}
+
+static void hclge_get_strings(struct hnae3_handle *handle,
+			      u32 stringset,
+			      u8 *data)
+{
+	u8 *p = (char *)data;
+	int size;
+
+	if (stringset == ETH_SS_STATS) {
+		size = ARRAY_SIZE(g_mac_stats_string);
+		p = hclge_comm_get_strings(stringset,
+					   g_mac_stats_string,
+					   size,
+					   p);
+		size = ARRAY_SIZE(g_all_32bit_stats_string);
+		p = hclge_comm_get_strings(stringset,
+					   g_all_32bit_stats_string,
+					   size,
+					   p);
+		size = ARRAY_SIZE(g_all_64bit_stats_string);
+		p = hclge_comm_get_strings(stringset,
+					   g_all_64bit_stats_string,
+					   size,
+					   p);
+		p = hclge_tqps_get_strings(handle, p);
+	} else if (stringset == ETH_SS_TEST) {
+		if (handle->flags & HNAE3_SUPPORT_MAC_LOOPBACK) {
+			memcpy(p,
+			       hns3_nic_test_strs[HNAE3_MAC_INTER_LOOP_MAC],
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		if (handle->flags & HNAE3_SUPPORT_SERDES_LOOPBACK) {
+			memcpy(p,
+			       hns3_nic_test_strs[HNAE3_MAC_INTER_LOOP_SERDES],
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		if (handle->flags & HNAE3_SUPPORT_PHY_LOOPBACK) {
+			memcpy(p,
+			       hns3_nic_test_strs[HNAE3_MAC_INTER_LOOP_PHY],
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+	}
+}
+
+static void hclge_get_stats(struct hnae3_handle *handle, u64 *data)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u64 *p;
+
+	p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats,
+				 g_mac_stats_string,
+				 ARRAY_SIZE(g_mac_stats_string),
+				 data);
+	p = hclge_comm_get_stats(&hdev->hw_stats.all_32_bit_stats,
+				 g_all_32bit_stats_string,
+				 ARRAY_SIZE(g_all_32bit_stats_string),
+				 p);
+	p = hclge_comm_get_stats(&hdev->hw_stats.all_64_bit_stats,
+				 g_all_64bit_stats_string,
+				 ARRAY_SIZE(g_all_64bit_stats_string),
+				 p);
+	p = hclge_tqps_get_stats(handle, p);
+}
+
+static int hclge_parse_func_status(struct hclge_dev *hdev,
+				   struct hclge_func_status *status)
+{
+	if (!(status->pf_state & HCLGE_PF_STATE_DONE))
+		return -EINVAL;
+
+	/* Set the pf to main pf */
+	if (status->pf_state & HCLGE_PF_STATE_MAIN)
+		hdev->flag |= HCLGE_FLAG_MAIN;
+	else
+		hdev->flag &= ~HCLGE_FLAG_MAIN;
+
+	hdev->num_req_vfs = status->vf_num / status->pf_num;
+	return 0;
+}
+
+static int hclge_query_function_status(struct hclge_dev *hdev)
+{
+	struct hclge_func_status *req;
+	struct hclge_desc desc;
+	int timeout = 0;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FUNC_STATUS, true);
+	req = (struct hclge_func_status *)desc.data;
+
+	do {
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"query function status failed %d.\n",
+				ret);
+
+			return ret;
+		}
+
+		/* Check pf reset is done */
+		if (req->pf_state)
+			break;
+		usleep_range(1000, 2000);
+	} while (timeout++ < 5);
+
+	ret = hclge_parse_func_status(hdev, req);
+
+	return ret;
+}
+
+static int hclge_query_pf_resource(struct hclge_dev *hdev)
+{
+	struct hclge_pf_res *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_PF_RSRC, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"query pf resource failed %d.\n", ret);
+		return ret;
+	}
+
+	req = (struct hclge_pf_res *)desc.data;
+	hdev->num_tqps = __le16_to_cpu(req->tqp_num);
+	hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
+
+	if (hnae_get_bit(hdev->ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B)) {
+		hdev->num_roce_msix =
+		hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
+			       HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+
+		/* PF should have NIC vectors and Roce vectors,
+		 * NIC vectors are queued before Roce vectors.
+		 */
+		hdev->num_msi = hdev->num_roce_msix  + HCLGE_ROCE_VECTOR_OFFSET;
+	} else {
+		hdev->num_msi =
+		hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
+			       HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+	}
+
+	return 0;
+}
+
+static int hclge_parse_speed(int speed_cmd, int *speed)
+{
+	switch (speed_cmd) {
+	case 6:
+		*speed = HCLGE_MAC_SPEED_10M;
+		break;
+	case 7:
+		*speed = HCLGE_MAC_SPEED_100M;
+		break;
+	case 0:
+		*speed = HCLGE_MAC_SPEED_1G;
+		break;
+	case 1:
+		*speed = HCLGE_MAC_SPEED_10G;
+		break;
+	case 2:
+		*speed = HCLGE_MAC_SPEED_25G;
+		break;
+	case 3:
+		*speed = HCLGE_MAC_SPEED_40G;
+		break;
+	case 4:
+		*speed = HCLGE_MAC_SPEED_50G;
+		break;
+	case 5:
+		*speed = HCLGE_MAC_SPEED_100G;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
+{
+	struct hclge_cfg_param *req;
+	u64 mac_addr_tmp_high;
+	u64 mac_addr_tmp;
+	int i;
+
+	req = (struct hclge_cfg_param *)desc[0].data;
+
+	/* get the configuration */
+	cfg->vmdq_vport_num = hnae_get_field(__le32_to_cpu(req->param[0]),
+					     HCLGE_CFG_VMDQ_M,
+					     HCLGE_CFG_VMDQ_S);
+	cfg->tc_num = hnae_get_field(__le32_to_cpu(req->param[0]),
+				     HCLGE_CFG_TC_NUM_M, HCLGE_CFG_TC_NUM_S);
+	cfg->tqp_desc_num = hnae_get_field(__le32_to_cpu(req->param[0]),
+					   HCLGE_CFG_TQP_DESC_N_M,
+					   HCLGE_CFG_TQP_DESC_N_S);
+
+	cfg->phy_addr = hnae_get_field(__le32_to_cpu(req->param[1]),
+				       HCLGE_CFG_PHY_ADDR_M,
+				       HCLGE_CFG_PHY_ADDR_S);
+	cfg->media_type = hnae_get_field(__le32_to_cpu(req->param[1]),
+					 HCLGE_CFG_MEDIA_TP_M,
+					 HCLGE_CFG_MEDIA_TP_S);
+	cfg->rx_buf_len = hnae_get_field(__le32_to_cpu(req->param[1]),
+					 HCLGE_CFG_RX_BUF_LEN_M,
+					 HCLGE_CFG_RX_BUF_LEN_S);
+	/* get mac_address */
+	mac_addr_tmp = __le32_to_cpu(req->param[2]);
+	mac_addr_tmp_high = hnae_get_field(__le32_to_cpu(req->param[3]),
+					   HCLGE_CFG_MAC_ADDR_H_M,
+					   HCLGE_CFG_MAC_ADDR_H_S);
+
+	mac_addr_tmp |= (mac_addr_tmp_high << 31) << 1;
+
+	cfg->default_speed = hnae_get_field(__le32_to_cpu(req->param[3]),
+					    HCLGE_CFG_DEFAULT_SPEED_M,
+					    HCLGE_CFG_DEFAULT_SPEED_S);
+	for (i = 0; i < ETH_ALEN; i++)
+		cfg->mac_addr[i] = (mac_addr_tmp >> (8 * i)) & 0xff;
+
+	req = (struct hclge_cfg_param *)desc[1].data;
+	cfg->numa_node_map = __le32_to_cpu(req->param[0]);
+}
+
+/* hclge_get_cfg: query the static parameter from flash
+ * @hdev: pointer to struct hclge_dev
+ * @hcfg: the config structure to be getted
+ */
+static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg)
+{
+	struct hclge_desc desc[HCLGE_PF_CFG_DESC_NUM];
+	struct hclge_cfg_param *req;
+	int i, ret;
+
+	for (i = 0; i < HCLGE_PF_CFG_DESC_NUM; i++) {
+		req = (struct hclge_cfg_param *)desc[i].data;
+		hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_GET_CFG_PARAM,
+					   true);
+		hnae_set_field(req->offset, HCLGE_CFG_OFFSET_M,
+			       HCLGE_CFG_OFFSET_S, i * HCLGE_CFG_RD_LEN_BYTES);
+		/* Len should be united by 4 bytes when send to hardware */
+		hnae_set_field(req->offset, HCLGE_CFG_RD_LEN_M,
+			       HCLGE_CFG_RD_LEN_S,
+			       HCLGE_CFG_RD_LEN_BYTES / HCLGE_CFG_RD_LEN_UNIT);
+		req->offset = cpu_to_le32(req->offset);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PF_CFG_DESC_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"get config failed %d.\n", ret);
+		return ret;
+	}
+
+	hclge_parse_cfg(hcfg, desc);
+	return 0;
+}
+
+static int hclge_get_cap(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_query_function_status(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"query function status error %d.\n", ret);
+		return ret;
+	}
+
+	/* get pf resource */
+	ret = hclge_query_pf_resource(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"query pf resource error %d.\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_configure(struct hclge_dev *hdev)
+{
+	struct hclge_cfg cfg;
+	int ret, i;
+
+	ret = hclge_get_cfg(hdev, &cfg);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "get mac mode error %d.\n", ret);
+		return ret;
+	}
+
+	hdev->num_vmdq_vport = cfg.vmdq_vport_num;
+	hdev->base_tqp_pid = 0;
+	hdev->rss_size_max = 1;
+	hdev->rx_buf_len = cfg.rx_buf_len;
+	for (i = 0; i < ETH_ALEN; i++)
+		hdev->hw.mac.mac_addr[i] = cfg.mac_addr[i];
+	hdev->hw.mac.media_type = cfg.media_type;
+	hdev->num_desc = cfg.tqp_desc_num;
+	hdev->tm_info.num_pg = 1;
+	hdev->tm_info.num_tc = cfg.tc_num;
+	hdev->tm_info.hw_pfc_map = 0;
+
+	ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "Get wrong speed ret=%d.\n", ret);
+		return ret;
+	}
+
+	if ((hdev->tm_info.num_tc > HNAE3_MAX_TC) ||
+	    (hdev->tm_info.num_tc < 1)) {
+		dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
+			 hdev->tm_info.num_tc);
+		hdev->tm_info.num_tc = 1;
+	}
+
+	/* Currently not support uncontiuous tc */
+	for (i = 0; i < cfg.tc_num; i++)
+		hnae_set_bit(hdev->hw_tc_map, i, 1);
+
+	if (!hdev->num_vmdq_vport && !hdev->num_req_vfs)
+		hdev->tx_sch_mode = HCLGE_FLAG_TC_BASE_SCH_MODE;
+	else
+		hdev->tx_sch_mode = HCLGE_FLAG_VNET_BASE_SCH_MODE;
+
+	return ret;
+}
+
+static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min,
+			    int tso_mss_max)
+{
+	struct hclge_cfg_tso_status *req;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TSO_GENERIC_CONFIG, false);
+
+	req = (struct hclge_cfg_tso_status *)desc.data;
+	hnae_set_field(req->tso_mss_min, HCLGE_TSO_MSS_MIN_M,
+		       HCLGE_TSO_MSS_MIN_S, tso_mss_min);
+	hnae_set_field(req->tso_mss_max, HCLGE_TSO_MSS_MIN_M,
+		       HCLGE_TSO_MSS_MIN_S, tso_mss_max);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_alloc_tqps(struct hclge_dev *hdev)
+{
+	struct hclge_tqp *tqp;
+	int i;
+
+	hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps,
+				  sizeof(struct hclge_tqp), GFP_KERNEL);
+	if (!hdev->htqp)
+		return -ENOMEM;
+
+	tqp = hdev->htqp;
+
+	for (i = 0; i < hdev->num_tqps; i++) {
+		tqp->dev = &hdev->pdev->dev;
+		tqp->index = i;
+
+		tqp->q.ae_algo = &ae_algo;
+		tqp->q.buf_size = hdev->rx_buf_len;
+		tqp->q.desc_num = hdev->num_desc;
+		tqp->q.io_base = hdev->hw.io_base + HCLGE_TQP_REG_OFFSET +
+			i * HCLGE_TQP_REG_SIZE;
+
+		tqp++;
+	}
+
+	return 0;
+}
+
+static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id,
+				  u16 tqp_pid, u16 tqp_vid, bool is_pf)
+{
+	struct hclge_tqp_map *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SET_TQP_MAP, false);
+
+	req = (struct hclge_tqp_map *)desc.data;
+	req->tqp_id = cpu_to_le16(tqp_pid);
+	req->tqp_vf = cpu_to_le16(func_id);
+	req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B |
+			1 << HCLGE_TQP_MAP_EN_B;
+	req->tqp_vid = cpu_to_le16(tqp_vid);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "TQP map failed %d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int  hclge_assign_tqp(struct hclge_vport *vport,
+			     struct hnae3_queue **tqp, u16 num_tqps)
+{
+	struct hclge_dev *hdev = vport->back;
+	int i, alloced, func_id, ret;
+	bool is_pf;
+
+	func_id = vport->vport_id;
+	is_pf = (vport->vport_id == 0) ? true : false;
+
+	for (i = 0, alloced = 0; i < hdev->num_tqps &&
+	     alloced < num_tqps; i++) {
+		if (!hdev->htqp[i].alloced) {
+			hdev->htqp[i].q.handle = &vport->nic;
+			hdev->htqp[i].q.tqp_index = alloced;
+			tqp[alloced] = &hdev->htqp[i].q;
+			hdev->htqp[i].alloced = true;
+			ret = hclge_map_tqps_to_func(hdev, func_id,
+						     hdev->htqp[i].index,
+						     alloced, is_pf);
+			if (ret)
+				return ret;
+
+			alloced++;
+		}
+	}
+	vport->alloc_tqps = num_tqps;
+
+	return 0;
+}
+
+static int hclge_knic_setup(struct hclge_vport *vport, u16 num_tqps)
+{
+	struct hnae3_handle *nic = &vport->nic;
+	struct hnae3_knic_private_info *kinfo = &nic->kinfo;
+	struct hclge_dev *hdev = vport->back;
+	int i, ret;
+
+	kinfo->num_desc = hdev->num_desc;
+	kinfo->rx_buf_len = hdev->rx_buf_len;
+	kinfo->num_tc = min_t(u16, num_tqps, hdev->tm_info.num_tc);
+	kinfo->rss_size
+		= min_t(u16, hdev->rss_size_max, num_tqps / kinfo->num_tc);
+	kinfo->num_tqps = kinfo->rss_size * kinfo->num_tc;
+
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		if (hdev->hw_tc_map & BIT(i)) {
+			kinfo->tc_info[i].enable = true;
+			kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
+			kinfo->tc_info[i].tqp_count = kinfo->rss_size;
+			kinfo->tc_info[i].tc = i;
+		} else {
+			/* Set to default queue if TC is disable */
+			kinfo->tc_info[i].enable = false;
+			kinfo->tc_info[i].tqp_offset = 0;
+			kinfo->tc_info[i].tqp_count = 1;
+			kinfo->tc_info[i].tc = 0;
+		}
+	}
+
+	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
+				  sizeof(struct hnae3_queue *), GFP_KERNEL);
+	if (!kinfo->tqp)
+		return -ENOMEM;
+
+	ret = hclge_assign_tqp(vport, kinfo->tqp, kinfo->num_tqps);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "fail to assign TQPs %d.\n", ret);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void hclge_unic_setup(struct hclge_vport *vport, u16 num_tqps)
+{
+	/* this would be initialized later */
+}
+
+static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps)
+{
+	struct hnae3_handle *nic = &vport->nic;
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	nic->pdev = hdev->pdev;
+	nic->ae_algo = &ae_algo;
+	nic->numa_node_mask = hdev->numa_node_mask;
+
+	if (hdev->ae_dev->dev_type == HNAE3_DEV_KNIC) {
+		ret = hclge_knic_setup(vport, num_tqps);
+		if (ret) {
+			dev_err(&hdev->pdev->dev, "knic setup failed %d\n",
+				ret);
+			return ret;
+		}
+	} else {
+		hclge_unic_setup(vport, num_tqps);
+	}
+
+	return 0;
+}
+
+static int hclge_alloc_vport(struct hclge_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	struct hclge_vport *vport;
+	u32 tqp_main_vport;
+	u32 tqp_per_vport;
+	int num_vport, i;
+	int ret;
+
+	/* We need to alloc a vport for main NIC of PF */
+	num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
+
+	if (hdev->num_tqps < num_vport)
+		num_vport = hdev->num_tqps;
+
+	/* Alloc the same number of TQPs for every vport */
+	tqp_per_vport = hdev->num_tqps / num_vport;
+	tqp_main_vport = tqp_per_vport + hdev->num_tqps % num_vport;
+
+	vport = devm_kcalloc(&pdev->dev, num_vport, sizeof(struct hclge_vport),
+			     GFP_KERNEL);
+	if (!vport)
+		return -ENOMEM;
+
+	hdev->vport = vport;
+	hdev->num_alloc_vport = num_vport;
+
+#ifdef CONFIG_PCI_IOV
+	/* Enable SRIOV */
+	if (hdev->num_req_vfs) {
+		dev_info(&pdev->dev, "active VFs(%d) found, enabling SRIOV\n",
+			 hdev->num_req_vfs);
+		ret = pci_enable_sriov(hdev->pdev, hdev->num_req_vfs);
+		if (ret) {
+			hdev->num_alloc_vfs = 0;
+			dev_err(&pdev->dev, "SRIOV enable failed %d\n",
+				ret);
+			return ret;
+		}
+	}
+	hdev->num_alloc_vfs = hdev->num_req_vfs;
+#endif
+
+	for (i = 0; i < num_vport; i++) {
+		vport->back = hdev;
+		vport->vport_id = i;
+
+		if (i == 0)
+			ret = hclge_vport_setup(vport, tqp_main_vport);
+		else
+			ret = hclge_vport_setup(vport, tqp_per_vport);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"vport setup failed for vport %d, %d\n",
+				i, ret);
+			return ret;
+		}
+
+		vport++;
+	}
+
+	return 0;
+}
+
+static int  hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size)
+{
+/* TX buffer size is unit by 128 byte */
+#define HCLGE_BUF_SIZE_UNIT_SHIFT	7
+#define HCLGE_BUF_SIZE_UPDATE_EN_MSK	BIT(15)
+	struct hclge_tx_buff_alloc *req;
+	struct hclge_desc desc;
+	int ret;
+	u8 i;
+
+	req = (struct hclge_tx_buff_alloc *)desc.data;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0);
+	for (i = 0; i < HCLGE_TC_NUM; i++)
+		req->tx_pkt_buff[i] =
+			cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) |
+				     HCLGE_BUF_SIZE_UPDATE_EN_MSK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "tx buffer alloc cmd failed %d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, u32 buf_size)
+{
+	int ret = hclge_cmd_alloc_tx_buff(hdev, buf_size);
+
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"tx buffer alloc failed %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_get_tc_num(struct hclge_dev *hdev)
+{
+	int i, cnt = 0;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
+		if (hdev->hw_tc_map & BIT(i))
+			cnt++;
+	return cnt;
+}
+
+static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev)
+{
+	int i, cnt = 0;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
+		if (hdev->hw_tc_map & BIT(i) &&
+		    hdev->tm_info.hw_pfc_map & BIT(i))
+			cnt++;
+	return cnt;
+}
+
+/* Get the number of pfc enabled TCs, which have private buffer */
+static int hclge_get_pfc_priv_num(struct hclge_dev *hdev)
+{
+	struct hclge_priv_buf *priv;
+	int i, cnt = 0;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		priv = &hdev->priv_buf[i];
+		if ((hdev->tm_info.hw_pfc_map & BIT(i)) &&
+		    priv->enable)
+			cnt++;
+	}
+
+	return cnt;
+}
+
+/* Get the number of pfc disabled TCs, which have private buffer */
+static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev)
+{
+	struct hclge_priv_buf *priv;
+	int i, cnt = 0;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		priv = &hdev->priv_buf[i];
+		if (hdev->hw_tc_map & BIT(i) &&
+		    !(hdev->tm_info.hw_pfc_map & BIT(i)) &&
+		    priv->enable)
+			cnt++;
+	}
+
+	return cnt;
+}
+
+static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev)
+{
+	struct hclge_priv_buf *priv;
+	u32 rx_priv = 0;
+	int i;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		priv = &hdev->priv_buf[i];
+		if (priv->enable)
+			rx_priv += priv->buf_size;
+	}
+	return rx_priv;
+}
+
+static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
+{
+	u32 shared_buf_min, shared_buf_tc, shared_std;
+	int tc_num, pfc_enable_num;
+	u32 shared_buf;
+	u32 rx_priv;
+	int i;
+
+	tc_num = hclge_get_tc_num(hdev);
+	pfc_enable_num = hclge_get_pfc_enalbe_num(hdev);
+
+	shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV;
+	shared_buf_tc = pfc_enable_num * hdev->mps +
+			(tc_num - pfc_enable_num) * hdev->mps / 2 +
+			hdev->mps;
+	shared_std = max_t(u32, shared_buf_min, shared_buf_tc);
+
+	rx_priv = hclge_get_rx_priv_buff_alloced(hdev);
+	if (rx_all <= rx_priv + shared_std)
+		return false;
+
+	shared_buf = rx_all - rx_priv;
+	hdev->s_buf.buf_size = shared_buf;
+	hdev->s_buf.self.high = shared_buf;
+	hdev->s_buf.self.low =  2 * hdev->mps;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		if ((hdev->hw_tc_map & BIT(i)) &&
+		    (hdev->tm_info.hw_pfc_map & BIT(i))) {
+			hdev->s_buf.tc_thrd[i].low = hdev->mps;
+			hdev->s_buf.tc_thrd[i].high = 2 * hdev->mps;
+		} else {
+			hdev->s_buf.tc_thrd[i].low = 0;
+			hdev->s_buf.tc_thrd[i].high = hdev->mps;
+		}
+	}
+
+	return true;
+}
+
+/* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
+ * @hdev: pointer to struct hclge_dev
+ * @tx_size: the allocated tx buffer for all TCs
+ * @return: 0: calculate sucessful, negative: fail
+ */
+int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
+{
+	u32 rx_all = hdev->pkt_buf_size - tx_size;
+	int no_pfc_priv_num, pfc_priv_num;
+	struct hclge_priv_buf *priv;
+	int i;
+
+	/* step 1, try to alloc private buffer for all enabled tc */
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		priv = &hdev->priv_buf[i];
+		if (hdev->hw_tc_map & BIT(i)) {
+			priv->enable = 1;
+			if (hdev->tm_info.hw_pfc_map & BIT(i)) {
+				priv->wl.low = hdev->mps;
+				priv->wl.high = priv->wl.low + hdev->mps;
+				priv->buf_size = priv->wl.high +
+						HCLGE_DEFAULT_DV;
+			} else {
+				priv->wl.low = 0;
+				priv->wl.high = 2 * hdev->mps;
+				priv->buf_size = priv->wl.high;
+			}
+		}
+	}
+
+	if (hclge_is_rx_buf_ok(hdev, rx_all))
+		return 0;
+
+	/* step 2, try to decrease the buffer size of
+	 * no pfc TC's private buffer
+	 */
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		priv = &hdev->priv_buf[i];
+
+		if (hdev->hw_tc_map & BIT(i))
+			priv->enable = 1;
+
+		if (hdev->tm_info.hw_pfc_map & BIT(i)) {
+			priv->wl.low = 128;
+			priv->wl.high = priv->wl.low + hdev->mps;
+			priv->buf_size = priv->wl.high + HCLGE_DEFAULT_DV;
+		} else {
+			priv->wl.low = 0;
+			priv->wl.high = hdev->mps;
+			priv->buf_size = priv->wl.high;
+		}
+	}
+
+	if (hclge_is_rx_buf_ok(hdev, rx_all))
+		return 0;
+
+	/* step 3, try to reduce the number of pfc disabled TCs,
+	 * which have private buffer
+	 */
+	/* get the total no pfc enable TC number, which have private buffer */
+	no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev);
+
+	/* let the last to be cleared first */
+	for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
+		priv = &hdev->priv_buf[i];
+
+		if (hdev->hw_tc_map & BIT(i) &&
+		    !(hdev->tm_info.hw_pfc_map & BIT(i))) {
+			/* Clear the no pfc TC private buffer */
+			priv->wl.low = 0;
+			priv->wl.high = 0;
+			priv->buf_size = 0;
+			priv->enable = 0;
+			no_pfc_priv_num--;
+		}
+
+		if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+		    no_pfc_priv_num == 0)
+			break;
+	}
+
+	if (hclge_is_rx_buf_ok(hdev, rx_all))
+		return 0;
+
+	/* step 4, try to reduce the number of pfc enabled TCs
+	 * which have private buffer.
+	 */
+	pfc_priv_num = hclge_get_pfc_priv_num(hdev);
+
+	/* let the last to be cleared first */
+	for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
+		priv = &hdev->priv_buf[i];
+
+		if (hdev->hw_tc_map & BIT(i) &&
+		    hdev->tm_info.hw_pfc_map & BIT(i)) {
+			/* Reduce the number of pfc TC with private buffer */
+			priv->wl.low = 0;
+			priv->enable = 0;
+			priv->wl.high = 0;
+			priv->buf_size = 0;
+			pfc_priv_num--;
+		}
+
+		if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+		    pfc_priv_num == 0)
+			break;
+	}
+	if (hclge_is_rx_buf_ok(hdev, rx_all))
+		return 0;
+
+	return -ENOMEM;
+}
+
+static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
+{
+	struct hclge_rx_priv_buff *req;
+	struct hclge_desc desc;
+	int ret;
+	int i;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_PRIV_BUFF_ALLOC, false);
+	req = (struct hclge_rx_priv_buff *)desc.data;
+
+	/* Alloc private buffer TCs */
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		struct hclge_priv_buf *priv = &hdev->priv_buf[i];
+
+		req->buf_num[i] =
+			cpu_to_le16(priv->buf_size >> HCLGE_BUF_UNIT_S);
+		req->buf_num[i] |=
+			cpu_to_le16(true << HCLGE_TC0_PRI_BUF_EN_B);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"rx private buffer alloc cmd failed %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+#define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0)
+
+static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
+{
+	struct hclge_rx_priv_wl_buf *req;
+	struct hclge_priv_buf *priv;
+	struct hclge_desc desc[2];
+	int i, j;
+	int ret;
+
+	for (i = 0; i < 2; i++) {
+		hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_RX_PRIV_WL_ALLOC,
+					   false);
+		req = (struct hclge_rx_priv_wl_buf *)desc[i].data;
+
+		/* The first descriptor set the NEXT bit to 1 */
+		if (i == 0)
+			desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		else
+			desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+		for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) {
+			priv = &hdev->priv_buf[i * HCLGE_TC_NUM_ONE_DESC + j];
+			req->tc_wl[j].high =
+				cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S);
+			req->tc_wl[j].high |=
+				cpu_to_le16(HCLGE_PRIV_ENABLE(priv->wl.high) <<
+					    HCLGE_RX_PRIV_EN_B);
+			req->tc_wl[j].low =
+				cpu_to_le16(priv->wl.low >> HCLGE_BUF_UNIT_S);
+			req->tc_wl[j].low |=
+				cpu_to_le16(HCLGE_PRIV_ENABLE(priv->wl.low) <<
+					    HCLGE_RX_PRIV_EN_B);
+		}
+	}
+
+	/* Send 2 descriptor at one time */
+	ret = hclge_cmd_send(&hdev->hw, desc, 2);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"rx private waterline config cmd failed %d\n",
+			ret);
+		return ret;
+	}
+	return 0;
+}
+
+static int hclge_common_thrd_config(struct hclge_dev *hdev)
+{
+	struct hclge_shared_buf *s_buf = &hdev->s_buf;
+	struct hclge_rx_com_thrd *req;
+	struct hclge_desc desc[2];
+	struct hclge_tc_thrd *tc;
+	int i, j;
+	int ret;
+
+	for (i = 0; i < 2; i++) {
+		hclge_cmd_setup_basic_desc(&desc[i],
+					   HCLGE_OPC_RX_COM_THRD_ALLOC, false);
+		req = (struct hclge_rx_com_thrd *)&desc[i].data;
+
+		/* The first descriptor set the NEXT bit to 1 */
+		if (i == 0)
+			desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		else
+			desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+		for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) {
+			tc = &s_buf->tc_thrd[i * HCLGE_TC_NUM_ONE_DESC + j];
+
+			req->com_thrd[j].high =
+				cpu_to_le16(tc->high >> HCLGE_BUF_UNIT_S);
+			req->com_thrd[j].high |=
+				cpu_to_le16(HCLGE_PRIV_ENABLE(tc->high) <<
+					    HCLGE_RX_PRIV_EN_B);
+			req->com_thrd[j].low =
+				cpu_to_le16(tc->low >> HCLGE_BUF_UNIT_S);
+			req->com_thrd[j].low |=
+				cpu_to_le16(HCLGE_PRIV_ENABLE(tc->low) <<
+					    HCLGE_RX_PRIV_EN_B);
+		}
+	}
+
+	/* Send 2 descriptors at one time */
+	ret = hclge_cmd_send(&hdev->hw, desc, 2);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"common threshold config cmd failed %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static int hclge_common_wl_config(struct hclge_dev *hdev)
+{
+	struct hclge_shared_buf *buf = &hdev->s_buf;
+	struct hclge_rx_com_wl *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_COM_WL_ALLOC, false);
+
+	req = (struct hclge_rx_com_wl *)desc.data;
+	req->com_wl.high = cpu_to_le16(buf->self.high >> HCLGE_BUF_UNIT_S);
+	req->com_wl.high |=
+		cpu_to_le16(HCLGE_PRIV_ENABLE(buf->self.high) <<
+			    HCLGE_RX_PRIV_EN_B);
+
+	req->com_wl.low = cpu_to_le16(buf->self.low >> HCLGE_BUF_UNIT_S);
+	req->com_wl.low |=
+		cpu_to_le16(HCLGE_PRIV_ENABLE(buf->self.low) <<
+			    HCLGE_RX_PRIV_EN_B);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"common waterline config cmd failed %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int hclge_buffer_alloc(struct hclge_dev *hdev)
+{
+	u32 tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+	int ret;
+
+	hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM,
+					    sizeof(struct hclge_priv_buf),
+					    GFP_KERNEL | __GFP_ZERO);
+	if (!hdev->priv_buf)
+		return -ENOMEM;
+
+	ret = hclge_tx_buffer_alloc(hdev, tx_buf_size);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not alloc tx buffers %d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_rx_buffer_calc(hdev, tx_buf_size);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not calc rx priv buffer size for all TCs %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = hclge_rx_priv_buf_alloc(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "could not alloc rx priv buffer %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = hclge_rx_priv_wl_config(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not configure rx private waterline %d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_common_thrd_config(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not configure common threshold %d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_common_wl_config(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not configure common waterline %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_init_roce_base_info(struct hclge_vport *vport)
+{
+	struct hnae3_handle *roce = &vport->roce;
+	struct hnae3_handle *nic = &vport->nic;
+
+	roce->rinfo.num_vectors = vport->back->num_roce_msix;
+
+	if (vport->back->num_msi_left < vport->roce.rinfo.num_vectors ||
+	    vport->back->num_msi_left == 0)
+		return -EINVAL;
+
+	roce->rinfo.base_vector = vport->back->roce_base_vector;
+
+	roce->rinfo.netdev = nic->kinfo.netdev;
+	roce->rinfo.roce_io_base = vport->back->hw.io_base;
+
+	roce->pdev = nic->pdev;
+	roce->ae_algo = nic->ae_algo;
+	roce->numa_node_mask = nic->numa_node_mask;
+
+	return 0;
+}
+
+static int hclge_init_msix(struct hclge_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	int ret, i;
+
+	hdev->msix_entries = devm_kcalloc(&pdev->dev, hdev->num_msi,
+					  sizeof(struct msix_entry),
+					  GFP_KERNEL);
+	if (!hdev->msix_entries)
+		return -ENOMEM;
+
+	hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
+					   sizeof(u16), GFP_KERNEL);
+	if (!hdev->vector_status)
+		return -ENOMEM;
+
+	for (i = 0; i < hdev->num_msi; i++) {
+		hdev->msix_entries[i].entry = i;
+		hdev->vector_status[i] = HCLGE_INVALID_VPORT;
+	}
+
+	hdev->num_msi_left = hdev->num_msi;
+	hdev->base_msi_vector = hdev->pdev->irq;
+	hdev->roce_base_vector = hdev->base_msi_vector +
+				HCLGE_ROCE_VECTOR_OFFSET;
+
+	ret = pci_enable_msix_range(hdev->pdev, hdev->msix_entries,
+				    hdev->num_msi, hdev->num_msi);
+	if (ret < 0) {
+		dev_info(&hdev->pdev->dev,
+			 "MSI-X vector alloc failed: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_init_msi(struct hclge_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	int vectors;
+	int i;
+
+	hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
+					   sizeof(u16), GFP_KERNEL);
+	if (!hdev->vector_status)
+		return -ENOMEM;
+
+	for (i = 0; i < hdev->num_msi; i++)
+		hdev->vector_status[i] = HCLGE_INVALID_VPORT;
+
+	vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, PCI_IRQ_MSI);
+	if (vectors < 0) {
+		dev_err(&pdev->dev, "MSI vectors enable failed %d\n", vectors);
+		return -EINVAL;
+	}
+	hdev->num_msi = vectors;
+	hdev->num_msi_left = vectors;
+	hdev->base_msi_vector = pdev->irq;
+	hdev->roce_base_vector = hdev->base_msi_vector +
+				HCLGE_ROCE_VECTOR_OFFSET;
+
+	return 0;
+}
+
+static void hclge_check_speed_dup(struct hclge_dev *hdev, int duplex, int speed)
+{
+	struct hclge_mac *mac = &hdev->hw.mac;
+
+	if ((speed == HCLGE_MAC_SPEED_10M) || (speed == HCLGE_MAC_SPEED_100M))
+		mac->duplex = (u8)duplex;
+	else
+		mac->duplex = HCLGE_MAC_FULL;
+
+	mac->speed = speed;
+}
+
+int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex)
+{
+	struct hclge_config_mac_speed_dup *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_config_mac_speed_dup *)desc.data;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, false);
+
+	hnae_set_bit(req->speed_dup, HCLGE_CFG_DUPLEX_B, !!duplex);
+
+	switch (speed) {
+	case HCLGE_MAC_SPEED_10M:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 6);
+		break;
+	case HCLGE_MAC_SPEED_100M:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 7);
+		break;
+	case HCLGE_MAC_SPEED_1G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 0);
+		break;
+	case HCLGE_MAC_SPEED_10G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 1);
+		break;
+	case HCLGE_MAC_SPEED_25G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 2);
+		break;
+	case HCLGE_MAC_SPEED_40G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 3);
+		break;
+	case HCLGE_MAC_SPEED_50G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 4);
+		break;
+	case HCLGE_MAC_SPEED_100G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 5);
+		break;
+	default:
+		dev_err(&hdev->pdev->dev, "invald speed (%d)\n", speed);
+		return -EINVAL;
+	}
+
+	hnae_set_bit(req->mac_change_fec_en, HCLGE_CFG_MAC_SPEED_CHANGE_EN_B,
+		     1);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"mac speed/duplex config cmd failed %d.\n", ret);
+		return ret;
+	}
+
+	hclge_check_speed_dup(hdev, duplex, speed);
+
+	return 0;
+}
+
+static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed,
+				     u8 duplex)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hclge_cfg_mac_speed_dup(hdev, speed, duplex);
+}
+
+static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed,
+					u8 *duplex)
+{
+	struct hclge_query_an_speed_dup *req;
+	struct hclge_desc desc;
+	int speed_tmp;
+	int ret;
+
+	req = (struct hclge_query_an_speed_dup *)desc.data;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"mac speed/autoneg/duplex query cmd failed %d\n",
+			ret);
+		return ret;
+	}
+
+	*duplex = hnae_get_bit(req->an_syn_dup_speed, HCLGE_QUERY_DUPLEX_B);
+	speed_tmp = hnae_get_field(req->an_syn_dup_speed, HCLGE_QUERY_SPEED_M,
+				   HCLGE_QUERY_SPEED_S);
+
+	ret = hclge_parse_speed(speed_tmp, speed);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not parse speed(=%d), %d\n", speed_tmp, ret);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int hclge_query_autoneg_result(struct hclge_dev *hdev)
+{
+	struct hclge_mac *mac = &hdev->hw.mac;
+	struct hclge_query_an_speed_dup *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_query_an_speed_dup *)desc.data;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"autoneg result query cmd failed %d.\n", ret);
+		return ret;
+	}
+
+	mac->autoneg = hnae_get_bit(req->an_syn_dup_speed, HCLGE_QUERY_AN_B);
+
+	return 0;
+}
+
+static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
+{
+	struct hclge_config_auto_neg *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false);
+
+	req = (struct hclge_config_auto_neg *)desc.data;
+	hnae_set_bit(req->cfg_an_cmd_flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "auto neg set cmd failed %d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_set_autoneg(struct hnae3_handle *handle, bool enable)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hclge_set_autoneg_en(hdev, enable);
+}
+
+static int hclge_get_autoneg(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	hclge_query_autoneg_result(hdev);
+
+	return hdev->hw.mac.autoneg;
+}
+
+static int hclge_mac_init(struct hclge_dev *hdev)
+{
+	struct hclge_mac *mac = &hdev->hw.mac;
+	int ret;
+
+	ret = hclge_cfg_mac_speed_dup(hdev, hdev->hw.mac.speed, HCLGE_MAC_FULL);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Config mac speed dup fail ret=%d\n", ret);
+		return ret;
+	}
+
+	mac->link = 0;
+
+	ret = hclge_mac_mdio_config(hdev);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev,
+			 "mdio config fail ret=%d\n", ret);
+		return ret;
+	}
+
+	/* Initialize the MTA table work mode */
+	hdev->accept_mta_mc	= true;
+	hdev->enable_mta	= true;
+	hdev->mta_mac_sel_type	= HCLGE_MAC_ADDR_47_36;
+
+	ret = hclge_set_mta_filter_mode(hdev,
+					hdev->mta_mac_sel_type,
+					hdev->enable_mta);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "set mta filter mode failed %d\n",
+			ret);
+		return ret;
+	}
+
+	return hclge_cfg_func_mta_filter(hdev, 0, hdev->accept_mta_mc);
+}
+
+static void hclge_task_schedule(struct hclge_dev *hdev)
+{
+	if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
+	    !test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
+	    !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
+		(void)schedule_work(&hdev->service_task);
+}
+
+static int hclge_get_mac_link_status(struct hclge_dev *hdev)
+{
+	struct hclge_link_status *req;
+	struct hclge_desc desc;
+	int link_status;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_LINK_STATUS, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "get link status cmd failed %d\n",
+			ret);
+		return ret;
+	}
+
+	req = (struct hclge_link_status *)desc.data;
+	link_status = req->status & HCLGE_LINK_STATUS;
+
+	return !!link_status;
+}
+
+static int hclge_get_mac_phy_link(struct hclge_dev *hdev)
+{
+	int mac_state;
+	int link_stat;
+
+	mac_state = hclge_get_mac_link_status(hdev);
+
+	if (hdev->hw.mac.phydev) {
+		if (!genphy_read_status(hdev->hw.mac.phydev))
+			link_stat = mac_state &
+				hdev->hw.mac.phydev->link;
+		else
+			link_stat = 0;
+
+	} else {
+		link_stat = mac_state;
+	}
+
+	return !!link_stat;
+}
+
+static void hclge_update_link_status(struct hclge_dev *hdev)
+{
+	struct hnae3_client *client = hdev->nic_client;
+	struct hnae3_handle *handle;
+	int state;
+	int i;
+
+	if (!client)
+		return;
+	state = hclge_get_mac_phy_link(hdev);
+	if (state != hdev->hw.mac.link) {
+		for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+			handle = &hdev->vport[i].nic;
+			client->ops->link_status_change(handle, state);
+		}
+		hdev->hw.mac.link = state;
+	}
+}
+
+static int hclge_update_speed_duplex(struct hclge_dev *hdev)
+{
+	struct hclge_mac mac = hdev->hw.mac;
+	u8 duplex;
+	int speed;
+	int ret;
+
+	/* get the speed and duplex as autoneg'result from mac cmd when phy
+	 * doesn't exit.
+	 */
+	if (mac.phydev)
+		return 0;
+
+	/* update mac->antoneg. */
+	ret = hclge_query_autoneg_result(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"autoneg result query failed %d\n", ret);
+		return ret;
+	}
+
+	if (!mac.autoneg)
+		return 0;
+
+	ret = hclge_query_mac_an_speed_dup(hdev, &speed, &duplex);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"mac autoneg/speed/duplex query failed %d\n", ret);
+		return ret;
+	}
+
+	if ((mac.speed != speed) || (mac.duplex != duplex)) {
+		ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"mac speed/duplex config failed %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_update_speed_duplex_h(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hclge_update_speed_duplex(hdev);
+}
+
+static int hclge_get_status(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	hclge_update_link_status(hdev);
+
+	return hdev->hw.mac.link;
+}
+
+static void hclge_service_timer(unsigned long data)
+{
+	struct hclge_dev *hdev = (struct hclge_dev *)data;
+	(void)mod_timer(&hdev->service_timer, jiffies + HZ);
+
+	hclge_task_schedule(hdev);
+}
+
+static void hclge_service_complete(struct hclge_dev *hdev)
+{
+	WARN_ON(!test_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state));
+
+	/* Flush memory before next watchdog */
+	smp_mb__before_atomic();
+	clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+}
+
+static void hclge_service_task(struct work_struct *work)
+{
+	struct hclge_dev *hdev =
+		container_of(work, struct hclge_dev, service_task);
+
+	hclge_update_speed_duplex(hdev);
+	hclge_update_link_status(hdev);
+	hclge_update_stats_for_all(hdev);
+	hclge_service_complete(hdev);
+}
+
+static void hclge_disable_sriov(struct hclge_dev *hdev)
+{
+#ifdef CONFIG_PCI_IOV
+		/* If our VFs are assigned we cannot shut down SR-IOV
+		 * without causing issues, so just leave the hardware
+		 * available but disabled
+		 */
+		if (pci_vfs_assigned(hdev->pdev)) {
+			dev_warn(&hdev->pdev->dev,
+				 "disabling driver while VFs are assigned\n");
+			return;
+		}
+
+		pci_disable_sriov(hdev->pdev);
+#endif
+}
+
+struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle)
+{
+	/* VF handle has no client */
+	if (!handle->client)
+		return container_of(handle, struct hclge_vport, nic);
+	else if (handle->client->type == HNAE3_CLIENT_ROCE)
+		return container_of(handle, struct hclge_vport, roce);
+	else
+		return container_of(handle, struct hclge_vport, nic);
+}
+
+static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num,
+			    struct hnae3_vector_info *vector_info)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hnae3_vector_info *vector = vector_info;
+	struct hclge_dev *hdev = vport->back;
+	int alloc = 0;
+	int i, j;
+
+	vector_num = min(hdev->num_msi_left, vector_num);
+
+	for (j = 0; j < vector_num; j++) {
+		for (i = 1; i < hdev->num_msi; i++) {
+			if (hdev->vector_status[i] == HCLGE_INVALID_VPORT) {
+				vector->vector = pci_irq_vector(hdev->pdev, i);
+				vector->io_addr = hdev->hw.io_base +
+					HCLGE_VECTOR_REG_BASE +
+					(i - 1) * HCLGE_VECTOR_REG_OFFSET +
+					vport->vport_id *
+					HCLGE_VECTOR_VF_OFFSET;
+				hdev->vector_status[i] = vport->vport_id;
+
+				vector++;
+				alloc++;
+
+				break;
+			}
+		}
+	}
+	hdev->num_msi_left -= alloc;
+	hdev->num_msi_used += alloc;
+
+	return alloc;
+}
+
+static int hclge_get_vector_index(struct hclge_dev *hdev, int vector)
+{
+	int i;
+
+	for (i = 0; i < hdev->num_msi; i++) {
+		if (hdev->msix_entries) {
+			if (vector == hdev->msix_entries[i].vector)
+				return i;
+		} else {
+			if (vector == (hdev->base_msi_vector + i))
+				return i;
+		}
+	}
+	return -EINVAL;
+}
+
+static u32 hclge_get_rss_key_size(struct hnae3_handle *handle)
+{
+	return HCLGE_RSS_KEY_SIZE;
+}
+
+static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
+{
+	return HCLGE_RSS_IND_TBL_SIZE;
+}
+
+static int hclge_get_rss_algo(struct hclge_dev *hdev)
+{
+	struct hclge_rss_config *req;
+	struct hclge_desc desc;
+	int rss_hash_algo;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG, true);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get link status error, status =%d\n", ret);
+		return ret;
+	}
+
+	req = (struct hclge_rss_config *)desc.data;
+	rss_hash_algo = (req->hash_config & HCLGE_RSS_HASH_ALGO_MASK);
+
+	if (rss_hash_algo == HCLGE_RSS_HASH_ALGO_TOEPLITZ)
+		return ETH_RSS_HASH_TOP;
+
+	return -EINVAL;
+}
+
+static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
+				  const u8 hfunc, const u8 *key)
+{
+	struct hclge_rss_config *req;
+	struct hclge_desc desc;
+	int key_offset;
+	int key_size;
+	int ret;
+
+	req = (struct hclge_rss_config *)desc.data;
+
+	for (key_offset = 0; key_offset < 3; key_offset++) {
+		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG,
+					   false);
+
+		req->hash_config |= (hfunc & HCLGE_RSS_HASH_ALGO_MASK);
+		req->hash_config |= (key_offset << HCLGE_RSS_HASH_KEY_OFFSET_B);
+
+		if (key_offset == 2)
+			key_size =
+			HCLGE_RSS_KEY_SIZE - HCLGE_RSS_HASH_KEY_NUM * 2;
+		else
+			key_size = HCLGE_RSS_HASH_KEY_NUM;
+
+		memcpy(req->hash_key,
+		       key + key_offset * HCLGE_RSS_HASH_KEY_NUM, key_size);
+
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Configure RSS config fail, status = %d\n",
+				ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir)
+{
+	struct hclge_rss_indirection_table *req;
+	struct hclge_desc desc;
+	int i, j;
+	int ret;
+
+	req = (struct hclge_rss_indirection_table *)desc.data;
+
+	for (i = 0; i < HCLGE_RSS_CFG_TBL_NUM; i++) {
+		hclge_cmd_setup_basic_desc
+			(&desc, HCLGE_OPC_RSS_INDIR_TABLE, false);
+
+		req->start_table_index = i * HCLGE_RSS_CFG_TBL_SIZE;
+		req->rss_set_bitmap = HCLGE_RSS_SET_BITMAP_MSK;
+
+		for (j = 0; j < HCLGE_RSS_CFG_TBL_SIZE; j++)
+			req->rss_result[j] =
+				indir[i * HCLGE_RSS_CFG_TBL_SIZE + j];
+
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Configure rss indir table fail,status = %d\n",
+				ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid,
+				 u16 *tc_size, u16 *tc_offset)
+{
+	struct hclge_rss_tc_mode *req;
+	struct hclge_desc desc;
+	int ret;
+	int i;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_TC_MODE, false);
+	req = (struct hclge_rss_tc_mode *)desc.data;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		hnae_set_bit(req->rss_tc_mode[i], HCLGE_RSS_TC_VALID_B,
+			     (tc_valid[i] & 0x1));
+		hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_SIZE_M,
+			       HCLGE_RSS_TC_SIZE_S, tc_size[i]);
+		hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_OFFSET_M,
+			       HCLGE_RSS_TC_OFFSET_S, tc_offset[i]);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Configure rss tc mode fail, status = %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
+{
+#define HCLGE_RSS_INPUT_TUPLE_OTHER		0xf
+#define HCLGE_RSS_INPUT_TUPLE_SCTP		0x1f
+	struct hclge_rss_input_tuple *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
+
+	req = (struct hclge_rss_input_tuple *)desc.data;
+	req->ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	req->ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	req->ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
+	req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	req->ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	req->ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	req->ipv6_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
+	req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Configure rss input fail, status = %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
+			 u8 *key, u8 *hfunc)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int i;
+
+	/* Get hash algorithm */
+	if (hfunc)
+		*hfunc = hclge_get_rss_algo(hdev);
+
+	/* Get the RSS Key required by the user */
+	if (key)
+		memcpy(key, vport->rss_hash_key, HCLGE_RSS_KEY_SIZE);
+
+	/* Get indirect table */
+	if (indir)
+		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+			indir[i] =  vport->rss_indirection_tbl[i];
+
+	return 0;
+}
+
+static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
+			 const  u8 *key, const  u8 hfunc)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u8 hash_algo;
+	int ret, i;
+
+	/* Set the RSS Hash Key if specififed by the user */
+	if (key) {
+		/* Update the shadow RSS key with user specified qids */
+		memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
+
+		if (hfunc == ETH_RSS_HASH_TOP ||
+		    hfunc == ETH_RSS_HASH_NO_CHANGE)
+			hash_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+		else
+			return -EINVAL;
+		ret = hclge_set_rss_algo_key(hdev, hash_algo, key);
+		if (ret)
+			return ret;
+	}
+
+	/* Update the shadow RSS table with user specified qids */
+	for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+		vport->rss_indirection_tbl[i] = indir[i];
+
+	/* Update the hardware */
+	ret = hclge_set_rss_indir_table(hdev, indir);
+	return ret;
+}
+
+static int hclge_get_tc_size(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hdev->rss_size_max;
+}
+
+static int hclge_rss_init_hw(struct hclge_dev *hdev)
+{
+	const  u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+	struct hclge_vport *vport = hdev->vport;
+	u16 tc_offset[HCLGE_MAX_TC_NUM];
+	u8 rss_key[HCLGE_RSS_KEY_SIZE];
+	u16 tc_valid[HCLGE_MAX_TC_NUM];
+	u16 tc_size[HCLGE_MAX_TC_NUM];
+	u32 *rss_indir = NULL;
+	const u8 *key;
+	int i, ret, j;
+
+	rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
+	if (!rss_indir)
+		return -ENOMEM;
+
+	/* Get default RSS key */
+	netdev_rss_key_fill(rss_key, HCLGE_RSS_KEY_SIZE);
+
+	/* Initialize RSS indirect table for each vport */
+	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
+		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) {
+			vport[j].rss_indirection_tbl[i] =
+				i % hdev->rss_size_max;
+			rss_indir[i] = vport[j].rss_indirection_tbl[i];
+		}
+	}
+	ret = hclge_set_rss_indir_table(hdev, rss_indir);
+	if (ret)
+		goto err;
+
+	key = rss_key;
+	ret = hclge_set_rss_algo_key(hdev, hfunc, key);
+	if (ret)
+		goto err;
+
+	ret = hclge_set_rss_input_tuple(hdev);
+	if (ret)
+		goto err;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		if (hdev->hw_tc_map & BIT(i))
+			tc_valid[i] = 1;
+		else
+			tc_valid[i] = 0;
+
+		switch (hdev->rss_size_max) {
+		case HCLGE_RSS_TC_SIZE_0:
+			tc_size[i] = 0;
+			break;
+		case HCLGE_RSS_TC_SIZE_1:
+			tc_size[i] = 1;
+			break;
+		case HCLGE_RSS_TC_SIZE_2:
+			tc_size[i] = 2;
+			break;
+		case HCLGE_RSS_TC_SIZE_3:
+			tc_size[i] = 3;
+			break;
+		case HCLGE_RSS_TC_SIZE_4:
+			tc_size[i] = 4;
+			break;
+		case HCLGE_RSS_TC_SIZE_5:
+			tc_size[i] = 5;
+			break;
+		case HCLGE_RSS_TC_SIZE_6:
+			tc_size[i] = 6;
+			break;
+		case HCLGE_RSS_TC_SIZE_7:
+			tc_size[i] = 7;
+			break;
+		default:
+			break;
+		}
+		tc_offset[i] = hdev->rss_size_max * i;
+	}
+	ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+
+err:
+	kfree(rss_indir);
+
+	return ret;
+}
+
+int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
+				   struct hnae3_ring_chain_node *ring_chain)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_ctrl_vector_chain *req;
+	struct hnae3_ring_chain_node *node;
+	struct hclge_desc desc;
+	int ret;
+	int i;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ADD_RING_TO_VECTOR, false);
+
+	req = (struct hclge_ctrl_vector_chain *)desc.data;
+	req->int_vector_id = vector_id;
+
+	i = 0;
+	for (node = ring_chain; node; node = node->next) {
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M,
+			       HCLGE_INT_TYPE_S,
+			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
+			       HCLGE_TQP_ID_S,	node->tqp_index);
+		req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+
+		if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
+			req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
+
+			ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+			if (ret) {
+				dev_err(&hdev->pdev->dev,
+					"Map TQP fail, status is %d.\n",
+					ret);
+				return ret;
+			}
+			i = 0;
+
+			hclge_cmd_setup_basic_desc(&desc,
+						   HCLGE_OPC_ADD_RING_TO_VECTOR,
+						   false);
+			req->int_vector_id = vector_id;
+		}
+	}
+
+	if (i > 0) {
+		req->int_cause_num = i;
+
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Map TQP fail, status is %d.\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+int hclge_map_handle_ring_to_vector(struct hnae3_handle *handle,
+				    int vector,
+				    struct hnae3_ring_chain_node *ring_chain)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int vector_id;
+
+	vector_id = hclge_get_vector_index(hdev, vector);
+	if (vector_id < 0) {
+		dev_err(&hdev->pdev->dev,
+			"Get vector index fail. ret =%d\n", vector_id);
+		return vector_id;
+	}
+
+	return hclge_map_vport_ring_to_vector(vport, vector_id, ring_chain);
+}
+
+static int hclge_unmap_ring_from_vector(
+	struct hnae3_handle *handle, int vector,
+	struct hnae3_ring_chain_node *ring_chain)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_ctrl_vector_chain *req;
+	struct hnae3_ring_chain_node *node;
+	struct hclge_desc desc;
+	int i, vector_id;
+	int ret;
+
+	vector_id = hclge_get_vector_index(hdev, vector);
+	if (vector_id < 0) {
+		dev_err(&handle->pdev->dev,
+			"Get vector index fail. ret =%d\n", vector_id);
+		return vector_id;
+	}
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_DEL_RING_TO_VECTOR, false);
+
+	req = (struct hclge_ctrl_vector_chain *)desc.data;
+	req->int_vector_id = vector_id;
+
+	i = 0;
+	for (node = ring_chain; node; node = node->next) {
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M,
+			       HCLGE_INT_TYPE_S,
+			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
+			       HCLGE_TQP_ID_S,	node->tqp_index);
+
+		req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+
+		if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
+			req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
+
+			ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+			if (ret) {
+				dev_err(&hdev->pdev->dev,
+					"Unmap TQP fail, status is %d.\n",
+					ret);
+				return ret;
+			}
+			i = 0;
+			hclge_cmd_setup_basic_desc(&desc,
+						   HCLGE_OPC_ADD_RING_TO_VECTOR,
+						   false);
+			req->int_vector_id = vector_id;
+		}
+	}
+
+	if (i > 0) {
+		req->int_cause_num = i;
+
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Unmap TQP fail, status is %d.\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
+			       struct hclge_promisc_param *param)
+{
+	struct hclge_promisc_cfg *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PROMISC_MODE, false);
+
+	req = (struct hclge_promisc_cfg *)desc.data;
+	req->vf_id = param->vf_id;
+	req->flag = (param->enable << HCLGE_PROMISC_EN_B);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Set promisc mode fail, status is %d.\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
+			      bool en_mc, bool en_bc, int vport_id)
+{
+	if (!param)
+		return;
+
+	memset(param, 0, sizeof(struct hclge_promisc_param));
+	if (en_uc)
+		param->enable = HCLGE_PROMISC_EN_UC;
+	if (en_mc)
+		param->enable |= HCLGE_PROMISC_EN_MC;
+	if (en_bc)
+		param->enable |= HCLGE_PROMISC_EN_BC;
+	param->vf_id = vport_id;
+}
+
+static void hclge_set_promisc_mode(struct hnae3_handle *handle, u32 en)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_promisc_param param;
+
+	hclge_promisc_param_init(&param, en, en, true, vport->vport_id);
+	hclge_cmd_set_promisc_mode(hdev, &param);
+}
+
+static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
+{
+	struct hclge_desc desc;
+	struct hclge_config_mac_mode *req =
+		(struct hclge_config_mac_mode *)desc.data;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_TX_EN_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_EN_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_TX_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_RX_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_TX_B, 0);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_RX_B, 0);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_APP_LP_B, 0);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_LINE_LP_B, 0);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_FCS_TX_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_FCS_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en,
+		     HCLGE_MAC_RX_FCS_STRIP_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en,
+		     HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en,
+		     HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en,
+		     HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"mac enable fail, ret =%d.\n", ret);
+}
+
+static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id,
+			    int stream_id, bool enable)
+{
+	struct hclge_desc desc;
+	struct hclge_cfg_com_tqp_queue *req =
+		(struct hclge_cfg_com_tqp_queue *)desc.data;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false);
+	req->tqp_id = cpu_to_le16(tqp_id & HCLGE_RING_ID_MASK);
+	req->stream_id = cpu_to_le16(stream_id);
+	req->enable |= enable << HCLGE_TQP_ENABLE_B;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Tqp enable fail, status =%d.\n", ret);
+	return ret;
+}
+
+static void hclge_reset_tqp_stats(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hnae3_queue *queue;
+	struct hclge_tqp *tqp;
+	int i;
+
+	for (i = 0; i < vport->alloc_tqps; i++) {
+		queue = handle->kinfo.tqp[i];
+		tqp = container_of(queue, struct hclge_tqp, q);
+		memset(&tqp->tqp_stats, 0, sizeof(tqp->tqp_stats));
+	}
+}
+
+static int hclge_ae_start(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int i, queue_id, ret;
+
+	for (i = 0; i < vport->alloc_tqps; i++) {
+		/* todo clear interrupt */
+		/* ring enable */
+		queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
+		if (queue_id < 0) {
+			dev_warn(&hdev->pdev->dev,
+				 "Get invalid queue id, ignore it\n");
+			continue;
+		}
+
+		hclge_tqp_enable(hdev, queue_id, 0, true);
+	}
+	/* mac enable */
+	hclge_cfg_mac_mode(hdev, true);
+	clear_bit(HCLGE_STATE_DOWN, &hdev->state);
+	(void)mod_timer(&hdev->service_timer, jiffies + HZ);
+
+	ret = hclge_mac_start_phy(hdev);
+	if (ret)
+		return ret;
+
+	/* reset tqp stats */
+	hclge_reset_tqp_stats(handle);
+
+	return 0;
+}
+
+static void hclge_ae_stop(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int i, queue_id;
+
+	for (i = 0; i < vport->alloc_tqps; i++) {
+		/* Ring disable */
+		queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
+		if (queue_id < 0) {
+			dev_warn(&hdev->pdev->dev,
+				 "Get invalid queue id, ignore it\n");
+			continue;
+		}
+
+		hclge_tqp_enable(hdev, queue_id, 0, false);
+	}
+	/* Mac disable */
+	hclge_cfg_mac_mode(hdev, false);
+
+	hclge_mac_stop_phy(hdev);
+
+	/* reset tqp stats */
+	hclge_reset_tqp_stats(handle);
+}
+
+static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
+					 u16 cmdq_resp, u8  resp_code,
+					 enum hclge_mac_vlan_tbl_opcode op)
+{
+	struct hclge_dev *hdev = vport->back;
+	int return_status = -EIO;
+
+	if (cmdq_resp) {
+		dev_err(&hdev->pdev->dev,
+			"cmdq execute failed for get_mac_vlan_cmd_status,status=%d.\n",
+			cmdq_resp);
+		return -EIO;
+	}
+
+	if (op == HCLGE_MAC_VLAN_ADD) {
+		if ((!resp_code) || (resp_code == 1)) {
+			return_status = 0;
+		} else if (resp_code == 2) {
+			return_status = -EIO;
+			dev_err(&hdev->pdev->dev,
+				"add mac addr failed for uc_overflow.\n");
+		} else if (resp_code == 3) {
+			return_status = -EIO;
+			dev_err(&hdev->pdev->dev,
+				"add mac addr failed for mc_overflow.\n");
+		} else {
+			dev_err(&hdev->pdev->dev,
+				"add mac addr failed for undefined, code=%d.\n",
+				resp_code);
+		}
+	} else if (op == HCLGE_MAC_VLAN_REMOVE) {
+		if (!resp_code) {
+			return_status = 0;
+		} else if (resp_code == 1) {
+			return_status = -EIO;
+			dev_dbg(&hdev->pdev->dev,
+				"remove mac addr failed for miss.\n");
+		} else {
+			dev_err(&hdev->pdev->dev,
+				"remove mac addr failed for undefined, code=%d.\n",
+				resp_code);
+		}
+	} else if (op == HCLGE_MAC_VLAN_LKUP) {
+		if (!resp_code) {
+			return_status = 0;
+		} else if (resp_code == 1) {
+			return_status = -EIO;
+			dev_dbg(&hdev->pdev->dev,
+				"lookup mac addr failed for miss.\n");
+		} else {
+			dev_err(&hdev->pdev->dev,
+				"lookup mac addr failed for undefined, code=%d.\n",
+				resp_code);
+		}
+	} else {
+		return_status = -EIO;
+		dev_err(&hdev->pdev->dev,
+			"unknown opcode for get_mac_vlan_cmd_status,opcode=%d.\n",
+			op);
+	}
+
+	return return_status;
+}
+
+static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr)
+{
+	int word_num;
+	int bit_num;
+
+	if (vfid > 255 || vfid < 0)
+		return -EIO;
+
+	if (vfid >= 0 && vfid <= 191) {
+		word_num = vfid / 32;
+		bit_num  = vfid % 32;
+		if (clr)
+			desc[1].data[word_num] &= ~(1 << bit_num);
+		else
+			desc[1].data[word_num] |= (1 << bit_num);
+	} else {
+		word_num = (vfid - 192) / 32;
+		bit_num  = vfid % 32;
+		if (clr)
+			desc[2].data[word_num] &= ~(1 << bit_num);
+		else
+			desc[2].data[word_num] |= (1 << bit_num);
+	}
+
+	return 0;
+}
+
+static bool hclge_is_all_function_id_zero(struct hclge_desc *desc)
+{
+#define HCLGE_DESC_NUMBER 3
+#define HCLGE_FUNC_NUMBER_PER_DESC 6
+	int i, j;
+
+	for (i = 0; i < HCLGE_DESC_NUMBER; i++)
+		for (j = 0; j < HCLGE_FUNC_NUMBER_PER_DESC; j++)
+			if (desc[i].data[j])
+				return false;
+
+	return true;
+}
+
+static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry *new_req,
+				   const u8 *addr)
+{
+	const unsigned char *mac_addr = addr;
+	u32 high_val = mac_addr[2] << 16 | (mac_addr[3] << 24) |
+		       (mac_addr[0]) | (mac_addr[1] << 8);
+	u32 low_val  = mac_addr[4] | (mac_addr[5] << 8);
+
+	new_req->mac_addr_hi32 = cpu_to_le32(high_val);
+	new_req->mac_addr_lo16 = cpu_to_le16(low_val & 0xffff);
+}
+
+u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport,
+				    const u8 *addr)
+{
+	u16 high_val = addr[1] | (addr[0] << 8);
+	struct hclge_dev *hdev = vport->back;
+	u32 rsh = 4 - hdev->mta_mac_sel_type;
+	u16 ret_val = (high_val >> rsh) & 0xfff;
+
+	return ret_val;
+}
+
+static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
+				     enum hclge_mta_dmac_sel_type mta_mac_sel,
+				     bool enable)
+{
+	struct hclge_mta_filter_mode *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_mta_filter_mode *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_MODE_CFG, false);
+
+	hnae_set_bit(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_EN_B,
+		     enable);
+	hnae_set_field(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_SEL_M,
+		       HCLGE_CFG_MTA_MAC_SEL_S, mta_mac_sel);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Config mat filter mode failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
+			      u8 func_id,
+			      bool enable)
+{
+	struct hclge_cfg_func_mta_filter *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_cfg_func_mta_filter *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_FUNC_CFG, false);
+
+	hnae_set_bit(req->accept, HCLGE_CFG_FUNC_MTA_ACCEPT_B,
+		     enable);
+	req->function_id = func_id;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Config func_id enable failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_set_mta_table_item(struct hclge_vport *vport,
+				    u16 idx,
+				    bool enable)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_cfg_func_mta_item *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_cfg_func_mta_item *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_TBL_ITEM_CFG, false);
+	hnae_set_bit(req->accept, HCLGE_CFG_MTA_ITEM_ACCEPT_B, enable);
+
+	hnae_set_field(req->item_idx, HCLGE_CFG_MTA_ITEM_IDX_M,
+		       HCLGE_CFG_MTA_ITEM_IDX_S, idx);
+	req->item_idx = cpu_to_le16(req->item_idx);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Config mta table item failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport,
+				     struct hclge_mac_vlan_tbl_entry *req)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_desc desc;
+	u8 resp_code;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_REMOVE, false);
+
+	memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry));
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"del mac addr failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+	resp_code = (desc.data[0] >> 8) & 0xff;
+
+	return hclge_get_mac_vlan_cmd_status(vport, desc.retval, resp_code,
+					     HCLGE_MAC_VLAN_REMOVE);
+}
+
+static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport,
+				     struct hclge_mac_vlan_tbl_entry *req,
+				     struct hclge_desc *desc,
+				     bool is_mc)
+{
+	struct hclge_dev *hdev = vport->back;
+	u8 resp_code;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_MAC_VLAN_ADD, true);
+	if (is_mc) {
+		desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		memcpy(desc[0].data,
+		       req,
+		       sizeof(struct hclge_mac_vlan_tbl_entry));
+		hclge_cmd_setup_basic_desc(&desc[1],
+					   HCLGE_OPC_MAC_VLAN_ADD,
+					   true);
+		desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		hclge_cmd_setup_basic_desc(&desc[2],
+					   HCLGE_OPC_MAC_VLAN_ADD,
+					   true);
+		ret = hclge_cmd_send(&hdev->hw, desc, 3);
+	} else {
+		memcpy(desc[0].data,
+		       req,
+		       sizeof(struct hclge_mac_vlan_tbl_entry));
+		ret = hclge_cmd_send(&hdev->hw, desc, 1);
+	}
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"lookup mac addr failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+	resp_code = (desc[0].data[0] >> 8) & 0xff;
+
+	return hclge_get_mac_vlan_cmd_status(vport, desc[0].retval, resp_code,
+					     HCLGE_MAC_VLAN_LKUP);
+}
+
+static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport,
+				  struct hclge_mac_vlan_tbl_entry *req,
+				  struct hclge_desc *mc_desc)
+{
+	struct hclge_dev *hdev = vport->back;
+	int cfg_status;
+	u8 resp_code;
+	int ret;
+
+	if (!mc_desc) {
+		struct hclge_desc desc;
+
+		hclge_cmd_setup_basic_desc(&desc,
+					   HCLGE_OPC_MAC_VLAN_ADD,
+					   false);
+		memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry));
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		resp_code = (desc.data[0] >> 8) & 0xff;
+		cfg_status = hclge_get_mac_vlan_cmd_status(vport, desc.retval,
+							   resp_code,
+							   HCLGE_MAC_VLAN_ADD);
+	} else {
+		mc_desc[0].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+		mc_desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		mc_desc[1].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+		mc_desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+		mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_NEXT);
+		memcpy(mc_desc[0].data, req,
+		       sizeof(struct hclge_mac_vlan_tbl_entry));
+		ret = hclge_cmd_send(&hdev->hw, mc_desc, 3);
+		resp_code = (mc_desc[0].data[0] >> 8) & 0xff;
+		cfg_status = hclge_get_mac_vlan_cmd_status(vport,
+							   mc_desc[0].retval,
+							   resp_code,
+							   HCLGE_MAC_VLAN_ADD);
+	}
+
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"add mac addr failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	return cfg_status;
+}
+
+static int hclge_add_uc_addr(struct hnae3_handle *handle,
+			     const unsigned char *addr)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	return hclge_add_uc_addr_common(vport, addr);
+}
+
+int hclge_add_uc_addr_common(struct hclge_vport *vport,
+			     const unsigned char *addr)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac_vlan_tbl_entry req;
+	enum hclge_cmd_status status;
+
+	/* mac addr check */
+	if (is_zero_ether_addr(addr) ||
+	    is_broadcast_ether_addr(addr) ||
+	    is_multicast_ether_addr(addr)) {
+		dev_err(&hdev->pdev->dev,
+			"Set_uc mac err! invalid mac:%pM. is_zero:%d,is_br=%d,is_mul=%d\n",
+			 addr,
+			 is_zero_ether_addr(addr),
+			 is_broadcast_ether_addr(addr),
+			 is_multicast_ether_addr(addr));
+		return -EINVAL;
+	}
+
+	memset(&req, 0, sizeof(req));
+	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 0);
+	hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hnae_set_bit(req.egress_port,
+		     HCLGE_MAC_EPORT_SW_EN_B, 0);
+	hnae_set_bit(req.egress_port,
+		     HCLGE_MAC_EPORT_TYPE_B, 0);
+	hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_VFID_M,
+		       HCLGE_MAC_EPORT_VFID_S, vport->vport_id);
+	hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_PFID_M,
+		       HCLGE_MAC_EPORT_PFID_S, 0);
+	req.egress_port = cpu_to_le16(req.egress_port);
+
+	hclge_prepare_mac_addr(&req, addr);
+
+	status = hclge_add_mac_vlan_tbl(vport, &req, NULL);
+
+	return status;
+}
+
+static int hclge_rm_uc_addr(struct hnae3_handle *handle,
+			    const unsigned char *addr)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	return hclge_rm_uc_addr_common(vport, addr);
+}
+
+int hclge_rm_uc_addr_common(struct hclge_vport *vport,
+			    const unsigned char *addr)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac_vlan_tbl_entry req;
+	enum hclge_cmd_status status;
+
+	/* mac addr check */
+	if (is_zero_ether_addr(addr) ||
+	    is_broadcast_ether_addr(addr) ||
+	    is_multicast_ether_addr(addr)) {
+		dev_dbg(&hdev->pdev->dev,
+			"Remove mac err! invalid mac:%pM.\n",
+			 addr);
+		return -EINVAL;
+	}
+
+	memset(&req, 0, sizeof(req));
+	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hclge_prepare_mac_addr(&req, addr);
+	status = hclge_remove_mac_vlan_tbl(vport, &req);
+
+	return status;
+}
+
+static int hclge_add_mc_addr(struct hnae3_handle *handle,
+			     const unsigned char *addr)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	return	hclge_add_mc_addr_common(vport, addr);
+}
+
+int hclge_add_mc_addr_common(struct hclge_vport *vport,
+			     const unsigned char *addr)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac_vlan_tbl_entry req;
+	struct hclge_desc desc[3];
+	u16 tbl_idx;
+	int status;
+
+	/* mac addr check */
+	if (!is_multicast_ether_addr(addr)) {
+		dev_err(&hdev->pdev->dev,
+			"Add mc mac err! invalid mac:%pM.\n",
+			 addr);
+		return -EINVAL;
+	}
+	memset(&req, 0, sizeof(req));
+	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
+	hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hclge_prepare_mac_addr(&req, addr);
+	status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true);
+	if (!status) {
+		/* This mac addr exist, update VFID for it */
+		hclge_update_desc_vfid(desc, vport->vport_id, false);
+		status = hclge_add_mac_vlan_tbl(vport, &req, desc);
+	} else {
+		/* This mac addr do not exist, add new entry for it */
+		memset(desc[0].data, 0, sizeof(desc[0].data));
+		memset(desc[1].data, 0, sizeof(desc[0].data));
+		memset(desc[2].data, 0, sizeof(desc[0].data));
+		hclge_update_desc_vfid(desc, vport->vport_id, false);
+		status = hclge_add_mac_vlan_tbl(vport, &req, desc);
+	}
+
+	/* Set MTA table for this MAC address */
+	tbl_idx = hclge_get_mac_addr_to_mta_index(vport, addr);
+	status = hclge_set_mta_table_item(vport, tbl_idx, true);
+
+	return status;
+}
+
+static int hclge_rm_mc_addr(struct hnae3_handle *handle,
+			    const unsigned char *addr)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	return hclge_rm_mc_addr_common(vport, addr);
+}
+
+int hclge_rm_mc_addr_common(struct hclge_vport *vport,
+			    const unsigned char *addr)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac_vlan_tbl_entry req;
+	enum hclge_cmd_status status;
+	struct hclge_desc desc[3];
+	u16 tbl_idx;
+
+	/* mac addr check */
+	if (!is_multicast_ether_addr(addr)) {
+		dev_dbg(&hdev->pdev->dev,
+			"Remove mc mac err! invalid mac:%pM.\n",
+			 addr);
+		return -EINVAL;
+	}
+
+	memset(&req, 0, sizeof(req));
+	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
+	hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hclge_prepare_mac_addr(&req, addr);
+	status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true);
+	if (!status) {
+		/* This mac addr exist, remove this handle's VFID for it */
+		hclge_update_desc_vfid(desc, vport->vport_id, true);
+
+		if (hclge_is_all_function_id_zero(desc))
+			/* All the vfid is zero, so need to delete this entry */
+			status = hclge_remove_mac_vlan_tbl(vport, &req);
+		else
+			/* Not all the vfid is zero, update the vfid */
+			status = hclge_add_mac_vlan_tbl(vport, &req, desc);
+
+	} else {
+		/* This mac addr do not exist, can't delete it */
+		dev_err(&hdev->pdev->dev,
+			"Rm mutilcast mac addr failed, ret = %d.\n",
+			status);
+		return -EIO;
+	}
+
+	/* Set MTB table for this MAC address */
+	tbl_idx = hclge_get_mac_addr_to_mta_index(vport, addr);
+	status = hclge_set_mta_table_item(vport, tbl_idx, false);
+
+	return status;
+}
+
+static void hclge_get_mac_addr(struct hnae3_handle *handle, u8 *p)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	ether_addr_copy(p, hdev->hw.mac.mac_addr);
+}
+
+static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
+{
+	const unsigned char *new_addr = (const unsigned char *)p;
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	/* mac addr check */
+	if (is_zero_ether_addr(new_addr) ||
+	    is_broadcast_ether_addr(new_addr) ||
+	    is_multicast_ether_addr(new_addr)) {
+		dev_err(&hdev->pdev->dev,
+			"Change uc mac err! invalid mac:%p.\n",
+			 new_addr);
+		return -EINVAL;
+	}
+
+	hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr);
+
+	if (!hclge_add_uc_addr(handle, new_addr)) {
+		ether_addr_copy(hdev->hw.mac.mac_addr, new_addr);
+		return 0;
+	}
+
+	return -EIO;
+}
+
+static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
+				      bool filter_en)
+{
+	struct hclge_vlan_filter_ctrl *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false);
+
+	req = (struct hclge_vlan_filter_ctrl *)desc.data;
+	req->vlan_type = vlan_type;
+	req->vlan_fe = filter_en;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "set vlan filter fail, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
+			     bool is_kill, u16 vlan, u8 qos, __be16 proto)
+{
+#define HCLGE_MAX_VF_BYTES  16
+	struct hclge_vlan_filter_vf_cfg *req0;
+	struct hclge_vlan_filter_vf_cfg *req1;
+	struct hclge_desc desc[2];
+	u8 vf_byte_val;
+	u8 vf_byte_off;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0],
+				   HCLGE_OPC_VLAN_FILTER_VF_CFG, false);
+	hclge_cmd_setup_basic_desc(&desc[1],
+				   HCLGE_OPC_VLAN_FILTER_VF_CFG, false);
+
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	vf_byte_off = vfid / 8;
+	vf_byte_val = 1 << (vfid % 8);
+
+	req0 = (struct hclge_vlan_filter_vf_cfg *)desc[0].data;
+	req1 = (struct hclge_vlan_filter_vf_cfg *)desc[1].data;
+
+	req0->vlan_id  = vlan;
+	req0->vlan_cfg = is_kill;
+
+	if (vf_byte_off < HCLGE_MAX_VF_BYTES)
+		req0->vf_bitmap[vf_byte_off] = vf_byte_val;
+	else
+		req1->vf_bitmap[vf_byte_off - HCLGE_MAX_VF_BYTES] = vf_byte_val;
+
+	ret = hclge_cmd_send(&hdev->hw, desc, 2);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Send vf vlan command fail, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	if (!is_kill) {
+		if (!req0->resp_code || req0->resp_code == 1)
+			return 0;
+
+		dev_err(&hdev->pdev->dev,
+			"Add vf vlan filter fail, ret =%d.\n",
+			req0->resp_code);
+	} else {
+		if (!req0->resp_code)
+			return 0;
+
+		dev_err(&hdev->pdev->dev,
+			"Kill vf vlan filter fail, ret =%d.\n",
+			req0->resp_code);
+	}
+
+	return -EIO;
+}
+
+static int hclge_set_port_vlan_filter(struct hnae3_handle *handle,
+				      __be16 proto, u16 vlan_id,
+				      bool is_kill)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_vlan_filter_pf_cfg *req;
+	struct hclge_desc desc;
+	u8 vlan_offset_byte_val;
+	u8 vlan_offset_byte;
+	u8 vlan_offset_160;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_PF_CFG, false);
+
+	vlan_offset_160 = vlan_id / 160;
+	vlan_offset_byte = (vlan_id % 160) / 8;
+	vlan_offset_byte_val = 1 << (vlan_id % 8);
+
+	req = (struct hclge_vlan_filter_pf_cfg *)desc.data;
+	req->vlan_offset = vlan_offset_160;
+	req->vlan_cfg = is_kill;
+	req->vlan_offset_bitmap[vlan_offset_byte] = vlan_offset_byte_val;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"port vlan command, send fail, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	ret = hclge_set_vf_vlan_common(hdev, 0, is_kill, vlan_id, 0, proto);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Set pf vlan filter config fail, ret =%d.\n",
+			ret);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
+				    u16 vlan, u8 qos, __be16 proto)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if ((vfid >= hdev->num_alloc_vfs) || (vlan > 4095) || (qos > 7))
+		return -EINVAL;
+	if (proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
+	return hclge_set_vf_vlan_common(hdev, vfid, false, vlan, qos, proto);
+}
+
+static int hclge_init_vlan_config(struct hclge_dev *hdev)
+{
+#define HCLGE_VLAN_TYPE_VF_TABLE   0
+#define HCLGE_VLAN_TYPE_PORT_TABLE 1
+	int ret;
+
+	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_VF_TABLE,
+					 true);
+	if (ret)
+		return ret;
+
+	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_PORT_TABLE,
+					 true);
+
+	return ret;
+}
+
+static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_config_max_frm_size *req;
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_desc desc;
+	int ret;
+
+	if ((new_mtu < HCLGE_MAC_MIN_MTU) || (new_mtu > HCLGE_MAC_MAX_MTU))
+		return -EINVAL;
+
+	hdev->mps = new_mtu;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAX_FRM_SIZE, false);
+
+	req = (struct hclge_config_max_frm_size *)desc.data;
+	req->max_frm_size = cpu_to_le16(new_mtu);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "set mtu fail, ret =%d.\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
+				    bool enable)
+{
+	struct hclge_reset_tqp_queue *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, false);
+
+	req = (struct hclge_reset_tqp_queue *)desc.data;
+	req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
+	hnae_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Send tqp reset cmd error, status =%d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
+{
+	struct hclge_reset_tqp_queue *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, true);
+
+	req = (struct hclge_reset_tqp_queue *)desc.data;
+	req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get reset status error, status =%d\n", ret);
+		return ret;
+	}
+
+	return hnae_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
+}
+
+static void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int reset_try_times = 0;
+	int reset_status;
+	int ret;
+
+	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
+		return;
+	}
+
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_id, true);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev,
+			 "Send reset tqp cmd fail, ret = %d\n", ret);
+		return;
+	}
+
+	reset_try_times = 0;
+	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
+		/* Wait for tqp hw reset */
+		msleep(20);
+		reset_status = hclge_get_reset_status(hdev, queue_id);
+		if (reset_status)
+			break;
+	}
+
+	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
+		dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
+		return;
+	}
+
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_id, false);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev,
+			 "Deassert the soft reset fail, ret = %d\n", ret);
+		return;
+	}
+}
+
+static u32 hclge_get_fw_version(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hdev->fw_version;
+}
+
+static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg,
+				 u32 *rx_en, u32 *tx_en)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	*auto_neg = hclge_get_autoneg(handle);
+
+	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
+		*rx_en = 0;
+		*tx_en = 0;
+		return;
+	}
+
+	if (hdev->tm_info.fc_mode == HCLGE_FC_RX_PAUSE) {
+		*rx_en = 1;
+		*tx_en = 0;
+	} else if (hdev->tm_info.fc_mode == HCLGE_FC_TX_PAUSE) {
+		*tx_en = 1;
+		*rx_en = 0;
+	} else if (hdev->tm_info.fc_mode == HCLGE_FC_FULL) {
+		*rx_en = 1;
+		*tx_en = 1;
+	} else {
+		*rx_en = 0;
+		*tx_en = 0;
+	}
+}
+
+static void hclge_get_ksettings_an_result(struct hnae3_handle *handle,
+					  u8 *auto_neg, u32 *speed, u8 *duplex)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (speed)
+		*speed = hdev->hw.mac.speed;
+	if (duplex)
+		*duplex = hdev->hw.mac.duplex;
+	if (auto_neg)
+		*auto_neg = hdev->hw.mac.autoneg;
+}
+
+static void hclge_get_media_type(struct hnae3_handle *handle, u8 *media_type)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (media_type)
+		*media_type = hdev->hw.mac.media_type;
+}
+
+static void hclge_get_mdix_mode(struct hnae3_handle *handle,
+				u8 *tp_mdix_ctrl, u8 *tp_mdix)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct phy_device *phydev = hdev->hw.mac.phydev;
+	int mdix_ctrl, mdix, retval, is_resolved;
+
+	if (!phydev) {
+		*tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+		*tp_mdix = ETH_TP_MDI_INVALID;
+		return;
+	}
+
+	phy_write(phydev, HCLGE_PHY_PAGE_REG, HCLGE_PHY_PAGE_MDIX);
+
+	retval = phy_read(phydev, HCLGE_PHY_CSC_REG);
+	mdix_ctrl = hnae_get_field(retval, HCLGE_PHY_MDIX_CTRL_M,
+				   HCLGE_PHY_MDIX_CTRL_S);
+
+	retval = phy_read(phydev, HCLGE_PHY_CSS_REG);
+	mdix = hnae_get_bit(retval, HCLGE_PHY_MDIX_STATUS_B);
+	is_resolved = hnae_get_bit(retval, HCLGE_PHY_SPEED_DUP_RESOLVE_B);
+
+	phy_write(phydev, HCLGE_PHY_PAGE_REG, HCLGE_PHY_PAGE_COPPER);
+
+	switch (mdix_ctrl) {
+	case 0x0:
+		*tp_mdix_ctrl = ETH_TP_MDI;
+		break;
+	case 0x1:
+		*tp_mdix_ctrl = ETH_TP_MDI_X;
+		break;
+	case 0x3:
+		*tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+		break;
+	default:
+		*tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+		break;
+	}
+
+	if (!is_resolved)
+		*tp_mdix = ETH_TP_MDI_INVALID;
+	else if (mdix)
+		*tp_mdix = ETH_TP_MDI_X;
+	else
+		*tp_mdix = ETH_TP_MDI;
+}
+
+static int hclge_init_client_instance(struct hnae3_client *client,
+				      struct hnae3_ae_dev *ae_dev)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct hclge_vport *vport;
+	int i, ret;
+
+	for (i = 0; i <  hdev->num_vmdq_vport + 1; i++) {
+		vport = &hdev->vport[i];
+
+		switch (client->type) {
+		case HNAE3_CLIENT_KNIC:
+
+			hdev->nic_client = client;
+			vport->nic.client = client;
+			ret = client->ops->init_instance(&vport->nic);
+			if (ret)
+				goto err;
+
+			if (hdev->roce_client &&
+			    hnae_get_bit(hdev->ae_dev->flag,
+					 HNAE_DEV_SUPPORT_ROCE_B)) {
+				struct hnae3_client *rc = hdev->roce_client;
+
+				ret = hclge_init_roce_base_info(vport);
+				if (ret)
+					goto err;
+
+				ret = rc->ops->init_instance(&vport->roce);
+				if (ret)
+					goto err;
+			}
+
+			break;
+		case HNAE3_CLIENT_UNIC:
+			hdev->nic_client = client;
+			vport->nic.client = client;
+
+			ret = client->ops->init_instance(&vport->nic);
+			if (ret)
+				goto err;
+
+			break;
+		case HNAE3_CLIENT_ROCE:
+			if (hnae_get_bit(hdev->ae_dev->flag,
+					 HNAE_DEV_SUPPORT_ROCE_B)) {
+				hdev->roce_client = client;
+				vport->roce.client = client;
+			}
+
+			if (hdev->roce_client) {
+				ret = hclge_init_roce_base_info(vport);
+				if (ret)
+					goto err;
+
+				ret = client->ops->init_instance(&vport->roce);
+				if (ret)
+					goto err;
+			}
+		}
+	}
+
+	return 0;
+err:
+	return ret;
+}
+
+static void hclge_uninit_client_instance(struct hnae3_client *client,
+					 struct hnae3_ae_dev *ae_dev)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct hclge_vport *vport;
+	int i;
+
+	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+		vport = &hdev->vport[i];
+		if (hdev->roce_client)
+			hdev->roce_client->ops->uninit_instance(&vport->roce,
+								0);
+		if (client->type == HNAE3_CLIENT_ROCE)
+			return;
+		if (client->ops->uninit_instance)
+			client->ops->uninit_instance(&vport->nic, 0);
+	}
+}
+
+static int hclge_pci_init(struct hclge_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	struct hclge_hw *hw;
+	int ret;
+
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable PCI device\n");
+		goto err_no_drvdata;
+	}
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret) {
+		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_err(&pdev->dev,
+				"can't set consistent PCI DMA");
+			goto err_disable_device;
+		}
+		dev_warn(&pdev->dev, "set DMA mask to 32 bits\n");
+	}
+
+	ret = pci_request_regions(pdev, HCLGE_DRIVER_NAME);
+	if (ret) {
+		dev_err(&pdev->dev, "PCI request regions failed %d\n", ret);
+		goto err_disable_device;
+	}
+
+	pci_set_master(pdev);
+	hw = &hdev->hw;
+	hw->back = hdev;
+	hw->io_base = pcim_iomap(pdev, 2, 0);
+	if (!hw->io_base) {
+		dev_err(&pdev->dev, "Can't map configuration register space\n");
+		ret = -ENOMEM;
+		goto err_clr_master;
+	}
+
+	return 0;
+err_clr_master:
+	pci_clear_master(pdev);
+	pci_release_regions(pdev);
+err_disable_device:
+	pci_disable_device(pdev);
+err_no_drvdata:
+	pci_set_drvdata(pdev, NULL);
+
+	return ret;
+}
+
+static void hclge_pci_uninit(struct hclge_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+
+	if (hdev->flag & HCLGE_FLAG_USE_MSIX) {
+		pci_disable_msix(pdev);
+		devm_kfree(&pdev->dev, hdev->msix_entries);
+		hdev->msix_entries = NULL;
+	} else {
+		pci_disable_msi(pdev);
+	}
+
+	pci_clear_master(pdev);
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct pci_dev *pdev = ae_dev->pdev;
+	const struct pci_device_id *id;
+	struct hclge_dev *hdev;
+	int ret;
+
+	hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
+	if (!hdev) {
+		ret = -ENOMEM;
+		goto err_hclge_dev;
+	}
+
+	hdev->flag |= HCLGE_FLAG_USE_MSIX;
+	hdev->pdev = pdev;
+	hdev->ae_dev = ae_dev;
+	ae_dev->priv = hdev;
+
+	id = pci_match_id(roce_pci_tbl, ae_dev->pdev);
+	if (id)
+		hnae_set_bit(ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B, 1);
+
+	ret = hclge_pci_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "PCI init failed\n");
+		goto err_pci_init;
+	}
+
+	/* Command queue initialize */
+	ret = hclge_cmd_init(hdev);
+	if (ret)
+		goto err_cmd_init;
+
+	ret = hclge_get_cap(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "get hw capabilty error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_configure(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	if (hdev->flag & HCLGE_FLAG_USE_MSIX)
+		ret = hclge_init_msix(hdev);
+	else
+		ret = hclge_init_msi(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Init msix/msi error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_alloc_tqps(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Allocate TQPs error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_alloc_vport(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Allocate vport error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_mac_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
+		return ret;
+	}
+	ret = hclge_buffer_alloc(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
+		return  ret;
+	}
+
+	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
+	if (ret) {
+		dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_rss_init_hw(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
+		return  ret;
+	}
+
+	ret = hclge_init_vlan_config(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret);
+		return  ret;
+	}
+
+	ret = hclge_tm_schd_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	setup_timer(&hdev->service_timer, hclge_service_timer,
+		    (unsigned long)hdev);
+	INIT_WORK(&hdev->service_task, hclge_service_task);
+
+	set_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state);
+	set_bit(HCLGE_STATE_DOWN, &hdev->state);
+
+	pr_info("%s driver initialization finished.\n", HCLGE_DRIVER_NAME);
+	return 0;
+
+err_cmd_init:
+	pci_release_regions(pdev);
+err_pci_init:
+	pci_set_drvdata(pdev, NULL);
+err_hclge_dev:
+	return ret;
+}
+
+static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct hclge_mac *mac = &hdev->hw.mac;
+
+	set_bit(HCLGE_STATE_DOWN, &hdev->state);
+
+#ifdef CONFIG_PCI_IOV
+	hclge_disable_sriov(hdev);
+#endif
+
+	if (hdev->service_timer.data)
+		del_timer_sync(&hdev->service_timer);
+	if (hdev->service_task.func)
+		cancel_work_sync(&hdev->service_task);
+
+	if (mac->phydev)
+		mdiobus_unregister(mac->mdio_bus);
+
+	hclge_destroy_cmd_queue(&hdev->hw);
+	hclge_pci_uninit(hdev);
+	ae_dev->priv = NULL;
+}
+
+static const struct hnae3_ae_ops hclge_ops = {
+	.init_ae_dev = hclge_init_ae_dev,
+	.uninit_ae_dev = hclge_uninit_ae_dev,
+	.init_client_instance = hclge_init_client_instance,
+	.uninit_client_instance = hclge_uninit_client_instance,
+	.map_ring_to_vector = hclge_map_handle_ring_to_vector,
+	.unmap_ring_from_vector = hclge_unmap_ring_from_vector,
+	.get_vector = hclge_get_vector,
+	.set_promisc_mode = hclge_set_promisc_mode,
+	.start = hclge_ae_start,
+	.stop = hclge_ae_stop,
+	.get_status = hclge_get_status,
+	.get_ksettings_an_result = hclge_get_ksettings_an_result,
+	.update_speed_duplex_h = hclge_update_speed_duplex_h,
+	.cfg_mac_speed_dup_h = hclge_cfg_mac_speed_dup_h,
+	.get_media_type = hclge_get_media_type,
+	.get_rss_key_size = hclge_get_rss_key_size,
+	.get_rss_indir_size = hclge_get_rss_indir_size,
+	.get_rss = hclge_get_rss,
+	.set_rss = hclge_set_rss,
+	.get_tc_size = hclge_get_tc_size,
+	.get_mac_addr = hclge_get_mac_addr,
+	.set_mac_addr = hclge_set_mac_addr,
+	.add_uc_addr = hclge_add_uc_addr,
+	.rm_uc_addr = hclge_rm_uc_addr,
+	.add_mc_addr = hclge_add_mc_addr,
+	.rm_mc_addr = hclge_rm_mc_addr,
+	.set_autoneg = hclge_set_autoneg,
+	.get_autoneg = hclge_get_autoneg,
+	.get_pauseparam = hclge_get_pauseparam,
+	.set_mtu = hclge_set_mtu,
+	.reset_queue = hclge_reset_tqp,
+	.get_stats = hclge_get_stats,
+	.update_stats = hclge_update_stats,
+	.get_strings = hclge_get_strings,
+	.get_sset_count = hclge_get_sset_count,
+	.get_fw_version = hclge_get_fw_version,
+	.get_mdix_mode = hclge_get_mdix_mode,
+	.set_vlan_filter = hclge_set_port_vlan_filter,
+	.set_vf_vlan_filter = hclge_set_vf_vlan_filter,
+};
+
+static struct hnae3_ae_algo ae_algo = {
+	.ops = &hclge_ops,
+	.name = HCLGE_NAME,
+	.pdev_id_table = ae_algo_pci_tbl,
+};
+
+static int hclge_init(void)
+{
+	pr_info("%s is initializing\n", HCLGE_NAME);
+
+	return hnae3_register_ae_algo(&ae_algo);
+}
+
+static void hclge_exit(void)
+{
+	hnae3_unregister_ae_algo(&ae_algo);
+}
+module_init(hclge_init);
+module_exit(hclge_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
+MODULE_DESCRIPTION("HCLGE Driver");
+MODULE_VERSION(HCLGE_MOD_VERSION);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
new file mode 100644
index 000000000000..edb10ad075eb
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_MAIN_H
+#define __HCLGE_MAIN_H
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/phy.h>
+#include "hclge_cmd.h"
+#include "hnae3.h"
+
+#define HCLGE_MOD_VERSION "v1.0"
+#define HCLGE_DRIVER_NAME "hclge"
+
+#define HCLGE_INVALID_VPORT 0xffff
+
+#define HCLGE_ROCE_VECTOR_OFFSET	96
+
+#define HCLGE_PF_CFG_BLOCK_SIZE		32
+#define HCLGE_PF_CFG_DESC_NUM \
+	(HCLGE_PF_CFG_BLOCK_SIZE / HCLGE_CFG_RD_LEN_BYTES)
+
+#define HCLGE_VECTOR_REG_BASE		0x20000
+
+#define HCLGE_VECTOR_REG_OFFSET		0x4
+#define HCLGE_VECTOR_VF_OFFSET		0x100000
+
+#define HCLGE_RSS_IND_TBL_SIZE		512
+#define HCLGE_RSS_SET_BITMAP_MSK	0xffff
+#define HCLGE_RSS_KEY_SIZE		40
+#define HCLGE_RSS_HASH_ALGO_TOEPLITZ	0
+#define HCLGE_RSS_HASH_ALGO_SIMPLE	1
+#define HCLGE_RSS_HASH_ALGO_SYMMETRIC	2
+#define HCLGE_RSS_HASH_ALGO_MASK	0xf
+#define HCLGE_RSS_CFG_TBL_NUM \
+	(HCLGE_RSS_IND_TBL_SIZE / HCLGE_RSS_CFG_TBL_SIZE)
+
+#define HCLGE_RSS_TC_SIZE_0		1
+#define HCLGE_RSS_TC_SIZE_1		2
+#define HCLGE_RSS_TC_SIZE_2		4
+#define HCLGE_RSS_TC_SIZE_3		8
+#define HCLGE_RSS_TC_SIZE_4		16
+#define HCLGE_RSS_TC_SIZE_5		32
+#define HCLGE_RSS_TC_SIZE_6		64
+#define HCLGE_RSS_TC_SIZE_7		128
+
+#define HCLGE_TQP_RESET_TRY_TIMES	10
+
+#define HCLGE_PHY_PAGE_MDIX		0
+#define HCLGE_PHY_PAGE_COPPER		0
+
+/* Page Selection Reg. */
+#define HCLGE_PHY_PAGE_REG		22
+
+/* Copper Specific Control Register */
+#define HCLGE_PHY_CSC_REG		16
+
+/* Copper Specific Status Register */
+#define HCLGE_PHY_CSS_REG		17
+
+#define HCLGE_PHY_MDIX_CTRL_S		(5)
+#define HCLGE_PHY_MDIX_CTRL_M		(3 << HCLGE_PHY_MDIX_CTRL_S)
+
+#define HCLGE_PHY_MDIX_STATUS_B	(6)
+#define HCLGE_PHY_SPEED_DUP_RESOLVE_B	(11)
+
+enum HCLGE_DEV_STATE {
+	HCLGE_STATE_REINITING,
+	HCLGE_STATE_DOWN,
+	HCLGE_STATE_DISABLED,
+	HCLGE_STATE_REMOVING,
+	HCLGE_STATE_SERVICE_INITED,
+	HCLGE_STATE_SERVICE_SCHED,
+	HCLGE_STATE_MBX_HANDLING,
+	HCLGE_STATE_MBX_IRQ,
+	HCLGE_STATE_MAX
+};
+
+#define HCLGE_MPF_ENBALE 1
+struct hclge_caps {
+	u16 num_tqp;
+	u16 num_buffer_cell;
+	u32 flag;
+	u16 vmdq;
+};
+
+enum HCLGE_MAC_SPEED {
+	HCLGE_MAC_SPEED_10M	= 10,		/* 10 Mbps */
+	HCLGE_MAC_SPEED_100M	= 100,		/* 100 Mbps */
+	HCLGE_MAC_SPEED_1G	= 1000,		/* 1000 Mbps   = 1 Gbps */
+	HCLGE_MAC_SPEED_10G	= 10000,	/* 10000 Mbps  = 10 Gbps */
+	HCLGE_MAC_SPEED_25G	= 25000,	/* 25000 Mbps  = 25 Gbps */
+	HCLGE_MAC_SPEED_40G	= 40000,	/* 40000 Mbps  = 40 Gbps */
+	HCLGE_MAC_SPEED_50G	= 50000,	/* 50000 Mbps  = 50 Gbps */
+	HCLGE_MAC_SPEED_100G	= 100000	/* 100000 Mbps = 100 Gbps */
+};
+
+enum HCLGE_MAC_DUPLEX {
+	HCLGE_MAC_HALF,
+	HCLGE_MAC_FULL
+};
+
+enum hclge_mta_dmac_sel_type {
+	HCLGE_MAC_ADDR_47_36,
+	HCLGE_MAC_ADDR_46_35,
+	HCLGE_MAC_ADDR_45_34,
+	HCLGE_MAC_ADDR_44_33,
+};
+
+struct hclge_mac {
+	u8 phy_addr;
+	u8 flag;
+	u8 media_type;
+	u8 mac_addr[ETH_ALEN];
+	u8 autoneg;
+	u8 duplex;
+	u32 speed;
+	int link;	/* store the link status of mac & phy (if phy exit)*/
+	struct phy_device *phydev;
+	struct mii_bus *mdio_bus;
+	phy_interface_t phy_if;
+};
+
+struct hclge_hw {
+	void __iomem *io_base;
+	struct hclge_mac mac;
+	int num_vec;
+	struct hclge_cmq cmq;
+	struct hclge_caps caps;
+	void *back;
+};
+
+/* TQP stats */
+struct hlcge_tqp_stats {
+	/* query_tqp_tx_queue_statistics ,opcode id:  0x0B03 */
+	u64 rcb_tx_ring_pktnum_rcd; /* 32bit */
+	/* query_tqp_rx_queue_statistics ,opcode id:  0x0B13 */
+	u64 rcb_rx_ring_pktnum_rcd; /* 32bit */
+};
+
+struct hclge_tqp {
+	struct device *dev;	/* Device for DMA mapping */
+	struct hnae3_queue q;
+	struct hlcge_tqp_stats tqp_stats;
+	u16 index;	/* Global index in a NIC controller */
+
+	bool alloced;
+};
+
+enum hclge_fc_mode {
+	HCLGE_FC_NONE,
+	HCLGE_FC_RX_PAUSE,
+	HCLGE_FC_TX_PAUSE,
+	HCLGE_FC_FULL,
+	HCLGE_FC_PFC,
+	HCLGE_FC_DEFAULT
+};
+
+#define HCLGE_PG_NUM		4
+#define HCLGE_SCH_MODE_SP	0
+#define HCLGE_SCH_MODE_DWRR	1
+struct hclge_pg_info {
+	u8 pg_id;
+	u8 pg_sch_mode;		/* 0: sp; 1: dwrr */
+	u8 tc_bit_map;
+	u32 bw_limit;
+	u8 tc_dwrr[HNAE3_MAX_TC];
+};
+
+struct hclge_tc_info {
+	u8 tc_id;
+	u8 tc_sch_mode;		/* 0: sp; 1: dwrr */
+	u8 up;
+	u8 pgid;
+	u32 bw_limit;
+};
+
+struct hclge_cfg {
+	u8 vmdq_vport_num;
+	u8 tc_num;
+	u16 tqp_desc_num;
+	u16 rx_buf_len;
+	u8 phy_addr;
+	u8 media_type;
+	u8 mac_addr[ETH_ALEN];
+	u8 default_speed;
+	u32 numa_node_map;
+};
+
+struct hclge_tm_info {
+	u8 num_tc;
+	u8 num_pg;      /* It must be 1 if vNET-Base schd */
+	u8 pg_dwrr[HCLGE_PG_NUM];
+	struct hclge_pg_info pg_info[HCLGE_PG_NUM];
+	struct hclge_tc_info tc_info[HNAE3_MAX_TC];
+	enum hclge_fc_mode fc_mode;
+	u8 hw_pfc_map; /* Allow for packet drop or not on this TC */
+};
+
+struct hclge_comm_stats_str {
+	char desc[ETH_GSTRING_LEN];
+	unsigned long offset;
+};
+
+/* all 64bit stats, opcode id: 0x0030 */
+struct hclge_64_bit_stats {
+	/* query_igu_stat */
+	u64 igu_rx_oversize_pkt;
+	u64 igu_rx_undersize_pkt;
+	u64 igu_rx_out_all_pkt;
+	u64 igu_rx_uni_pkt;
+	u64 igu_rx_multi_pkt;
+	u64 igu_rx_broad_pkt;
+	u64 rsv0;
+
+	/* query_egu_stat */
+	u64 egu_tx_out_all_pkt;
+	u64 egu_tx_uni_pkt;
+	u64 egu_tx_multi_pkt;
+	u64 egu_tx_broad_pkt;
+
+	/* ssu_ppp packet stats */
+	u64 ssu_ppp_mac_key_num;
+	u64 ssu_ppp_host_key_num;
+	u64 ppp_ssu_mac_rlt_num;
+	u64 ppp_ssu_host_rlt_num;
+
+	/* ssu_tx_in_out_dfx_stats */
+	u64 ssu_tx_in_num;
+	u64 ssu_tx_out_num;
+	/* ssu_rx_in_out_dfx_stats */
+	u64 ssu_rx_in_num;
+	u64 ssu_rx_out_num;
+};
+
+/* all 32bit stats, opcode id: 0x0031 */
+struct hclge_32_bit_stats {
+	u64 igu_rx_err_pkt;
+	u64 igu_rx_no_eof_pkt;
+	u64 igu_rx_no_sof_pkt;
+	u64 egu_tx_1588_pkt;
+	u64 egu_tx_err_pkt;
+	u64 ssu_full_drop_num;
+	u64 ssu_part_drop_num;
+	u64 ppp_key_drop_num;
+	u64 ppp_rlt_drop_num;
+	u64 ssu_key_drop_num;
+	u64 pkt_curr_buf_cnt;
+	u64 qcn_fb_rcv_cnt;
+	u64 qcn_fb_drop_cnt;
+	u64 qcn_fb_invaild_cnt;
+	u64 rsv0;
+	u64 rx_packet_tc0_in_cnt;
+	u64 rx_packet_tc1_in_cnt;
+	u64 rx_packet_tc2_in_cnt;
+	u64 rx_packet_tc3_in_cnt;
+	u64 rx_packet_tc4_in_cnt;
+	u64 rx_packet_tc5_in_cnt;
+	u64 rx_packet_tc6_in_cnt;
+	u64 rx_packet_tc7_in_cnt;
+	u64 rx_packet_tc0_out_cnt;
+	u64 rx_packet_tc1_out_cnt;
+	u64 rx_packet_tc2_out_cnt;
+	u64 rx_packet_tc3_out_cnt;
+	u64 rx_packet_tc4_out_cnt;
+	u64 rx_packet_tc5_out_cnt;
+	u64 rx_packet_tc6_out_cnt;
+	u64 rx_packet_tc7_out_cnt;
+
+	/* Tx packet level statistics */
+	u64 tx_packet_tc0_in_cnt;
+	u64 tx_packet_tc1_in_cnt;
+	u64 tx_packet_tc2_in_cnt;
+	u64 tx_packet_tc3_in_cnt;
+	u64 tx_packet_tc4_in_cnt;
+	u64 tx_packet_tc5_in_cnt;
+	u64 tx_packet_tc6_in_cnt;
+	u64 tx_packet_tc7_in_cnt;
+	u64 tx_packet_tc0_out_cnt;
+	u64 tx_packet_tc1_out_cnt;
+	u64 tx_packet_tc2_out_cnt;
+	u64 tx_packet_tc3_out_cnt;
+	u64 tx_packet_tc4_out_cnt;
+	u64 tx_packet_tc5_out_cnt;
+	u64 tx_packet_tc6_out_cnt;
+	u64 tx_packet_tc7_out_cnt;
+
+	/* packet buffer statistics */
+	u64 pkt_curr_buf_tc0_cnt;
+	u64 pkt_curr_buf_tc1_cnt;
+	u64 pkt_curr_buf_tc2_cnt;
+	u64 pkt_curr_buf_tc3_cnt;
+	u64 pkt_curr_buf_tc4_cnt;
+	u64 pkt_curr_buf_tc5_cnt;
+	u64 pkt_curr_buf_tc6_cnt;
+	u64 pkt_curr_buf_tc7_cnt;
+
+	u64 mb_uncopy_num;
+	u64 lo_pri_unicast_rlt_drop_num;
+	u64 hi_pri_multicast_rlt_drop_num;
+	u64 lo_pri_multicast_rlt_drop_num;
+	u64 rx_oq_drop_pkt_cnt;
+	u64 tx_oq_drop_pkt_cnt;
+	u64 nic_l2_err_drop_pkt_cnt;
+	u64 roc_l2_err_drop_pkt_cnt;
+};
+
+/* mac stats ,opcode id: 0x0032 */
+struct hclge_mac_stats {
+	u64 mac_tx_mac_pause_num;
+	u64 mac_rx_mac_pause_num;
+	u64 mac_tx_pfc_pri0_pkt_num;
+	u64 mac_tx_pfc_pri1_pkt_num;
+	u64 mac_tx_pfc_pri2_pkt_num;
+	u64 mac_tx_pfc_pri3_pkt_num;
+	u64 mac_tx_pfc_pri4_pkt_num;
+	u64 mac_tx_pfc_pri5_pkt_num;
+	u64 mac_tx_pfc_pri6_pkt_num;
+	u64 mac_tx_pfc_pri7_pkt_num;
+	u64 mac_rx_pfc_pri0_pkt_num;
+	u64 mac_rx_pfc_pri1_pkt_num;
+	u64 mac_rx_pfc_pri2_pkt_num;
+	u64 mac_rx_pfc_pri3_pkt_num;
+	u64 mac_rx_pfc_pri4_pkt_num;
+	u64 mac_rx_pfc_pri5_pkt_num;
+	u64 mac_rx_pfc_pri6_pkt_num;
+	u64 mac_rx_pfc_pri7_pkt_num;
+	u64 mac_tx_total_pkt_num;
+	u64 mac_tx_total_oct_num;
+	u64 mac_tx_good_pkt_num;
+	u64 mac_tx_bad_pkt_num;
+	u64 mac_tx_good_oct_num;
+	u64 mac_tx_bad_oct_num;
+	u64 mac_tx_uni_pkt_num;
+	u64 mac_tx_multi_pkt_num;
+	u64 mac_tx_broad_pkt_num;
+	u64 mac_tx_undersize_pkt_num;
+	u64 mac_tx_overrsize_pkt_num;
+	u64 mac_tx_64_oct_pkt_num;
+	u64 mac_tx_65_127_oct_pkt_num;
+	u64 mac_tx_128_255_oct_pkt_num;
+	u64 mac_tx_256_511_oct_pkt_num;
+	u64 mac_tx_512_1023_oct_pkt_num;
+	u64 mac_tx_1024_1518_oct_pkt_num;
+	u64 mac_tx_1519_max_oct_pkt_num;
+	u64 mac_rx_total_pkt_num;
+	u64 mac_rx_total_oct_num;
+	u64 mac_rx_good_pkt_num;
+	u64 mac_rx_bad_pkt_num;
+	u64 mac_rx_good_oct_num;
+	u64 mac_rx_bad_oct_num;
+	u64 mac_rx_uni_pkt_num;
+	u64 mac_rx_multi_pkt_num;
+	u64 mac_rx_broad_pkt_num;
+	u64 mac_rx_undersize_pkt_num;
+	u64 mac_rx_overrsize_pkt_num;
+	u64 mac_rx_64_oct_pkt_num;
+	u64 mac_rx_65_127_oct_pkt_num;
+	u64 mac_rx_128_255_oct_pkt_num;
+	u64 mac_rx_256_511_oct_pkt_num;
+	u64 mac_rx_512_1023_oct_pkt_num;
+	u64 mac_rx_1024_1518_oct_pkt_num;
+	u64 mac_rx_1519_max_oct_pkt_num;
+
+	u64 mac_trans_fragment_pkt_num;
+	u64 mac_trans_undermin_pkt_num;
+	u64 mac_trans_jabber_pkt_num;
+	u64 mac_trans_err_all_pkt_num;
+	u64 mac_trans_from_app_good_pkt_num;
+	u64 mac_trans_from_app_bad_pkt_num;
+	u64 mac_rcv_fragment_pkt_num;
+	u64 mac_rcv_undermin_pkt_num;
+	u64 mac_rcv_jabber_pkt_num;
+	u64 mac_rcv_fcs_err_pkt_num;
+	u64 mac_rcv_send_app_good_pkt_num;
+	u64 mac_rcv_send_app_bad_pkt_num;
+};
+
+struct hclge_hw_stats {
+	struct hclge_mac_stats      mac_stats;
+	struct hclge_64_bit_stats   all_64_bit_stats;
+	struct hclge_32_bit_stats   all_32_bit_stats;
+};
+
+struct hclge_dev {
+	struct pci_dev *pdev;
+	struct hnae3_ae_dev *ae_dev;
+	struct hclge_hw hw;
+	struct hclge_hw_stats hw_stats;
+	unsigned long state;
+
+	u32 fw_version;
+	u16 num_vmdq_vport;		/* Num vmdq vport this PF has set up */
+	u16 num_tqps;			/* Num task queue pairs of this PF */
+	u16 num_req_vfs;		/* Num VFs requested for this PF */
+
+	u16 num_roce_msix;		/* Num of roce vectors for this PF */
+	int roce_base_vector;
+
+	/* Base task tqp physical id of this PF */
+	u16 base_tqp_pid;
+	u16 alloc_rss_size;		/* Allocated RSS task queue */
+	u16 rss_size_max;		/* HW defined max RSS task queue */
+
+	/* Num of guaranteed filters for this PF */
+	u16 fdir_pf_filter_count;
+	u16 num_alloc_vport;		/* Num vports this driver supports */
+	u32 numa_node_mask;
+	u16 rx_buf_len;
+	u16 num_desc;
+	u8 hw_tc_map;
+	u8 tc_num_last_time;
+	enum hclge_fc_mode fc_mode_last_time;
+
+#define HCLGE_FLAG_TC_BASE_SCH_MODE		1
+#define HCLGE_FLAG_VNET_BASE_SCH_MODE		2
+	u8 tx_sch_mode;
+
+	u8 default_up;
+	struct hclge_tm_info tm_info;
+
+	u16 num_msi;
+	u16 num_msi_left;
+	u16 num_msi_used;
+	u32 base_msi_vector;
+	struct msix_entry *msix_entries;
+	u16 *vector_status;
+
+	u16 pending_udp_bitmap;
+
+	u16 rx_itr_default;
+	u16 tx_itr_default;
+
+	u16 adminq_work_limit; /* Num of admin receive queue desc to process */
+	unsigned long service_timer_period;
+	unsigned long service_timer_previous;
+	struct timer_list service_timer;
+	struct work_struct service_task;
+
+	bool cur_promisc;
+	int num_alloc_vfs;	/* Actual number of VFs allocated */
+
+	struct hclge_tqp *htqp;
+	struct hclge_vport *vport;
+
+	struct dentry *hclge_dbgfs;
+
+	struct hnae3_client *nic_client;
+	struct hnae3_client *roce_client;
+
+#define HCLGE_FLAG_USE_MSI	0x00000001
+#define HCLGE_FLAG_USE_MSIX	0x00000002
+#define HCLGE_FLAG_MAIN		0x00000004
+#define HCLGE_FLAG_DCB_CAPABLE	0x00000008
+#define HCLGE_FLAG_DCB_ENABLE	0x00000010
+	u32 flag;
+
+	u32 pkt_buf_size; /* Total pf buf size for tx/rx */
+	u32 mps; /* Max packet size */
+	struct hclge_priv_buf *priv_buf;
+	struct hclge_shared_buf s_buf;
+
+	enum hclge_mta_dmac_sel_type mta_mac_sel_type;
+	bool enable_mta; /* Mutilcast filter enable */
+	bool accept_mta_mc; /* Whether accept mta filter multicast */
+};
+
+struct hclge_vport {
+	u16 alloc_tqps;	/* Allocated Tx/Rx queues */
+
+	u8  rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */
+	/* User configured lookup table entries */
+	u8  rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE];
+
+	u16 qs_offset;
+	u16 bw_limit;		/* VSI BW Limit (0 = disabled) */
+	u8  dwrr;
+
+	int vport_id;
+	struct hclge_dev *back;  /* Back reference to associated dev */
+	struct hnae3_handle nic;
+	struct hnae3_handle roce;
+};
+
+void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
+			      bool en_mc, bool en_bc, int vport_id);
+
+int hclge_add_uc_addr_common(struct hclge_vport *vport,
+			     const unsigned char *addr);
+int hclge_rm_uc_addr_common(struct hclge_vport *vport,
+			    const unsigned char *addr);
+int hclge_add_mc_addr_common(struct hclge_vport *vport,
+			     const unsigned char *addr);
+int hclge_rm_mc_addr_common(struct hclge_vport *vport,
+			    const unsigned char *addr);
+
+int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
+			      u8 func_id,
+			      bool enable);
+struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle);
+int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector,
+				   struct hnae3_ring_chain_node *ring_chain);
+static inline int hclge_get_queue_id(struct hnae3_queue *queue)
+{
+	struct hclge_tqp *tqp = container_of(queue, struct hclge_tqp, q);
+
+	return tqp->index;
+}
+
+int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex);
+int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
+			     bool is_kill, u16 vlan, u8 qos, __be16 proto);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
new file mode 100644
index 000000000000..a2add8bb1945
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+
+#include "hclge_cmd.h"
+#include "hclge_main.h"
+#include "hclge_mdio.h"
+
+enum hclge_mdio_c22_op_seq {
+	HCLGE_MDIO_C22_WRITE = 1,
+	HCLGE_MDIO_C22_READ = 2
+};
+
+#define HCLGE_MDIO_CTRL_START_B		0
+#define HCLGE_MDIO_CTRL_ST_S		1
+#define HCLGE_MDIO_CTRL_ST_M		(0x3 << HCLGE_MDIO_CTRL_ST_S)
+#define HCLGE_MDIO_CTRL_OP_S		3
+#define HCLGE_MDIO_CTRL_OP_M		(0x3 << HCLGE_MDIO_CTRL_OP_S)
+
+#define HCLGE_MDIO_PHYID_S		0
+#define HCLGE_MDIO_PHYID_M		(0x1f << HCLGE_MDIO_PHYID_S)
+
+#define HCLGE_MDIO_PHYREG_S		0
+#define HCLGE_MDIO_PHYREG_M		(0x1f << HCLGE_MDIO_PHYREG_S)
+
+#define HCLGE_MDIO_STA_B		0
+
+struct hclge_mdio_cfg_cmd {
+	u8 ctrl_bit;
+	u8 phyid;
+	u8 phyad;
+	u8 rsvd;
+	__le16 reserve;
+	__le16 data_wr;
+	__le16 data_rd;
+	__le16 sta;
+};
+
+static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum,
+			    u16 data)
+{
+	struct hclge_mdio_cfg_cmd *mdio_cmd;
+	struct hclge_dev *hdev = bus->priv;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false);
+
+	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
+
+	hnae_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
+		       HCLGE_MDIO_PHYID_S, phyid);
+	hnae_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
+		       HCLGE_MDIO_PHYREG_S, regnum);
+
+	hnae_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
+	hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
+		       HCLGE_MDIO_CTRL_ST_S, 1);
+	hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_OP_M,
+		       HCLGE_MDIO_CTRL_OP_S, HCLGE_MDIO_C22_WRITE);
+
+	mdio_cmd->data_wr = cpu_to_le16(data);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"mdio write fail when sending cmd, status is %d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
+{
+	struct hclge_mdio_cfg_cmd *mdio_cmd;
+	struct hclge_dev *hdev = bus->priv;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true);
+
+	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
+
+	hnae_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
+		       HCLGE_MDIO_PHYID_S, phyid);
+	hnae_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
+		       HCLGE_MDIO_PHYREG_S, regnum);
+
+	hnae_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
+	hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
+		       HCLGE_MDIO_CTRL_ST_S, 1);
+	hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_OP_M,
+		       HCLGE_MDIO_CTRL_OP_S, HCLGE_MDIO_C22_READ);
+
+	/* Read out phy data */
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"mdio read fail when get data, status is %d.\n",
+			ret);
+		return ret;
+	}
+
+	if (hnae_get_bit(le16_to_cpu(mdio_cmd->sta), HCLGE_MDIO_STA_B)) {
+		dev_err(&hdev->pdev->dev, "mdio read data error\n");
+		return -EIO;
+	}
+
+	return le16_to_cpu(mdio_cmd->data_rd);
+}
+
+int hclge_mac_mdio_config(struct hclge_dev *hdev)
+{
+	struct hclge_mac *mac = &hdev->hw.mac;
+	struct phy_device *phydev;
+	struct mii_bus *mdio_bus;
+	int ret;
+
+	if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR)
+		return 0;
+
+	mdio_bus = devm_mdiobus_alloc(&hdev->pdev->dev);
+	if (!mdio_bus)
+		return -ENOMEM;
+
+	mdio_bus->name = "hisilicon MII bus";
+	mdio_bus->read = hclge_mdio_read;
+	mdio_bus->write = hclge_mdio_write;
+	snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "%s-%s", "mii",
+		 dev_name(&hdev->pdev->dev));
+
+	mdio_bus->parent = &hdev->pdev->dev;
+	mdio_bus->priv = hdev;
+	mdio_bus->phy_mask = ~(1 << mac->phy_addr);
+	ret = mdiobus_register(mdio_bus);
+	if (ret) {
+		dev_err(mdio_bus->parent,
+			"Failed to register MDIO bus ret = %#x\n", ret);
+		return ret;
+	}
+
+	phydev = mdiobus_get_phy(mdio_bus, mac->phy_addr);
+	if (!phydev || IS_ERR(phydev)) {
+		dev_err(mdio_bus->parent, "Failed to get phy device\n");
+		mdiobus_unregister(mdio_bus);
+		return -EIO;
+	}
+
+	mac->phydev = phydev;
+	mac->mdio_bus = mdio_bus;
+
+	return 0;
+}
+
+static void hclge_mac_adjust_link(struct net_device *netdev)
+{
+	struct hnae3_handle *h = *((void **)netdev_priv(netdev));
+	struct hclge_vport *vport = hclge_get_vport(h);
+	struct hclge_dev *hdev = vport->back;
+	int duplex, speed;
+	int ret;
+
+	speed = netdev->phydev->speed;
+	duplex = netdev->phydev->duplex;
+
+	ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex);
+	if (ret)
+		netdev_err(netdev, "failed to adjust link.\n");
+}
+
+int hclge_mac_start_phy(struct hclge_dev *hdev)
+{
+	struct net_device *netdev = hdev->vport[0].nic.netdev;
+	struct phy_device *phydev = hdev->hw.mac.phydev;
+	int ret;
+
+	if (!phydev)
+		return 0;
+
+	ret = phy_connect_direct(netdev, phydev,
+				 hclge_mac_adjust_link,
+				 PHY_INTERFACE_MODE_SGMII);
+	if (ret) {
+		netdev_err(netdev, "phy_connect_direct err.\n");
+		return ret;
+	}
+
+	phy_start(phydev);
+
+	return 0;
+}
+
+void hclge_mac_stop_phy(struct hclge_dev *hdev)
+{
+	struct net_device *netdev = hdev->vport[0].nic.netdev;
+	struct phy_device *phydev = netdev->phydev;
+
+	if (!phydev)
+		return;
+
+	phy_stop(phydev);
+	phy_disconnect(phydev);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
new file mode 100644
index 000000000000..c5e91cfb8f2c
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_MDIO_H
+#define __HCLGE_MDIO_H
+
+int hclge_mac_mdio_config(struct hclge_dev *hdev);
+int hclge_mac_start_phy(struct hclge_dev *hdev);
+void hclge_mac_stop_phy(struct hclge_dev *hdev);
+
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
new file mode 100644
index 000000000000..1c577d268f00
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+
+#include "hclge_cmd.h"
+#include "hclge_main.h"
+#include "hclge_tm.h"
+
+enum hclge_shaper_level {
+	HCLGE_SHAPER_LVL_PRI	= 0,
+	HCLGE_SHAPER_LVL_PG	= 1,
+	HCLGE_SHAPER_LVL_PORT	= 2,
+	HCLGE_SHAPER_LVL_QSET	= 3,
+	HCLGE_SHAPER_LVL_CNT	= 4,
+	HCLGE_SHAPER_LVL_VF	= 0,
+	HCLGE_SHAPER_LVL_PF	= 1,
+};
+
+#define HCLGE_SHAPER_BS_U_DEF	1
+#define HCLGE_SHAPER_BS_S_DEF	4
+
+#define HCLGE_ETHER_MAX_RATE	100000
+
+/* hclge_shaper_para_calc: calculate ir parameter for the shaper
+ * @ir: Rate to be config, its unit is Mbps
+ * @shaper_level: the shaper level. eg: port, pg, priority, queueset
+ * @ir_b: IR_B parameter of IR shaper
+ * @ir_u: IR_U parameter of IR shaper
+ * @ir_s: IR_S parameter of IR shaper
+ *
+ * the formula:
+ *
+ *		IR_b * (2 ^ IR_u) * 8
+ * IR(Mbps) = -------------------------  *  CLOCK(1000Mbps)
+ *		Tick * (2 ^ IR_s)
+ *
+ * @return: 0: calculate sucessful, negative: fail
+ */
+static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
+				  u8 *ir_b, u8 *ir_u, u8 *ir_s)
+{
+	const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
+		6 * 256,        /* Prioriy level */
+		6 * 32,         /* Prioriy group level */
+		6 * 8,          /* Port level */
+		6 * 256         /* Qset level */
+	};
+	u8 ir_u_calc = 0, ir_s_calc = 0;
+	u32 ir_calc;
+	u32 tick;
+
+	/* Calc tick */
+	if (shaper_level >= HCLGE_SHAPER_LVL_CNT)
+		return -EINVAL;
+
+	tick = tick_array[shaper_level];
+
+	/**
+	 * Calc the speed if ir_b = 126, ir_u = 0 and ir_s = 0
+	 * the formula is changed to:
+	 *		126 * 1 * 8
+	 * ir_calc = ---------------- * 1000
+	 *		tick * 1
+	 */
+	ir_calc = (1008000 + (tick >> 1) - 1) / tick;
+
+	if (ir_calc == ir) {
+		*ir_b = 126;
+		*ir_u = 0;
+		*ir_s = 0;
+
+		return 0;
+	} else if (ir_calc > ir) {
+		/* Increasing the denominator to select ir_s value */
+		while (ir_calc > ir) {
+			ir_s_calc++;
+			ir_calc = 1008000 / (tick * (1 << ir_s_calc));
+		}
+
+		if (ir_calc == ir)
+			*ir_b = 126;
+		else
+			*ir_b = (ir * tick * (1 << ir_s_calc) + 4000) / 8000;
+	} else {
+		/* Increasing the numerator to select ir_u value */
+		u32 numerator;
+
+		while (ir_calc < ir) {
+			ir_u_calc++;
+			numerator = 1008000 * (1 << ir_u_calc);
+			ir_calc = (numerator + (tick >> 1)) / tick;
+		}
+
+		if (ir_calc == ir) {
+			*ir_b = 126;
+		} else {
+			u32 denominator = (8000 * (1 << --ir_u_calc));
+			*ir_b = (ir * tick + (denominator >> 1)) / denominator;
+		}
+	}
+
+	*ir_u = ir_u_calc;
+	*ir_s = ir_s_calc;
+
+	return 0;
+}
+
+static int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
+{
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_MAC_PAUSE_EN, false);
+
+	desc.data[0] = cpu_to_le32((tx ? HCLGE_TX_MAC_PAUSE_EN_MSK : 0) |
+		(rx ? HCLGE_RX_MAC_PAUSE_EN_MSK : 0));
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
+{
+	u8 tc;
+
+	for (tc = 0; tc < hdev->tm_info.num_tc; tc++)
+		if (hdev->tm_info.tc_info[tc].up == pri_id)
+			break;
+
+	if (tc >= hdev->tm_info.num_tc)
+		return -EINVAL;
+
+	/**
+	 * the register for priority has four bytes, the first bytes includes
+	 *  priority0 and priority1, the higher 4bit stands for priority1
+	 *  while the lower 4bit stands for priority0, as below:
+	 * first byte:	| pri_1 | pri_0 |
+	 * second byte:	| pri_3 | pri_2 |
+	 * third byte:	| pri_5 | pri_4 |
+	 * fourth byte:	| pri_7 | pri_6 |
+	 */
+	pri[pri_id >> 1] |= tc << ((pri_id & 1) * 4);
+
+	return 0;
+}
+
+static int hclge_up_to_tc_map(struct hclge_dev *hdev)
+{
+	struct hclge_desc desc;
+	u8 *pri = (u8 *)desc.data;
+	u8 pri_id;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PRI_TO_TC_MAPPING, false);
+
+	for (pri_id = 0; pri_id < hdev->tm_info.num_tc; pri_id++) {
+		ret = hclge_fill_pri_array(hdev, pri, pri_id);
+		if (ret)
+			return ret;
+	}
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pg_to_pri_map_cfg(struct hclge_dev *hdev,
+				      u8 pg_id, u8 pri_bit_map)
+{
+	struct hclge_pg_to_pri_link_cmd *map;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PG_TO_PRI_LINK, false);
+
+	map = (struct hclge_pg_to_pri_link_cmd *)desc.data;
+
+	map->pg_id = pg_id;
+	map->pri_bit_map = pri_bit_map;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev,
+				      u16 qs_id, u8 pri)
+{
+	struct hclge_qs_to_pri_link_cmd *map;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_TO_PRI_LINK, false);
+
+	map = (struct hclge_qs_to_pri_link_cmd *)desc.data;
+
+	map->qs_id = cpu_to_le16(qs_id);
+	map->priority = pri;
+	map->link_vld = HCLGE_TM_QS_PRI_LINK_VLD_MSK;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_q_to_qs_map_cfg(struct hclge_dev *hdev,
+				    u8 q_id, u16 qs_id)
+{
+	struct hclge_nq_to_qs_link_cmd *map;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_NQ_TO_QS_LINK, false);
+
+	map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
+
+	map->nq_id = cpu_to_le16(q_id);
+	map->qset_id = cpu_to_le16(qs_id | HCLGE_TM_Q_QS_LINK_VLD_MSK);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pg_weight_cfg(struct hclge_dev *hdev, u8 pg_id,
+				  u8 dwrr)
+{
+	struct hclge_pg_weight_cmd *weight;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PG_WEIGHT, false);
+
+	weight = (struct hclge_pg_weight_cmd *)desc.data;
+
+	weight->pg_id = pg_id;
+	weight->dwrr = dwrr;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pri_weight_cfg(struct hclge_dev *hdev, u8 pri_id,
+				   u8 dwrr)
+{
+	struct hclge_priority_weight_cmd *weight;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PRI_WEIGHT, false);
+
+	weight = (struct hclge_priority_weight_cmd *)desc.data;
+
+	weight->pri_id = pri_id;
+	weight->dwrr = dwrr;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_qs_weight_cfg(struct hclge_dev *hdev, u16 qs_id,
+				  u8 dwrr)
+{
+	struct hclge_qs_weight_cmd *weight;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_WEIGHT, false);
+
+	weight = (struct hclge_qs_weight_cmd *)desc.data;
+
+	weight->qs_id = cpu_to_le16(qs_id);
+	weight->dwrr = dwrr;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
+				    enum hclge_shap_bucket bucket, u8 pg_id,
+				    u8 ir_b, u8 ir_u, u8 ir_s, u8 bs_b, u8 bs_s)
+{
+	struct hclge_pg_shapping_cmd *shap_cfg_cmd;
+	enum hclge_opcode_type opcode;
+	struct hclge_desc desc;
+
+	opcode = bucket ? HCLGE_OPC_TM_PG_P_SHAPPING :
+		HCLGE_OPC_TM_PG_C_SHAPPING;
+	hclge_cmd_setup_basic_desc(&desc, opcode, false);
+
+	shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
+
+	shap_cfg_cmd->pg_id = pg_id;
+
+	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b);
+	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u);
+	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s);
+	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b);
+	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
+				     enum hclge_shap_bucket bucket, u8 pri_id,
+				     u8 ir_b, u8 ir_u, u8 ir_s,
+				     u8 bs_b, u8 bs_s)
+{
+	struct hclge_pri_shapping_cmd *shap_cfg_cmd;
+	enum hclge_opcode_type opcode;
+	struct hclge_desc desc;
+
+	opcode = bucket ? HCLGE_OPC_TM_PRI_P_SHAPPING :
+		HCLGE_OPC_TM_PRI_C_SHAPPING;
+
+	hclge_cmd_setup_basic_desc(&desc, opcode, false);
+
+	shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data;
+
+	shap_cfg_cmd->pri_id = pri_id;
+
+	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b);
+	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u);
+	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s);
+	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b);
+	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pg_schd_mode_cfg(struct hclge_dev *hdev, u8 pg_id)
+{
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PG_SCH_MODE_CFG, false);
+
+	if (hdev->tm_info.pg_info[pg_id].pg_sch_mode == HCLGE_SCH_MODE_DWRR)
+		desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK);
+	else
+		desc.data[1] = 0;
+
+	desc.data[0] = cpu_to_le32(pg_id);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pri_schd_mode_cfg(struct hclge_dev *hdev, u8 pri_id)
+{
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PRI_SCH_MODE_CFG, false);
+
+	if (hdev->tm_info.tc_info[pri_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR)
+		desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK);
+	else
+		desc.data[1] = 0;
+
+	desc.data[0] = cpu_to_le32(pri_id);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id)
+{
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_SCH_MODE_CFG, false);
+
+	if (hdev->tm_info.tc_info[qs_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR)
+		desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK);
+	else
+		desc.data[1] = 0;
+
+	desc.data[0] = cpu_to_le32(qs_id);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_qs_bp_cfg(struct hclge_dev *hdev, u8 tc)
+{
+	struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_BP_TO_QSET_MAPPING,
+				   false);
+
+	bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data;
+
+	bp_to_qs_map_cmd->tc_id = tc;
+
+	/* Qset and tc is one by one mapping */
+	bp_to_qs_map_cmd->qs_bit_map = cpu_to_le32(1 << tc);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	u8 i;
+
+	kinfo = &vport->nic.kinfo;
+	vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit;
+	kinfo->num_tc =
+		min_t(u16, kinfo->num_tqps, hdev->tm_info.num_tc);
+	kinfo->rss_size
+		= min_t(u16, hdev->rss_size_max,
+			kinfo->num_tqps / kinfo->num_tc);
+	vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id;
+	vport->dwrr = 100;  /* 100 percent as init */
+
+	for (i = 0; i < kinfo->num_tc; i++) {
+		if (hdev->hw_tc_map & BIT(i)) {
+			kinfo->tc_info[i].enable = true;
+			kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
+			kinfo->tc_info[i].tqp_count = kinfo->rss_size;
+			kinfo->tc_info[i].tc = i;
+			kinfo->tc_info[i].up = hdev->tm_info.tc_info[i].up;
+		} else {
+			/* Set to default queue if TC is disable */
+			kinfo->tc_info[i].enable = false;
+			kinfo->tc_info[i].tqp_offset = 0;
+			kinfo->tc_info[i].tqp_count = 1;
+			kinfo->tc_info[i].tc = 0;
+			kinfo->tc_info[i].up = 0;
+		}
+	}
+}
+
+static void hclge_tm_vport_info_update(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	u32 i;
+
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		hclge_tm_vport_tc_info_update(vport);
+
+		vport++;
+	}
+}
+
+static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
+{
+	u8 i;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		hdev->tm_info.tc_info[i].tc_id = i;
+		hdev->tm_info.tc_info[i].tc_sch_mode = HCLGE_SCH_MODE_DWRR;
+		hdev->tm_info.tc_info[i].up = i;
+		hdev->tm_info.tc_info[i].pgid = 0;
+		hdev->tm_info.tc_info[i].bw_limit =
+			hdev->tm_info.pg_info[0].bw_limit;
+	}
+
+	hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+}
+
+static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
+{
+	u8 i;
+
+	for (i = 0; i < hdev->tm_info.num_pg; i++) {
+		int k;
+
+		hdev->tm_info.pg_dwrr[i] = i ? 0 : 100;
+
+		hdev->tm_info.pg_info[i].pg_id = i;
+		hdev->tm_info.pg_info[i].pg_sch_mode = HCLGE_SCH_MODE_DWRR;
+
+		hdev->tm_info.pg_info[i].bw_limit = HCLGE_ETHER_MAX_RATE;
+
+		if (i != 0)
+			continue;
+
+		hdev->tm_info.pg_info[i].tc_bit_map = hdev->hw_tc_map;
+		for (k = 0; k < hdev->tm_info.num_tc; k++)
+			hdev->tm_info.pg_info[i].tc_dwrr[k] = 100;
+	}
+}
+
+static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
+{
+	if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) &&
+	    (hdev->tm_info.num_pg != 1))
+		return -EINVAL;
+
+	hclge_tm_pg_info_init(hdev);
+
+	hclge_tm_tc_info_init(hdev);
+
+	hclge_tm_vport_info_update(hdev);
+
+	hdev->tm_info.fc_mode = HCLGE_FC_NONE;
+	hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+
+	return 0;
+}
+
+static int hclge_tm_pg_to_pri_map(struct hclge_dev *hdev)
+{
+	int ret;
+	u32 i;
+
+	if (hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE)
+		return 0;
+
+	for (i = 0; i < hdev->tm_info.num_pg; i++) {
+		/* Cfg mapping */
+		ret = hclge_tm_pg_to_pri_map_cfg(
+			hdev, i, hdev->tm_info.pg_info[i].tc_bit_map);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev)
+{
+	u8 ir_u, ir_b, ir_s;
+	int ret;
+	u32 i;
+
+	/* Cfg pg schd */
+	if (hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE)
+		return 0;
+
+	/* Pg to pri */
+	for (i = 0; i < hdev->tm_info.num_pg; i++) {
+		/* Calc shaper para */
+		ret = hclge_shaper_para_calc(
+					hdev->tm_info.pg_info[i].bw_limit,
+					HCLGE_SHAPER_LVL_PG,
+					&ir_b, &ir_u, &ir_s);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_pg_shapping_cfg(hdev,
+					       HCLGE_TM_SHAP_C_BUCKET, i,
+					       0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
+					       HCLGE_SHAPER_BS_S_DEF);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_pg_shapping_cfg(hdev,
+					       HCLGE_TM_SHAP_P_BUCKET, i,
+					       ir_b, ir_u, ir_s,
+					       HCLGE_SHAPER_BS_U_DEF,
+					       HCLGE_SHAPER_BS_S_DEF);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pg_dwrr_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+	u32 i;
+
+	/* cfg pg schd */
+	if (hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE)
+		return 0;
+
+	/* pg to prio */
+	for (i = 0; i < hdev->tm_info.num_pg; i++) {
+		/* Cfg dwrr */
+		ret = hclge_tm_pg_weight_cfg(hdev, i,
+					     hdev->tm_info.pg_dwrr[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_vport_q_to_qs_map(struct hclge_dev *hdev,
+				   struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hnae3_queue **tqp = kinfo->tqp;
+	struct hnae3_tc_info *v_tc_info;
+	u32 i, j;
+	int ret;
+
+	for (i = 0; i < kinfo->num_tc; i++) {
+		v_tc_info = &kinfo->tc_info[i];
+		for (j = 0; j < v_tc_info->tqp_count; j++) {
+			struct hnae3_queue *q = tqp[v_tc_info->tqp_offset + j];
+
+			ret = hclge_tm_q_to_qs_map_cfg(hdev,
+						       hclge_get_queue_id(q),
+						       vport->qs_offset + i);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int ret;
+	u32 i;
+
+	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
+		/* Cfg qs -> pri mapping, one by one mapping */
+		for (i = 0; i < hdev->tm_info.num_tc; i++) {
+			ret = hclge_tm_qs_to_pri_map_cfg(hdev, i, i);
+			if (ret)
+				return ret;
+		}
+	} else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) {
+		int k;
+		/* Cfg qs -> pri mapping,  qs = tc, pri = vf, 8 qs -> 1 pri */
+		for (k = 0; k < hdev->num_alloc_vport; k++)
+			for (i = 0; i < HNAE3_MAX_TC; i++) {
+				ret = hclge_tm_qs_to_pri_map_cfg(
+					hdev, vport[k].qs_offset + i, k);
+				if (ret)
+					return ret;
+			}
+	} else {
+		return -EINVAL;
+	}
+
+	/* Cfg q -> qs mapping */
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		ret = hclge_vport_q_to_qs_map(hdev, vport);
+		if (ret)
+			return ret;
+
+		vport++;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev)
+{
+	u8 ir_u, ir_b, ir_s;
+	int ret;
+	u32 i;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		ret = hclge_shaper_para_calc(
+					hdev->tm_info.tc_info[i].bw_limit,
+					HCLGE_SHAPER_LVL_PRI,
+					&ir_b, &ir_u, &ir_s);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_pri_shapping_cfg(
+			hdev, HCLGE_TM_SHAP_C_BUCKET, i,
+			0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
+			HCLGE_SHAPER_BS_S_DEF);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_pri_shapping_cfg(
+			hdev, HCLGE_TM_SHAP_P_BUCKET, i,
+			ir_b, ir_u, ir_s, HCLGE_SHAPER_BS_U_DEF,
+			HCLGE_SHAPER_BS_S_DEF);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_vnet_base_shaper_pri_cfg(struct hclge_vport *vport)
+{
+	struct hclge_dev *hdev = vport->back;
+	u8 ir_u, ir_b, ir_s;
+	int ret;
+
+	ret = hclge_shaper_para_calc(vport->bw_limit, HCLGE_SHAPER_LVL_VF,
+				     &ir_b, &ir_u, &ir_s);
+	if (ret)
+		return ret;
+
+	ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET,
+					vport->vport_id,
+					0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
+					HCLGE_SHAPER_BS_S_DEF);
+	if (ret)
+		return ret;
+
+	ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET,
+					vport->vport_id,
+					ir_b, ir_u, ir_s,
+					HCLGE_SHAPER_BS_U_DEF,
+					HCLGE_SHAPER_BS_S_DEF);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int hclge_tm_pri_vnet_base_shaper_qs_cfg(struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	struct hnae3_tc_info *v_tc_info;
+	u8 ir_u, ir_b, ir_s;
+	u32 i;
+	int ret;
+
+	for (i = 0; i < kinfo->num_tc; i++) {
+		v_tc_info = &kinfo->tc_info[i];
+		ret = hclge_shaper_para_calc(
+					hdev->tm_info.tc_info[i].bw_limit,
+					HCLGE_SHAPER_LVL_QSET,
+					&ir_b, &ir_u, &ir_s);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_vnet_base_shaper_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int ret;
+	u32 i;
+
+	/* Need config vport shaper */
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		ret = hclge_tm_pri_vnet_base_shaper_pri_cfg(vport);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_pri_vnet_base_shaper_qs_cfg(vport);
+		if (ret)
+			return ret;
+
+		vport++;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_shaper_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+
+	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
+		ret = hclge_tm_pri_tc_base_shaper_cfg(hdev);
+		if (ret)
+			return ret;
+	} else {
+		ret = hclge_tm_pri_vnet_base_shaper_cfg(hdev);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_pg_info *pg_info;
+	u8 dwrr;
+	int ret;
+	u32 i;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		pg_info =
+			&hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid];
+		dwrr = pg_info->tc_dwrr[i];
+
+		ret = hclge_tm_pri_weight_cfg(hdev, i, dwrr);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_qs_weight_cfg(hdev, i, dwrr);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_vnet_base_dwrr_pri_cfg(struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+	u8 i;
+
+	/* Vf dwrr */
+	ret = hclge_tm_pri_weight_cfg(hdev, vport->vport_id, vport->dwrr);
+	if (ret)
+		return ret;
+
+	/* Qset dwrr */
+	for (i = 0; i < kinfo->num_tc; i++) {
+		ret = hclge_tm_qs_weight_cfg(
+			hdev, vport->qs_offset + i,
+			hdev->tm_info.pg_info[0].tc_dwrr[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_vnet_base_dwrr_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int ret;
+	u32 i;
+
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		ret = hclge_tm_pri_vnet_base_dwrr_pri_cfg(vport);
+		if (ret)
+			return ret;
+
+		vport++;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+
+	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
+		ret = hclge_tm_pri_tc_base_dwrr_cfg(hdev);
+		if (ret)
+			return ret;
+	} else {
+		ret = hclge_tm_pri_vnet_base_dwrr_cfg(hdev);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_map_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_tm_pg_to_pri_map(hdev);
+	if (ret)
+		return ret;
+
+	return hclge_tm_pri_q_qs_cfg(hdev);
+}
+
+static int hclge_tm_shaper_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_tm_pg_shaper_cfg(hdev);
+	if (ret)
+		return ret;
+
+	return hclge_tm_pri_shaper_cfg(hdev);
+}
+
+int hclge_tm_dwrr_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_tm_pg_dwrr_cfg(hdev);
+	if (ret)
+		return ret;
+
+	return hclge_tm_pri_dwrr_cfg(hdev);
+}
+
+static int hclge_tm_lvl2_schd_mode_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+	u8 i;
+
+	/* Only being config on TC-Based scheduler mode */
+	if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE)
+		return 0;
+
+	for (i = 0; i < hdev->tm_info.num_pg; i++) {
+		ret = hclge_tm_pg_schd_mode_cfg(hdev, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+	u8 i;
+
+	ret = hclge_tm_pri_schd_mode_cfg(hdev, vport->vport_id);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < kinfo->num_tc; i++) {
+		ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int ret;
+	u8 i;
+
+	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
+		for (i = 0; i < hdev->tm_info.num_tc; i++) {
+			ret = hclge_tm_pri_schd_mode_cfg(hdev, i);
+			if (ret)
+				return ret;
+
+			ret = hclge_tm_qs_schd_mode_cfg(hdev, i);
+			if (ret)
+				return ret;
+		}
+	} else {
+		for (i = 0; i < hdev->num_alloc_vport; i++) {
+			ret = hclge_tm_schd_mode_vnet_base_cfg(vport);
+			if (ret)
+				return ret;
+
+			vport++;
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_tm_lvl2_schd_mode_cfg(hdev);
+	if (ret)
+		return ret;
+
+	return hclge_tm_lvl34_schd_mode_cfg(hdev);
+}
+
+static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
+{
+	int ret;
+
+	/* Cfg tm mapping  */
+	ret = hclge_tm_map_cfg(hdev);
+	if (ret)
+		return ret;
+
+	/* Cfg tm shaper */
+	ret = hclge_tm_shaper_cfg(hdev);
+	if (ret)
+		return ret;
+
+	/* Cfg dwrr */
+	ret = hclge_tm_dwrr_cfg(hdev);
+	if (ret)
+		return ret;
+
+	/* Cfg schd mode for each level schd */
+	return hclge_tm_schd_mode_hw(hdev);
+}
+
+int hclge_pause_setup_hw(struct hclge_dev *hdev)
+{
+	bool en = hdev->tm_info.fc_mode != HCLGE_FC_PFC;
+	int ret;
+	u8 i;
+
+	ret = hclge_mac_pause_en_cfg(hdev, en, en);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		ret = hclge_tm_qs_bp_cfg(hdev, i);
+		if (ret)
+			return ret;
+	}
+
+	return hclge_up_to_tc_map(hdev);
+}
+
+int hclge_tm_init_hw(struct hclge_dev *hdev)
+{
+	int ret;
+
+	if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) &&
+	    (hdev->tx_sch_mode != HCLGE_FLAG_VNET_BASE_SCH_MODE))
+		return -ENOTSUPP;
+
+	ret = hclge_tm_schd_setup_hw(hdev);
+	if (ret)
+		return ret;
+
+	ret = hclge_pause_setup_hw(hdev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int hclge_tm_schd_init(struct hclge_dev *hdev)
+{
+	int ret = hclge_tm_schd_info_init(hdev);
+
+	if (ret)
+		return ret;
+
+	return hclge_tm_init_hw(hdev);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
new file mode 100644
index 000000000000..7e67337dfaf2
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_TM_H
+#define __HCLGE_TM_H
+
+#include <linux/types.h>
+
+/* MAC Pause */
+#define HCLGE_TX_MAC_PAUSE_EN_MSK	BIT(0)
+#define HCLGE_RX_MAC_PAUSE_EN_MSK	BIT(1)
+
+#define HCLGE_TM_PORT_BASE_MODE_MSK	BIT(0)
+
+/* SP or DWRR */
+#define HCLGE_TM_TX_SCHD_DWRR_MSK	BIT(0)
+#define HCLGE_TM_TX_SCHD_SP_MSK		(0xFE)
+
+struct hclge_pg_to_pri_link_cmd {
+	u8 pg_id;
+	u8 rsvd1[3];
+	u8 pri_bit_map;
+};
+
+struct hclge_qs_to_pri_link_cmd {
+	__le16 qs_id;
+	__le16 rsvd;
+	u8 priority;
+#define HCLGE_TM_QS_PRI_LINK_VLD_MSK	BIT(0)
+	u8 link_vld;
+};
+
+struct hclge_nq_to_qs_link_cmd {
+	__le16 nq_id;
+	__le16 rsvd;
+#define HCLGE_TM_Q_QS_LINK_VLD_MSK	BIT(10)
+	__le16 qset_id;
+};
+
+struct hclge_pg_weight_cmd {
+	u8 pg_id;
+	u8 dwrr;
+};
+
+struct hclge_priority_weight_cmd {
+	u8 pri_id;
+	u8 dwrr;
+};
+
+struct hclge_qs_weight_cmd {
+	__le16 qs_id;
+	u8 dwrr;
+};
+
+#define HCLGE_TM_SHAP_IR_B_MSK  GENMASK(7, 0)
+#define HCLGE_TM_SHAP_IR_B_LSH	0
+#define HCLGE_TM_SHAP_IR_U_MSK  GENMASK(11, 8)
+#define HCLGE_TM_SHAP_IR_U_LSH	8
+#define HCLGE_TM_SHAP_IR_S_MSK  GENMASK(15, 12)
+#define HCLGE_TM_SHAP_IR_S_LSH	12
+#define HCLGE_TM_SHAP_BS_B_MSK  GENMASK(20, 16)
+#define HCLGE_TM_SHAP_BS_B_LSH	16
+#define HCLGE_TM_SHAP_BS_S_MSK  GENMASK(25, 21)
+#define HCLGE_TM_SHAP_BS_S_LSH	21
+
+enum hclge_shap_bucket {
+	HCLGE_TM_SHAP_C_BUCKET = 0,
+	HCLGE_TM_SHAP_P_BUCKET,
+};
+
+struct hclge_pri_shapping_cmd {
+	u8 pri_id;
+	u8 rsvd[3];
+	__le32 pri_shapping_para;
+};
+
+struct hclge_pg_shapping_cmd {
+	u8 pg_id;
+	u8 rsvd[3];
+	__le32 pg_shapping_para;
+};
+
+struct hclge_bp_to_qs_map_cmd {
+	u8 tc_id;
+	u8 rsvd[2];
+	u8 qs_group_id;
+	__le32 qs_bit_map;
+	u32 rsvd1;
+};
+
+#define hclge_tm_set_feild(dest, string, val) \
+			hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
+				       (HCLGE_TM_SHAP_##string##_LSH), val)
+#define hclge_tm_get_feild(src, string) \
+			hnae_get_field((src), (HCLGE_TM_SHAP_##string##_MSK), \
+				       (HCLGE_TM_SHAP_##string##_LSH))
+
+int hclge_tm_schd_init(struct hclge_dev *hdev);
+int hclge_pause_setup_hw(struct hclge_dev *hdev);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
new file mode 100644
index 000000000000..069ae426aa24
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -0,0 +1,2849 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+#include <linux/sctp.h>
+#include <linux/vermagic.h>
+#include <net/gre.h>
+#include <net/vxlan.h>
+
+#include "hnae3.h"
+#include "hns3_enet.h"
+
+const char hns3_driver_name[] = "hns3";
+const char hns3_driver_version[] = VERMAGIC_STRING;
+static const char hns3_driver_string[] =
+			"Hisilicon Ethernet Network Driver for Hip08 Family";
+static const char hns3_copyright[] = "Copyright (c) 2017 Huawei Corporation.";
+static struct hnae3_client client;
+
+/* hns3_pci_tbl - PCI Device ID Table
+ *
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id hns3_pci_tbl[] = {
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+	/* required last entry */
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, hns3_pci_tbl);
+
+static irqreturn_t hns3_irq_handle(int irq, void *dev)
+{
+	struct hns3_enet_tqp_vector *tqp_vector = dev;
+
+	napi_schedule(&tqp_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv)
+{
+	struct hns3_enet_tqp_vector *tqp_vectors;
+	unsigned int i;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vectors = &priv->tqp_vector[i];
+
+		if (tqp_vectors->irq_init_flag != HNS3_VECTOR_INITED)
+			continue;
+
+		/* release the irq resource */
+		free_irq(tqp_vectors->vector_irq, tqp_vectors);
+		tqp_vectors->irq_init_flag = HNS3_VECTOR_NOT_INITED;
+	}
+}
+
+static int hns3_nic_init_irq(struct hns3_nic_priv *priv)
+{
+	struct hns3_enet_tqp_vector *tqp_vectors;
+	int txrx_int_idx = 0;
+	int rx_int_idx = 0;
+	int tx_int_idx = 0;
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vectors = &priv->tqp_vector[i];
+
+		if (tqp_vectors->irq_init_flag == HNS3_VECTOR_INITED)
+			continue;
+
+		if (tqp_vectors->tx_group.ring && tqp_vectors->rx_group.ring) {
+			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
+				 "%s-%s-%d", priv->netdev->name, "TxRx",
+				 txrx_int_idx++);
+			txrx_int_idx++;
+		} else if (tqp_vectors->rx_group.ring) {
+			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
+				 "%s-%s-%d", priv->netdev->name, "Rx",
+				 rx_int_idx++);
+		} else if (tqp_vectors->tx_group.ring) {
+			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
+				 "%s-%s-%d", priv->netdev->name, "Tx",
+				 tx_int_idx++);
+		} else {
+			/* Skip this unused q_vector */
+			continue;
+		}
+
+		tqp_vectors->name[HNAE3_INT_NAME_LEN - 1] = '\0';
+
+		ret = request_irq(tqp_vectors->vector_irq, hns3_irq_handle, 0,
+				  tqp_vectors->name,
+				       tqp_vectors);
+		if (ret) {
+			netdev_err(priv->netdev, "request irq(%d) fail\n",
+				   tqp_vectors->vector_irq);
+			return ret;
+		}
+
+		tqp_vectors->irq_init_flag = HNS3_VECTOR_INITED;
+	}
+
+	return 0;
+}
+
+static void hns3_mask_vector_irq(struct hns3_enet_tqp_vector *tqp_vector,
+				 u32 mask_en)
+{
+	writel(mask_en, tqp_vector->mask_addr);
+}
+
+static void hns3_vector_enable(struct hns3_enet_tqp_vector *tqp_vector)
+{
+	napi_enable(&tqp_vector->napi);
+
+	/* enable vector */
+	hns3_mask_vector_irq(tqp_vector, 1);
+}
+
+static void hns3_vector_disable(struct hns3_enet_tqp_vector *tqp_vector)
+{
+	/* disable vector */
+	hns3_mask_vector_irq(tqp_vector, 0);
+
+	disable_irq(tqp_vector->vector_irq);
+	napi_disable(&tqp_vector->napi);
+}
+
+static void hns3_set_vector_coalesc_gl(struct hns3_enet_tqp_vector *tqp_vector,
+				       u32 gl_value)
+{
+	/* this defines the configuration for GL (Interrupt Gap Limiter)
+	 * GL defines inter interrupt gap.
+	 * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing
+	 */
+	writel(gl_value, tqp_vector->mask_addr + HNS3_VECTOR_GL0_OFFSET);
+	writel(gl_value, tqp_vector->mask_addr + HNS3_VECTOR_GL1_OFFSET);
+	writel(gl_value, tqp_vector->mask_addr + HNS3_VECTOR_GL2_OFFSET);
+}
+
+static void hns3_set_vector_coalesc_rl(struct hns3_enet_tqp_vector *tqp_vector,
+				       u32 rl_value)
+{
+	/* this defines the configuration for RL (Interrupt Rate Limiter).
+	 * Rl defines rate of interrupts i.e. number of interrupts-per-second
+	 * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing
+	 */
+	writel(rl_value, tqp_vector->mask_addr + HNS3_VECTOR_RL_OFFSET);
+}
+
+static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector)
+{
+	/* initialize the configuration for interrupt coalescing.
+	 * 1. GL (Interrupt Gap Limiter)
+	 * 2. RL (Interrupt Rate Limiter)
+	 */
+
+	/* Default :enable interrupt coalesce */
+	tqp_vector->rx_group.int_gl = HNS3_INT_GL_50K;
+	tqp_vector->tx_group.int_gl = HNS3_INT_GL_50K;
+	hns3_set_vector_coalesc_gl(tqp_vector, HNS3_INT_GL_50K);
+	/* for now we are disabling Interrupt RL - we
+	 * will re-enable later
+	 */
+	hns3_set_vector_coalesc_rl(tqp_vector, 0);
+	tqp_vector->rx_group.flow_level = HNS3_FLOW_LOW;
+	tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
+}
+
+static int hns3_nic_net_up(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int i, j;
+	int ret;
+
+	/* get irq resource for all vectors */
+	ret = hns3_nic_init_irq(priv);
+	if (ret) {
+		netdev_err(netdev, "hns init irq failed! ret=%d\n", ret);
+		return ret;
+	}
+
+	/* enable the vectors */
+	for (i = 0; i < priv->vector_num; i++)
+		hns3_vector_enable(&priv->tqp_vector[i]);
+
+	/* start the ae_dev */
+	ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0;
+	if (ret)
+		goto out_start_err;
+
+	return 0;
+
+out_start_err:
+	for (j = i - 1; j >= 0; j--)
+		hns3_vector_disable(&priv->tqp_vector[j]);
+
+	hns3_nic_uninit_irq(priv);
+
+	return ret;
+}
+
+static int hns3_nic_net_open(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int ret;
+
+	netif_carrier_off(netdev);
+
+	ret = netif_set_real_num_tx_queues(netdev, h->kinfo.num_tqps);
+	if (ret) {
+		netdev_err(netdev,
+			   "netif_set_real_num_tx_queues fail, ret=%d!\n",
+			   ret);
+		return ret;
+	}
+
+	ret = netif_set_real_num_rx_queues(netdev, h->kinfo.num_tqps);
+	if (ret) {
+		netdev_err(netdev,
+			   "netif_set_real_num_rx_queues fail, ret=%d!\n", ret);
+		return ret;
+	}
+
+	ret = hns3_nic_net_up(netdev);
+	if (ret) {
+		netdev_err(netdev,
+			   "hns net up fail, ret=%d!\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void hns3_nic_net_down(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	const struct hnae3_ae_ops *ops;
+	int i;
+
+	/* stop ae_dev */
+	ops = priv->ae_handle->ae_algo->ops;
+	if (ops->stop)
+		ops->stop(priv->ae_handle);
+
+	/* disable vectors */
+	for (i = 0; i < priv->vector_num; i++)
+		hns3_vector_disable(&priv->tqp_vector[i]);
+
+	/* free irq resources */
+	hns3_nic_uninit_irq(priv);
+}
+
+static int hns3_nic_net_stop(struct net_device *netdev)
+{
+	netif_tx_stop_all_queues(netdev);
+	netif_carrier_off(netdev);
+
+	hns3_nic_net_down(netdev);
+
+	return 0;
+}
+
+void hns3_set_multicast_list(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	struct netdev_hw_addr *ha = NULL;
+
+	if (h->ae_algo->ops->set_mc_addr) {
+		netdev_for_each_mc_addr(ha, netdev)
+			if (h->ae_algo->ops->set_mc_addr(h, ha->addr))
+				netdev_err(netdev, "set multicast fail\n");
+	}
+}
+
+static int hns3_nic_uc_sync(struct net_device *netdev,
+			    const unsigned char *addr)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo->ops->add_uc_addr)
+		return h->ae_algo->ops->add_uc_addr(h, addr);
+
+	return 0;
+}
+
+static int hns3_nic_uc_unsync(struct net_device *netdev,
+			      const unsigned char *addr)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo->ops->rm_uc_addr)
+		return h->ae_algo->ops->rm_uc_addr(h, addr);
+
+	return 0;
+}
+
+static int hns3_nic_mc_sync(struct net_device *netdev,
+			    const unsigned char *addr)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo->ops->add_uc_addr)
+		return h->ae_algo->ops->add_mc_addr(h, addr);
+
+	return 0;
+}
+
+static int hns3_nic_mc_unsync(struct net_device *netdev,
+			      const unsigned char *addr)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo->ops->rm_uc_addr)
+		return h->ae_algo->ops->rm_mc_addr(h, addr);
+
+	return 0;
+}
+
+void hns3_nic_set_rx_mode(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo->ops->set_promisc_mode) {
+		if (netdev->flags & IFF_PROMISC)
+			h->ae_algo->ops->set_promisc_mode(h, 1);
+		else
+			h->ae_algo->ops->set_promisc_mode(h, 0);
+	}
+	if (__dev_uc_sync(netdev, hns3_nic_uc_sync, hns3_nic_uc_unsync))
+		netdev_err(netdev, "sync uc address fail\n");
+	if (netdev->flags & IFF_MULTICAST)
+		if (__dev_mc_sync(netdev, hns3_nic_mc_sync, hns3_nic_mc_unsync))
+			netdev_err(netdev, "sync mc address fail\n");
+}
+
+static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
+			u16 *mss, u32 *type_cs_vlan_tso)
+{
+	u32 l4_offset, hdr_len;
+	union l3_hdr_info l3;
+	union l4_hdr_info l4;
+	u32 l4_paylen;
+	int ret;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	ret = skb_cow_head(skb, 0);
+	if (ret)
+		return ret;
+
+	l3.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* Software should clear the IPv4's checksum field when tso is
+	 * needed.
+	 */
+	if (l3.v4->version == 4)
+		l3.v4->check = 0;
+
+	/* tunnel packet.*/
+	if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
+					 SKB_GSO_GRE_CSUM |
+					 SKB_GSO_UDP_TUNNEL |
+					 SKB_GSO_UDP_TUNNEL_CSUM)) {
+		if ((!(skb_shinfo(skb)->gso_type &
+		    SKB_GSO_PARTIAL)) &&
+		    (skb_shinfo(skb)->gso_type &
+		    SKB_GSO_UDP_TUNNEL_CSUM)) {
+			/* Software should clear the udp's checksum
+			 * field when tso is needed.
+			 */
+			l4.udp->check = 0;
+		}
+		/* reset l3&l4 pointers from outer to inner headers */
+		l3.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+
+		/* Software should clear the IPv4's checksum field when
+		 * tso is needed.
+		 */
+		if (l3.v4->version == 4)
+			l3.v4->check = 0;
+	}
+
+	/* normal or tunnel packet*/
+	l4_offset = l4.hdr - skb->data;
+	hdr_len = (l4.tcp->doff * 4) + l4_offset;
+
+	/* remove payload length from inner pseudo checksum when tso*/
+	l4_paylen = skb->len - l4_offset;
+	csum_replace_by_diff(&l4.tcp->check,
+			     (__force __wsum)htonl(l4_paylen));
+
+	/* find the txbd field values */
+	*paylen = skb->len - hdr_len;
+	hnae_set_bit(*type_cs_vlan_tso,
+		     HNS3_TXD_TSO_B, 1);
+
+	/* get MSS for TSO */
+	*mss = skb_shinfo(skb)->gso_size;
+
+	return 0;
+}
+
+static void hns3_get_l4_protocol(struct sk_buff *skb, u8 *ol4_proto,
+				 u8 *il4_proto)
+{
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} l3;
+	unsigned char *l4_hdr;
+	unsigned char *exthdr;
+	u8 l4_proto_tmp;
+	__be16 frag_off;
+
+	/* find outer header point */
+	l3.hdr = skb_network_header(skb);
+	l4_hdr = skb_inner_transport_header(skb);
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		exthdr = l3.hdr + sizeof(*l3.v6);
+		l4_proto_tmp = l3.v6->nexthdr;
+		if (l4_hdr != exthdr)
+			ipv6_skip_exthdr(skb, exthdr - skb->data,
+					 &l4_proto_tmp, &frag_off);
+	} else if (skb->protocol == htons(ETH_P_IP)) {
+		l4_proto_tmp = l3.v4->protocol;
+	}
+
+	*ol4_proto = l4_proto_tmp;
+
+	/* tunnel packet */
+	if (!skb->encapsulation) {
+		*il4_proto = 0;
+		return;
+	}
+
+	/* find inner header point */
+	l3.hdr = skb_inner_network_header(skb);
+	l4_hdr = skb_inner_transport_header(skb);
+
+	if (l3.v6->version == 6) {
+		exthdr = l3.hdr + sizeof(*l3.v6);
+		l4_proto_tmp = l3.v6->nexthdr;
+		if (l4_hdr != exthdr)
+			ipv6_skip_exthdr(skb, exthdr - skb->data,
+					 &l4_proto_tmp, &frag_off);
+	} else if (l3.v4->version == 4) {
+		l4_proto_tmp = l3.v4->protocol;
+	}
+
+	*il4_proto = l4_proto_tmp;
+}
+
+static void hns3_set_l2l3l4_len(struct sk_buff *skb, u8 ol4_proto,
+				u8 il4_proto, u32 *type_cs_vlan_tso,
+				u32 *ol_type_vlan_len_msec)
+{
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} l3;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		struct gre_base_hdr *gre;
+		unsigned char *hdr;
+	} l4;
+	unsigned char *l2_hdr;
+	u8 l4_proto = ol4_proto;
+	u32 ol2_len;
+	u32 ol3_len;
+	u32 ol4_len;
+	u32 l2_len;
+	u32 l3_len;
+
+	l3.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* compute L2 header size for normal packet, defined in 2 Bytes */
+	l2_len = l3.hdr - skb->data;
+	hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_M,
+		       HNS3_TXD_L2LEN_S, l2_len >> 1);
+
+	/* tunnel packet*/
+	if (skb->encapsulation) {
+		/* compute OL2 header size, defined in 2 Bytes */
+		ol2_len = l2_len;
+		hnae_set_field(*ol_type_vlan_len_msec,
+			       HNS3_TXD_L2LEN_M,
+			       HNS3_TXD_L2LEN_S, ol2_len >> 1);
+
+		/* compute OL3 header size, defined in 4 Bytes */
+		ol3_len = l4.hdr - l3.hdr;
+		hnae_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L3LEN_M,
+			       HNS3_TXD_L3LEN_S, ol3_len >> 2);
+
+		/* MAC in UDP, MAC in GRE (0x6558)*/
+		if ((ol4_proto == IPPROTO_UDP) || (ol4_proto == IPPROTO_GRE)) {
+			/* switch MAC header ptr from outer to inner header.*/
+			l2_hdr = skb_inner_mac_header(skb);
+
+			/* compute OL4 header size, defined in 4 Bytes. */
+			ol4_len = l2_hdr - l4.hdr;
+			hnae_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L4LEN_M,
+				       HNS3_TXD_L4LEN_S, ol4_len >> 2);
+
+			/* switch IP header ptr from outer to inner header */
+			l3.hdr = skb_inner_network_header(skb);
+
+			/* compute inner l2 header size, defined in 2 Bytes. */
+			l2_len = l3.hdr - l2_hdr;
+			hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_M,
+				       HNS3_TXD_L2LEN_S, l2_len >> 1);
+		} else {
+			/* skb packet types not supported by hardware,
+			 * txbd len fild doesn't be filled.
+			 */
+			return;
+		}
+
+		/* switch L4 header pointer from outer to inner */
+		l4.hdr = skb_inner_transport_header(skb);
+
+		l4_proto = il4_proto;
+	}
+
+	/* compute inner(/normal) L3 header size, defined in 4 Bytes */
+	l3_len = l4.hdr - l3.hdr;
+	hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L3LEN_M,
+		       HNS3_TXD_L3LEN_S, l3_len >> 2);
+
+	/* compute inner(/normal) L4 header size, defined in 4 Bytes */
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
+			       HNS3_TXD_L4LEN_S, l4.tcp->doff);
+		break;
+	case IPPROTO_SCTP:
+		hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
+			       HNS3_TXD_L4LEN_S, (sizeof(struct sctphdr) >> 2));
+		break;
+	case IPPROTO_UDP:
+		hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
+			       HNS3_TXD_L4LEN_S, (sizeof(struct udphdr) >> 2));
+		break;
+	default:
+		/* skb packet types not supported by hardware,
+		 * txbd len fild doesn't be filled.
+		 */
+		return;
+	}
+}
+
+static int hns3_set_l3l4_type_csum(struct sk_buff *skb, u8 ol4_proto,
+				   u8 il4_proto, u32 *type_cs_vlan_tso,
+				   u32 *ol_type_vlan_len_msec)
+{
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} l3;
+	u32 l4_proto = ol4_proto;
+
+	l3.hdr = skb_network_header(skb);
+
+	/* define OL3 type and tunnel type(OL4).*/
+	if (skb->encapsulation) {
+		/* define outer network header type.*/
+		if (skb->protocol == htons(ETH_P_IP)) {
+			if (skb_is_gso(skb))
+				hnae_set_field(*ol_type_vlan_len_msec,
+					       HNS3_TXD_OL3T_M, HNS3_TXD_OL3T_S,
+					       HNS3_OL3T_IPV4_CSUM);
+			else
+				hnae_set_field(*ol_type_vlan_len_msec,
+					       HNS3_TXD_OL3T_M, HNS3_TXD_OL3T_S,
+					       HNS3_OL3T_IPV4_NO_CSUM);
+
+		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+			hnae_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_M,
+				       HNS3_TXD_OL3T_S, HNS3_OL3T_IPV6);
+		}
+
+		/* define tunnel type(OL4).*/
+		switch (l4_proto) {
+		case IPPROTO_UDP:
+			hnae_set_field(*ol_type_vlan_len_msec,
+				       HNS3_TXD_TUNTYPE_M,
+				       HNS3_TXD_TUNTYPE_S,
+				       HNS3_TUN_MAC_IN_UDP);
+			break;
+		case IPPROTO_GRE:
+			hnae_set_field(*ol_type_vlan_len_msec,
+				       HNS3_TXD_TUNTYPE_M,
+				       HNS3_TXD_TUNTYPE_S,
+				       HNS3_TUN_NVGRE);
+			break;
+		default:
+			/* drop the skb tunnel packet if hardware don't support,
+			 * because hardware can't calculate csum when TSO.
+			 */
+			if (skb_is_gso(skb))
+				return -EDOM;
+
+			/* the stack computes the IP header already,
+			 * driver calculate l4 checksum when not TSO.
+			 */
+			skb_checksum_help(skb);
+			return 0;
+		}
+
+		l3.hdr = skb_inner_network_header(skb);
+		l4_proto = il4_proto;
+	}
+
+	if (l3.v4->version == 4) {
+		hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
+			       HNS3_TXD_L3T_S, HNS3_L3T_IPV4);
+
+		/* the stack computes the IP header already, the only time we
+		 * need the hardware to recompute it is in the case of TSO.
+		 */
+		if (skb_is_gso(skb))
+			hnae_set_bit(*type_cs_vlan_tso, HNS3_TXD_L3CS_B, 1);
+
+		hnae_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+	} else if (l3.v6->version == 6) {
+		hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
+			       HNS3_TXD_L3T_S, HNS3_L3T_IPV6);
+		hnae_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+	}
+
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		hnae_set_field(*type_cs_vlan_tso,
+			       HNS3_TXD_L4T_M,
+			       HNS3_TXD_L4T_S,
+			       HNS3_L4T_TCP);
+		break;
+	case IPPROTO_UDP:
+		hnae_set_field(*type_cs_vlan_tso,
+			       HNS3_TXD_L4T_M,
+			       HNS3_TXD_L4T_S,
+			       HNS3_L4T_UDP);
+		break;
+	case IPPROTO_SCTP:
+		hnae_set_field(*type_cs_vlan_tso,
+			       HNS3_TXD_L4T_M,
+			       HNS3_TXD_L4T_S,
+			       HNS3_L4T_SCTP);
+		break;
+	default:
+		/* drop the skb tunnel packet if hardware don't support,
+		 * because hardware can't calculate csum when TSO.
+		 */
+		if (skb_is_gso(skb))
+			return -EDOM;
+
+		/* the stack computes the IP header already,
+		 * driver calculate l4 checksum when not TSO.
+		 */
+		skb_checksum_help(skb);
+		return 0;
+	}
+
+	return 0;
+}
+
+static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
+{
+	/* Config bd buffer end */
+	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_BDTYPE_M,
+		       HNS3_TXD_BDTYPE_M, 0);
+	hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end);
+	hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1);
+	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 1);
+}
+
+static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
+			  int size, dma_addr_t dma, int frag_end,
+			  enum hns_desc_type type)
+{
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
+	struct hns3_desc *desc = &ring->desc[ring->next_to_use];
+	u32 ol_type_vlan_len_msec = 0;
+	u16 bdtp_fe_sc_vld_ra_ri = 0;
+	u32 type_cs_vlan_tso = 0;
+	struct sk_buff *skb;
+	u32 paylen = 0;
+	u16 mss = 0;
+	__be16 protocol;
+	u8 ol4_proto;
+	u8 il4_proto;
+	int ret;
+
+	/* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */
+	desc_cb->priv = priv;
+	desc_cb->length = size;
+	desc_cb->dma = dma;
+	desc_cb->type = type;
+
+	/* now, fill the descriptor */
+	desc->addr = cpu_to_le64(dma);
+	desc->tx.send_size = cpu_to_le16((u16)size);
+	hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end);
+	desc->tx.bdtp_fe_sc_vld_ra_ri = cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+
+	if (type == DESC_TYPE_SKB) {
+		skb = (struct sk_buff *)priv;
+		paylen = cpu_to_le16(skb->len);
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			skb_reset_mac_len(skb);
+			protocol = skb->protocol;
+
+			/* vlan packet*/
+			if (protocol == htons(ETH_P_8021Q)) {
+				protocol = vlan_get_protocol(skb);
+				skb->protocol = protocol;
+			}
+			hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto);
+			hns3_set_l2l3l4_len(skb, ol4_proto, il4_proto,
+					    &type_cs_vlan_tso,
+					    &ol_type_vlan_len_msec);
+			ret = hns3_set_l3l4_type_csum(skb, ol4_proto, il4_proto,
+						      &type_cs_vlan_tso,
+						      &ol_type_vlan_len_msec);
+			if (ret)
+				return ret;
+
+			ret = hns3_set_tso(skb, &paylen, &mss,
+					   &type_cs_vlan_tso);
+			if (ret)
+				return ret;
+		}
+
+		/* Set txbd */
+		desc->tx.ol_type_vlan_len_msec =
+			cpu_to_le32(ol_type_vlan_len_msec);
+		desc->tx.type_cs_vlan_tso_len =
+			cpu_to_le32(type_cs_vlan_tso);
+		desc->tx.paylen = cpu_to_le16(paylen);
+		desc->tx.mss = cpu_to_le16(mss);
+	}
+
+	/* move ring pointer to next.*/
+	ring_ptr_move_fw(ring, next_to_use);
+
+	return 0;
+}
+
+static int hns3_fill_desc_tso(struct hns3_enet_ring *ring, void *priv,
+			      int size, dma_addr_t dma, int frag_end,
+			      enum hns_desc_type type)
+{
+	unsigned int frag_buf_num;
+	unsigned int k;
+	int sizeoflast;
+	int ret;
+
+	frag_buf_num = (size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
+	sizeoflast = size % HNS3_MAX_BD_SIZE;
+	sizeoflast = sizeoflast ? sizeoflast : HNS3_MAX_BD_SIZE;
+
+	/* When the frag size is bigger than hardware, split this frag */
+	for (k = 0; k < frag_buf_num; k++) {
+		ret = hns3_fill_desc(ring, priv,
+				     (k == frag_buf_num - 1) ?
+				sizeoflast : HNS3_MAX_BD_SIZE,
+				dma + HNS3_MAX_BD_SIZE * k,
+				frag_end && (k == frag_buf_num - 1) ? 1 : 0,
+				(type == DESC_TYPE_SKB && !k) ?
+					DESC_TYPE_SKB : DESC_TYPE_PAGE);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum,
+				   struct hns3_enet_ring *ring)
+{
+	struct sk_buff *skb = *out_skb;
+	struct skb_frag_struct *frag;
+	int bdnum_for_frag;
+	int frag_num;
+	int buf_num;
+	int size;
+	int i;
+
+	size = skb_headlen(skb);
+	buf_num = (size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
+
+	frag_num = skb_shinfo(skb)->nr_frags;
+	for (i = 0; i < frag_num; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+		size = skb_frag_size(frag);
+		bdnum_for_frag =
+			(size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
+		if (bdnum_for_frag > HNS3_MAX_BD_PER_FRAG)
+			return -ENOMEM;
+
+		buf_num += bdnum_for_frag;
+	}
+
+	if (buf_num > ring_space(ring))
+		return -EBUSY;
+
+	*bnum = buf_num;
+	return 0;
+}
+
+static int hns3_nic_maybe_stop_tx(struct sk_buff **out_skb, int *bnum,
+				  struct hns3_enet_ring *ring)
+{
+	struct sk_buff *skb = *out_skb;
+	int buf_num;
+
+	/* No. of segments (plus a header) */
+	buf_num = skb_shinfo(skb)->nr_frags + 1;
+
+	if (buf_num > ring_space(ring))
+		return -EBUSY;
+
+	*bnum = buf_num;
+
+	return 0;
+}
+
+static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig)
+{
+	struct device *dev = ring_to_dev(ring);
+	unsigned int i;
+
+	for (i = 0; i < ring->desc_num; i++) {
+		/* check if this is where we started */
+		if (ring->next_to_use == next_to_use_orig)
+			break;
+
+		/* unmap the descriptor dma address */
+		if (ring->desc_cb[ring->next_to_use].type == DESC_TYPE_SKB)
+			dma_unmap_single(dev,
+					 ring->desc_cb[ring->next_to_use].dma,
+					ring->desc_cb[ring->next_to_use].length,
+					DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dev,
+				       ring->desc_cb[ring->next_to_use].dma,
+				       ring->desc_cb[ring->next_to_use].length,
+				       DMA_TO_DEVICE);
+
+		/* rollback one */
+		ring_ptr_move_bw(ring, next_to_use);
+	}
+}
+
+static netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb,
+				     struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_nic_ring_data *ring_data =
+		&tx_ring_data(priv, skb->queue_mapping);
+	struct hns3_enet_ring *ring = ring_data->ring;
+	struct device *dev = priv->dev;
+	struct netdev_queue *dev_queue;
+	struct skb_frag_struct *frag;
+	int next_to_use_head;
+	int next_to_use_frag;
+	dma_addr_t dma;
+	int buf_num;
+	int seg_num;
+	int size;
+	int ret;
+	int i;
+
+	/* Prefetch the data used later */
+	prefetch(skb->data);
+
+	switch (priv->ops.maybe_stop_tx(&skb, &buf_num, ring)) {
+	case -EBUSY:
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_busy++;
+		u64_stats_update_end(&ring->syncp);
+
+		goto out_net_tx_busy;
+	case -ENOMEM:
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.sw_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+		netdev_err(netdev, "no memory to xmit!\n");
+
+		goto out_err_tx_ok;
+	default:
+		break;
+	}
+
+	/* No. of segments (plus a header) */
+	seg_num = skb_shinfo(skb)->nr_frags + 1;
+	/* Fill the first part */
+	size = skb_headlen(skb);
+
+	next_to_use_head = ring->next_to_use;
+
+	dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma)) {
+		netdev_err(netdev, "TX head DMA map failed\n");
+		ring->stats.sw_err_cnt++;
+		goto out_err_tx_ok;
+	}
+
+	ret = priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0,
+			   DESC_TYPE_SKB);
+	if (ret)
+		goto head_dma_map_err;
+
+	next_to_use_frag = ring->next_to_use;
+	/* Fill the fragments */
+	for (i = 1; i < seg_num; i++) {
+		frag = &skb_shinfo(skb)->frags[i - 1];
+		size = skb_frag_size(frag);
+		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma)) {
+			netdev_err(netdev, "TX frag(%d) DMA map failed\n", i);
+			ring->stats.sw_err_cnt++;
+			goto frag_dma_map_err;
+		}
+		ret = priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma,
+				    seg_num - 1 == i ? 1 : 0,
+				    DESC_TYPE_PAGE);
+
+		if (ret)
+			goto frag_dma_map_err;
+	}
+
+	/* Complete translate all packets */
+	dev_queue = netdev_get_tx_queue(netdev, ring_data->queue_index);
+	netdev_tx_sent_queue(dev_queue, skb->len);
+
+	wmb(); /* Commit all data before submit */
+
+	hnae_queue_xmit(ring->tqp, buf_num);
+
+	return NETDEV_TX_OK;
+
+frag_dma_map_err:
+	hns_nic_dma_unmap(ring, next_to_use_frag);
+
+head_dma_map_err:
+	hns_nic_dma_unmap(ring, next_to_use_head);
+
+out_err_tx_ok:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+
+out_net_tx_busy:
+	netif_stop_subqueue(netdev, ring_data->queue_index);
+	smp_mb(); /* Commit all data before submit */
+
+	return NETDEV_TX_BUSY;
+}
+
+static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	struct sockaddr *mac_addr = p;
+	int ret;
+
+	if (!mac_addr || !is_valid_ether_addr((const u8 *)mac_addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data);
+	if (ret) {
+		netdev_err(netdev, "set_mac_address fail, ret=%d!\n", ret);
+		return ret;
+	}
+
+	ether_addr_copy(netdev->dev_addr, mac_addr->sa_data);
+
+	return 0;
+}
+
+static int hns3_nic_set_features(struct net_device *netdev,
+				 netdev_features_t features)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+	if (features & (NETIF_F_TSO | NETIF_F_TSO6)) {
+		priv->ops.fill_desc = hns3_fill_desc_tso;
+		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso;
+	} else {
+		priv->ops.fill_desc = hns3_fill_desc;
+		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
+	}
+
+	netdev->features = features;
+	return 0;
+}
+
+static void
+hns3_nic_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int queue_num = priv->ae_handle->kinfo.num_tqps;
+	struct hns3_enet_ring *ring;
+	unsigned int start;
+	unsigned int idx;
+	u64 tx_bytes = 0;
+	u64 rx_bytes = 0;
+	u64 tx_pkts = 0;
+	u64 rx_pkts = 0;
+
+	for (idx = 0; idx < queue_num; idx++) {
+		/* fetch the tx stats */
+		ring = priv->ring_data[idx].ring;
+		do {
+			tx_bytes += ring->stats.tx_bytes;
+			tx_pkts += ring->stats.tx_pkts;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+
+		/* fetch the rx stats */
+		ring = priv->ring_data[idx + queue_num].ring;
+		do {
+			rx_bytes += ring->stats.rx_bytes;
+			rx_pkts += ring->stats.rx_pkts;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+	}
+
+	stats->tx_bytes = tx_bytes;
+	stats->tx_packets = tx_pkts;
+	stats->rx_bytes = rx_bytes;
+	stats->rx_packets = rx_pkts;
+
+	stats->rx_errors = netdev->stats.rx_errors;
+	stats->multicast = netdev->stats.multicast;
+	stats->rx_length_errors = netdev->stats.rx_length_errors;
+	stats->rx_crc_errors = netdev->stats.rx_crc_errors;
+	stats->rx_missed_errors = netdev->stats.rx_missed_errors;
+
+	stats->tx_errors = netdev->stats.tx_errors;
+	stats->rx_dropped = netdev->stats.rx_dropped;
+	stats->tx_dropped = netdev->stats.tx_dropped;
+	stats->collisions = netdev->stats.collisions;
+	stats->rx_over_errors = netdev->stats.rx_over_errors;
+	stats->rx_frame_errors = netdev->stats.rx_frame_errors;
+	stats->rx_fifo_errors = netdev->stats.rx_fifo_errors;
+	stats->tx_aborted_errors = netdev->stats.tx_aborted_errors;
+	stats->tx_carrier_errors = netdev->stats.tx_carrier_errors;
+	stats->tx_fifo_errors = netdev->stats.tx_fifo_errors;
+	stats->tx_heartbeat_errors = netdev->stats.tx_heartbeat_errors;
+	stats->tx_window_errors = netdev->stats.tx_window_errors;
+	stats->rx_compressed = netdev->stats.rx_compressed;
+	stats->tx_compressed = netdev->stats.tx_compressed;
+}
+
+static void hns3_add_tunnel_port(struct net_device *netdev, u16 port,
+				 enum hns3_udp_tnl_type type)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_udp_tunnel *udp_tnl = &priv->udp_tnl[type];
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (udp_tnl->used && udp_tnl->dst_port == port) {
+		udp_tnl->used++;
+		return;
+	}
+
+	if (udp_tnl->used) {
+		netdev_warn(netdev,
+			    "UDP tunnel [%d], port [%d] offload\n", type, port);
+		return;
+	}
+
+	udp_tnl->dst_port = port;
+	udp_tnl->used = 1;
+	/* TBD send command to hardware to add port */
+	if (h->ae_algo->ops->add_tunnel_udp)
+		h->ae_algo->ops->add_tunnel_udp(h, port);
+}
+
+static void hns3_del_tunnel_port(struct net_device *netdev, u16 port,
+				 enum hns3_udp_tnl_type type)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_udp_tunnel *udp_tnl = &priv->udp_tnl[type];
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!udp_tnl->used || udp_tnl->dst_port != port) {
+		netdev_warn(netdev,
+			    "Invalid UDP tunnel port %d\n", port);
+		return;
+	}
+
+	udp_tnl->used--;
+	if (udp_tnl->used)
+		return;
+
+	udp_tnl->dst_port = 0;
+	/* TBD send command to hardware to del port  */
+	if (h->ae_algo->ops->del_tunnel_udp)
+		h->ae_algo->ops->add_tunnel_udp(h, port);
+}
+
+/* hns3_nic_udp_tunnel_add - Get notifiacetion about UDP tunnel ports
+ * @netdev: This physical ports's netdev
+ * @ti: Tunnel information
+ */
+static void hns3_nic_udp_tunnel_add(struct net_device *netdev,
+				    struct udp_tunnel_info *ti)
+{
+	u16 port_n = ntohs(ti->port);
+
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		hns3_add_tunnel_port(netdev, port_n, HNS3_UDP_TNL_VXLAN);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		hns3_add_tunnel_port(netdev, port_n, HNS3_UDP_TNL_GENEVE);
+		break;
+	default:
+		netdev_err(netdev, "unsupported tunnel type %d\n", ti->type);
+		break;
+	}
+}
+
+static void hns3_nic_udp_tunnel_del(struct net_device *netdev,
+				    struct udp_tunnel_info *ti)
+{
+	u16 port_n = ntohs(ti->port);
+
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		hns3_del_tunnel_port(netdev, port_n, HNS3_UDP_TNL_VXLAN);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		hns3_del_tunnel_port(netdev, port_n, HNS3_UDP_TNL_GENEVE);
+		break;
+	default:
+		break;
+	}
+}
+
+static int hns3_setup_tc(struct net_device *netdev, u8 tc)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_knic_private_info *kinfo = &h->kinfo;
+	unsigned int i;
+	int ret;
+
+	if (tc > HNAE3_MAX_TC)
+		return -EINVAL;
+
+	if (kinfo->num_tc == tc)
+		return 0;
+
+	if (!netdev)
+		return -EINVAL;
+
+	if (!tc) {
+		netdev_reset_tc(netdev);
+		return 0;
+	}
+
+	/* Set num_tc for netdev */
+	ret = netdev_set_num_tc(netdev, tc);
+	if (ret)
+		return ret;
+
+	/* Set per TC queues for the VSI */
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		if (kinfo->tc_info[i].enable)
+			netdev_set_tc_queue(netdev,
+					    kinfo->tc_info[i].tc,
+					    kinfo->tc_info[i].tqp_count,
+					    kinfo->tc_info[i].tqp_offset);
+	}
+
+	return 0;
+}
+
+static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			     void *type_data)
+{
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
+
+	return hns3_setup_tc(dev, mqprio->num_tc);
+}
+
+static int hns3_vlan_rx_add_vid(struct net_device *netdev,
+				__be16 proto, u16 vid)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int ret = -EIO;
+
+	if (h->ae_algo->ops->set_vlan_filter)
+		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, false);
+
+	return ret;
+}
+
+static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
+				 __be16 proto, u16 vid)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int ret = -EIO;
+
+	if (h->ae_algo->ops->set_vlan_filter)
+		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, true);
+
+	return ret;
+}
+
+static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
+				u8 qos, __be16 vlan_proto)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int ret = -EIO;
+
+	if (h->ae_algo->ops->set_vf_vlan_filter)
+		ret = h->ae_algo->ops->set_vf_vlan_filter(h, vf, vlan,
+						   qos, vlan_proto);
+
+	return ret;
+}
+
+static const struct net_device_ops hns3_nic_netdev_ops = {
+	.ndo_open		= hns3_nic_net_open,
+	.ndo_stop		= hns3_nic_net_stop,
+	.ndo_start_xmit		= hns3_nic_net_xmit,
+	.ndo_set_mac_address	= hns3_nic_net_set_mac_address,
+	.ndo_set_features	= hns3_nic_set_features,
+	.ndo_get_stats64	= hns3_nic_get_stats64,
+	.ndo_setup_tc		= hns3_nic_setup_tc,
+	.ndo_set_rx_mode	= hns3_nic_set_rx_mode,
+	.ndo_udp_tunnel_add	= hns3_nic_udp_tunnel_add,
+	.ndo_udp_tunnel_del	= hns3_nic_udp_tunnel_del,
+	.ndo_vlan_rx_add_vid	= hns3_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= hns3_vlan_rx_kill_vid,
+	.ndo_set_vf_vlan	= hns3_ndo_set_vf_vlan,
+};
+
+/* hns3_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in hns3_pci_tbl
+ *
+ * hns3_probe initializes a PF identified by a pci_dev structure.
+ * The OS initialization, configuring of the PF private structure,
+ * and a hardware reset occur.
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct hnae3_ae_dev *ae_dev;
+	int ret;
+
+	ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev),
+			      GFP_KERNEL);
+	if (!ae_dev) {
+		ret = -ENOMEM;
+		return ret;
+	}
+
+	ae_dev->pdev = pdev;
+	ae_dev->dev_type = HNAE3_DEV_KNIC;
+	pci_set_drvdata(pdev, ae_dev);
+
+	return hnae3_register_ae_dev(ae_dev);
+}
+
+/* hns3_remove - Device removal routine
+ * @pdev: PCI device information struct
+ */
+static void hns3_remove(struct pci_dev *pdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+
+	hnae3_unregister_ae_dev(ae_dev);
+
+	devm_kfree(&pdev->dev, ae_dev);
+
+	pci_set_drvdata(pdev, NULL);
+}
+
+static struct pci_driver hns3_driver = {
+	.name     = hns3_driver_name,
+	.id_table = hns3_pci_tbl,
+	.probe    = hns3_probe,
+	.remove   = hns3_remove,
+};
+
+/* set default feature to hns3 */
+static void hns3_set_default_feature(struct net_device *netdev)
+{
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
+		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
+		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
+		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+	netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
+
+	netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
+
+	netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		NETIF_F_HW_VLAN_CTAG_FILTER |
+		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
+		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
+		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
+		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+	netdev->vlan_features |=
+		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
+		NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO |
+		NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
+		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
+		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+	netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		NETIF_F_HW_VLAN_CTAG_FILTER |
+		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
+		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
+		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
+		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+}
+
+static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
+			     struct hns3_desc_cb *cb)
+{
+	unsigned int order = hnae_page_order(ring);
+	struct page *p;
+
+	p = dev_alloc_pages(order);
+	if (!p)
+		return -ENOMEM;
+
+	cb->priv = p;
+	cb->page_offset = 0;
+	cb->reuse_flag = 0;
+	cb->buf  = page_address(p);
+	cb->length = hnae_page_size(ring);
+	cb->type = DESC_TYPE_PAGE;
+
+	memset(cb->buf, 0, cb->length);
+
+	return 0;
+}
+
+static void hns3_free_buffer(struct hns3_enet_ring *ring,
+			     struct hns3_desc_cb *cb)
+{
+	if (cb->type == DESC_TYPE_SKB)
+		dev_kfree_skb_any((struct sk_buff *)cb->priv);
+	else if (!HNAE3_IS_TX_RING(ring))
+		put_page((struct page *)cb->priv);
+	memset(cb, 0, sizeof(*cb));
+}
+
+static int hns3_map_buffer(struct hns3_enet_ring *ring, struct hns3_desc_cb *cb)
+{
+	cb->dma = dma_map_page(ring_to_dev(ring), cb->priv, 0,
+			       cb->length, ring_to_dma_dir(ring));
+
+	if (dma_mapping_error(ring_to_dev(ring), cb->dma))
+		return -EIO;
+
+	return 0;
+}
+
+static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
+			      struct hns3_desc_cb *cb)
+{
+	if (cb->type == DESC_TYPE_SKB)
+		dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
+				 ring_to_dma_dir(ring));
+	else
+		dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
+			       ring_to_dma_dir(ring));
+}
+
+static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i)
+{
+	hns3_unmap_buffer(ring, &ring->desc_cb[i]);
+	ring->desc[i].addr = 0;
+}
+
+static void hns3_free_buffer_detach(struct hns3_enet_ring *ring, int i)
+{
+	struct hns3_desc_cb *cb = &ring->desc_cb[i];
+
+	if (!ring->desc_cb[i].dma)
+		return;
+
+	hns3_buffer_detach(ring, i);
+	hns3_free_buffer(ring, cb);
+}
+
+static void hns3_free_buffers(struct hns3_enet_ring *ring)
+{
+	int i;
+
+	for (i = 0; i < ring->desc_num; i++)
+		hns3_free_buffer_detach(ring, i);
+}
+
+/* free desc along with its attached buffer */
+static void hns3_free_desc(struct hns3_enet_ring *ring)
+{
+	hns3_free_buffers(ring);
+
+	dma_unmap_single(ring_to_dev(ring), ring->desc_dma_addr,
+			 ring->desc_num * sizeof(ring->desc[0]),
+			 DMA_BIDIRECTIONAL);
+	ring->desc_dma_addr = 0;
+	kfree(ring->desc);
+	ring->desc = NULL;
+}
+
+static int hns3_alloc_desc(struct hns3_enet_ring *ring)
+{
+	int size = ring->desc_num * sizeof(ring->desc[0]);
+
+	ring->desc = kzalloc(size, GFP_KERNEL);
+	if (!ring->desc)
+		return -ENOMEM;
+
+	ring->desc_dma_addr = dma_map_single(ring_to_dev(ring), ring->desc,
+					     size, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(ring_to_dev(ring), ring->desc_dma_addr)) {
+		ring->desc_dma_addr = 0;
+		kfree(ring->desc);
+		ring->desc = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int hns3_reserve_buffer_map(struct hns3_enet_ring *ring,
+				   struct hns3_desc_cb *cb)
+{
+	int ret;
+
+	ret = hns3_alloc_buffer(ring, cb);
+	if (ret)
+		goto out;
+
+	ret = hns3_map_buffer(ring, cb);
+	if (ret)
+		goto out_with_buf;
+
+	return 0;
+
+out_with_buf:
+	hns3_free_buffers(ring);
+out:
+	return ret;
+}
+
+static int hns3_alloc_buffer_attach(struct hns3_enet_ring *ring, int i)
+{
+	int ret = hns3_reserve_buffer_map(ring, &ring->desc_cb[i]);
+
+	if (ret)
+		return ret;
+
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+
+	return 0;
+}
+
+/* Allocate memory for raw pkg, and map with dma */
+static int hns3_alloc_ring_buffers(struct hns3_enet_ring *ring)
+{
+	int i, j, ret;
+
+	for (i = 0; i < ring->desc_num; i++) {
+		ret = hns3_alloc_buffer_attach(ring, i);
+		if (ret)
+			goto out_buffer_fail;
+	}
+
+	return 0;
+
+out_buffer_fail:
+	for (j = i - 1; j >= 0; j--)
+		hns3_free_buffer_detach(ring, j);
+	return ret;
+}
+
+/* detach a in-used buffer and replace with a reserved one  */
+static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
+				struct hns3_desc_cb *res_cb)
+{
+	hns3_map_buffer(ring, &ring->desc_cb[i]);
+	ring->desc_cb[i] = *res_cb;
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+}
+
+static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
+{
+	ring->desc_cb[i].reuse_flag = 0;
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma
+		+ ring->desc_cb[i].page_offset);
+}
+
+static void hns3_nic_reclaim_one_desc(struct hns3_enet_ring *ring, int *bytes,
+				      int *pkts)
+{
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean];
+
+	(*pkts) += (desc_cb->type == DESC_TYPE_SKB);
+	(*bytes) += desc_cb->length;
+	/* desc_cb will be cleaned, after hnae_free_buffer_detach*/
+	hns3_free_buffer_detach(ring, ring->next_to_clean);
+
+	ring_ptr_move_fw(ring, next_to_clean);
+}
+
+static int is_valid_clean_head(struct hns3_enet_ring *ring, int h)
+{
+	int u = ring->next_to_use;
+	int c = ring->next_to_clean;
+
+	if (unlikely(h > ring->desc_num))
+		return 0;
+
+	return u > c ? (h > c && h <= u) : (h > c || h <= u);
+}
+
+int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget)
+{
+	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	struct netdev_queue *dev_queue;
+	int bytes, pkts;
+	int head;
+
+	head = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_HEAD_REG);
+	rmb(); /* Make sure head is ready before touch any data */
+
+	if (is_ring_empty(ring) || head == ring->next_to_clean)
+		return 0; /* no data to poll */
+
+	if (!is_valid_clean_head(ring, head)) {
+		netdev_err(netdev, "wrong head (%d, %d-%d)\n", head,
+			   ring->next_to_use, ring->next_to_clean);
+
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.io_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+		return -EIO;
+	}
+
+	bytes = 0;
+	pkts = 0;
+	while (head != ring->next_to_clean && budget) {
+		hns3_nic_reclaim_one_desc(ring, &bytes, &pkts);
+		/* Issue prefetch for next Tx descriptor */
+		prefetch(&ring->desc_cb[ring->next_to_clean]);
+		budget--;
+	}
+
+	ring->tqp_vector->tx_group.total_bytes += bytes;
+	ring->tqp_vector->tx_group.total_packets += pkts;
+
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.tx_bytes += bytes;
+	ring->stats.tx_pkts += pkts;
+	u64_stats_update_end(&ring->syncp);
+
+	dev_queue = netdev_get_tx_queue(netdev, ring->tqp->tqp_index);
+	netdev_tx_completed_queue(dev_queue, pkts, bytes);
+
+	if (unlikely(pkts && netif_carrier_ok(netdev) &&
+		     (ring_space(ring) > HNS3_MAX_BD_PER_PKT))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (netif_tx_queue_stopped(dev_queue)) {
+			netif_tx_wake_queue(dev_queue);
+			ring->stats.restart_queue++;
+		}
+	}
+
+	return !!budget;
+}
+
+static int hns3_desc_unused(struct hns3_enet_ring *ring)
+{
+	int ntc = ring->next_to_clean;
+	int ntu = ring->next_to_use;
+
+	return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu;
+}
+
+static void
+hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, int cleand_count)
+{
+	struct hns3_desc_cb *desc_cb;
+	struct hns3_desc_cb res_cbs;
+	int i, ret;
+
+	for (i = 0; i < cleand_count; i++) {
+		desc_cb = &ring->desc_cb[ring->next_to_use];
+		if (desc_cb->reuse_flag) {
+			u64_stats_update_begin(&ring->syncp);
+			ring->stats.reuse_pg_cnt++;
+			u64_stats_update_end(&ring->syncp);
+
+			hns3_reuse_buffer(ring, ring->next_to_use);
+		} else {
+			ret = hns3_reserve_buffer_map(ring, &res_cbs);
+			if (ret) {
+				u64_stats_update_begin(&ring->syncp);
+				ring->stats.sw_err_cnt++;
+				u64_stats_update_end(&ring->syncp);
+
+				netdev_err(ring->tqp->handle->kinfo.netdev,
+					   "hnae reserve buffer map failed.\n");
+				break;
+			}
+			hns3_replace_buffer(ring, ring->next_to_use, &res_cbs);
+		}
+
+		ring_ptr_move_fw(ring, next_to_use);
+	}
+
+	wmb(); /* Make all data has been write before submit */
+	writel_relaxed(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG);
+}
+
+/* hns3_nic_get_headlen - determine size of header for LRO/GRO
+ * @data: pointer to the start of the headers
+ * @max: total length of section to find headers in
+ *
+ * This function is meant to determine the length of headers that will
+ * be recognized by hardware for LRO, GRO, and RSC offloads.  The main
+ * motivation of doing this is to only perform one pull for IPv4 TCP
+ * packets so that we can do basic things like calculating the gso_size
+ * based on the average data per packet.
+ */
+static unsigned int hns3_nic_get_headlen(unsigned char *data, u32 flag,
+					 unsigned int max_size)
+{
+	unsigned char *network;
+	u8 hlen;
+
+	/* This should never happen, but better safe than sorry */
+	if (max_size < ETH_HLEN)
+		return max_size;
+
+	/* Initialize network frame pointer */
+	network = data;
+
+	/* Set first protocol and move network header forward */
+	network += ETH_HLEN;
+
+	/* Handle any vlan tag if present */
+	if (hnae_get_field(flag, HNS3_RXD_VLAN_M, HNS3_RXD_VLAN_S)
+		== HNS3_RX_FLAG_VLAN_PRESENT) {
+		if ((typeof(max_size))(network - data) > (max_size - VLAN_HLEN))
+			return max_size;
+
+		network += VLAN_HLEN;
+	}
+
+	/* Handle L3 protocols */
+	if (hnae_get_field(flag, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S)
+		== HNS3_RX_FLAG_L3ID_IPV4) {
+		if ((typeof(max_size))(network - data) >
+		    (max_size - sizeof(struct iphdr)))
+			return max_size;
+
+		/* Access ihl as a u8 to avoid unaligned access on ia64 */
+		hlen = (network[0] & 0x0F) << 2;
+
+		/* Verify hlen meets minimum size requirements */
+		if (hlen < sizeof(struct iphdr))
+			return network - data;
+
+		/* Record next protocol if header is present */
+	} else if (hnae_get_field(flag, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S)
+		== HNS3_RX_FLAG_L3ID_IPV6) {
+		if ((typeof(max_size))(network - data) >
+		    (max_size - sizeof(struct ipv6hdr)))
+			return max_size;
+
+		/* Record next protocol */
+		hlen = sizeof(struct ipv6hdr);
+	} else {
+		return network - data;
+	}
+
+	/* Relocate pointer to start of L4 header */
+	network += hlen;
+
+	/* Finally sort out TCP/UDP */
+	if (hnae_get_field(flag, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S)
+		== HNS3_RX_FLAG_L4ID_TCP) {
+		if ((typeof(max_size))(network - data) >
+		    (max_size - sizeof(struct tcphdr)))
+			return max_size;
+
+		/* Access doff as a u8 to avoid unaligned access on ia64 */
+		hlen = (network[12] & 0xF0) >> 2;
+
+		/* Verify hlen meets minimum size requirements */
+		if (hlen < sizeof(struct tcphdr))
+			return network - data;
+
+		network += hlen;
+	} else if (hnae_get_field(flag, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S)
+		== HNS3_RX_FLAG_L4ID_UDP) {
+		if ((typeof(max_size))(network - data) >
+		    (max_size - sizeof(struct udphdr)))
+			return max_size;
+
+		network += sizeof(struct udphdr);
+	}
+
+	/* If everything has gone correctly network should be the
+	 * data section of the packet and will be the end of the header.
+	 * If not then it probably represents the end of the last recognized
+	 * header.
+	 */
+	if ((typeof(max_size))(network - data) < max_size)
+		return network - data;
+	else
+		return max_size;
+}
+
+static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
+				struct hns3_enet_ring *ring, int pull_len,
+				struct hns3_desc_cb *desc_cb)
+{
+	struct hns3_desc *desc;
+	int truesize, size;
+	int last_offset;
+	bool twobufs;
+
+	twobufs = ((PAGE_SIZE < 8192) &&
+		hnae_buf_size(ring) == HNS3_BUFFER_SIZE_2048);
+
+	desc = &ring->desc[ring->next_to_clean];
+	size = le16_to_cpu(desc->rx.size);
+
+	if (twobufs) {
+		truesize = hnae_buf_size(ring);
+	} else {
+		truesize = ALIGN(size, L1_CACHE_BYTES);
+		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
+	}
+
+	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
+			size - pull_len, truesize - pull_len);
+
+	 /* Avoid re-using remote pages,flag default unreuse */
+	if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()))
+		return;
+
+	if (twobufs) {
+		/* If we are only owner of page we can reuse it */
+		if (likely(page_count(desc_cb->priv) == 1)) {
+			/* Flip page offset to other buffer */
+			desc_cb->page_offset ^= truesize;
+
+			desc_cb->reuse_flag = 1;
+			/* bump ref count on page before it is given*/
+			get_page(desc_cb->priv);
+		}
+		return;
+	}
+
+	/* Move offset up to the next cache line */
+	desc_cb->page_offset += truesize;
+
+	if (desc_cb->page_offset <= last_offset) {
+		desc_cb->reuse_flag = 1;
+		/* Bump ref count on page before it is given*/
+		get_page(desc_cb->priv);
+	}
+}
+
+static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
+			     struct hns3_desc *desc)
+{
+	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	int l3_type, l4_type;
+	u32 bd_base_info;
+	int ol4_type;
+	u32 l234info;
+
+	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+	l234info = le32_to_cpu(desc->rx.l234_info);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	skb_checksum_none_assert(skb);
+
+	if (!(netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* check if hardware has done checksum */
+	if (!hnae_get_bit(bd_base_info, HNS3_RXD_L3L4P_B))
+		return;
+
+	if (unlikely(hnae_get_bit(l234info, HNS3_RXD_L3E_B) ||
+		     hnae_get_bit(l234info, HNS3_RXD_L4E_B) ||
+		     hnae_get_bit(l234info, HNS3_RXD_OL3E_B) ||
+		     hnae_get_bit(l234info, HNS3_RXD_OL4E_B))) {
+		netdev_err(netdev, "L3/L4 error pkt\n");
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.l3l4_csum_err++;
+		u64_stats_update_end(&ring->syncp);
+
+		return;
+	}
+
+	l3_type = hnae_get_field(l234info, HNS3_RXD_L3ID_M,
+				 HNS3_RXD_L3ID_S);
+	l4_type = hnae_get_field(l234info, HNS3_RXD_L4ID_M,
+				 HNS3_RXD_L4ID_S);
+
+	ol4_type = hnae_get_field(l234info, HNS3_RXD_OL4ID_M, HNS3_RXD_OL4ID_S);
+	switch (ol4_type) {
+	case HNS3_OL4_TYPE_MAC_IN_UDP:
+	case HNS3_OL4_TYPE_NVGRE:
+		skb->csum_level = 1;
+	case HNS3_OL4_TYPE_NO_TUN:
+		/* Can checksum ipv4 or ipv6 + UDP/TCP/SCTP packets */
+		if (l3_type == HNS3_L3_TYPE_IPV4 ||
+		    (l3_type == HNS3_L3_TYPE_IPV6 &&
+		     (l4_type == HNS3_L4_TYPE_UDP ||
+		      l4_type == HNS3_L4_TYPE_TCP ||
+		      l4_type == HNS3_L4_TYPE_SCTP)))
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		break;
+	}
+}
+
+static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
+			     struct sk_buff **out_skb, int *out_bnum)
+{
+	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	struct hns3_desc_cb *desc_cb;
+	struct hns3_desc *desc;
+	struct sk_buff *skb;
+	unsigned char *va;
+	u32 bd_base_info;
+	int pull_len;
+	u32 l234info;
+	int length;
+	int bnum;
+
+	desc = &ring->desc[ring->next_to_clean];
+	desc_cb = &ring->desc_cb[ring->next_to_clean];
+
+	prefetch(desc);
+
+	length = le16_to_cpu(desc->rx.pkt_len);
+	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+	l234info = le32_to_cpu(desc->rx.l234_info);
+
+	/* Check valid BD */
+	if (!hnae_get_bit(bd_base_info, HNS3_RXD_VLD_B))
+		return -EFAULT;
+
+	va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;
+
+	/* Prefetch first cache line of first page
+	 * Idea is to cache few bytes of the header of the packet. Our L1 Cache
+	 * line size is 64B so need to prefetch twice to make it 128B. But in
+	 * actual we can have greater size of caches with 128B Level 1 cache
+	 * lines. In such a case, single fetch would suffice to cache in the
+	 * relevant part of the header.
+	 */
+	prefetch(va);
+#if L1_CACHE_BYTES < 128
+	prefetch(va + L1_CACHE_BYTES);
+#endif
+
+	skb = *out_skb = napi_alloc_skb(&ring->tqp_vector->napi,
+					HNS3_RX_HEAD_SIZE);
+	if (unlikely(!skb)) {
+		netdev_err(netdev, "alloc rx skb fail\n");
+
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.sw_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+
+		return -ENOMEM;
+	}
+
+	prefetchw(skb->data);
+
+	bnum = 1;
+	if (length <= HNS3_RX_HEAD_SIZE) {
+		memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
+
+		/* We can reuse buffer as-is, just make sure it is local */
+		if (likely(page_to_nid(desc_cb->priv) == numa_node_id()))
+			desc_cb->reuse_flag = 1;
+		else /* This page cannot be reused so discard it */
+			put_page(desc_cb->priv);
+
+		ring_ptr_move_fw(ring, next_to_clean);
+	} else {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.seg_pkt_cnt++;
+		u64_stats_update_end(&ring->syncp);
+
+		pull_len = hns3_nic_get_headlen(va, l234info,
+						HNS3_RX_HEAD_SIZE);
+		memcpy(__skb_put(skb, pull_len), va,
+		       ALIGN(pull_len, sizeof(long)));
+
+		hns3_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
+		ring_ptr_move_fw(ring, next_to_clean);
+
+		while (!hnae_get_bit(bd_base_info, HNS3_RXD_FE_B)) {
+			desc = &ring->desc[ring->next_to_clean];
+			desc_cb = &ring->desc_cb[ring->next_to_clean];
+			bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+			hns3_nic_reuse_page(skb, bnum, ring, 0, desc_cb);
+			ring_ptr_move_fw(ring, next_to_clean);
+			bnum++;
+		}
+	}
+
+	*out_bnum = bnum;
+
+	if (unlikely(!hnae_get_bit(bd_base_info, HNS3_RXD_VLD_B))) {
+		netdev_err(netdev, "no valid bd,%016llx,%016llx\n",
+			   ((u64 *)desc)[0], ((u64 *)desc)[1]);
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.non_vld_descs++;
+		u64_stats_update_end(&ring->syncp);
+
+		dev_kfree_skb_any(skb);
+		return -EINVAL;
+	}
+
+	if (unlikely((!desc->rx.pkt_len) ||
+		     hnae_get_bit(l234info, HNS3_RXD_TRUNCAT_B))) {
+		netdev_err(netdev, "truncated pkt\n");
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.err_pkt_len++;
+		u64_stats_update_end(&ring->syncp);
+
+		dev_kfree_skb_any(skb);
+		return -EFAULT;
+	}
+
+	if (unlikely(hnae_get_bit(l234info, HNS3_RXD_L2E_B))) {
+		netdev_err(netdev, "L2 error pkt\n");
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.l2_err++;
+		u64_stats_update_end(&ring->syncp);
+
+		dev_kfree_skb_any(skb);
+		return -EFAULT;
+	}
+
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.rx_pkts++;
+	ring->stats.rx_bytes += skb->len;
+	u64_stats_update_end(&ring->syncp);
+
+	ring->tqp_vector->rx_group.total_bytes += skb->len;
+
+	hns3_rx_checksum(ring, skb, desc);
+	return 0;
+}
+
+static int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget)
+{
+#define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
+	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	int recv_pkts, recv_bds, clean_count, err;
+	int unused_count = hns3_desc_unused(ring);
+	struct sk_buff *skb = NULL;
+	int num, bnum = 0;
+
+	num = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_FBDNUM_REG);
+	rmb(); /* Make sure num taken effect before the other data is touched */
+
+	recv_pkts = 0, recv_bds = 0, clean_count = 0;
+	num -= unused_count;
+
+	while (recv_pkts < budget && recv_bds < num) {
+		/* Reuse or realloc buffers */
+		if (clean_count + unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
+			hns3_nic_alloc_rx_buffers(ring,
+						  clean_count + unused_count);
+			clean_count = 0;
+			unused_count = hns3_desc_unused(ring);
+		}
+
+		/* Poll one pkt */
+		err = hns3_handle_rx_bd(ring, &skb, &bnum);
+		if (unlikely(!skb)) /* This fault cannot be repaired */
+			goto out;
+
+		recv_bds += bnum;
+		clean_count += bnum;
+		if (unlikely(err)) {  /* Do jump the err */
+			recv_pkts++;
+			continue;
+		}
+
+		/* Do update ip stack process */
+		skb->protocol = eth_type_trans(skb, netdev);
+		(void)napi_gro_receive(&ring->tqp_vector->napi, skb);
+
+		recv_pkts++;
+	}
+
+out:
+	/* Make all data has been write before submit */
+	if (clean_count + unused_count > 0)
+		hns3_nic_alloc_rx_buffers(ring,
+					  clean_count + unused_count);
+
+	return recv_pkts;
+}
+
+static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
+{
+#define HNS3_RX_ULTRA_PACKET_RATE 40000
+	enum hns3_flow_level_range new_flow_level;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	int packets_per_secs;
+	int bytes_per_usecs;
+	u16 new_int_gl;
+	int usecs;
+
+	if (!ring_group->int_gl)
+		return false;
+
+	if (ring_group->total_packets == 0) {
+		ring_group->int_gl = HNS3_INT_GL_50K;
+		ring_group->flow_level = HNS3_FLOW_LOW;
+		return true;
+	}
+
+	/* Simple throttlerate management
+	 * 0-10MB/s   lower     (50000 ints/s)
+	 * 10-20MB/s   middle    (20000 ints/s)
+	 * 20-1249MB/s high      (18000 ints/s)
+	 * > 40000pps  ultra     (8000 ints/s)
+	 */
+	new_flow_level = ring_group->flow_level;
+	new_int_gl = ring_group->int_gl;
+	tqp_vector = ring_group->ring->tqp_vector;
+	usecs = (ring_group->int_gl << 1);
+	bytes_per_usecs = ring_group->total_bytes / usecs;
+	/* 1000000 microseconds */
+	packets_per_secs = ring_group->total_packets * 1000000 / usecs;
+
+	switch (new_flow_level) {
+	case HNS3_FLOW_LOW:
+		if (bytes_per_usecs > 10)
+			new_flow_level = HNS3_FLOW_MID;
+		break;
+	case HNS3_FLOW_MID:
+		if (bytes_per_usecs > 20)
+			new_flow_level = HNS3_FLOW_HIGH;
+		else if (bytes_per_usecs <= 10)
+			new_flow_level = HNS3_FLOW_LOW;
+		break;
+	case HNS3_FLOW_HIGH:
+	case HNS3_FLOW_ULTRA:
+	default:
+		if (bytes_per_usecs <= 20)
+			new_flow_level = HNS3_FLOW_MID;
+		break;
+	}
+
+	if ((packets_per_secs > HNS3_RX_ULTRA_PACKET_RATE) &&
+	    (&tqp_vector->rx_group == ring_group))
+		new_flow_level = HNS3_FLOW_ULTRA;
+
+	switch (new_flow_level) {
+	case HNS3_FLOW_LOW:
+		new_int_gl = HNS3_INT_GL_50K;
+		break;
+	case HNS3_FLOW_MID:
+		new_int_gl = HNS3_INT_GL_20K;
+		break;
+	case HNS3_FLOW_HIGH:
+		new_int_gl = HNS3_INT_GL_18K;
+		break;
+	case HNS3_FLOW_ULTRA:
+		new_int_gl = HNS3_INT_GL_8K;
+		break;
+	default:
+		break;
+	}
+
+	ring_group->total_bytes = 0;
+	ring_group->total_packets = 0;
+	ring_group->flow_level = new_flow_level;
+	if (new_int_gl != ring_group->int_gl) {
+		ring_group->int_gl = new_int_gl;
+		return true;
+	}
+	return false;
+}
+
+static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
+{
+	u16 rx_int_gl, tx_int_gl;
+	bool rx, tx;
+
+	rx = hns3_get_new_int_gl(&tqp_vector->rx_group);
+	tx = hns3_get_new_int_gl(&tqp_vector->tx_group);
+	rx_int_gl = tqp_vector->rx_group.int_gl;
+	tx_int_gl = tqp_vector->tx_group.int_gl;
+	if (rx && tx) {
+		if (rx_int_gl > tx_int_gl) {
+			tqp_vector->tx_group.int_gl = rx_int_gl;
+			tqp_vector->tx_group.flow_level =
+				tqp_vector->rx_group.flow_level;
+			hns3_set_vector_coalesc_gl(tqp_vector, rx_int_gl);
+		} else {
+			tqp_vector->rx_group.int_gl = tx_int_gl;
+			tqp_vector->rx_group.flow_level =
+				tqp_vector->tx_group.flow_level;
+			hns3_set_vector_coalesc_gl(tqp_vector, tx_int_gl);
+		}
+	}
+}
+
+static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
+{
+	struct hns3_enet_ring *ring;
+	int rx_pkt_total = 0;
+
+	struct hns3_enet_tqp_vector *tqp_vector =
+		container_of(napi, struct hns3_enet_tqp_vector, napi);
+	bool clean_complete = true;
+	int rx_budget;
+
+	/* Since the actual Tx work is minimal, we can give the Tx a larger
+	 * budget and be more aggressive about cleaning up the Tx descriptors.
+	 */
+	hns3_for_each_ring(ring, tqp_vector->tx_group) {
+		if (!hns3_clean_tx_ring(ring, budget))
+			clean_complete = false;
+	}
+
+	/* make sure rx ring budget not smaller than 1 */
+	rx_budget = max(budget / tqp_vector->num_tqps, 1);
+
+	hns3_for_each_ring(ring, tqp_vector->rx_group) {
+		int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget);
+
+		if (rx_cleaned >= rx_budget)
+			clean_complete = false;
+
+		rx_pkt_total += rx_cleaned;
+	}
+
+	tqp_vector->rx_group.total_packets += rx_pkt_total;
+
+	if (!clean_complete)
+		return budget;
+
+	napi_complete(napi);
+	hns3_update_new_int_gl(tqp_vector);
+	hns3_mask_vector_irq(tqp_vector, 1);
+
+	return rx_pkt_total;
+}
+
+static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
+				      struct hnae3_ring_chain_node *head)
+{
+	struct pci_dev *pdev = tqp_vector->handle->pdev;
+	struct hnae3_ring_chain_node *cur_chain = head;
+	struct hnae3_ring_chain_node *chain;
+	struct hns3_enet_ring *tx_ring;
+	struct hns3_enet_ring *rx_ring;
+
+	tx_ring = tqp_vector->tx_group.ring;
+	if (tx_ring) {
+		cur_chain->tqp_index = tx_ring->tqp->tqp_index;
+		hnae_set_bit(cur_chain->flag, HNAE3_RING_TYPE_B,
+			     HNAE3_RING_TYPE_TX);
+
+		cur_chain->next = NULL;
+
+		while (tx_ring->next) {
+			tx_ring = tx_ring->next;
+
+			chain = devm_kzalloc(&pdev->dev, sizeof(*chain),
+					     GFP_KERNEL);
+			if (!chain)
+				return -ENOMEM;
+
+			cur_chain->next = chain;
+			chain->tqp_index = tx_ring->tqp->tqp_index;
+			hnae_set_bit(chain->flag, HNAE3_RING_TYPE_B,
+				     HNAE3_RING_TYPE_TX);
+
+			cur_chain = chain;
+		}
+	}
+
+	rx_ring = tqp_vector->rx_group.ring;
+	if (!tx_ring && rx_ring) {
+		cur_chain->next = NULL;
+		cur_chain->tqp_index = rx_ring->tqp->tqp_index;
+		hnae_set_bit(cur_chain->flag, HNAE3_RING_TYPE_B,
+			     HNAE3_RING_TYPE_RX);
+
+		rx_ring = rx_ring->next;
+	}
+
+	while (rx_ring) {
+		chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL);
+		if (!chain)
+			return -ENOMEM;
+
+		cur_chain->next = chain;
+		chain->tqp_index = rx_ring->tqp->tqp_index;
+		hnae_set_bit(chain->flag, HNAE3_RING_TYPE_B,
+			     HNAE3_RING_TYPE_RX);
+		cur_chain = chain;
+
+		rx_ring = rx_ring->next;
+	}
+
+	return 0;
+}
+
+static void hns3_free_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
+					struct hnae3_ring_chain_node *head)
+{
+	struct pci_dev *pdev = tqp_vector->handle->pdev;
+	struct hnae3_ring_chain_node *chain_tmp, *chain;
+
+	chain = head->next;
+
+	while (chain) {
+		chain_tmp = chain->next;
+		devm_kfree(&pdev->dev, chain);
+		chain = chain_tmp;
+	}
+}
+
+static void hns3_add_ring_to_group(struct hns3_enet_ring_group *group,
+				   struct hns3_enet_ring *ring)
+{
+	ring->next = group->ring;
+	group->ring = ring;
+
+	group->count++;
+}
+
+static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
+{
+	struct hnae3_ring_chain_node vector_ring_chain;
+	struct hnae3_handle *h = priv->ae_handle;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	struct hnae3_vector_info *vector;
+	struct pci_dev *pdev = h->pdev;
+	u16 tqp_num = h->kinfo.num_tqps;
+	u16 vector_num;
+	int ret = 0;
+	u16 i;
+
+	/* RSS size, cpu online and vector_num should be the same */
+	/* Should consider 2p/4p later */
+	vector_num = min_t(u16, num_online_cpus(), tqp_num);
+	vector = devm_kcalloc(&pdev->dev, vector_num, sizeof(*vector),
+			      GFP_KERNEL);
+	if (!vector)
+		return -ENOMEM;
+
+	vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
+
+	priv->vector_num = vector_num;
+	priv->tqp_vector = (struct hns3_enet_tqp_vector *)
+		devm_kcalloc(&pdev->dev, vector_num, sizeof(*priv->tqp_vector),
+			     GFP_KERNEL);
+	if (!priv->tqp_vector)
+		return -ENOMEM;
+
+	for (i = 0; i < tqp_num; i++) {
+		u16 vector_i = i % vector_num;
+
+		tqp_vector = &priv->tqp_vector[vector_i];
+
+		hns3_add_ring_to_group(&tqp_vector->tx_group,
+				       priv->ring_data[i].ring);
+
+		hns3_add_ring_to_group(&tqp_vector->rx_group,
+				       priv->ring_data[i + tqp_num].ring);
+
+		tqp_vector->idx = vector_i;
+		tqp_vector->mask_addr = vector[vector_i].io_addr;
+		tqp_vector->vector_irq = vector[vector_i].vector;
+		tqp_vector->num_tqps++;
+
+		priv->ring_data[i].ring->tqp_vector = tqp_vector;
+		priv->ring_data[i + tqp_num].ring->tqp_vector = tqp_vector;
+	}
+
+	for (i = 0; i < vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+
+		tqp_vector->rx_group.total_bytes = 0;
+		tqp_vector->rx_group.total_packets = 0;
+		tqp_vector->tx_group.total_bytes = 0;
+		tqp_vector->tx_group.total_packets = 0;
+		hns3_vector_gl_rl_init(tqp_vector);
+		tqp_vector->handle = h;
+
+		ret = hns3_get_vector_ring_chain(tqp_vector,
+						 &vector_ring_chain);
+		if (ret)
+			goto out;
+
+		ret = h->ae_algo->ops->map_ring_to_vector(h,
+			tqp_vector->vector_irq, &vector_ring_chain);
+		if (ret)
+			goto out;
+
+		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
+
+		netif_napi_add(priv->netdev, &tqp_vector->napi,
+			       hns3_nic_common_poll, NAPI_POLL_WEIGHT);
+	}
+
+out:
+	devm_kfree(&pdev->dev, vector);
+	return ret;
+}
+
+static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
+{
+	struct hnae3_ring_chain_node vector_ring_chain;
+	struct hnae3_handle *h = priv->ae_handle;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	struct pci_dev *pdev = h->pdev;
+	int i, ret;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+
+		ret = hns3_get_vector_ring_chain(tqp_vector,
+						 &vector_ring_chain);
+		if (ret)
+			return ret;
+
+		ret = h->ae_algo->ops->unmap_ring_from_vector(h,
+			tqp_vector->vector_irq, &vector_ring_chain);
+		if (ret)
+			return ret;
+
+		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
+
+		if (priv->tqp_vector[i].irq_init_flag == HNS3_VECTOR_INITED) {
+			(void)irq_set_affinity_hint(
+				priv->tqp_vector[i].vector_irq,
+						    NULL);
+			devm_free_irq(&pdev->dev,
+				      priv->tqp_vector[i].vector_irq,
+				      &priv->tqp_vector[i]);
+		}
+
+		priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED;
+
+		netif_napi_del(&priv->tqp_vector[i].napi);
+	}
+
+	devm_kfree(&pdev->dev, priv->tqp_vector);
+
+	return 0;
+}
+
+static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
+			     int ring_type)
+{
+	struct hns3_nic_ring_data *ring_data = priv->ring_data;
+	int queue_num = priv->ae_handle->kinfo.num_tqps;
+	struct pci_dev *pdev = priv->ae_handle->pdev;
+	struct hns3_enet_ring *ring;
+
+	ring = devm_kzalloc(&pdev->dev, sizeof(*ring), GFP_KERNEL);
+	if (!ring)
+		return -ENOMEM;
+
+	if (ring_type == HNAE3_RING_TYPE_TX) {
+		ring_data[q->tqp_index].ring = ring;
+		ring->io_base = (u8 __iomem *)q->io_base + HNS3_TX_REG_OFFSET;
+	} else {
+		ring_data[q->tqp_index + queue_num].ring = ring;
+		ring->io_base = q->io_base;
+	}
+
+	hnae_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type);
+
+	ring_data[q->tqp_index].queue_index = q->tqp_index;
+
+	ring->tqp = q;
+	ring->desc = NULL;
+	ring->desc_cb = NULL;
+	ring->dev = priv->dev;
+	ring->desc_dma_addr = 0;
+	ring->buf_size = q->buf_size;
+	ring->desc_num = q->desc_num;
+	ring->next_to_use = 0;
+	ring->next_to_clean = 0;
+
+	return 0;
+}
+
+static int hns3_queue_to_ring(struct hnae3_queue *tqp,
+			      struct hns3_nic_priv *priv)
+{
+	int ret;
+
+	ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_TX);
+	if (ret)
+		return ret;
+
+	ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int hns3_get_ring_config(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	struct pci_dev *pdev = h->pdev;
+	int i, ret;
+
+	priv->ring_data =  devm_kzalloc(&pdev->dev, h->kinfo.num_tqps *
+					sizeof(*priv->ring_data) * 2,
+					GFP_KERNEL);
+	if (!priv->ring_data)
+		return -ENOMEM;
+
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		ret = hns3_queue_to_ring(h->kinfo.tqp[i], priv);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+err:
+	devm_kfree(&pdev->dev, priv->ring_data);
+	return ret;
+}
+
+static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
+{
+	int ret;
+
+	if (ring->desc_num <= 0 || ring->buf_size <= 0)
+		return -EINVAL;
+
+	ring->desc_cb = kcalloc(ring->desc_num, sizeof(ring->desc_cb[0]),
+				GFP_KERNEL);
+	if (!ring->desc_cb) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = hns3_alloc_desc(ring);
+	if (ret)
+		goto out_with_desc_cb;
+
+	if (!HNAE3_IS_TX_RING(ring)) {
+		ret = hns3_alloc_ring_buffers(ring);
+		if (ret)
+			goto out_with_desc;
+	}
+
+	return 0;
+
+out_with_desc:
+	hns3_free_desc(ring);
+out_with_desc_cb:
+	kfree(ring->desc_cb);
+	ring->desc_cb = NULL;
+out:
+	return ret;
+}
+
+static void hns3_fini_ring(struct hns3_enet_ring *ring)
+{
+	hns3_free_desc(ring);
+	kfree(ring->desc_cb);
+	ring->desc_cb = NULL;
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+}
+
+int hns3_buf_size2type(u32 buf_size)
+{
+	int bd_size_type;
+
+	switch (buf_size) {
+	case 512:
+		bd_size_type = HNS3_BD_SIZE_512_TYPE;
+		break;
+	case 1024:
+		bd_size_type = HNS3_BD_SIZE_1024_TYPE;
+		break;
+	case 2048:
+		bd_size_type = HNS3_BD_SIZE_2048_TYPE;
+		break;
+	case 4096:
+		bd_size_type = HNS3_BD_SIZE_4096_TYPE;
+		break;
+	default:
+		bd_size_type = HNS3_BD_SIZE_2048_TYPE;
+	}
+
+	return bd_size_type;
+}
+
+static void hns3_init_ring_hw(struct hns3_enet_ring *ring)
+{
+	dma_addr_t dma = ring->desc_dma_addr;
+	struct hnae3_queue *q = ring->tqp;
+
+	if (!HNAE3_IS_TX_RING(ring)) {
+		hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_L_REG,
+			       (u32)dma);
+		hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_H_REG,
+			       (u32)((dma >> 31) >> 1));
+
+		hns3_write_dev(q, HNS3_RING_RX_RING_BD_LEN_REG,
+			       hns3_buf_size2type(ring->buf_size));
+		hns3_write_dev(q, HNS3_RING_RX_RING_BD_NUM_REG,
+			       ring->desc_num / 8 - 1);
+
+	} else {
+		hns3_write_dev(q, HNS3_RING_TX_RING_BASEADDR_L_REG,
+			       (u32)dma);
+		hns3_write_dev(q, HNS3_RING_TX_RING_BASEADDR_H_REG,
+			       (u32)((dma >> 31) >> 1));
+
+		hns3_write_dev(q, HNS3_RING_TX_RING_BD_LEN_REG,
+			       hns3_buf_size2type(ring->buf_size));
+		hns3_write_dev(q, HNS3_RING_TX_RING_BD_NUM_REG,
+			       ring->desc_num / 8 - 1);
+	}
+}
+
+static int hns3_init_all_ring(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	int ring_num = h->kinfo.num_tqps * 2;
+	int i, j;
+	int ret;
+
+	for (i = 0; i < ring_num; i++) {
+		ret = hns3_alloc_ring_memory(priv->ring_data[i].ring);
+		if (ret) {
+			dev_err(priv->dev,
+				"Alloc ring memory fail! ret=%d\n", ret);
+			goto out_when_alloc_ring_memory;
+		}
+
+		hns3_init_ring_hw(priv->ring_data[i].ring);
+
+		u64_stats_init(&priv->ring_data[i].ring->syncp);
+	}
+
+	return 0;
+
+out_when_alloc_ring_memory:
+	for (j = i - 1; j >= 0; j--)
+		hns3_fini_ring(priv->ring_data[i].ring);
+
+	return -ENOMEM;
+}
+
+static int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	int i;
+
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		if (h->ae_algo->ops->reset_queue)
+			h->ae_algo->ops->reset_queue(h, i);
+
+		hns3_fini_ring(priv->ring_data[i].ring);
+		hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
+	}
+
+	return 0;
+}
+
+/* Set mac addr if it is configured. or leave it to the AE driver */
+static void hns3_init_mac_addr(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	u8 mac_addr_temp[ETH_ALEN];
+
+	if (h->ae_algo->ops->get_mac_addr) {
+		h->ae_algo->ops->get_mac_addr(h, mac_addr_temp);
+		ether_addr_copy(netdev->dev_addr, mac_addr_temp);
+	}
+
+	/* Check if the MAC address is valid, if not get a random one */
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		eth_hw_addr_random(netdev);
+		dev_warn(priv->dev, "using random MAC address %pM\n",
+			 netdev->dev_addr);
+		/* Also copy this new MAC address into hdev */
+		if (h->ae_algo->ops->set_mac_addr)
+			h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
+	}
+}
+
+static void hns3_nic_set_priv_ops(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+	if ((netdev->features & NETIF_F_TSO) ||
+	    (netdev->features & NETIF_F_TSO6)) {
+		priv->ops.fill_desc = hns3_fill_desc_tso;
+		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso;
+	} else {
+		priv->ops.fill_desc = hns3_fill_desc;
+		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
+	}
+}
+
+static int hns3_client_init(struct hnae3_handle *handle)
+{
+	struct pci_dev *pdev = handle->pdev;
+	struct hns3_nic_priv *priv;
+	struct net_device *netdev;
+	int ret;
+
+	netdev = alloc_etherdev_mq(sizeof(struct hns3_nic_priv),
+				   handle->kinfo.num_tqps);
+	if (!netdev)
+		return -ENOMEM;
+
+	priv = netdev_priv(netdev);
+	priv->dev = &pdev->dev;
+	priv->netdev = netdev;
+	priv->ae_handle = handle;
+
+	handle->kinfo.netdev = netdev;
+	handle->priv = (void *)priv;
+
+	hns3_init_mac_addr(netdev);
+
+	hns3_set_default_feature(netdev);
+
+	netdev->watchdog_timeo = HNS3_TX_TIMEOUT;
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+	netdev->netdev_ops = &hns3_nic_netdev_ops;
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+	hns3_ethtool_set_ops(netdev);
+	hns3_nic_set_priv_ops(netdev);
+
+	/* Carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	ret = hns3_get_ring_config(priv);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_get_ring_cfg;
+	}
+
+	ret = hns3_nic_init_vector_data(priv);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_init_vector_data;
+	}
+
+	ret = hns3_init_all_ring(priv);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_init_ring_data;
+	}
+
+	ret = register_netdev(netdev);
+	if (ret) {
+		dev_err(priv->dev, "probe register netdev fail!\n");
+		goto out_reg_netdev_fail;
+	}
+
+	return ret;
+
+out_reg_netdev_fail:
+out_init_ring_data:
+	(void)hns3_nic_uninit_vector_data(priv);
+	priv->ring_data = NULL;
+out_init_vector_data:
+out_get_ring_cfg:
+	priv->ae_handle = NULL;
+	free_netdev(netdev);
+	return ret;
+}
+
+static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
+{
+	struct net_device *netdev = handle->kinfo.netdev;
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int ret;
+
+	if (netdev->reg_state != NETREG_UNINITIALIZED)
+		unregister_netdev(netdev);
+
+	ret = hns3_nic_uninit_vector_data(priv);
+	if (ret)
+		netdev_err(netdev, "uninit vector error\n");
+
+	ret = hns3_uninit_all_ring(priv);
+	if (ret)
+		netdev_err(netdev, "uninit ring error\n");
+
+	priv->ring_data = NULL;
+
+	free_netdev(netdev);
+}
+
+static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup)
+{
+	struct net_device *netdev = handle->kinfo.netdev;
+
+	if (!netdev)
+		return;
+
+	if (linkup) {
+		netif_carrier_on(netdev);
+		netif_tx_wake_all_queues(netdev);
+		netdev_info(netdev, "link up\n");
+	} else {
+		netif_carrier_off(netdev);
+		netif_tx_stop_all_queues(netdev);
+		netdev_info(netdev, "link down\n");
+	}
+}
+
+const struct hnae3_client_ops client_ops = {
+	.init_instance = hns3_client_init,
+	.uninit_instance = hns3_client_uninit,
+	.link_status_change = hns3_link_status_change,
+};
+
+/* hns3_init_module - Driver registration routine
+ * hns3_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init hns3_init_module(void)
+{
+	int ret;
+
+	pr_info("%s: %s - version\n", hns3_driver_name, hns3_driver_string);
+	pr_info("%s: %s\n", hns3_driver_name, hns3_copyright);
+
+	client.type = HNAE3_CLIENT_KNIC;
+	snprintf(client.name, HNAE3_CLIENT_NAME_LENGTH - 1, "%s",
+		 hns3_driver_name);
+
+	client.ops = &client_ops;
+
+	ret = hnae3_register_client(&client);
+	if (ret)
+		return ret;
+
+	ret = pci_register_driver(&hns3_driver);
+	if (ret)
+		hnae3_unregister_client(&client);
+
+	return ret;
+}
+module_init(hns3_init_module);
+
+/* hns3_exit_module - Driver exit cleanup routine
+ * hns3_exit_module is called just before the driver is removed
+ * from memory.
+ */
+static void __exit hns3_exit_module(void)
+{
+	pci_unregister_driver(&hns3_driver);
+	hnae3_unregister_client(&client);
+}
+module_exit(hns3_exit_module);
+
+MODULE_DESCRIPTION("HNS3: Hisilicon Ethernet Driver");
+MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("pci:hns-nic");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
new file mode 100644
index 000000000000..a6e8f15a4669
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
@@ -0,0 +1,592 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HNS3_ENET_H
+#define __HNS3_ENET_H
+
+#include "hnae3.h"
+
+extern const char hns3_driver_version[];
+
+enum hns3_nic_state {
+	HNS3_NIC_STATE_TESTING,
+	HNS3_NIC_STATE_RESETTING,
+	HNS3_NIC_STATE_REINITING,
+	HNS3_NIC_STATE_DOWN,
+	HNS3_NIC_STATE_DISABLED,
+	HNS3_NIC_STATE_REMOVING,
+	HNS3_NIC_STATE_SERVICE_INITED,
+	HNS3_NIC_STATE_SERVICE_SCHED,
+	HNS3_NIC_STATE2_RESET_REQUESTED,
+	HNS3_NIC_STATE_MAX
+};
+
+#define HNS3_RING_RX_RING_BASEADDR_L_REG	0x00000
+#define HNS3_RING_RX_RING_BASEADDR_H_REG	0x00004
+#define HNS3_RING_RX_RING_BD_NUM_REG		0x00008
+#define HNS3_RING_RX_RING_BD_LEN_REG		0x0000C
+#define HNS3_RING_RX_RING_TAIL_REG		0x00018
+#define HNS3_RING_RX_RING_HEAD_REG		0x0001C
+#define HNS3_RING_RX_RING_FBDNUM_REG		0x00020
+#define HNS3_RING_RX_RING_PKTNUM_RECORD_REG	0x0002C
+
+#define HNS3_RING_TX_RING_BASEADDR_L_REG	0x00040
+#define HNS3_RING_TX_RING_BASEADDR_H_REG	0x00044
+#define HNS3_RING_TX_RING_BD_NUM_REG		0x00048
+#define HNS3_RING_TX_RING_BD_LEN_REG		0x0004C
+#define HNS3_RING_TX_RING_TAIL_REG		0x00058
+#define HNS3_RING_TX_RING_HEAD_REG		0x0005C
+#define HNS3_RING_TX_RING_FBDNUM_REG		0x00060
+#define HNS3_RING_TX_RING_OFFSET_REG		0x00064
+#define HNS3_RING_TX_RING_PKTNUM_RECORD_REG	0x0006C
+
+#define HNS3_RING_PREFETCH_EN_REG		0x0007C
+#define HNS3_RING_CFG_VF_NUM_REG		0x00080
+#define HNS3_RING_ASID_REG			0x0008C
+#define HNS3_RING_RX_VM_REG			0x00090
+#define HNS3_RING_T0_BE_RST			0x00094
+#define HNS3_RING_COULD_BE_RST			0x00098
+#define HNS3_RING_WRR_WEIGHT_REG		0x0009c
+
+#define HNS3_RING_INTMSK_RXWL_REG		0x000A0
+#define HNS3_RING_INTSTS_RX_RING_REG		0x000A4
+#define HNS3_RX_RING_INT_STS_REG		0x000A8
+#define HNS3_RING_INTMSK_TXWL_REG		0x000AC
+#define HNS3_RING_INTSTS_TX_RING_REG		0x000B0
+#define HNS3_TX_RING_INT_STS_REG		0x000B4
+#define HNS3_RING_INTMSK_RX_OVERTIME_REG	0x000B8
+#define HNS3_RING_INTSTS_RX_OVERTIME_REG	0x000BC
+#define HNS3_RING_INTMSK_TX_OVERTIME_REG	0x000C4
+#define HNS3_RING_INTSTS_TX_OVERTIME_REG	0x000C8
+
+#define HNS3_RING_MB_CTRL_REG			0x00100
+#define HNS3_RING_MB_DATA_BASE_REG		0x00200
+
+#define HNS3_TX_REG_OFFSET			0x40
+
+#define HNS3_RX_HEAD_SIZE			256
+
+#define HNS3_TX_TIMEOUT (5 * HZ)
+#define HNS3_RING_NAME_LEN			16
+#define HNS3_BUFFER_SIZE_2048			2048
+#define HNS3_RING_MAX_PENDING			32768
+
+#define HNS3_BD_SIZE_512_TYPE			0
+#define HNS3_BD_SIZE_1024_TYPE			1
+#define HNS3_BD_SIZE_2048_TYPE			2
+#define HNS3_BD_SIZE_4096_TYPE			3
+
+#define HNS3_RX_FLAG_VLAN_PRESENT		0x1
+#define HNS3_RX_FLAG_L3ID_IPV4			0x0
+#define HNS3_RX_FLAG_L3ID_IPV6			0x1
+#define HNS3_RX_FLAG_L4ID_UDP			0x0
+#define HNS3_RX_FLAG_L4ID_TCP			0x1
+
+#define HNS3_RXD_DMAC_S				0
+#define HNS3_RXD_DMAC_M				(0x3 << HNS3_RXD_DMAC_S)
+#define HNS3_RXD_VLAN_S				2
+#define HNS3_RXD_VLAN_M				(0x3 << HNS3_RXD_VLAN_S)
+#define HNS3_RXD_L3ID_S				4
+#define HNS3_RXD_L3ID_M				(0xf << HNS3_RXD_L3ID_S)
+#define HNS3_RXD_L4ID_S				8
+#define HNS3_RXD_L4ID_M				(0xf << HNS3_RXD_L4ID_S)
+#define HNS3_RXD_FRAG_B				12
+#define HNS3_RXD_L2E_B				16
+#define HNS3_RXD_L3E_B				17
+#define HNS3_RXD_L4E_B				18
+#define HNS3_RXD_TRUNCAT_B			19
+#define HNS3_RXD_HOI_B				20
+#define HNS3_RXD_DOI_B				21
+#define HNS3_RXD_OL3E_B				22
+#define HNS3_RXD_OL4E_B				23
+
+#define HNS3_RXD_ODMAC_S			0
+#define HNS3_RXD_ODMAC_M			(0x3 << HNS3_RXD_ODMAC_S)
+#define HNS3_RXD_OVLAN_S			2
+#define HNS3_RXD_OVLAN_M			(0x3 << HNS3_RXD_OVLAN_S)
+#define HNS3_RXD_OL3ID_S			4
+#define HNS3_RXD_OL3ID_M			(0xf << HNS3_RXD_OL3ID_S)
+#define HNS3_RXD_OL4ID_S			8
+#define HNS3_RXD_OL4ID_M			(0xf << HNS3_RXD_OL4ID_S)
+#define HNS3_RXD_FBHI_S				12
+#define HNS3_RXD_FBHI_M				(0x3 << HNS3_RXD_FBHI_S)
+#define HNS3_RXD_FBLI_S				14
+#define HNS3_RXD_FBLI_M				(0x3 << HNS3_RXD_FBLI_S)
+
+#define HNS3_RXD_BDTYPE_S			0
+#define HNS3_RXD_BDTYPE_M			(0xf << HNS3_RXD_BDTYPE_S)
+#define HNS3_RXD_VLD_B				4
+#define HNS3_RXD_UDP0_B				5
+#define HNS3_RXD_EXTEND_B			7
+#define HNS3_RXD_FE_B				8
+#define HNS3_RXD_LUM_B				9
+#define HNS3_RXD_CRCP_B				10
+#define HNS3_RXD_L3L4P_B			11
+#define HNS3_RXD_TSIND_S			12
+#define HNS3_RXD_TSIND_M			(0x7 << HNS3_RXD_TSIND_S)
+#define HNS3_RXD_LKBK_B				15
+#define HNS3_RXD_HDL_S				16
+#define HNS3_RXD_HDL_M				(0x7ff << HNS3_RXD_HDL_S)
+#define HNS3_RXD_HSIND_B			31
+
+#define HNS3_TXD_L3T_S				0
+#define HNS3_TXD_L3T_M				(0x3 << HNS3_TXD_L3T_S)
+#define HNS3_TXD_L4T_S				2
+#define HNS3_TXD_L4T_M				(0x3 << HNS3_TXD_L4T_S)
+#define HNS3_TXD_L3CS_B				4
+#define HNS3_TXD_L4CS_B				5
+#define HNS3_TXD_VLAN_B				6
+#define HNS3_TXD_TSO_B				7
+
+#define HNS3_TXD_L2LEN_S			8
+#define HNS3_TXD_L2LEN_M			(0xff << HNS3_TXD_L2LEN_S)
+#define HNS3_TXD_L3LEN_S			16
+#define HNS3_TXD_L3LEN_M			(0xff << HNS3_TXD_L3LEN_S)
+#define HNS3_TXD_L4LEN_S			24
+#define HNS3_TXD_L4LEN_M			(0xff << HNS3_TXD_L4LEN_S)
+
+#define HNS3_TXD_OL3T_S				0
+#define HNS3_TXD_OL3T_M				(0x3 << HNS3_TXD_OL3T_S)
+#define HNS3_TXD_OVLAN_B			2
+#define HNS3_TXD_MACSEC_B			3
+#define HNS3_TXD_TUNTYPE_S			4
+#define HNS3_TXD_TUNTYPE_M			(0xf << HNS3_TXD_TUNTYPE_S)
+
+#define HNS3_TXD_BDTYPE_S			0
+#define HNS3_TXD_BDTYPE_M			(0xf << HNS3_TXD_BDTYPE_S)
+#define HNS3_TXD_FE_B				4
+#define HNS3_TXD_SC_S				5
+#define HNS3_TXD_SC_M				(0x3 << HNS3_TXD_SC_S)
+#define HNS3_TXD_EXTEND_B			7
+#define HNS3_TXD_VLD_B				8
+#define HNS3_TXD_RI_B				9
+#define HNS3_TXD_RA_B				10
+#define HNS3_TXD_TSYN_B				11
+#define HNS3_TXD_DECTTL_S			12
+#define HNS3_TXD_DECTTL_M			(0xf << HNS3_TXD_DECTTL_S)
+
+#define HNS3_TXD_MSS_S				0
+#define HNS3_TXD_MSS_M				(0x3fff << HNS3_TXD_MSS_S)
+
+#define HNS3_VECTOR_TX_IRQ			BIT_ULL(0)
+#define HNS3_VECTOR_RX_IRQ			BIT_ULL(1)
+
+#define HNS3_VECTOR_NOT_INITED			0
+#define HNS3_VECTOR_INITED			1
+
+#define HNS3_MAX_BD_SIZE			65535
+#define HNS3_MAX_BD_PER_FRAG			8
+#define HNS3_MAX_BD_PER_PKT			MAX_SKB_FRAGS
+
+#define HNS3_VECTOR_GL0_OFFSET			0x100
+#define HNS3_VECTOR_GL1_OFFSET			0x200
+#define HNS3_VECTOR_GL2_OFFSET			0x300
+#define HNS3_VECTOR_RL_OFFSET			0x900
+#define HNS3_VECTOR_RL_EN_B			6
+
+enum hns3_pkt_l3t_type {
+	HNS3_L3T_NONE,
+	HNS3_L3T_IPV6,
+	HNS3_L3T_IPV4,
+	HNS3_L3T_RESERVED
+};
+
+enum hns3_pkt_l4t_type {
+	HNS3_L4T_UNKNOWN,
+	HNS3_L4T_TCP,
+	HNS3_L4T_UDP,
+	HNS3_L4T_SCTP
+};
+
+enum hns3_pkt_ol3t_type {
+	HNS3_OL3T_NONE,
+	HNS3_OL3T_IPV6,
+	HNS3_OL3T_IPV4_NO_CSUM,
+	HNS3_OL3T_IPV4_CSUM
+};
+
+enum hns3_pkt_tun_type {
+	HNS3_TUN_NONE,
+	HNS3_TUN_MAC_IN_UDP,
+	HNS3_TUN_NVGRE,
+	HNS3_TUN_OTHER
+};
+
+/* hardware spec ring buffer format */
+struct __packed hns3_desc {
+	__le64 addr;
+	union {
+		struct {
+			__le16 vlan_tag;
+			__le16 send_size;
+			union {
+				__le32 type_cs_vlan_tso_len;
+				struct {
+					__u8 type_cs_vlan_tso;
+					__u8 l2_len;
+					__u8 l3_len;
+					__u8 l4_len;
+				};
+			};
+			__le16 outer_vlan_tag;
+			__le16 tv;
+
+		union {
+			__le32 ol_type_vlan_len_msec;
+			struct {
+				__u8 ol_type_vlan_msec;
+				__u8 ol2_len;
+				__u8 ol3_len;
+				__u8 ol4_len;
+			};
+		};
+
+			__le32 paylen;
+			__le16 bdtp_fe_sc_vld_ra_ri;
+			__le16 mss;
+		} tx;
+
+		struct {
+			__le32 l234_info;
+			__le16 pkt_len;
+			__le16 size;
+
+			__le32 rss_hash;
+			__le16 fd_id;
+			__le16 vlan_tag;
+
+			union {
+				__le32 ol_info;
+				struct {
+					__le16 o_dm_vlan_id_fb;
+					__le16 ot_vlan_tag;
+				};
+			};
+
+			__le32 bd_base_info;
+		} rx;
+	};
+};
+
+struct hns3_desc_cb {
+	dma_addr_t dma; /* dma address of this desc */
+	void *buf;      /* cpu addr for a desc */
+
+	/* priv data for the desc, e.g. skb when use with ip stack*/
+	void *priv;
+	u16 page_offset;
+	u16 reuse_flag;
+
+	u16 length;     /* length of the buffer */
+
+       /* desc type, used by the ring user to mark the type of the priv data */
+	u16 type;
+};
+
+enum hns3_pkt_l3type {
+	HNS3_L3_TYPE_IPV4,
+	HNS3_L3_TYPE_IPV6,
+	HNS3_L3_TYPE_ARP,
+	HNS3_L3_TYPE_RARP,
+	HNS3_L3_TYPE_IPV4_OPT,
+	HNS3_L3_TYPE_IPV6_EXT,
+	HNS3_L3_TYPE_LLDP,
+	HNS3_L3_TYPE_BPDU,
+	HNS3_L3_TYPE_MAC_PAUSE,
+	HNS3_L3_TYPE_PFC_PAUSE,/* 0x9*/
+
+	/* reserved for 0xA~0xB*/
+
+	HNS3_L3_TYPE_CNM = 0xc,
+
+	/* reserved for 0xD~0xE*/
+
+	HNS3_L3_TYPE_PARSE_FAIL	= 0xf /* must be last */
+};
+
+enum hns3_pkt_l4type {
+	HNS3_L4_TYPE_UDP,
+	HNS3_L4_TYPE_TCP,
+	HNS3_L4_TYPE_GRE,
+	HNS3_L4_TYPE_SCTP,
+	HNS3_L4_TYPE_IGMP,
+	HNS3_L4_TYPE_ICMP,
+
+	/* reserved for 0x6~0xE */
+
+	HNS3_L4_TYPE_PARSE_FAIL	= 0xf /* must be last */
+};
+
+enum hns3_pkt_ol3type {
+	HNS3_OL3_TYPE_IPV4 = 0,
+	HNS3_OL3_TYPE_IPV6,
+	/* reserved for 0x2~0x3 */
+	HNS3_OL3_TYPE_IPV4_OPT = 4,
+	HNS3_OL3_TYPE_IPV6_EXT,
+
+	/* reserved for 0x6~0xE*/
+
+	HNS3_OL3_TYPE_PARSE_FAIL = 0xf	/* must be last */
+};
+
+enum hns3_pkt_ol4type {
+	HNS3_OL4_TYPE_NO_TUN,
+	HNS3_OL4_TYPE_MAC_IN_UDP,
+	HNS3_OL4_TYPE_NVGRE,
+	HNS3_OL4_TYPE_UNKNOWN
+};
+
+struct ring_stats {
+	u64 io_err_cnt;
+	u64 sw_err_cnt;
+	u64 seg_pkt_cnt;
+	union {
+		struct {
+			u64 tx_pkts;
+			u64 tx_bytes;
+			u64 tx_err_cnt;
+			u64 restart_queue;
+			u64 tx_busy;
+		};
+		struct {
+			u64 rx_pkts;
+			u64 rx_bytes;
+			u64 rx_err_cnt;
+			u64 reuse_pg_cnt;
+			u64 err_pkt_len;
+			u64 non_vld_descs;
+			u64 err_bd_num;
+			u64 l2_err;
+			u64 l3l4_csum_err;
+		};
+	};
+};
+
+struct hns3_enet_ring {
+	u8 __iomem *io_base; /* base io address for the ring */
+	struct hns3_desc *desc; /* dma map address space */
+	struct hns3_desc_cb *desc_cb;
+	struct hns3_enet_ring *next;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	struct hnae3_queue *tqp;
+	char ring_name[HNS3_RING_NAME_LEN];
+	struct device *dev; /* will be used for DMA mapping of descriptors */
+
+	/* statistic */
+	struct ring_stats stats;
+	struct u64_stats_sync syncp;
+
+	dma_addr_t desc_dma_addr;
+	u32 buf_size;       /* size for hnae_desc->addr, preset by AE */
+	u16 desc_num;       /* total number of desc */
+	u16 max_desc_num_per_pkt;
+	u16 max_raw_data_sz_per_desc;
+	u16 max_pkt_size;
+	int next_to_use;    /* idx of next spare desc */
+
+	/* idx of lastest sent desc, the ring is empty when equal to
+	 * next_to_use
+	 */
+	int next_to_clean;
+
+	u32 flag;          /* ring attribute */
+	int irq_init_flag;
+
+	int numa_node;
+	cpumask_t affinity_mask;
+};
+
+struct hns_queue;
+
+struct hns3_nic_ring_data {
+	struct hns3_enet_ring *ring;
+	struct napi_struct napi;
+	int queue_index;
+	int (*poll_one)(struct hns3_nic_ring_data *, int, void *);
+	void (*ex_process)(struct hns3_nic_ring_data *, struct sk_buff *);
+	void (*fini_process)(struct hns3_nic_ring_data *);
+};
+
+struct hns3_nic_ops {
+	int (*fill_desc)(struct hns3_enet_ring *ring, void *priv,
+			 int size, dma_addr_t dma, int frag_end,
+			 enum hns_desc_type type);
+	int (*maybe_stop_tx)(struct sk_buff **out_skb,
+			     int *bnum, struct hns3_enet_ring *ring);
+	void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum);
+};
+
+enum hns3_flow_level_range {
+	HNS3_FLOW_LOW = 0,
+	HNS3_FLOW_MID = 1,
+	HNS3_FLOW_HIGH = 2,
+	HNS3_FLOW_ULTRA = 3,
+};
+
+enum hns3_link_mode_bits {
+	HNS3_LM_FIBRE_BIT = BIT(0),
+	HNS3_LM_AUTONEG_BIT = BIT(1),
+	HNS3_LM_TP_BIT = BIT(2),
+	HNS3_LM_PAUSE_BIT = BIT(3),
+	HNS3_LM_BACKPLANE_BIT = BIT(4),
+	HNS3_LM_10BASET_HALF_BIT = BIT(5),
+	HNS3_LM_10BASET_FULL_BIT = BIT(6),
+	HNS3_LM_100BASET_HALF_BIT = BIT(7),
+	HNS3_LM_100BASET_FULL_BIT = BIT(8),
+	HNS3_LM_1000BASET_FULL_BIT = BIT(9),
+	HNS3_LM_10000BASEKR_FULL_BIT = BIT(10),
+	HNS3_LM_25000BASEKR_FULL_BIT = BIT(11),
+	HNS3_LM_40000BASELR4_FULL_BIT = BIT(12),
+	HNS3_LM_50000BASEKR2_FULL_BIT = BIT(13),
+	HNS3_LM_100000BASEKR4_FULL_BIT = BIT(14),
+	HNS3_LM_COUNT = 15
+};
+
+#define HNS3_INT_GL_50K		0x000A
+#define HNS3_INT_GL_20K		0x0019
+#define HNS3_INT_GL_18K		0x001B
+#define HNS3_INT_GL_8K		0x003E
+
+struct hns3_enet_ring_group {
+	/* array of pointers to rings */
+	struct hns3_enet_ring *ring;
+	u64 total_bytes;	/* total bytes processed this group */
+	u64 total_packets;	/* total packets processed this group */
+	u16 count;
+	enum hns3_flow_level_range flow_level;
+	u16 int_gl;
+};
+
+struct hns3_enet_tqp_vector {
+	struct hnae3_handle *handle;
+	u8 __iomem *mask_addr;
+	int vector_irq;
+	int irq_init_flag;
+
+	u16 idx;		/* index in the TQP vector array per handle. */
+
+	struct napi_struct napi;
+
+	struct hns3_enet_ring_group rx_group;
+	struct hns3_enet_ring_group tx_group;
+
+	u16 num_tqps;	/* total number of tqps in TQP vector */
+
+	cpumask_t affinity_mask;
+	char name[HNAE3_INT_NAME_LEN];
+
+	/* when 0 should adjust interrupt coalesce parameter */
+	u8 int_adapt_down;
+} ____cacheline_internodealigned_in_smp;
+
+enum hns3_udp_tnl_type {
+	HNS3_UDP_TNL_VXLAN,
+	HNS3_UDP_TNL_GENEVE,
+	HNS3_UDP_TNL_MAX,
+};
+
+struct hns3_udp_tunnel {
+	u16 dst_port;
+	int used;
+};
+
+struct hns3_nic_priv {
+	struct hnae3_handle *ae_handle;
+	u32 enet_ver;
+	u32 port_id;
+	struct net_device *netdev;
+	struct device *dev;
+	struct hns3_nic_ops ops;
+
+	/**
+	 * the cb for nic to manage the ring buffer, the first half of the
+	 * array is for tx_ring and vice versa for the second half
+	 */
+	struct hns3_nic_ring_data *ring_data;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	u16 vector_num;
+
+	/* The most recently read link state */
+	int link;
+	u64 tx_timeout_count;
+
+	unsigned long state;
+
+	struct timer_list service_timer;
+
+	struct work_struct service_task;
+
+	struct notifier_block notifier_block;
+	/* Vxlan/Geneve information */
+	struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX];
+};
+
+union l3_hdr_info {
+	struct iphdr *v4;
+	struct ipv6hdr *v6;
+	unsigned char *hdr;
+};
+
+union l4_hdr_info {
+	struct tcphdr *tcp;
+	struct udphdr *udp;
+	unsigned char *hdr;
+};
+
+/* the distance between [begin, end) in a ring buffer
+ * note: there is a unuse slot between the begin and the end
+ */
+static inline int ring_dist(struct hns3_enet_ring *ring, int begin, int end)
+{
+	return (end - begin + ring->desc_num) % ring->desc_num;
+}
+
+static inline int ring_space(struct hns3_enet_ring *ring)
+{
+	return ring->desc_num -
+		ring_dist(ring, ring->next_to_clean, ring->next_to_use) - 1;
+}
+
+static inline int is_ring_empty(struct hns3_enet_ring *ring)
+{
+	return ring->next_to_use == ring->next_to_clean;
+}
+
+static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
+{
+	u8 __iomem *reg_addr = READ_ONCE(base);
+
+	writel(value, reg_addr + reg);
+}
+
+#define hns3_write_dev(a, reg, value) \
+	hns3_write_reg((a)->io_base, (reg), (value))
+
+#define hnae_queue_xmit(tqp, buf_num) writel_relaxed(buf_num, \
+		(tqp)->io_base + HNS3_RING_TX_RING_TAIL_REG)
+
+#define ring_to_dev(ring) (&(ring)->tqp->handle->pdev->dev)
+
+#define ring_to_dma_dir(ring) (HNAE3_IS_TX_RING(ring) ? \
+	DMA_TO_DEVICE : DMA_FROM_DEVICE)
+
+#define tx_ring_data(priv, idx) ((priv)->ring_data[idx])
+
+#define hnae_buf_size(_ring) ((_ring)->buf_size)
+#define hnae_page_order(_ring) (get_order(hnae_buf_size(_ring)))
+#define hnae_page_size(_ring) (PAGE_SIZE << hnae_page_order(_ring))
+
+/* iterator for handling rings in ring group */
+#define hns3_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos; pos = pos->next)
+
+void hns3_ethtool_set_ops(struct net_device *netdev);
+
+int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
new file mode 100644
index 000000000000..0ad65e47c77e
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+
+#include "hns3_enet.h"
+
+struct hns3_stats {
+	char stats_string[ETH_GSTRING_LEN];
+	int stats_size;
+	int stats_offset;
+};
+
+/* tqp related stats */
+#define HNS3_TQP_STAT(_string, _member)	{			\
+	.stats_string = _string,				\
+	.stats_size = FIELD_SIZEOF(struct ring_stats, _member),	\
+	.stats_offset = offsetof(struct hns3_enet_ring, stats),	\
+}								\
+
+static const struct hns3_stats hns3_txq_stats[] = {
+	/* Tx per-queue statistics */
+	HNS3_TQP_STAT("tx_io_err_cnt", io_err_cnt),
+	HNS3_TQP_STAT("tx_sw_err_cnt", sw_err_cnt),
+	HNS3_TQP_STAT("tx_seg_pkt_cnt", seg_pkt_cnt),
+	HNS3_TQP_STAT("tx_pkts", tx_pkts),
+	HNS3_TQP_STAT("tx_bytes", tx_bytes),
+	HNS3_TQP_STAT("tx_err_cnt", tx_err_cnt),
+	HNS3_TQP_STAT("tx_restart_queue", restart_queue),
+	HNS3_TQP_STAT("tx_busy", tx_busy),
+};
+
+#define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)
+
+static const struct hns3_stats hns3_rxq_stats[] = {
+	/* Rx per-queue statistics */
+	HNS3_TQP_STAT("rx_io_err_cnt", io_err_cnt),
+	HNS3_TQP_STAT("rx_sw_err_cnt", sw_err_cnt),
+	HNS3_TQP_STAT("rx_seg_pkt_cnt", seg_pkt_cnt),
+	HNS3_TQP_STAT("rx_pkts", rx_pkts),
+	HNS3_TQP_STAT("rx_bytes", rx_bytes),
+	HNS3_TQP_STAT("rx_err_cnt", rx_err_cnt),
+	HNS3_TQP_STAT("rx_reuse_pg_cnt", reuse_pg_cnt),
+	HNS3_TQP_STAT("rx_err_pkt_len", err_pkt_len),
+	HNS3_TQP_STAT("rx_non_vld_descs", non_vld_descs),
+	HNS3_TQP_STAT("rx_err_bd_num", err_bd_num),
+	HNS3_TQP_STAT("rx_l2_err", l2_err),
+	HNS3_TQP_STAT("rx_l3l4_csum_err", l3l4_csum_err),
+};
+
+#define HNS3_RXQ_STATS_COUNT ARRAY_SIZE(hns3_rxq_stats)
+
+#define HNS3_TQP_STATS_COUNT (HNS3_TXQ_STATS_COUNT + HNS3_RXQ_STATS_COUNT)
+
+struct hns3_link_mode_mapping {
+	u32 hns3_link_mode;
+	u32 ethtool_link_mode;
+};
+
+static const struct hns3_link_mode_mapping hns3_lm_map[] = {
+	{HNS3_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
+	{HNS3_LM_AUTONEG_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
+	{HNS3_LM_TP_BIT, ETHTOOL_LINK_MODE_TP_BIT},
+	{HNS3_LM_PAUSE_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
+	{HNS3_LM_BACKPLANE_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
+	{HNS3_LM_10BASET_HALF_BIT, ETHTOOL_LINK_MODE_10baseT_Half_BIT},
+	{HNS3_LM_10BASET_FULL_BIT, ETHTOOL_LINK_MODE_10baseT_Full_BIT},
+	{HNS3_LM_100BASET_HALF_BIT, ETHTOOL_LINK_MODE_100baseT_Half_BIT},
+	{HNS3_LM_100BASET_FULL_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT},
+	{HNS3_LM_1000BASET_FULL_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
+};
+
+static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd,
+				  bool is_advertised)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hns3_lm_map); i++) {
+		if (!(caps & hns3_lm_map[i].hns3_link_mode))
+			continue;
+
+		if (is_advertised) {
+			ethtool_link_ksettings_zero_link_mode(cmd,
+							      advertising);
+			__set_bit(hns3_lm_map[i].ethtool_link_mode,
+				  cmd->link_modes.advertising);
+		} else {
+			ethtool_link_ksettings_zero_link_mode(cmd,
+							      supported);
+			__set_bit(hns3_lm_map[i].ethtool_link_mode,
+				  cmd->link_modes.supported);
+		}
+	}
+}
+
+static int hns3_get_sset_count(struct net_device *netdev, int stringset)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	const struct hnae3_ae_ops *ops = h->ae_algo->ops;
+
+	if (!ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		return ((HNS3_TQP_STATS_COUNT * h->kinfo.num_tqps) +
+			ops->get_sset_count(h, stringset));
+
+	case ETH_SS_TEST:
+		return ops->get_sset_count(h, stringset);
+	}
+
+	return 0;
+}
+
+static void *hns3_update_strings(u8 *data, const struct hns3_stats *stats,
+				 u32 stat_count, u32 num_tqps)
+{
+#define MAX_PREFIX_SIZE (8 + 4)
+	u32 size_left;
+	u32 i, j;
+	u32 n1;
+
+	for (i = 0; i < num_tqps; i++) {
+		for (j = 0; j < stat_count; j++) {
+			data[ETH_GSTRING_LEN - 1] = '\0';
+
+			/* first, prepend the prefix string */
+			n1 = snprintf(data, MAX_PREFIX_SIZE, "rcb_q%d_", i);
+			n1 = min_t(uint, n1, MAX_PREFIX_SIZE - 1);
+			size_left = (ETH_GSTRING_LEN - 1) - n1;
+
+			/* now, concatenate the stats string to it */
+			strncat(data, stats[j].stats_string, size_left);
+			data += ETH_GSTRING_LEN;
+		}
+	}
+
+	return data;
+}
+
+static u8 *hns3_get_strings_tqps(struct hnae3_handle *handle, u8 *data)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+
+	/* get strings for Tx */
+	data = hns3_update_strings(data, hns3_txq_stats, HNS3_TXQ_STATS_COUNT,
+				   kinfo->num_tqps);
+
+	/* get strings for Rx */
+	data = hns3_update_strings(data, hns3_rxq_stats, HNS3_RXQ_STATS_COUNT,
+				   kinfo->num_tqps);
+
+	return data;
+}
+
+static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	const struct hnae3_ae_ops *ops = h->ae_algo->ops;
+	char *buff = (char *)data;
+
+	if (!ops->get_strings)
+		return;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		buff = hns3_get_strings_tqps(h, buff);
+		h->ae_algo->ops->get_strings(h, stringset, (u8 *)buff);
+		break;
+	case ETH_SS_TEST:
+		ops->get_strings(h, stringset, data);
+		break;
+	}
+}
+
+static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data)
+{
+	struct hns3_nic_priv *nic_priv = (struct hns3_nic_priv *)handle->priv;
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct hns3_enet_ring *ring;
+	u8 *stat;
+	u32 i;
+
+	/* get stats for Tx */
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		ring = nic_priv->ring_data[i].ring;
+		for (i = 0; i < HNS3_TXQ_STATS_COUNT; i++) {
+			stat = (u8 *)ring + hns3_txq_stats[i].stats_offset;
+			*data++ = *(u64 *)stat;
+		}
+	}
+
+	/* get stats for Rx */
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		ring = nic_priv->ring_data[i + kinfo->num_tqps].ring;
+		for (i = 0; i < HNS3_RXQ_STATS_COUNT; i++) {
+			stat = (u8 *)ring + hns3_rxq_stats[i].stats_offset;
+			*data++ = *(u64 *)stat;
+		}
+	}
+
+	return data;
+}
+
+/* hns3_get_stats - get detail statistics.
+ * @netdev: net device
+ * @stats: statistics info.
+ * @data: statistics data.
+ */
+void hns3_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
+		    u64 *data)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	u64 *p = data;
+
+	if (!h->ae_algo->ops->get_stats || !h->ae_algo->ops->update_stats) {
+		netdev_err(netdev, "could not get any statistics\n");
+		return;
+	}
+
+	h->ae_algo->ops->update_stats(h, &netdev->stats);
+
+	/* get per-queue stats */
+	p = hns3_get_stats_tqps(h, p);
+
+	/* get MAC & other misc hardware stats */
+	h->ae_algo->ops->get_stats(h, p);
+}
+
+static void hns3_get_drvinfo(struct net_device *netdev,
+			     struct ethtool_drvinfo *drvinfo)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	strncpy(drvinfo->version, hns3_driver_version,
+		sizeof(drvinfo->version));
+	drvinfo->version[sizeof(drvinfo->version) - 1] = '\0';
+
+	strncpy(drvinfo->driver, h->pdev->driver->name,
+		sizeof(drvinfo->driver));
+	drvinfo->driver[sizeof(drvinfo->driver) - 1] = '\0';
+
+	strncpy(drvinfo->bus_info, pci_name(h->pdev),
+		sizeof(drvinfo->bus_info));
+	drvinfo->bus_info[ETHTOOL_BUSINFO_LEN - 1] = '\0';
+
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "0x%08x",
+		 priv->ae_handle->ae_algo->ops->get_fw_version(h));
+}
+
+static u32 hns3_get_link(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h;
+
+	h = priv->ae_handle;
+
+	if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_status)
+		return h->ae_algo->ops->get_status(h);
+	else
+		return 0;
+}
+
+static void hns3_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *param)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int queue_num = priv->ae_handle->kinfo.num_tqps;
+
+	param->tx_max_pending = HNS3_RING_MAX_PENDING;
+	param->rx_max_pending = HNS3_RING_MAX_PENDING;
+
+	param->tx_pending = priv->ring_data[0].ring->desc_num;
+	param->rx_pending = priv->ring_data[queue_num].ring->desc_num;
+}
+
+static void hns3_get_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *param)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_pauseparam)
+		h->ae_algo->ops->get_pauseparam(h, &param->autoneg,
+			&param->rx_pause, &param->tx_pause);
+}
+
+static int hns3_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	u32 supported_caps;
+	u32 advertised_caps;
+	u8 media_type;
+	u8 link_stat;
+	u8 auto_neg;
+	u8 duplex;
+	u32 speed;
+
+	if (!h->ae_algo || !h->ae_algo->ops)
+		return -EOPNOTSUPP;
+
+	/* 1.auto_neg&speed&duplex from cmd */
+	if (h->ae_algo->ops->get_ksettings_an_result) {
+		h->ae_algo->ops->get_ksettings_an_result(h, &auto_neg,
+							 &speed, &duplex);
+		cmd->base.autoneg = auto_neg;
+		cmd->base.speed = speed;
+		cmd->base.duplex = duplex;
+
+		link_stat = hns3_get_link(netdev);
+		if (!link_stat) {
+			cmd->base.speed = (u32)SPEED_UNKNOWN;
+			cmd->base.duplex = DUPLEX_UNKNOWN;
+		}
+	}
+
+	/* 2.media_type get from bios parameter block */
+	if (h->ae_algo->ops->get_media_type)
+		h->ae_algo->ops->get_media_type(h, &media_type);
+
+	switch (media_type) {
+	case HNAE3_MEDIA_TYPE_FIBER:
+		cmd->base.port = PORT_FIBRE;
+		supported_caps = HNS3_LM_FIBRE_BIT | HNS3_LM_AUTONEG_BIT |
+			HNS3_LM_PAUSE_BIT | HNS3_LM_1000BASET_FULL_BIT;
+
+		advertised_caps = supported_caps;
+		break;
+	case HNAE3_MEDIA_TYPE_COPPER:
+		cmd->base.port = PORT_TP;
+		supported_caps = HNS3_LM_TP_BIT | HNS3_LM_AUTONEG_BIT |
+			HNS3_LM_PAUSE_BIT | HNS3_LM_1000BASET_FULL_BIT |
+			HNS3_LM_100BASET_FULL_BIT | HNS3_LM_100BASET_HALF_BIT |
+			HNS3_LM_10BASET_FULL_BIT | HNS3_LM_10BASET_HALF_BIT;
+		advertised_caps = supported_caps;
+		break;
+	case HNAE3_MEDIA_TYPE_BACKPLANE:
+		cmd->base.port = PORT_NONE;
+		supported_caps = HNS3_LM_BACKPLANE_BIT | HNS3_LM_PAUSE_BIT |
+			HNS3_LM_AUTONEG_BIT | HNS3_LM_1000BASET_FULL_BIT |
+			HNS3_LM_100BASET_FULL_BIT | HNS3_LM_100BASET_HALF_BIT |
+			HNS3_LM_10BASET_FULL_BIT | HNS3_LM_10BASET_HALF_BIT;
+
+		advertised_caps = supported_caps;
+		break;
+	case HNAE3_MEDIA_TYPE_UNKNOWN:
+	default:
+		cmd->base.port = PORT_OTHER;
+		supported_caps = 0;
+		advertised_caps = 0;
+		break;
+	}
+
+	/* now, map driver link modes to ethtool link modes */
+	hns3_driv_to_eth_caps(supported_caps, cmd, false);
+	hns3_driv_to_eth_caps(advertised_caps, cmd, true);
+
+	/* 3.mdix_ctrl&mdix get from phy reg */
+	if (h->ae_algo->ops->get_mdix_mode)
+		h->ae_algo->ops->get_mdix_mode(h, &cmd->base.eth_tp_mdix_ctrl,
+			&cmd->base.eth_tp_mdix);
+	/* 4.mdio_support */
+	cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C22;
+
+	return 0;
+}
+
+static u32 hns3_get_rss_key_size(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!h->ae_algo || !h->ae_algo->ops ||
+	    !h->ae_algo->ops->get_rss_key_size)
+		return -EOPNOTSUPP;
+
+	return h->ae_algo->ops->get_rss_key_size(h);
+}
+
+static u32 hns3_get_rss_indir_size(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!h->ae_algo || !h->ae_algo->ops ||
+	    !h->ae_algo->ops->get_rss_indir_size)
+		return -EOPNOTSUPP;
+
+	return h->ae_algo->ops->get_rss_indir_size(h);
+}
+
+static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key,
+			u8 *hfunc)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss)
+		return -EOPNOTSUPP;
+
+	return h->ae_algo->ops->get_rss(h, indir, key, hfunc);
+}
+
+static int hns3_set_rss(struct net_device *netdev, const u32 *indir,
+			const u8 *key, const u8 hfunc)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss)
+		return -EOPNOTSUPP;
+
+	/* currently we only support Toeplitz hash */
+	if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && (hfunc != ETH_RSS_HASH_TOP)) {
+		netdev_err(netdev,
+			   "hash func not supported (only Toeplitz hash)\n");
+		return -EOPNOTSUPP;
+	}
+	if (!indir) {
+		netdev_err(netdev,
+			   "set rss failed for indir is empty\n");
+		return -EOPNOTSUPP;
+	}
+
+	return h->ae_algo->ops->set_rss(h, indir, key, hfunc);
+}
+
+static int hns3_get_rxnfc(struct net_device *netdev,
+			  struct ethtool_rxnfc *cmd,
+			  u32 *rule_locs)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_tc_size)
+		return -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = h->ae_algo->ops->get_tc_size(h);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static const struct ethtool_ops hns3_ethtool_ops = {
+	.get_drvinfo = hns3_get_drvinfo,
+	.get_link = hns3_get_link,
+	.get_ringparam = hns3_get_ringparam,
+	.get_pauseparam = hns3_get_pauseparam,
+	.get_strings = hns3_get_strings,
+	.get_ethtool_stats = hns3_get_stats,
+	.get_sset_count = hns3_get_sset_count,
+	.get_rxnfc = hns3_get_rxnfc,
+	.get_rxfh_key_size = hns3_get_rss_key_size,
+	.get_rxfh_indir_size = hns3_get_rss_indir_size,
+	.get_rxfh = hns3_get_rss,
+	.set_rxfh = hns3_set_rss,
+	.get_link_ksettings = hns3_get_link_ksettings,
+};
+
+void hns3_ethtool_set_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &hns3_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index b9d310f20bcc..4878b7169e0f 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -3102,8 +3102,7 @@ static int ehea_setup_ports(struct ehea_adapter *adapter)
 		dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no",
 						 NULL);
 		if (!dn_log_port_id) {
-			pr_err("bad device node: eth_dn name=%s\n",
-			       eth_dn->full_name);
+			pr_err("bad device node: eth_dn name=%pOF\n", eth_dn);
 			continue;
 		}
 
@@ -3425,7 +3424,7 @@ static int ehea_probe_adapter(struct platform_device *dev)
 
 	if (!adapter->handle) {
 		dev_err(&dev->dev, "failed getting handle for adapter"
-			" '%s'\n", dev->dev.of_node->full_name);
+			" '%pOF'\n", dev->dev.of_node);
 		ret = -ENODEV;
 		goto out_free_ad;
 	}
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 259e69a52ec5..95135d20458f 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -133,8 +133,7 @@ static inline void emac_report_timeout_error(struct emac_instance *dev,
 				  EMAC_FTR_440EP_PHY_CLK_FIX))
 		DBG(dev, "%s" NL, error);
 	else if (net_ratelimit())
-		printk(KERN_ERR "%s: %s\n", dev->ofdev->dev.of_node->full_name,
-			error);
+		printk(KERN_ERR "%pOF: %s\n", dev->ofdev->dev.of_node, error);
 }
 
 /* EMAC PHY clock workaround:
@@ -2258,8 +2257,8 @@ static void emac_ethtool_get_drvinfo(struct net_device *ndev,
 
 	strlcpy(info->driver, "ibm_emac", sizeof(info->driver));
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
-	snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %s",
-		 dev->cell_index, dev->ofdev->dev.of_node->full_name);
+	snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %pOF",
+		 dev->cell_index, dev->ofdev->dev.of_node);
 }
 
 static const struct ethtool_ops emac_ethtool_ops = {
@@ -2431,8 +2430,8 @@ static int emac_read_uint_prop(struct device_node *np, const char *name,
 	const u32 *prop = of_get_property(np, name, &len);
 	if (prop == NULL || len < sizeof(u32)) {
 		if (fatal)
-			printk(KERN_ERR "%s: missing %s property\n",
-			       np->full_name, name);
+			printk(KERN_ERR "%pOF: missing %s property\n",
+			       np, name);
 		return -ENODEV;
 	}
 	*val = *prop;
@@ -2768,7 +2767,7 @@ static int emac_init_phy(struct emac_instance *dev)
 #endif
 	mutex_unlock(&emac_phy_map_lock);
 	if (i == 0x20) {
-		printk(KERN_WARNING "%s: can't find PHY!\n", np->full_name);
+		printk(KERN_WARNING "%pOF: can't find PHY!\n", np);
 		return -ENXIO;
 	}
 
@@ -2894,8 +2893,8 @@ static int emac_init_config(struct emac_instance *dev)
 #ifdef CONFIG_IBM_EMAC_NO_FLOW_CTRL
 			dev->features |= EMAC_FTR_NO_FLOW_CONTROL_40x;
 #else
-			printk(KERN_ERR "%s: Flow control not disabled!\n",
-					np->full_name);
+			printk(KERN_ERR "%pOF: Flow control not disabled!\n",
+					np);
 			return -ENXIO;
 #endif
 		}
@@ -2918,8 +2917,7 @@ static int emac_init_config(struct emac_instance *dev)
 #ifdef CONFIG_IBM_EMAC_TAH
 		dev->features |= EMAC_FTR_HAS_TAH;
 #else
-		printk(KERN_ERR "%s: TAH support not enabled !\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: TAH support not enabled !\n", np);
 		return -ENXIO;
 #endif
 	}
@@ -2928,8 +2926,7 @@ static int emac_init_config(struct emac_instance *dev)
 #ifdef CONFIG_IBM_EMAC_ZMII
 		dev->features |= EMAC_FTR_HAS_ZMII;
 #else
-		printk(KERN_ERR "%s: ZMII support not enabled !\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: ZMII support not enabled !\n", np);
 		return -ENXIO;
 #endif
 	}
@@ -2938,8 +2935,7 @@ static int emac_init_config(struct emac_instance *dev)
 #ifdef CONFIG_IBM_EMAC_RGMII
 		dev->features |= EMAC_FTR_HAS_RGMII;
 #else
-		printk(KERN_ERR "%s: RGMII support not enabled !\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: RGMII support not enabled !\n", np);
 		return -ENXIO;
 #endif
 	}
@@ -2947,8 +2943,8 @@ static int emac_init_config(struct emac_instance *dev)
 	/* Read MAC-address */
 	p = of_get_property(np, "local-mac-address", NULL);
 	if (p == NULL) {
-		printk(KERN_ERR "%s: Can't find local-mac-address property\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't find local-mac-address property\n",
+		       np);
 		return -ENXIO;
 	}
 	memcpy(dev->ndev->dev_addr, p, ETH_ALEN);
@@ -3043,23 +3039,21 @@ static int emac_probe(struct platform_device *ofdev)
 	dev->emac_irq = irq_of_parse_and_map(np, 0);
 	dev->wol_irq = irq_of_parse_and_map(np, 1);
 	if (!dev->emac_irq) {
-		printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name);
+		printk(KERN_ERR "%pOF: Can't map main interrupt\n", np);
 		goto err_free;
 	}
 	ndev->irq = dev->emac_irq;
 
 	/* Map EMAC regs */
 	if (of_address_to_resource(np, 0, &dev->rsrc_regs)) {
-		printk(KERN_ERR "%s: Can't get registers address\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get registers address\n", np);
 		goto err_irq_unmap;
 	}
 	// TODO : request_mem_region
 	dev->emacp = ioremap(dev->rsrc_regs.start,
 			     resource_size(&dev->rsrc_regs));
 	if (dev->emacp == NULL) {
-		printk(KERN_ERR "%s: Can't map device registers!\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't map device registers!\n", np);
 		err = -ENOMEM;
 		goto err_irq_unmap;
 	}
@@ -3068,8 +3062,7 @@ static int emac_probe(struct platform_device *ofdev)
 	err = emac_wait_deps(dev);
 	if (err) {
 		printk(KERN_ERR
-		       "%s: Timeout waiting for dependent devices\n",
-		       np->full_name);
+		       "%pOF: Timeout waiting for dependent devices\n", np);
 		/*  display more info about what's missing ? */
 		goto err_reg_unmap;
 	}
@@ -3084,8 +3077,8 @@ static int emac_probe(struct platform_device *ofdev)
 	dev->commac.rx_chan_mask = MAL_CHAN_MASK(dev->mal_rx_chan);
 	err = mal_register_commac(dev->mal, &dev->commac);
 	if (err) {
-		printk(KERN_ERR "%s: failed to register with mal %s!\n",
-		       np->full_name, dev->mal_dev->dev.of_node->full_name);
+		printk(KERN_ERR "%pOF: failed to register with mal %pOF!\n",
+		       np, dev->mal_dev->dev.of_node);
 		goto err_rel_deps;
 	}
 	dev->rx_skb_size = emac_rx_skb_size(ndev->mtu);
@@ -3161,8 +3154,8 @@ static int emac_probe(struct platform_device *ofdev)
 
 	err = register_netdev(ndev);
 	if (err) {
-		printk(KERN_ERR "%s: failed to register net device (%d)!\n",
-		       np->full_name, err);
+		printk(KERN_ERR "%pOF: failed to register net device (%d)!\n",
+		       np, err);
 		goto err_detach_tah;
 	}
 
@@ -3176,8 +3169,8 @@ static int emac_probe(struct platform_device *ofdev)
 	wake_up_all(&emac_probe_wait);
 
 
-	printk(KERN_INFO "%s: EMAC-%d %s, MAC %pM\n",
-	       ndev->name, dev->cell_index, np->full_name, ndev->dev_addr);
+	printk(KERN_INFO "%s: EMAC-%d %pOF, MAC %pM\n",
+	       ndev->name, dev->cell_index, np, ndev->dev_addr);
 
 	if (dev->phy_mode == PHY_MODE_SGMII)
 		printk(KERN_NOTICE "%s: in SGMII mode\n", ndev->name);
diff --git a/drivers/net/ethernet/ibm/emac/debug.h b/drivers/net/ethernet/ibm/emac/debug.h
index 5bdfc174a07e..9d06d3be3161 100644
--- a/drivers/net/ethernet/ibm/emac/debug.h
+++ b/drivers/net/ethernet/ibm/emac/debug.h
@@ -31,7 +31,7 @@
 #endif
 
 #define EMAC_DBG(d, name, fmt, arg...) \
-	printk(KERN_DEBUG #name "%s: " fmt, d->ofdev->dev.of_node->full_name, ## arg)
+	printk(KERN_DEBUG #name "%pOF: " fmt, d->ofdev->dev.of_node, ## arg)
 
 #if DBG_LEVEL > 0
 #  define DBG(d,f,x...)		EMAC_DBG(d, emac, f, ##x)
diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index 91b1a558f37d..2c74baa2398a 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -579,8 +579,8 @@ static int mal_probe(struct platform_device *ofdev)
 		mal->features |= (MAL_FTR_CLEAR_ICINTSTAT |
 				MAL_FTR_COMMON_ERR_INT);
 #else
-		printk(KERN_ERR "%s: Support for 405EZ not enabled!\n",
-				ofdev->dev.of_node->full_name);
+		printk(KERN_ERR "%pOF: Support for 405EZ not enabled!\n",
+				ofdev->dev.of_node);
 		err = -ENODEV;
 		goto fail;
 #endif
@@ -687,8 +687,8 @@ static int mal_probe(struct platform_device *ofdev)
 	mal_enable_eob_irq(mal);
 
 	printk(KERN_INFO
-	       "MAL v%d %s, %d TX channels, %d RX channels\n",
-	       mal->version, ofdev->dev.of_node->full_name,
+	       "MAL v%d %pOF, %d TX channels, %d RX channels\n",
+	       mal->version, ofdev->dev.of_node,
 	       mal->num_tx_chans, mal->num_rx_chans);
 
 	/* Advertise this instance to the rest of the world */
diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c
index 206ccbbae7bb..c4a1ac38bba8 100644
--- a/drivers/net/ethernet/ibm/emac/rgmii.c
+++ b/drivers/net/ethernet/ibm/emac/rgmii.c
@@ -104,8 +104,8 @@ int rgmii_attach(struct platform_device *ofdev, int input, int mode)
 
 	/* Check if we need to attach to a RGMII */
 	if (input < 0 || !rgmii_valid_mode(mode)) {
-		printk(KERN_ERR "%s: unsupported settings !\n",
-		       ofdev->dev.of_node->full_name);
+		printk(KERN_ERR "%pOF: unsupported settings !\n",
+		       ofdev->dev.of_node);
 		return -ENODEV;
 	}
 
@@ -114,8 +114,8 @@ int rgmii_attach(struct platform_device *ofdev, int input, int mode)
 	/* Enable this input */
 	out_be32(&p->fer, in_be32(&p->fer) | rgmii_mode_mask(mode, input));
 
-	printk(KERN_NOTICE "%s: input %d in %s mode\n",
-	       ofdev->dev.of_node->full_name, input, rgmii_mode_name(mode));
+	printk(KERN_NOTICE "%pOF: input %d in %s mode\n",
+	       ofdev->dev.of_node, input, rgmii_mode_name(mode));
 
 	++dev->users;
 
@@ -249,8 +249,7 @@ static int rgmii_probe(struct platform_device *ofdev)
 
 	rc = -ENXIO;
 	if (of_address_to_resource(np, 0, &regs)) {
-		printk(KERN_ERR "%s: Can't get registers address\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get registers address\n", np);
 		goto err_free;
 	}
 
@@ -258,8 +257,7 @@ static int rgmii_probe(struct platform_device *ofdev)
 	dev->base = (struct rgmii_regs __iomem *)ioremap(regs.start,
 						 sizeof(struct rgmii_regs));
 	if (dev->base == NULL) {
-		printk(KERN_ERR "%s: Can't map device registers!\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't map device registers!\n", np);
 		goto err_free;
 	}
 
@@ -278,8 +276,8 @@ static int rgmii_probe(struct platform_device *ofdev)
 	out_be32(&dev->base->fer, 0);
 
 	printk(KERN_INFO
-	       "RGMII %s initialized with%s MDIO support\n",
-	       ofdev->dev.of_node->full_name,
+	       "RGMII %pOF initialized with%s MDIO support\n",
+	       ofdev->dev.of_node,
 	       (dev->flags & EMAC_RGMII_FLAG_HAS_MDIO) ? "" : "out");
 
 	wmb();
diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c
index 32cb6c9007c5..9912456dca48 100644
--- a/drivers/net/ethernet/ibm/emac/tah.c
+++ b/drivers/net/ethernet/ibm/emac/tah.c
@@ -58,8 +58,7 @@ void tah_reset(struct platform_device *ofdev)
 		--n;
 
 	if (unlikely(!n))
-		printk(KERN_ERR "%s: reset timeout\n",
-			ofdev->dev.of_node->full_name);
+		printk(KERN_ERR "%pOF: reset timeout\n", ofdev->dev.of_node);
 
 	/* 10KB TAH TX FIFO accommodates the max MTU of 9000 */
 	out_be32(&p->mr,
@@ -105,8 +104,7 @@ static int tah_probe(struct platform_device *ofdev)
 
 	rc = -ENXIO;
 	if (of_address_to_resource(np, 0, &regs)) {
-		printk(KERN_ERR "%s: Can't get registers address\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get registers address\n", np);
 		goto err_free;
 	}
 
@@ -114,8 +112,7 @@ static int tah_probe(struct platform_device *ofdev)
 	dev->base = (struct tah_regs __iomem *)ioremap(regs.start,
 					       sizeof(struct tah_regs));
 	if (dev->base == NULL) {
-		printk(KERN_ERR "%s: Can't map device registers!\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't map device registers!\n", np);
 		goto err_free;
 	}
 
@@ -124,8 +121,7 @@ static int tah_probe(struct platform_device *ofdev)
 	/* Initialize TAH and enable IPv4 checksum verification, no TSO yet */
 	tah_reset(ofdev);
 
-	printk(KERN_INFO
-	       "TAH %s initialized\n", ofdev->dev.of_node->full_name);
+	printk(KERN_INFO "TAH %pOF initialized\n", ofdev->dev.of_node);
 	wmb();
 
 	return 0;
diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c
index 8727b865ea02..89c42d362292 100644
--- a/drivers/net/ethernet/ibm/emac/zmii.c
+++ b/drivers/net/ethernet/ibm/emac/zmii.c
@@ -121,15 +121,15 @@ int zmii_attach(struct platform_device *ofdev, int input, int *mode)
 		} else
 			dev->mode = *mode;
 
-		printk(KERN_NOTICE "%s: bridge in %s mode\n",
-		       ofdev->dev.of_node->full_name,
+		printk(KERN_NOTICE "%pOF: bridge in %s mode\n",
+		       ofdev->dev.of_node,
 		       zmii_mode_name(dev->mode));
 	} else {
 		/* All inputs must use the same mode */
 		if (*mode != PHY_MODE_NA && *mode != dev->mode) {
 			printk(KERN_ERR
-			       "%s: invalid mode %d specified for input %d\n",
-			       ofdev->dev.of_node->full_name, *mode, input);
+			       "%pOF: invalid mode %d specified for input %d\n",
+			       ofdev->dev.of_node, *mode, input);
 			mutex_unlock(&dev->lock);
 			return -EINVAL;
 		}
@@ -250,8 +250,7 @@ static int zmii_probe(struct platform_device *ofdev)
 
 	rc = -ENXIO;
 	if (of_address_to_resource(np, 0, &regs)) {
-		printk(KERN_ERR "%s: Can't get registers address\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get registers address\n", np);
 		goto err_free;
 	}
 
@@ -259,8 +258,7 @@ static int zmii_probe(struct platform_device *ofdev)
 	dev->base = (struct zmii_regs __iomem *)ioremap(regs.start,
 						sizeof(struct zmii_regs));
 	if (dev->base == NULL) {
-		printk(KERN_ERR "%s: Can't map device registers!\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't map device registers!\n", np);
 		goto err_free;
 	}
 
@@ -270,8 +268,7 @@ static int zmii_probe(struct platform_device *ofdev)
 	/* Disable all inputs by default */
 	out_be32(&dev->base->fer, 0);
 
-	printk(KERN_INFO
-	       "ZMII %s initialized\n", ofdev->dev.of_node->full_name);
+	printk(KERN_INFO "ZMII %pOF initialized\n", ofdev->dev.of_node);
 	wmb();
 	platform_set_drvdata(ofdev, dev);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index a3e694679635..5932160eb815 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -346,6 +346,31 @@ static void replenish_pools(struct ibmvnic_adapter *adapter)
 	}
 }
 
+static void release_stats_buffers(struct ibmvnic_adapter *adapter)
+{
+	kfree(adapter->tx_stats_buffers);
+	kfree(adapter->rx_stats_buffers);
+}
+
+static int init_stats_buffers(struct ibmvnic_adapter *adapter)
+{
+	adapter->tx_stats_buffers =
+				kcalloc(adapter->req_tx_queues,
+					sizeof(struct ibmvnic_tx_queue_stats),
+					GFP_KERNEL);
+	if (!adapter->tx_stats_buffers)
+		return -ENOMEM;
+
+	adapter->rx_stats_buffers =
+				kcalloc(adapter->req_rx_queues,
+					sizeof(struct ibmvnic_rx_queue_stats),
+					GFP_KERNEL);
+	if (!adapter->rx_stats_buffers)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static void release_stats_token(struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
@@ -686,6 +711,7 @@ static void release_resources(struct ibmvnic_adapter *adapter)
 	release_rx_pools(adapter);
 
 	release_stats_token(adapter);
+	release_stats_buffers(adapter);
 	release_error_buffers(adapter);
 
 	if (adapter->napi) {
@@ -763,6 +789,10 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 	if (rc)
 		return rc;
 
+	rc = init_stats_buffers(adapter);
+	if (rc)
+		return rc;
+
 	rc = init_stats_token(adapter);
 	if (rc)
 		return rc;
@@ -1245,6 +1275,9 @@ out:
 	netdev->stats.tx_packets += tx_packets;
 	adapter->tx_send_failed += tx_send_failed;
 	adapter->tx_map_failed += tx_map_failed;
+	adapter->tx_stats_buffers[queue_num].packets += tx_packets;
+	adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
+	adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
 
 	return ret;
 }
@@ -1585,6 +1618,8 @@ restart_poll:
 		napi_gro_receive(napi, skb); /* send it up */
 		netdev->stats.rx_packets++;
 		netdev->stats.rx_bytes += length;
+		adapter->rx_stats_buffers[scrq_num].packets++;
+		adapter->rx_stats_buffers[scrq_num].bytes += length;
 		frames_processed++;
 	}
 
@@ -1694,18 +1729,36 @@ static u32 ibmvnic_get_link(struct net_device *netdev)
 static void ibmvnic_get_ringparam(struct net_device *netdev,
 				  struct ethtool_ringparam *ring)
 {
-	ring->rx_max_pending = 0;
-	ring->tx_max_pending = 0;
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
+	ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
 	ring->rx_mini_max_pending = 0;
 	ring->rx_jumbo_max_pending = 0;
-	ring->rx_pending = 0;
-	ring->tx_pending = 0;
+	ring->rx_pending = adapter->req_rx_add_entries_per_subcrq;
+	ring->tx_pending = adapter->req_tx_entries_per_subcrq;
 	ring->rx_mini_pending = 0;
 	ring->rx_jumbo_pending = 0;
 }
 
+static void ibmvnic_get_channels(struct net_device *netdev,
+				 struct ethtool_channels *channels)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+	channels->max_rx = adapter->max_rx_queues;
+	channels->max_tx = adapter->max_tx_queues;
+	channels->max_other = 0;
+	channels->max_combined = 0;
+	channels->rx_count = adapter->req_rx_queues;
+	channels->tx_count = adapter->req_tx_queues;
+	channels->other_count = 0;
+	channels->combined_count = 0;
+}
+
 static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
+	struct ibmvnic_adapter *adapter = netdev_priv(dev);
 	int i;
 
 	if (stringset != ETH_SS_STATS)
@@ -1713,13 +1766,39 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
 	for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN)
 		memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
+
+	for (i = 0; i < adapter->req_tx_queues; i++) {
+		snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
+		data += ETH_GSTRING_LEN;
+
+		snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
+		data += ETH_GSTRING_LEN;
+
+		snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i);
+		data += ETH_GSTRING_LEN;
+	}
+
+	for (i = 0; i < adapter->req_rx_queues; i++) {
+		snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
+		data += ETH_GSTRING_LEN;
+
+		snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
+		data += ETH_GSTRING_LEN;
+
+		snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
+		data += ETH_GSTRING_LEN;
+	}
 }
 
 static int ibmvnic_get_sset_count(struct net_device *dev, int sset)
 {
+	struct ibmvnic_adapter *adapter = netdev_priv(dev);
+
 	switch (sset) {
 	case ETH_SS_STATS:
-		return ARRAY_SIZE(ibmvnic_stats);
+		return ARRAY_SIZE(ibmvnic_stats) +
+		       adapter->req_tx_queues * NUM_TX_STATS +
+		       adapter->req_rx_queues * NUM_RX_STATS;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1730,7 +1809,7 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(dev);
 	union ibmvnic_crq crq;
-	int i;
+	int i, j;
 
 	memset(&crq, 0, sizeof(crq));
 	crq.request_statistics.first = IBMVNIC_CRQ_CMD;
@@ -1745,7 +1824,26 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
 	wait_for_completion(&adapter->stats_done);
 
 	for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
-		data[i] = IBMVNIC_GET_STAT(adapter, ibmvnic_stats[i].offset);
+		data[i] = be64_to_cpu(IBMVNIC_GET_STAT(adapter,
+						ibmvnic_stats[i].offset));
+
+	for (j = 0; j < adapter->req_tx_queues; j++) {
+		data[i] = adapter->tx_stats_buffers[j].packets;
+		i++;
+		data[i] = adapter->tx_stats_buffers[j].bytes;
+		i++;
+		data[i] = adapter->tx_stats_buffers[j].dropped_packets;
+		i++;
+	}
+
+	for (j = 0; j < adapter->req_rx_queues; j++) {
+		data[i] = adapter->rx_stats_buffers[j].packets;
+		i++;
+		data[i] = adapter->rx_stats_buffers[j].bytes;
+		i++;
+		data[i] = adapter->rx_stats_buffers[j].interrupts;
+		i++;
+	}
 }
 
 static const struct ethtool_ops ibmvnic_ethtool_ops = {
@@ -1754,6 +1852,7 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = {
 	.set_msglevel		= ibmvnic_set_msglevel,
 	.get_link		= ibmvnic_get_link,
 	.get_ringparam		= ibmvnic_get_ringparam,
+	.get_channels		= ibmvnic_get_channels,
 	.get_strings            = ibmvnic_get_strings,
 	.get_sset_count         = ibmvnic_get_sset_count,
 	.get_ethtool_stats	= ibmvnic_get_ethtool_stats,
@@ -2050,6 +2149,8 @@ static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance)
 	struct ibmvnic_sub_crq_queue *scrq = instance;
 	struct ibmvnic_adapter *adapter = scrq->adapter;
 
+	adapter->rx_stats_buffers[scrq->scrq_num].interrupts++;
+
 	if (napi_schedule_prep(&adapter->napi[scrq->scrq_num])) {
 		disable_scrq_irq(adapter, scrq);
 		__napi_schedule(&adapter->napi[scrq->scrq_num]);
@@ -3851,10 +3952,7 @@ static int ibmvnic_resume(struct device *dev)
 	if (adapter->state != VNIC_OPEN)
 		return 0;
 
-	/* kick the interrupt handlers just in case we lost an interrupt */
-	for (i = 0; i < adapter->req_rx_queues; i++)
-		ibmvnic_interrupt_rx(adapter->rx_scrq[i]->irq,
-				     adapter->rx_scrq[i]);
+	tasklet_schedule(&adapter->tasklet);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 8eff6e15f4bb..d02257ccc377 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -166,6 +166,20 @@ struct ibmvnic_statistics {
 	u8 reserved[72];
 } __packed __aligned(8);
 
+#define NUM_TX_STATS 3
+struct ibmvnic_tx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 dropped_packets;
+};
+
+#define NUM_RX_STATS 3
+struct ibmvnic_rx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 interrupts;
+};
+
 struct ibmvnic_acl_buffer {
 	__be32 len;
 	__be32 version;
@@ -956,6 +970,9 @@ struct ibmvnic_adapter {
 	int tx_send_failed;
 	int tx_map_failed;
 
+	struct ibmvnic_tx_queue_stats *tx_stats_buffers;
+	struct ibmvnic_rx_queue_stats *rx_stats_buffers;
+
 	int phys_link_state;
 	int logical_link_state;
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 5e37387c7082..e69d49d91d67 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1265,15 +1265,17 @@ err_queueing_scheme:
 	return err;
 }
 
-static int __fm10k_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-			    __be16 proto, struct tc_to_netdev *tc)
+static int __fm10k_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			    void *type_data)
 {
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-	return fm10k_setup_tc(dev, tc->mqprio->num_tc);
+	return fm10k_setup_tc(dev, mqprio->num_tc);
 }
 
 static void fm10k_assign_l2_accel(struct fm10k_intfc *interface,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 9692a5294fa3..1d29152256fe 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1091,7 +1091,7 @@ static void i40e_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	u32 *reg_buf = p;
-	int i, j, ri;
+	unsigned int i, j, ri;
 	u32 reg;
 
 	/* Tell ethtool which driver-version-specific regs output we have.
@@ -1550,9 +1550,9 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 	struct i40e_ring *tx_ring, *rx_ring;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
+	unsigned int j;
 	int i = 0;
 	char *p;
-	int j;
 	struct rtnl_link_stats64 *net_stats = i40e_get_vsi_stats_struct(vsi);
 	unsigned int start;
 
@@ -1637,7 +1637,7 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 	char *p = (char *)data;
-	int i;
+	unsigned int i;
 
 	switch (stringset) {
 	case ETH_SS_TEST:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2db93d3f6d23..a7e5a76703e7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4773,7 +4773,7 @@ static void i40e_detect_recover_hung(struct i40e_pf *pf)
 {
 	struct net_device *netdev;
 	struct i40e_vsi *vsi;
-	int i;
+	unsigned int i;
 
 	/* Only for LAN VSI */
 	vsi = pf->vsi[pf->lan_vsi];
@@ -5656,16 +5656,17 @@ exit:
 	return ret;
 }
 
-static int __i40e_setup_tc(struct net_device *netdev, u32 handle,
-			   u32 chain_index, __be16 proto,
-			   struct tc_to_netdev *tc)
+static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			   void *type_data)
 {
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-	return i40e_setup_tc(netdev, tc->mqprio->num_tc);
+	return i40e_setup_tc(netdev, mqprio->num_tc);
 }
 
 /**
@@ -7520,6 +7521,18 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
 	i40e_flush(hw);
 }
 
+static const char *i40e_tunnel_name(struct i40e_udp_port_config *port)
+{
+	switch (port->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		return "vxlan";
+	case UDP_TUNNEL_TYPE_GENEVE:
+		return "geneve";
+	default:
+		return "unknown";
+	}
+}
+
 /**
  * i40e_sync_udp_filters - Trigger a sync event for existing UDP filters
  * @pf: board private structure
@@ -7565,14 +7578,14 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
 				ret = i40e_aq_del_udp_tunnel(hw, i, NULL);
 
 			if (ret) {
-				dev_dbg(&pf->pdev->dev,
-					"%s %s port %d, index %d failed, err %s aq_err %s\n",
-					pf->udp_ports[i].type ? "vxlan" : "geneve",
-					port ? "add" : "delete",
-					port, i,
-					i40e_stat_str(&pf->hw, ret),
-					i40e_aq_str(&pf->hw,
-						    pf->hw.aq.asq_last_status));
+				dev_info(&pf->pdev->dev,
+					 "%s %s port %d, index %d failed, err %s aq_err %s\n",
+					 i40e_tunnel_name(&pf->udp_ports[i]),
+					 port ? "add" : "delete",
+					 port, i,
+					 i40e_stat_str(&pf->hw, ret),
+					 i40e_aq_str(&pf->hw,
+						     pf->hw.aq.asq_last_status));
 				pf->udp_ports[i].port = 0;
 			}
 		}
@@ -9589,6 +9602,7 @@ static int i40e_xdp(struct net_device *dev,
 		return i40e_xdp_setup(vsi, xdp->prog);
 	case XDP_QUERY_PROG:
 		xdp->prog_attached = i40e_enabled_xdp_vsi(vsi);
+		xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
 		return 0;
 	default:
 		return -EINVAL;
@@ -12089,7 +12103,10 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
 	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
 
 	i40e_stop_misc_vector(pf);
-
+	if (pf->msix_entries) {
+		synchronize_irq(pf->msix_entries[0].vector);
+		free_irq(pf->msix_entries[0].vector, pf);
+	}
 	retval = pci_save_state(pdev);
 	if (retval)
 		return retval;
@@ -12129,6 +12146,15 @@ static int i40e_resume(struct pci_dev *pdev)
 	/* handling the reset will rebuild the device state */
 	if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) {
 		clear_bit(__I40E_DOWN, pf->state);
+		if (pf->msix_entries) {
+			err = request_irq(pf->msix_entries[0].vector,
+					  i40e_intr, 0, pf->int_name, pf);
+			if (err) {
+				dev_err(&pf->pdev->dev,
+					"request_irq for %s failed: %d\n",
+					pf->int_name, err);
+			}
+		}
 		i40e_reset_and_rebuild(pf, false, false);
 	}
 
@@ -12168,12 +12194,14 @@ static int __init i40e_init_module(void)
 		i40e_driver_string, i40e_driver_version_str);
 	pr_info("%s: %s\n", i40e_driver_name, i40e_copyright);
 
-	/* we will see if single thread per module is enough for now,
-	 * it can't be any worse than using the system workqueue which
-	 * was already single threaded
+	/* There is no need to throttle the number of active tasks because
+	 * each device limits its own task using a state bit for scheduling
+	 * the service task, and the device tasks do not interfere with each
+	 * other, so we don't set a max task limit. We must set WQ_MEM_RECLAIM
+	 * since we need to be able to guarantee forward progress even under
+	 * memory pressure.
 	 */
-	i40e_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1,
-				  i40e_driver_name);
+	i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name);
 	if (!i40e_wq) {
 		pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 800bd55d0159..6fdecd70dcbc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -134,8 +134,25 @@ i40e_i40e_acquire_nvm_exit:
  **/
 void i40e_release_nvm(struct i40e_hw *hw)
 {
-	if (!hw->nvm.blank_nvm_mode)
-		i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
+	i40e_status ret_code = I40E_SUCCESS;
+	u32 total_delay = 0;
+
+	if (hw->nvm.blank_nvm_mode)
+		return;
+
+	ret_code = i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
+
+	/* there are some rare cases when trying to release the resource
+	 * results in an admin Q timeout, so handle them correctly
+	 */
+	while ((ret_code == I40E_ERR_ADMIN_QUEUE_TIMEOUT) &&
+	       (total_delay < hw->aq.asq_cmd_timeout)) {
+		usleep_range(1000, 2000);
+		ret_code = i40e_aq_release_resource(hw,
+						    I40E_NVM_RESOURCE_ID,
+						    0, NULL);
+		total_delay++;
+	}
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 1a0be835fa06..0129ed3b78ec 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -158,13 +158,12 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
 	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
-	struct timespec64 now, then;
+	struct timespec64 now;
 
-	then = ns_to_timespec64(delta);
 	mutex_lock(&pf->tmreg_lock);
 
 	i40e_ptp_read(pf, &now);
-	now = timespec64_add(now, then);
+	timespec64_add_ns(&now, delta);
 	i40e_ptp_write(pf, (const struct timespec64 *)&now);
 
 	mutex_unlock(&pf->tmreg_lock);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b936febc315a..d464fceb300f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -860,7 +860,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 	netdev_tx_completed_queue(txring_txq(tx_ring),
 				  total_packets, total_bytes);
 
-#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
 		/* Make sure that anybody stopping the queue after this
@@ -2063,7 +2063,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
 	bool failure = false, xdp_xmit = false;
 
-	while (likely(total_rx_packets < budget)) {
+	while (likely(total_rx_packets < (unsigned int)budget)) {
 		struct i40e_rx_buffer *rx_buffer;
 		union i40e_rx_desc *rx_desc;
 		struct xdp_buff xdp;
@@ -2196,7 +2196,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
 
 	/* guarantee a trip back through this routine if there was a failure */
-	return failure ? budget : total_rx_packets;
+	return failure ? budget : (int)total_rx_packets;
 }
 
 static u32 i40e_buildreg_itr(const int type, const u16 itr)
@@ -2451,9 +2451,15 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 		hlen = (hdr.network[0] & 0x0F) << 2;
 		l4_proto = hdr.ipv4->protocol;
 	} else {
-		hlen = hdr.network - skb->data;
-		l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
-		hlen -= hdr.network - skb->data;
+		/* find the start of the innermost ipv6 header */
+		unsigned int inner_hlen = hdr.network - skb->data;
+		unsigned int h_offset = inner_hlen;
+
+		/* this function updates h_offset to the end of the header */
+		l4_proto =
+		  ipv6_find_hdr(skb, &h_offset, IPPROTO_TCP, NULL, NULL);
+		/* hlen will contain our best estimate of the tcp header */
+		hlen = h_offset - inner_hlen;
 	}
 
 	if (l4_proto != IPPROTO_TCP)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index ecbe40ea8ffe..979110d59f67 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1567,7 +1567,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 			dev_err(&pf->pdev->dev,
 				"VF %d requested polling mode: this feature is supported only when the device is running in single function per port (SFP) mode\n",
 				 vf->vf_id);
-			ret = I40E_ERR_PARAM;
+			aq_ret = I40E_ERR_PARAM;
 			goto err;
 		}
 		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
@@ -1741,16 +1741,14 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 							    NULL);
 	} else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
 		hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
-			aq_ret = 0;
-			if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) {
-				aq_ret =
-				i40e_aq_set_vsi_uc_promisc_on_vlan(hw,
-								   vsi->seid,
-								   alluni,
-								   f->vlan,
-								   NULL);
-				aq_err = pf->hw.aq.asq_last_status;
-			}
+			if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID)
+				continue;
+			aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw,
+								    vsi->seid,
+								    alluni,
+								    f->vlan,
+								    NULL);
+			aq_err = pf->hw.aq.asq_last_status;
 			if (aq_ret)
 				dev_err(&pf->pdev->dev,
 					"Could not add VLAN %d to Unicast promiscuous domain err %s aq_err %s\n",
@@ -2764,7 +2762,6 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
-	dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id);
 	/* program mac filter */
 	if (i40e_sync_vsi_filters(vsi)) {
 		dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
@@ -2772,7 +2769,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 		goto error_param;
 	}
 	ether_addr_copy(vf->default_lan_addr.addr, mac);
-	vf->pf_set_mac = true;
+
+	if (is_zero_ether_addr(mac)) {
+		vf->pf_set_mac = false;
+		dev_info(&pf->pdev->dev, "Removing MAC on VF %d\n", vf_id);
+	} else {
+		vf->pf_set_mac = true;
+		dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n",
+			 mac, vf_id);
+	}
+
 	/* Force the VF driver stop so it has to reload with new MAC address */
 	i40e_vc_disable_vf(pf, vf);
 	dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n");
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
index 5e314fd3c016..a90737786c34 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
@@ -54,7 +54,7 @@ struct i40e_dma_mem {
 	void *va;
 	dma_addr_t pa;
 	u32 size;
-} __packed;
+};
 
 #define i40e_allocate_dma_mem(h, m, unused, s, a) \
 	i40evf_allocate_dma_mem_d(h, m, s, a)
@@ -63,7 +63,7 @@ struct i40e_dma_mem {
 struct i40e_virt_mem {
 	void *va;
 	u32 size;
-} __packed;
+};
 #define i40e_allocate_virt_mem(h, m, s) i40evf_allocate_virt_mem_d(h, m, s)
 #define i40e_free_virt_mem(h, m) i40evf_free_virt_mem_d(h, m)
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 12b02e530503..d91676ccf125 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -275,7 +275,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 	netdev_tx_completed_queue(txring_txq(tx_ring),
 				  total_packets, total_bytes);
 
-#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
 		/* Make sure that anybody stopping the queue after this
@@ -1299,7 +1299,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
 	bool failure = false;
 
-	while (likely(total_rx_packets < budget)) {
+	while (likely(total_rx_packets < (unsigned int)budget)) {
 		struct i40e_rx_buffer *rx_buffer;
 		union i40e_rx_desc *rx_desc;
 		unsigned int size;
@@ -1406,7 +1406,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
 
 	/* guarantee a trip back through this routine if there was a failure */
-	return failure ? budget : total_rx_packets;
+	return failure ? budget : (int)total_rx_packets;
 }
 
 static u32 i40e_buildreg_itr(const int type, const u16 itr)
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 6cc92089fecb..7901cc85cbe5 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -39,6 +39,17 @@
 #include <linux/tcp.h>
 #include <linux/sctp.h>
 #include <linux/ipv6.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/socket.h>
+#include <linux/jiffies.h>
 #include <net/ip6_checksum.h>
 #include <net/udp.h>
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 9bb2cc7dd4e4..76fd89c1dbb2 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -165,7 +165,7 @@ static void i40evf_get_ethtool_stats(struct net_device *netdev,
 				     struct ethtool_stats *stats, u64 *data)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	int i, j;
+	unsigned int i, j;
 	char *p;
 
 	for (i = 0; i < I40EVF_GLOBAL_STATS_LEN; i++) {
@@ -197,7 +197,7 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
 	int i;
 
 	if (sset == ETH_SS_STATS) {
-		for (i = 0; i < I40EVF_GLOBAL_STATS_LEN; i++) {
+		for (i = 0; i < (int)I40EVF_GLOBAL_STATS_LEN; i++) {
 			memcpy(p, i40evf_gstrings_stats[i].stat_string,
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 7c213a347909..93536b9fc629 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1957,8 +1957,8 @@ static void i40evf_adminq_task(struct work_struct *work)
 		container_of(work, struct i40evf_adapter, adminq_task);
 	struct i40e_hw *hw = &adapter->hw;
 	struct i40e_arq_event_info event;
-	struct virtchnl_msg *v_msg;
-	i40e_status ret;
+	enum virtchnl_ops v_op;
+	i40e_status ret, v_ret;
 	u32 val, oldval;
 	u16 pending;
 
@@ -1970,15 +1970,15 @@ static void i40evf_adminq_task(struct work_struct *work)
 	if (!event.msg_buf)
 		goto out;
 
-	v_msg = (struct virtchnl_msg *)&event.desc;
 	do {
 		ret = i40evf_clean_arq_element(hw, &event, &pending);
-		if (ret || !v_msg->v_opcode)
+		v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
+		v_ret = (i40e_status)le32_to_cpu(event.desc.cookie_low);
+
+		if (ret || !v_op)
 			break; /* No event to process or error cleaning ARQ */
 
-		i40evf_virtchnl_completion(adapter, v_msg->v_opcode,
-					   (i40e_status)v_msg->v_retval,
-					   event.msg_buf,
+		i40evf_virtchnl_completion(adapter, v_op, v_ret, event.msg_buf,
 					   event.msg_len);
 		if (pending != 0)
 			memset(event.msg_buf, 0, I40EVF_MAX_AQ_BUF_SIZE);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 4e35e7017f3d..2c19070d2a0b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -79,16 +79,28 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
 
 	switch (hw->phy.media_type) {
 	case ixgbe_media_type_fiber:
-		hw->mac.ops.check_link(hw, &speed, &link_up, false);
-		/* if link is down, assume supported */
-		if (link_up)
-			supported = speed == IXGBE_LINK_SPEED_1GB_FULL ?
+		/* flow control autoneg black list */
+		switch (hw->device_id) {
+		case IXGBE_DEV_ID_X550EM_A_SFP:
+		case IXGBE_DEV_ID_X550EM_A_SFP_N:
+			supported = false;
+			break;
+		default:
+			hw->mac.ops.check_link(hw, &speed, &link_up, false);
+			/* if link is down, assume supported */
+			if (link_up)
+				supported = speed == IXGBE_LINK_SPEED_1GB_FULL ?
 				true : false;
-		else
-			supported = true;
+			else
+				supported = true;
+		}
+
 		break;
 	case ixgbe_media_type_backplane:
-		supported = true;
+		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_XFI)
+			supported = false;
+		else
+			supported = true;
 		break;
 	case ixgbe_media_type_copper:
 		/* only some copper devices support flow control autoneg */
@@ -111,6 +123,10 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
 		break;
 	}
 
+	if (!supported)
+		hw_dbg(hw, "Device %x does not support flow control autoneg\n",
+		       hw->device_id);
+
 	return supported;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index b45fdc98033d..f1bfae0c41d0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -1018,8 +1018,12 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
 	struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx];
 	struct ixgbe_ring *ring;
 
-	ixgbe_for_each_ring(ring, q_vector->tx)
-		adapter->tx_ring[ring->queue_index] = NULL;
+	ixgbe_for_each_ring(ring, q_vector->tx) {
+		if (ring_is_xdp(ring))
+			adapter->xdp_ring[ring->queue_index] = NULL;
+		else
+			adapter->tx_ring[ring->queue_index] = NULL;
+	}
 
 	ixgbe_for_each_ring(ring, q_vector->rx)
 		adapter->rx_ring[ring->queue_index] = NULL;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index f1dbdf26d8e1..c6b132476de4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -386,7 +386,7 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
 	if (ixgbe_removed(reg_addr))
 		return IXGBE_FAILED_READ_REG;
 	if (unlikely(hw->phy.nw_mng_if_sel &
-		     IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M)) {
+		     IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE)) {
 		struct ixgbe_adapter *adapter;
 		int i;
 
@@ -2214,7 +2214,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 				     struct ixgbe_ring *rx_ring,
 				     struct xdp_buff *xdp)
 {
-	int result = IXGBE_XDP_PASS;
+	int err, result = IXGBE_XDP_PASS;
 	struct bpf_prog *xdp_prog;
 	u32 act;
 
@@ -2231,6 +2231,13 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 	case XDP_TX:
 		result = ixgbe_xmit_xdp_ring(adapter, xdp);
 		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
+		if (!err)
+			result = IXGBE_XDP_TX;
+		else
+			result = IXGBE_XDP_CONSUMED;
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		/* fallthrough */
@@ -2408,6 +2415,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		 */
 		wmb();
 		writel(ring->next_to_use, ring->tail);
+
+		xdp_do_flush_map();
 	}
 
 	u64_stats_update_begin(&rx_ring->syncp);
@@ -5810,6 +5819,9 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 
 	usleep_range(10000, 20000);
 
+	/* synchronize_sched() needed for pending XDP buffers to drain */
+	if (adapter->xdp_ring[0])
+		synchronize_sched();
 	netif_tx_stop_all_queues(netdev);
 
 	/* call carrier off first to avoid false dev_watchdog timeouts */
@@ -8839,7 +8851,6 @@ static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter,
 }
 
 static int ixgbe_configure_clsu32_add_hnode(struct ixgbe_adapter *adapter,
-					    __be16 protocol,
 					    struct tc_cls_u32_offload *cls)
 {
 	u32 uhtid = TC_U32_USERHTID(cls->hnode.handle);
@@ -8941,7 +8952,7 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
 	LIST_HEAD(actions);
 	int err;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	tcf_exts_to_list(exts, &actions);
@@ -9025,9 +9036,9 @@ static int ixgbe_clsu32_build_input(struct ixgbe_fdir_filter *input,
 }
 
 static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
-				  __be16 protocol,
 				  struct tc_cls_u32_offload *cls)
 {
+	__be16 protocol = cls->common.protocol;
 	u32 loc = cls->knode.handle & 0xfffff;
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbe_mat_field *field_ptr;
@@ -9214,41 +9225,49 @@ free_jump:
 	return err;
 }
 
-static int __ixgbe_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-			    __be16 proto, struct tc_to_netdev *tc)
+static int ixgbe_setup_tc_cls_u32(struct net_device *dev,
+				  struct tc_cls_u32_offload *cls_u32)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 
-	if (chain_index)
+	if (TC_H_MAJ(cls_u32->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    cls_u32->common.chain_index)
 		return -EOPNOTSUPP;
 
-	if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
-	    tc->type == TC_SETUP_CLSU32) {
-		switch (tc->cls_u32->command) {
-		case TC_CLSU32_NEW_KNODE:
-		case TC_CLSU32_REPLACE_KNODE:
-			return ixgbe_configure_clsu32(adapter,
-						      proto, tc->cls_u32);
-		case TC_CLSU32_DELETE_KNODE:
-			return ixgbe_delete_clsu32(adapter, tc->cls_u32);
-		case TC_CLSU32_NEW_HNODE:
-		case TC_CLSU32_REPLACE_HNODE:
-			return ixgbe_configure_clsu32_add_hnode(adapter, proto,
-								tc->cls_u32);
-		case TC_CLSU32_DELETE_HNODE:
-			return ixgbe_configure_clsu32_del_hnode(adapter,
-								tc->cls_u32);
-		default:
-			return -EINVAL;
-		}
+	switch (cls_u32->command) {
+	case TC_CLSU32_NEW_KNODE:
+	case TC_CLSU32_REPLACE_KNODE:
+		return ixgbe_configure_clsu32(adapter, cls_u32);
+	case TC_CLSU32_DELETE_KNODE:
+		return ixgbe_delete_clsu32(adapter, cls_u32);
+	case TC_CLSU32_NEW_HNODE:
+	case TC_CLSU32_REPLACE_HNODE:
+		return ixgbe_configure_clsu32_add_hnode(adapter, cls_u32);
+	case TC_CLSU32_DELETE_HNODE:
+		return ixgbe_configure_clsu32_del_hnode(adapter, cls_u32);
+	default:
+		return -EOPNOTSUPP;
 	}
+}
 
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
-
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+static int ixgbe_setup_tc_mqprio(struct net_device *dev,
+				 struct tc_mqprio_qopt *mqprio)
+{
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	return ixgbe_setup_tc(dev, mqprio->num_tc);
+}
 
-	return ixgbe_setup_tc(dev, tc->mqprio->num_tc);
+static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			    void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_CLSU32:
+		return ixgbe_setup_tc_cls_u32(dev, type_data);
+	case TC_SETUP_MQPRIO:
+		return ixgbe_setup_tc_mqprio(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 #ifdef CONFIG_PCI_IOV
@@ -9823,6 +9842,53 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
 	}
 }
 
+static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ring *ring;
+	int err;
+
+	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
+		return -EINVAL;
+
+	/* During program transitions its possible adapter->xdp_prog is assigned
+	 * but ring has not been configured yet. In this case simply abort xmit.
+	 */
+	ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL;
+	if (unlikely(!ring))
+		return -EINVAL;
+
+	err = ixgbe_xmit_xdp_ring(adapter, xdp);
+	if (err != IXGBE_XDP_TX)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void ixgbe_xdp_flush(struct net_device *dev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ring *ring;
+
+	/* Its possible the device went down between xdp xmit and flush so
+	 * we need to ensure device is still up.
+	 */
+	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
+		return;
+
+	ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL;
+	if (unlikely(!ring))
+		return;
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 */
+	wmb();
+	writel(ring->next_to_use, ring->tail);
+
+	return;
+}
+
 static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open		= ixgbe_open,
 	.ndo_stop		= ixgbe_close,
@@ -9869,6 +9935,8 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_udp_tunnel_del	= ixgbe_del_udp_tunnel_port,
 	.ndo_features_check	= ixgbe_features_check,
 	.ndo_xdp		= ixgbe_xdp,
+	.ndo_xdp_xmit		= ixgbe_xdp_xmit,
+	.ndo_xdp_flush		= ixgbe_xdp_flush,
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 0760bd7eeb01..112d24c6c9ce 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -679,8 +679,9 @@ update_vlvfb:
 static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter,
 				int vf, int index, unsigned char *mac_addr)
 {
-	struct list_head *pos;
 	struct vf_macvlans *entry;
+	struct list_head *pos;
+	int retval = 0;
 
 	if (index <= 1) {
 		list_for_each(pos, &adapter->vf_mvs.l) {
@@ -721,13 +722,15 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter,
 	if (!entry || !entry->free)
 		return -ENOSPC;
 
+	retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
+	if (retval < 0)
+		return retval;
+
 	entry->free = false;
 	entry->is_macvlan = true;
 	entry->vf = vf;
 	memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
 
-	ixgbe_add_mac_filter(adapter, mac_addr, vf);
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 9c2460c5ef1b..ffa0ee5cd0f5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3778,8 +3778,8 @@ struct ixgbe_info {
 #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_1G	BIT(19)
 #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G	BIT(20)
 #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10G	BIT(21)
-#define IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M	BIT(23)
-#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE	BIT(24)
+#define IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE	BIT(25)
+#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE	BIT(24) /* X552 only */
 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT	3
 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD	\
 				(0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 72d84a065e34..19fbb2f28ea4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1555,9 +1555,14 @@ static s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw)
  **/
 static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
 {
+	struct ixgbe_mac_info *mac = &hw->mac;
 	s32 status;
 	u32 reg_val;
 
+	/* iXFI is only supported with X552 */
+	if (mac->type != ixgbe_mac_X550EM_x)
+		return IXGBE_ERR_LINK_SETUP;
+
 	/* Disable AN and force speed to 10G Serial. */
 	status = ixgbe_read_iosf_sb_reg_x550(hw,
 					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
@@ -1874,8 +1879,10 @@ static s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw,
 	else
 		force_speed = IXGBE_LINK_SPEED_1GB_FULL;
 
-	/* If internal link mode is XFI, then setup XFI internal link. */
-	if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
+	/* If X552 and internal link mode is XFI, then setup XFI internal link.
+	 */
+	if (hw->mac.type == ixgbe_mac_X550EM_x &&
+	    !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
 		status = ixgbe_setup_ixfi_x550em(hw, &force_speed);
 
 		if (status)
@@ -2404,17 +2411,30 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
 	status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc);
 
 	/* Enable link status change alarm */
-	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-				      MDIO_MMD_AN, &reg);
-	if (status)
-		return status;
 
-	reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
+	/* Enable the LASI interrupts on X552 devices to receive notifications
+	 * of the link configurations of the external PHY and correspondingly
+	 * support the configuration of the internal iXFI link, since iXFI does
+	 * not support auto-negotiation. This is not required for X553 devices
+	 * having KR support, which performs auto-negotiations and which is used
+	 * as the internal link to the external PHY. Hence adding a check here
+	 * to avoid enabling LASI interrupts for X553 devices.
+	 */
+	if (hw->mac.type != ixgbe_mac_x550em_a) {
+		status = hw->phy.ops.read_reg(hw,
+					    IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+					    MDIO_MMD_AN, &reg);
+		if (status)
+			return status;
 
-	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-				       MDIO_MMD_AN, reg);
-	if (status)
-		return status;
+		reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
+
+		status = hw->phy.ops.write_reg(hw,
+					    IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+					    MDIO_MMD_AN, reg);
+		if (status)
+			return status;
+	}
 
 	/* Enable high temperature failure and global fault alarms */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
@@ -2615,7 +2635,8 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
 	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
 		return IXGBE_ERR_CONFIG;
 
-	if (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) {
+	if (!(hw->mac.type == ixgbe_mac_X550EM_x &&
+	      !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE))) {
 		speed = IXGBE_LINK_SPEED_10GB_FULL |
 			IXGBE_LINK_SPEED_1GB_FULL;
 		return ixgbe_setup_kr_speed_x550em(hw, speed);
@@ -2822,7 +2843,7 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
 {
 	bool pause, asm_dir;
 	u32 reg_val;
-	s32 rc;
+	s32 rc = 0;
 
 	/* Validate the requested mode */
 	if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
@@ -2865,32 +2886,37 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
 		return IXGBE_ERR_CONFIG;
 	}
 
-	if (hw->device_id != IXGBE_DEV_ID_X550EM_X_KR &&
-	    hw->device_id != IXGBE_DEV_ID_X550EM_A_KR &&
-	    hw->device_id != IXGBE_DEV_ID_X550EM_A_KR_L)
-		return 0;
-
-	rc = hw->mac.ops.read_iosf_sb_reg(hw,
-					  IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
-					  IXGBE_SB_IOSF_TARGET_KR_PHY,
-					  &reg_val);
-	if (rc)
-		return rc;
-
-	reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
-		     IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
-	if (pause)
-		reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
-	if (asm_dir)
-		reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
-	rc = hw->mac.ops.write_iosf_sb_reg(hw,
-					   IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
-					   IXGBE_SB_IOSF_TARGET_KR_PHY,
-					   reg_val);
-
-	/* This device does not fully support AN. */
-	hw->fc.disable_fc_autoneg = true;
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_X550EM_X_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR_L:
+		rc = hw->mac.ops.read_iosf_sb_reg(hw,
+					    IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+					    IXGBE_SB_IOSF_TARGET_KR_PHY,
+					    &reg_val);
+		if (rc)
+			return rc;
 
+		reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+			     IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
+		if (pause)
+			reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
+		if (asm_dir)
+			reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+		rc = hw->mac.ops.write_iosf_sb_reg(hw,
+					    IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+					    IXGBE_SB_IOSF_TARGET_KR_PHY,
+					    reg_val);
+
+		/* This device does not fully support AN. */
+		hw->fc.disable_fc_autoneg = true;
+		break;
+	case IXGBE_DEV_ID_X550EM_X_XFI:
+		hw->fc.disable_fc_autoneg = true;
+		break;
+	default:
+		break;
+	}
 	return rc;
 }
 
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 48d21c1e09f2..39bc8fbbdd65 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -120,6 +120,9 @@
 #define MVPP2_TXQ_DESC_ADDR_REG			0x2084
 #define MVPP2_TXQ_DESC_SIZE_REG			0x2088
 #define     MVPP2_TXQ_DESC_SIZE_MASK		0x3ff0
+#define MVPP2_TXQ_THRESH_REG			0x2094
+#define	    MVPP2_TXQ_THRESH_OFFSET		16
+#define	    MVPP2_TXQ_THRESH_MASK		0x3fff
 #define MVPP2_AGGR_TXQ_UPDATE_REG		0x2090
 #define MVPP2_TXQ_INDEX_REG			0x2098
 #define MVPP2_TXQ_PREF_BUF_REG			0x209c
@@ -183,9 +186,12 @@
 #define MVPP22_AXI_CODE_DOMAIN_SYSTEM		3
 
 /* Interrupt Cause and Mask registers */
+#define MVPP2_ISR_TX_THRESHOLD_REG(port)	(0x5140 + 4 * (port))
+#define     MVPP2_MAX_ISR_TX_THRESHOLD		0xfffff0
+
 #define MVPP2_ISR_RX_THRESHOLD_REG(rxq)		(0x5200 + 4 * (rxq))
 #define     MVPP2_MAX_ISR_RX_THRESHOLD		0xfffff0
-#define MVPP21_ISR_RXQ_GROUP_REG(rxq)		(0x5400 + 4 * (rxq))
+#define MVPP21_ISR_RXQ_GROUP_REG(port)		(0x5400 + 4 * (port))
 
 #define MVPP22_ISR_RXQ_GROUP_INDEX_REG          0x5400
 #define MVPP22_ISR_RXQ_GROUP_INDEX_SUBGROUP_MASK 0xf
@@ -206,6 +212,7 @@
 #define MVPP2_ISR_RX_TX_CAUSE_REG(port)		(0x5480 + 4 * (port))
 #define     MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK	0xffff
 #define     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK	0xff0000
+#define     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET	16
 #define     MVPP2_CAUSE_RX_FIFO_OVERRUN_MASK	BIT(24)
 #define     MVPP2_CAUSE_FCS_ERR_MASK		BIT(25)
 #define     MVPP2_CAUSE_TX_FIFO_UNDERRUN_MASK	BIT(26)
@@ -372,6 +379,7 @@
 /* Coalescing */
 #define MVPP2_TXDONE_COAL_PKTS_THRESH	15
 #define MVPP2_TXDONE_HRTIMER_PERIOD_NS	1000000UL
+#define MVPP2_TXDONE_COAL_USEC		1000
 #define MVPP2_RX_COAL_PKTS		32
 #define MVPP2_RX_COAL_USEC		100
 
@@ -685,7 +693,8 @@ enum mvpp2_prs_l3_cast {
 #define MVPP21_ADDR_SPACE_SZ		0
 #define MVPP22_ADDR_SPACE_SZ		SZ_64K
 
-#define MVPP2_MAX_CPUS			4
+#define MVPP2_MAX_THREADS		8
+#define MVPP2_MAX_QVECS			MVPP2_MAX_THREADS
 
 enum mvpp2_bm_type {
 	MVPP2_BM_FREE,
@@ -701,11 +710,12 @@ struct mvpp2 {
 	void __iomem *lms_base;
 	void __iomem *iface_base;
 
-	/* On PPv2.2, each CPU can access the base register through a
-	 * separate address space, each 64 KB apart from each
-	 * other.
+	/* On PPv2.2, each "software thread" can access the base
+	 * register through a separate address space, each 64 KB apart
+	 * from each other. Typically, such address spaces will be
+	 * used per CPU.
 	 */
-	void __iomem *cpu_base[MVPP2_MAX_CPUS];
+	void __iomem *swth_base[MVPP2_MAX_THREADS];
 
 	/* Common clocks */
 	struct clk *pp_clk;
@@ -752,6 +762,18 @@ struct mvpp2_port_pcpu {
 	struct tasklet_struct tx_done_tasklet;
 };
 
+struct mvpp2_queue_vector {
+	int irq;
+	struct napi_struct napi;
+	enum { MVPP2_QUEUE_VECTOR_SHARED, MVPP2_QUEUE_VECTOR_PRIVATE } type;
+	int sw_thread_id;
+	u16 sw_thread_mask;
+	int first_rxq;
+	int nrxqs;
+	u32 pending_cause_rx;
+	struct mvpp2_port *port;
+};
+
 struct mvpp2_port {
 	u8 id;
 
@@ -760,22 +782,19 @@ struct mvpp2_port {
 	 */
 	int gop_id;
 
-	int irq;
-
 	struct mvpp2 *priv;
 
 	/* Per-port registers' base address */
 	void __iomem *base;
 
 	struct mvpp2_rx_queue **rxqs;
+	unsigned int nrxqs;
 	struct mvpp2_tx_queue **txqs;
+	unsigned int ntxqs;
 	struct net_device *dev;
 
 	int pkt_size;
 
-	u32 pending_cause_rx;
-	struct napi_struct napi;
-
 	/* Per-CPU port control */
 	struct mvpp2_port_pcpu __percpu *pcpu;
 
@@ -797,6 +816,12 @@ struct mvpp2_port {
 
 	/* Index of first port's physical RXQ */
 	u8 first_rxq;
+
+	struct mvpp2_queue_vector qvecs[MVPP2_MAX_QVECS];
+	unsigned int nqvecs;
+	bool has_tx_irqs;
+
+	u32 tx_time_coal;
 };
 
 /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
@@ -1062,12 +1087,14 @@ struct mvpp2_bm_pool {
 	u32 port_map;
 };
 
-/* Static declaractions */
+/* Queue modes */
+#define MVPP2_QDIST_SINGLE_MODE	0
+#define MVPP2_QDIST_MULTI_MODE	1
 
-/* Number of RXQs used by single port */
-static int rxq_number = MVPP2_DEFAULT_RXQ;
-/* Number of TXQs used by single port */
-static int txq_number = MVPP2_MAX_TXQ;
+static int queue_mode = MVPP2_QDIST_SINGLE_MODE;
+
+module_param(queue_mode, int, 0444);
+MODULE_PARM_DESC(queue_mode, "Set queue_mode (single=0, multi=1)");
 
 #define MVPP2_DRIVER_NAME "mvpp2"
 #define MVPP2_DRIVER_VERSION "1.0"
@@ -1076,12 +1103,12 @@ static int txq_number = MVPP2_MAX_TXQ;
 
 static void mvpp2_write(struct mvpp2 *priv, u32 offset, u32 data)
 {
-	writel(data, priv->cpu_base[0] + offset);
+	writel(data, priv->swth_base[0] + offset);
 }
 
 static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
 {
-	return readl(priv->cpu_base[0] + offset);
+	return readl(priv->swth_base[0] + offset);
 }
 
 /* These accessors should be used to access:
@@ -1123,13 +1150,13 @@ static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
 static void mvpp2_percpu_write(struct mvpp2 *priv, int cpu,
 			       u32 offset, u32 data)
 {
-	writel(data, priv->cpu_base[cpu] + offset);
+	writel(data, priv->swth_base[cpu] + offset);
 }
 
 static u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu,
 			     u32 offset)
 {
-	return readl(priv->cpu_base[cpu] + offset);
+	return readl(priv->swth_base[cpu] + offset);
 }
 
 static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
@@ -4070,7 +4097,7 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 
 		port->pool_long->port_map |= (1 << port->id);
 
-		for (rxq = 0; rxq < rxq_number; rxq++)
+		for (rxq = 0; rxq < port->nrxqs; rxq++)
 			mvpp2_rxq_long_pool_set(port, rxq, port->pool_long->id);
 	}
 
@@ -4084,7 +4111,7 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 
 		port->pool_short->port_map |= (1 << port->id);
 
-		for (rxq = 0; rxq < rxq_number; rxq++)
+		for (rxq = 0; rxq < port->nrxqs; rxq++)
 			mvpp2_rxq_short_pool_set(port, rxq,
 						 port->pool_short->id);
 	}
@@ -4125,22 +4152,40 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 
 static inline void mvpp2_interrupts_enable(struct mvpp2_port *port)
 {
-	int cpu, cpu_mask = 0;
+	int i, sw_thread_mask = 0;
+
+	for (i = 0; i < port->nqvecs; i++)
+		sw_thread_mask |= port->qvecs[i].sw_thread_mask;
 
-	for_each_present_cpu(cpu)
-		cpu_mask |= 1 << cpu;
 	mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
-		    MVPP2_ISR_ENABLE_INTERRUPT(cpu_mask));
+		    MVPP2_ISR_ENABLE_INTERRUPT(sw_thread_mask));
 }
 
 static inline void mvpp2_interrupts_disable(struct mvpp2_port *port)
 {
-	int cpu, cpu_mask = 0;
+	int i, sw_thread_mask = 0;
+
+	for (i = 0; i < port->nqvecs; i++)
+		sw_thread_mask |= port->qvecs[i].sw_thread_mask;
+
+	mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
+		    MVPP2_ISR_DISABLE_INTERRUPT(sw_thread_mask));
+}
+
+static inline void mvpp2_qvec_interrupt_enable(struct mvpp2_queue_vector *qvec)
+{
+	struct mvpp2_port *port = qvec->port;
+
+	mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
+		    MVPP2_ISR_ENABLE_INTERRUPT(qvec->sw_thread_mask));
+}
+
+static inline void mvpp2_qvec_interrupt_disable(struct mvpp2_queue_vector *qvec)
+{
+	struct mvpp2_port *port = qvec->port;
 
-	for_each_present_cpu(cpu)
-		cpu_mask |= 1 << cpu;
 	mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
-		    MVPP2_ISR_DISABLE_INTERRUPT(cpu_mask));
+		    MVPP2_ISR_DISABLE_INTERRUPT(qvec->sw_thread_mask));
 }
 
 /* Mask the current CPU's Rx/Tx interrupts
@@ -4162,11 +4207,40 @@ static void mvpp2_interrupts_mask(void *arg)
 static void mvpp2_interrupts_unmask(void *arg)
 {
 	struct mvpp2_port *port = arg;
+	u32 val;
+
+	val = MVPP2_CAUSE_MISC_SUM_MASK |
+		MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+	if (port->has_tx_irqs)
+		val |= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
 
 	mvpp2_percpu_write(port->priv, smp_processor_id(),
-			   MVPP2_ISR_RX_TX_MASK_REG(port->id),
-			   (MVPP2_CAUSE_MISC_SUM_MASK |
-			    MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK));
+			   MVPP2_ISR_RX_TX_MASK_REG(port->id), val);
+}
+
+static void
+mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask)
+{
+	u32 val;
+	int i;
+
+	if (port->priv->hw_version != MVPP22)
+		return;
+
+	if (mask)
+		val = 0;
+	else
+		val = MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+
+	for (i = 0; i < port->nqvecs; i++) {
+		struct mvpp2_queue_vector *v = port->qvecs + i;
+
+		if (v->type != MVPP2_QUEUE_VECTOR_SHARED)
+			continue;
+
+		mvpp2_percpu_write(port->priv, v->sw_thread_id,
+				   MVPP2_ISR_RX_TX_MASK_REG(port->id), val);
+	}
 }
 
 /* Port configuration routines */
@@ -4376,7 +4450,7 @@ static void mvpp2_defaults_set(struct mvpp2_port *port)
 		    MVPP2_RX_LOW_LATENCY_PKT_SIZE(256));
 
 	/* Enable Rx cache snoop */
-	for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+	for (lrxq = 0; lrxq < port->nrxqs; lrxq++) {
 		queue = port->rxqs[lrxq]->id;
 		val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
 		val |= MVPP2_SNOOP_PKT_SIZE_MASK |
@@ -4394,7 +4468,7 @@ static void mvpp2_ingress_enable(struct mvpp2_port *port)
 	u32 val;
 	int lrxq, queue;
 
-	for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+	for (lrxq = 0; lrxq < port->nrxqs; lrxq++) {
 		queue = port->rxqs[lrxq]->id;
 		val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
 		val &= ~MVPP2_RXQ_DISABLE_MASK;
@@ -4407,7 +4481,7 @@ static void mvpp2_ingress_disable(struct mvpp2_port *port)
 	u32 val;
 	int lrxq, queue;
 
-	for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+	for (lrxq = 0; lrxq < port->nrxqs; lrxq++) {
 		queue = port->rxqs[lrxq]->id;
 		val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
 		val |= MVPP2_RXQ_DISABLE_MASK;
@@ -4426,7 +4500,7 @@ static void mvpp2_egress_enable(struct mvpp2_port *port)
 
 	/* Enable all initialized TXs. */
 	qmap = 0;
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		struct mvpp2_tx_queue *txq = port->txqs[queue];
 
 		if (txq->descs)
@@ -4712,7 +4786,7 @@ static void mvpp2_txq_sent_counter_clear(void *arg)
 	struct mvpp2_port *port = arg;
 	int queue;
 
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		int id = port->txqs[queue]->id;
 
 		mvpp2_percpu_read(port->priv, smp_processor_id(),
@@ -4753,7 +4827,7 @@ static void mvpp2_txp_max_tx_size_set(struct mvpp2_port *port)
 		mvpp2_write(port->priv, MVPP2_TXP_SCHED_TOKEN_SIZE_REG, val);
 	}
 
-	for (txq = 0; txq < txq_number; txq++) {
+	for (txq = 0; txq < port->ntxqs; txq++) {
 		val = mvpp2_read(port->priv,
 				 MVPP2_TXQ_SCHED_TOKEN_SIZE_REG(txq));
 		size = val & MVPP2_TXQ_TOKEN_SIZE_MAX;
@@ -4787,6 +4861,23 @@ static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
 	put_cpu();
 }
 
+/* For some reason in the LSP this is done on each CPU. Why ? */
+static void mvpp2_tx_pkts_coal_set(struct mvpp2_port *port,
+				   struct mvpp2_tx_queue *txq)
+{
+	int cpu = get_cpu();
+	u32 val;
+
+	if (txq->done_pkts_coal > MVPP2_TXQ_THRESH_MASK)
+		txq->done_pkts_coal = MVPP2_TXQ_THRESH_MASK;
+
+	val = (txq->done_pkts_coal << MVPP2_TXQ_THRESH_OFFSET);
+	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
+	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_THRESH_REG, val);
+
+	put_cpu();
+}
+
 static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz)
 {
 	u64 tmp = (u64)clk_hz * usec;
@@ -4823,6 +4914,22 @@ static void mvpp2_rx_time_coal_set(struct mvpp2_port *port,
 	mvpp2_write(port->priv, MVPP2_ISR_RX_THRESHOLD_REG(rxq->id), val);
 }
 
+static void mvpp2_tx_time_coal_set(struct mvpp2_port *port)
+{
+	unsigned long freq = port->priv->tclk;
+	u32 val = mvpp2_usec_to_cycles(port->tx_time_coal, freq);
+
+	if (val > MVPP2_MAX_ISR_TX_THRESHOLD) {
+		port->tx_time_coal =
+			mvpp2_cycles_to_usec(MVPP2_MAX_ISR_TX_THRESHOLD, freq);
+
+		/* re-evaluate to get actual register value */
+		val = mvpp2_usec_to_cycles(port->tx_time_coal, freq);
+	}
+
+	mvpp2_write(port->priv, MVPP2_ISR_TX_THRESHOLD_REG(port->id), val);
+}
+
 /* Free Tx queue skbuffs */
 static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
 				struct mvpp2_tx_queue *txq,
@@ -4881,7 +4988,8 @@ static void mvpp2_txq_done(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
 			netif_tx_wake_queue(nq);
 }
 
-static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause)
+static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause,
+				  int cpu)
 {
 	struct mvpp2_tx_queue *txq;
 	struct mvpp2_txq_pcpu *txq_pcpu;
@@ -4892,7 +5000,7 @@ static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause)
 		if (!txq)
 			break;
 
-		txq_pcpu = this_cpu_ptr(txq->pcpu);
+		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
 
 		if (txq_pcpu->count) {
 			mvpp2_txq_done(port, txq, txq_pcpu);
@@ -5229,7 +5337,7 @@ static void mvpp2_cleanup_txqs(struct mvpp2_port *port)
 	val |= MVPP2_TX_PORT_FLUSH_MASK(port->id);
 	mvpp2_write(port->priv, MVPP2_TX_PORT_FLUSH_REG, val);
 
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		txq = port->txqs[queue];
 		mvpp2_txq_clean(port, txq);
 		mvpp2_txq_deinit(port, txq);
@@ -5246,7 +5354,7 @@ static void mvpp2_cleanup_rxqs(struct mvpp2_port *port)
 {
 	int queue;
 
-	for (queue = 0; queue < rxq_number; queue++)
+	for (queue = 0; queue < port->nrxqs; queue++)
 		mvpp2_rxq_deinit(port, port->rxqs[queue]);
 }
 
@@ -5255,7 +5363,7 @@ static int mvpp2_setup_rxqs(struct mvpp2_port *port)
 {
 	int queue, err;
 
-	for (queue = 0; queue < rxq_number; queue++) {
+	for (queue = 0; queue < port->nrxqs; queue++) {
 		err = mvpp2_rxq_init(port, port->rxqs[queue]);
 		if (err)
 			goto err_cleanup;
@@ -5273,13 +5381,21 @@ static int mvpp2_setup_txqs(struct mvpp2_port *port)
 	struct mvpp2_tx_queue *txq;
 	int queue, err;
 
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		txq = port->txqs[queue];
 		err = mvpp2_txq_init(port, txq);
 		if (err)
 			goto err_cleanup;
 	}
 
+	if (port->has_tx_irqs) {
+		mvpp2_tx_time_coal_set(port);
+		for (queue = 0; queue < port->ntxqs; queue++) {
+			txq = port->txqs[queue];
+			mvpp2_tx_pkts_coal_set(port, txq);
+		}
+	}
+
 	on_each_cpu(mvpp2_txq_sent_counter_clear, port, 1);
 	return 0;
 
@@ -5291,11 +5407,11 @@ err_cleanup:
 /* The callback for per-port interrupt */
 static irqreturn_t mvpp2_isr(int irq, void *dev_id)
 {
-	struct mvpp2_port *port = (struct mvpp2_port *)dev_id;
+	struct mvpp2_queue_vector *qv = dev_id;
 
-	mvpp2_interrupts_disable(port);
+	mvpp2_qvec_interrupt_disable(qv);
 
-	napi_schedule(&port->napi);
+	napi_schedule(&qv->napi);
 
 	return IRQ_HANDLED;
 }
@@ -5385,8 +5501,8 @@ static void mvpp2_tx_proc_cb(unsigned long data)
 	port_pcpu->timer_scheduled = false;
 
 	/* Process all the Tx queues */
-	cause = (1 << txq_number) - 1;
-	tx_todo = mvpp2_tx_done(port, cause);
+	cause = (1 << port->ntxqs) - 1;
+	tx_todo = mvpp2_tx_done(port, cause, smp_processor_id());
 
 	/* Set the timer in case not all the packets were processed */
 	if (tx_todo)
@@ -5498,8 +5614,8 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
 }
 
 /* Main rx processing */
-static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
-		    struct mvpp2_rx_queue *rxq)
+static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
+		    int rx_todo, struct mvpp2_rx_queue *rxq)
 {
 	struct net_device *dev = port->dev;
 	int rx_received;
@@ -5577,7 +5693,7 @@ err_drop_frame:
 		skb->protocol = eth_type_trans(skb, dev);
 		mvpp2_rx_csum(port, rx_status, skb);
 
-		napi_gro_receive(&port->napi, skb);
+		napi_gro_receive(napi, skb);
 	}
 
 	if (rcvd_pkts) {
@@ -5762,7 +5878,8 @@ out:
 		mvpp2_txq_done(port, txq, txq_pcpu);
 
 	/* Set the timer in case not all frags were processed */
-	if (txq_pcpu->count <= frags && txq_pcpu->count > 0) {
+	if (!port->has_tx_irqs && txq_pcpu->count <= frags &&
+	    txq_pcpu->count > 0) {
 		struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu);
 
 		mvpp2_timer_set(port_pcpu);
@@ -5783,11 +5900,14 @@ static inline void mvpp2_cause_error(struct net_device *dev, int cause)
 
 static int mvpp2_poll(struct napi_struct *napi, int budget)
 {
-	u32 cause_rx_tx, cause_rx, cause_misc;
+	u32 cause_rx_tx, cause_rx, cause_tx, cause_misc;
 	int rx_done = 0;
 	struct mvpp2_port *port = netdev_priv(napi->dev);
+	struct mvpp2_queue_vector *qv;
 	int cpu = smp_processor_id();
 
+	qv = container_of(napi, struct mvpp2_queue_vector, napi);
+
 	/* Rx/Tx cause register
 	 *
 	 * Bits 0-15: each bit indicates received packets on the Rx queue
@@ -5798,11 +5918,10 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 	 *
 	 * Each CPU has its own Rx/Tx cause register
 	 */
-	cause_rx_tx = mvpp2_percpu_read(port->priv, cpu,
+	cause_rx_tx = mvpp2_percpu_read(port->priv, qv->sw_thread_id,
 					MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
-	cause_rx_tx &= ~MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
-	cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
 
+	cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
 	if (cause_misc) {
 		mvpp2_cause_error(port->dev, cause_misc);
 
@@ -5813,10 +5932,16 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 				   cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
 	}
 
-	cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+	cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
+	if (cause_tx) {
+		cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET;
+		mvpp2_tx_done(port, cause_tx, qv->sw_thread_id);
+	}
 
 	/* Process RX packets */
-	cause_rx |= port->pending_cause_rx;
+	cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+	cause_rx <<= qv->first_rxq;
+	cause_rx |= qv->pending_cause_rx;
 	while (cause_rx && budget > 0) {
 		int count;
 		struct mvpp2_rx_queue *rxq;
@@ -5825,7 +5950,7 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 		if (!rxq)
 			break;
 
-		count = mvpp2_rx(port, budget, rxq);
+		count = mvpp2_rx(port, napi, budget, rxq);
 		rx_done += count;
 		budget -= count;
 		if (budget > 0) {
@@ -5841,9 +5966,9 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 		cause_rx = 0;
 		napi_complete_done(napi, rx_done);
 
-		mvpp2_interrupts_enable(port);
+		mvpp2_qvec_interrupt_enable(qv);
 	}
-	port->pending_cause_rx = cause_rx;
+	qv->pending_cause_rx = cause_rx;
 	return rx_done;
 }
 
@@ -5851,11 +5976,13 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 static void mvpp2_start_dev(struct mvpp2_port *port)
 {
 	struct net_device *ndev = port->dev;
+	int i;
 
 	mvpp2_gmac_max_rx_size_set(port);
 	mvpp2_txp_max_tx_size_set(port);
 
-	napi_enable(&port->napi);
+	for (i = 0; i < port->nqvecs; i++)
+		napi_enable(&port->qvecs[i].napi);
 
 	/* Enable interrupts on all CPUs */
 	mvpp2_interrupts_enable(port);
@@ -5869,6 +5996,7 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 static void mvpp2_stop_dev(struct mvpp2_port *port)
 {
 	struct net_device *ndev = port->dev;
+	int i;
 
 	/* Stop new packets from arriving to RXQs */
 	mvpp2_ingress_disable(port);
@@ -5878,7 +6006,8 @@ static void mvpp2_stop_dev(struct mvpp2_port *port)
 	/* Disable interrupts on all CPUs */
 	mvpp2_interrupts_disable(port);
 
-	napi_disable(&port->napi);
+	for (i = 0; i < port->nqvecs; i++)
+		napi_disable(&port->qvecs[i].napi);
 
 	netif_carrier_off(port->dev);
 	netif_tx_stop_all_queues(port->dev);
@@ -5964,6 +6093,46 @@ static void mvpp2_phy_disconnect(struct mvpp2_port *port)
 	phy_disconnect(ndev->phydev);
 }
 
+static int mvpp2_irqs_init(struct mvpp2_port *port)
+{
+	int err, i;
+
+	for (i = 0; i < port->nqvecs; i++) {
+		struct mvpp2_queue_vector *qv = port->qvecs + i;
+
+		err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv);
+		if (err)
+			goto err;
+
+		if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE)
+			irq_set_affinity_hint(qv->irq,
+					      cpumask_of(qv->sw_thread_id));
+	}
+
+	return 0;
+err:
+	for (i = 0; i < port->nqvecs; i++) {
+		struct mvpp2_queue_vector *qv = port->qvecs + i;
+
+		irq_set_affinity_hint(qv->irq, NULL);
+		free_irq(qv->irq, qv);
+	}
+
+	return err;
+}
+
+static void mvpp2_irqs_deinit(struct mvpp2_port *port)
+{
+	int i;
+
+	for (i = 0; i < port->nqvecs; i++) {
+		struct mvpp2_queue_vector *qv = port->qvecs + i;
+
+		irq_set_affinity_hint(qv->irq, NULL);
+		free_irq(qv->irq, qv);
+	}
+}
+
 static int mvpp2_open(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
@@ -6006,9 +6175,9 @@ static int mvpp2_open(struct net_device *dev)
 		goto err_cleanup_rxqs;
 	}
 
-	err = request_irq(port->irq, mvpp2_isr, 0, dev->name, port);
+	err = mvpp2_irqs_init(port);
 	if (err) {
-		netdev_err(port->dev, "cannot request IRQ %d\n", port->irq);
+		netdev_err(port->dev, "cannot init IRQs\n");
 		goto err_cleanup_txqs;
 	}
 
@@ -6021,13 +6190,14 @@ static int mvpp2_open(struct net_device *dev)
 
 	/* Unmask interrupts on all CPUs */
 	on_each_cpu(mvpp2_interrupts_unmask, port, 1);
+	mvpp2_shared_interrupt_mask_unmask(port, false);
 
 	mvpp2_start_dev(port);
 
 	return 0;
 
 err_free_irq:
-	free_irq(port->irq, port);
+	mvpp2_irqs_deinit(port);
 err_cleanup_txqs:
 	mvpp2_cleanup_txqs(port);
 err_cleanup_rxqs:
@@ -6046,14 +6216,17 @@ static int mvpp2_stop(struct net_device *dev)
 
 	/* Mask interrupts on all CPUs */
 	on_each_cpu(mvpp2_interrupts_mask, port, 1);
+	mvpp2_shared_interrupt_mask_unmask(port, true);
 
-	free_irq(port->irq, port);
-	for_each_present_cpu(cpu) {
-		port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+	mvpp2_irqs_deinit(port);
+	if (!port->has_tx_irqs) {
+		for_each_present_cpu(cpu) {
+			port_pcpu = per_cpu_ptr(port->pcpu, cpu);
 
-		hrtimer_cancel(&port_pcpu->tx_done_timer);
-		port_pcpu->timer_scheduled = false;
-		tasklet_kill(&port_pcpu->tx_done_tasklet);
+			hrtimer_cancel(&port_pcpu->tx_done_timer);
+			port_pcpu->timer_scheduled = false;
+			tasklet_kill(&port_pcpu->tx_done_tasklet);
+		}
 	}
 	mvpp2_cleanup_rxqs(port);
 	mvpp2_cleanup_txqs(port);
@@ -6228,7 +6401,7 @@ static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
 	struct mvpp2_port *port = netdev_priv(dev);
 	int queue;
 
-	for (queue = 0; queue < rxq_number; queue++) {
+	for (queue = 0; queue < port->nrxqs; queue++) {
 		struct mvpp2_rx_queue *rxq = port->rxqs[queue];
 
 		rxq->time_coal = c->rx_coalesce_usecs;
@@ -6237,10 +6410,18 @@ static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
 		mvpp2_rx_time_coal_set(port, rxq);
 	}
 
-	for (queue = 0; queue < txq_number; queue++) {
+	if (port->has_tx_irqs) {
+		port->tx_time_coal = c->tx_coalesce_usecs;
+		mvpp2_tx_time_coal_set(port);
+	}
+
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		struct mvpp2_tx_queue *txq = port->txqs[queue];
 
 		txq->done_pkts_coal = c->tx_max_coalesced_frames;
+
+		if (port->has_tx_irqs)
+			mvpp2_tx_pkts_coal_set(port, txq);
 	}
 
 	return 0;
@@ -6365,6 +6546,129 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
+/* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
+ * had a single IRQ defined per-port.
+ */
+static int mvpp2_simple_queue_vectors_init(struct mvpp2_port *port,
+					   struct device_node *port_node)
+{
+	struct mvpp2_queue_vector *v = &port->qvecs[0];
+
+	v->first_rxq = 0;
+	v->nrxqs = port->nrxqs;
+	v->type = MVPP2_QUEUE_VECTOR_SHARED;
+	v->sw_thread_id = 0;
+	v->sw_thread_mask = *cpumask_bits(cpu_online_mask);
+	v->port = port;
+	v->irq = irq_of_parse_and_map(port_node, 0);
+	if (v->irq <= 0)
+		return -EINVAL;
+	netif_napi_add(port->dev, &v->napi, mvpp2_poll,
+		       NAPI_POLL_WEIGHT);
+
+	port->nqvecs = 1;
+
+	return 0;
+}
+
+static int mvpp2_multi_queue_vectors_init(struct mvpp2_port *port,
+					  struct device_node *port_node)
+{
+	struct mvpp2_queue_vector *v;
+	int i, ret;
+
+	port->nqvecs = num_possible_cpus();
+	if (queue_mode == MVPP2_QDIST_SINGLE_MODE)
+		port->nqvecs += 1;
+
+	for (i = 0; i < port->nqvecs; i++) {
+		char irqname[16];
+
+		v = port->qvecs + i;
+
+		v->port = port;
+		v->type = MVPP2_QUEUE_VECTOR_PRIVATE;
+		v->sw_thread_id = i;
+		v->sw_thread_mask = BIT(i);
+
+		snprintf(irqname, sizeof(irqname), "tx-cpu%d", i);
+
+		if (queue_mode == MVPP2_QDIST_MULTI_MODE) {
+			v->first_rxq = i * MVPP2_DEFAULT_RXQ;
+			v->nrxqs = MVPP2_DEFAULT_RXQ;
+		} else if (queue_mode == MVPP2_QDIST_SINGLE_MODE &&
+			   i == (port->nqvecs - 1)) {
+			v->first_rxq = 0;
+			v->nrxqs = port->nrxqs;
+			v->type = MVPP2_QUEUE_VECTOR_SHARED;
+			strncpy(irqname, "rx-shared", sizeof(irqname));
+		}
+
+		v->irq = of_irq_get_byname(port_node, irqname);
+		if (v->irq <= 0) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		netif_napi_add(port->dev, &v->napi, mvpp2_poll,
+			       NAPI_POLL_WEIGHT);
+	}
+
+	return 0;
+
+err:
+	for (i = 0; i < port->nqvecs; i++)
+		irq_dispose_mapping(port->qvecs[i].irq);
+	return ret;
+}
+
+static int mvpp2_queue_vectors_init(struct mvpp2_port *port,
+				    struct device_node *port_node)
+{
+	if (port->has_tx_irqs)
+		return mvpp2_multi_queue_vectors_init(port, port_node);
+	else
+		return mvpp2_simple_queue_vectors_init(port, port_node);
+}
+
+static void mvpp2_queue_vectors_deinit(struct mvpp2_port *port)
+{
+	int i;
+
+	for (i = 0; i < port->nqvecs; i++)
+		irq_dispose_mapping(port->qvecs[i].irq);
+}
+
+/* Configure Rx queue group interrupt for this port */
+static void mvpp2_rx_irqs_setup(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	u32 val;
+	int i;
+
+	if (priv->hw_version == MVPP21) {
+		mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(port->id),
+			    port->nrxqs);
+		return;
+	}
+
+	/* Handle the more complicated PPv2.2 case */
+	for (i = 0; i < port->nqvecs; i++) {
+		struct mvpp2_queue_vector *qv = port->qvecs + i;
+
+		if (!qv->nrxqs)
+			continue;
+
+		val = qv->sw_thread_id;
+		val |= port->id << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET;
+		mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
+
+		val = qv->first_rxq;
+		val |= qv->nrxqs << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET;
+		mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
+	}
+}
+
 /* Initialize port HW */
 static int mvpp2_port_init(struct mvpp2_port *port)
 {
@@ -6373,15 +6677,22 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	struct mvpp2_txq_pcpu *txq_pcpu;
 	int queue, cpu, err;
 
-	if (port->first_rxq + rxq_number >
+	/* Checks for hardware constraints */
+	if (port->first_rxq + port->nrxqs >
 	    MVPP2_MAX_PORTS * priv->max_port_rxqs)
 		return -EINVAL;
 
+	if (port->nrxqs % 4 || (port->nrxqs > priv->max_port_rxqs) ||
+	    (port->ntxqs > MVPP2_MAX_TXQ))
+		return -EINVAL;
+
 	/* Disable port */
 	mvpp2_egress_disable(port);
 	mvpp2_port_disable(port);
 
-	port->txqs = devm_kcalloc(dev, txq_number, sizeof(*port->txqs),
+	port->tx_time_coal = MVPP2_TXDONE_COAL_USEC;
+
+	port->txqs = devm_kcalloc(dev, port->ntxqs, sizeof(*port->txqs),
 				  GFP_KERNEL);
 	if (!port->txqs)
 		return -ENOMEM;
@@ -6389,7 +6700,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	/* Associate physical Tx queues to this port and initialize.
 	 * The mapping is predefined.
 	 */
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		int queue_phy_id = mvpp2_txq_phys(port->id, queue);
 		struct mvpp2_tx_queue *txq;
 
@@ -6416,7 +6727,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 		port->txqs[queue] = txq;
 	}
 
-	port->rxqs = devm_kcalloc(dev, rxq_number, sizeof(*port->rxqs),
+	port->rxqs = devm_kcalloc(dev, port->nrxqs, sizeof(*port->rxqs),
 				  GFP_KERNEL);
 	if (!port->rxqs) {
 		err = -ENOMEM;
@@ -6424,7 +6735,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	}
 
 	/* Allocate and initialize Rx queue for this port */
-	for (queue = 0; queue < rxq_number; queue++) {
+	for (queue = 0; queue < port->nrxqs; queue++) {
 		struct mvpp2_rx_queue *rxq;
 
 		/* Map physical Rx queue to port's logical Rx queue */
@@ -6441,22 +6752,10 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 		port->rxqs[queue] = rxq;
 	}
 
-	/* Configure Rx queue group interrupt for this port */
-	if (priv->hw_version == MVPP21) {
-		mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(port->id),
-			    rxq_number);
-	} else {
-		u32 val;
-
-		val = (port->id << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET);
-		mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
-
-		val = (rxq_number << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET);
-		mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
-	}
+	mvpp2_rx_irqs_setup(port);
 
 	/* Create Rx descriptor rings */
-	for (queue = 0; queue < rxq_number; queue++) {
+	for (queue = 0; queue < port->nrxqs; queue++) {
 		struct mvpp2_rx_queue *rxq = port->rxqs[queue];
 
 		rxq->size = port->rx_ring_size;
@@ -6484,7 +6783,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	return 0;
 
 err_free_percpu:
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		if (!port->txqs[queue])
 			continue;
 		free_percpu(port->txqs[queue]->pcpu);
@@ -6492,6 +6791,30 @@ err_free_percpu:
 	return err;
 }
 
+/* Checks if the port DT description has the TX interrupts
+ * described. On PPv2.1, there are no such interrupts. On PPv2.2,
+ * there are available, but we need to keep support for old DTs.
+ */
+static bool mvpp2_port_has_tx_irqs(struct mvpp2 *priv,
+				   struct device_node *port_node)
+{
+	char *irqs[5] = { "rx-shared", "tx-cpu0", "tx-cpu1",
+			  "tx-cpu2", "tx-cpu3" };
+	int ret, i;
+
+	if (priv->hw_version == MVPP21)
+		return false;
+
+	for (i = 0; i < 5; i++) {
+		ret = of_property_match_string(port_node, "interrupt-names",
+					       irqs[i]);
+		if (ret < 0)
+			return false;
+	}
+
+	return true;
+}
+
 /* Ports initialization */
 static int mvpp2_port_probe(struct platform_device *pdev,
 			    struct device_node *port_node,
@@ -6505,12 +6828,25 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	const char *dt_mac_addr;
 	const char *mac_from;
 	char hw_mac_addr[ETH_ALEN];
+	unsigned int ntxqs, nrxqs;
+	bool has_tx_irqs;
 	u32 id;
 	int features;
 	int phy_mode;
 	int err, i, cpu;
 
-	dev = alloc_etherdev_mqs(sizeof(*port), txq_number, rxq_number);
+	has_tx_irqs = mvpp2_port_has_tx_irqs(priv, port_node);
+
+	if (!has_tx_irqs)
+		queue_mode = MVPP2_QDIST_SINGLE_MODE;
+
+	ntxqs = MVPP2_MAX_TXQ;
+	if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_MULTI_MODE)
+		nrxqs = MVPP2_DEFAULT_RXQ * num_possible_cpus();
+	else
+		nrxqs = MVPP2_DEFAULT_RXQ;
+
+	dev = alloc_etherdev_mqs(sizeof(*port), ntxqs, nrxqs);
 	if (!dev)
 		return -ENOMEM;
 
@@ -6540,20 +6876,22 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	dev->ethtool_ops = &mvpp2_eth_tool_ops;
 
 	port = netdev_priv(dev);
+	port->dev = dev;
+	port->ntxqs = ntxqs;
+	port->nrxqs = nrxqs;
+	port->priv = priv;
+	port->has_tx_irqs = has_tx_irqs;
 
-	port->irq = irq_of_parse_and_map(port_node, 0);
-	if (port->irq <= 0) {
-		err = -EINVAL;
+	err = mvpp2_queue_vectors_init(port, port_node);
+	if (err)
 		goto err_free_netdev;
-	}
 
 	if (of_property_read_bool(port_node, "marvell,loopback"))
 		port->flags |= MVPP2_F_LOOPBACK;
 
-	port->priv = priv;
 	port->id = id;
 	if (priv->hw_version == MVPP21)
-		port->first_rxq = port->id * rxq_number;
+		port->first_rxq = port->id * port->nrxqs;
 	else
 		port->first_rxq = port->id * priv->max_port_rxqs;
 
@@ -6565,14 +6903,14 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		port->base = devm_ioremap_resource(&pdev->dev, res);
 		if (IS_ERR(port->base)) {
 			err = PTR_ERR(port->base);
-			goto err_free_irq;
+			goto err_deinit_qvecs;
 		}
 	} else {
 		if (of_property_read_u32(port_node, "gop-port-id",
 					 &port->gop_id)) {
 			err = -EINVAL;
 			dev_err(&pdev->dev, "missing gop-port-id value\n");
-			goto err_free_irq;
+			goto err_deinit_qvecs;
 		}
 
 		port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id);
@@ -6582,7 +6920,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats);
 	if (!port->stats) {
 		err = -ENOMEM;
-		goto err_free_irq;
+		goto err_deinit_qvecs;
 	}
 
 	dt_mac_addr = of_get_mac_address(port_node);
@@ -6603,7 +6941,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
 	port->tx_ring_size = MVPP2_MAX_TXD;
 	port->rx_ring_size = MVPP2_MAX_RXD;
-	port->dev = dev;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	err = mvpp2_port_init(port);
@@ -6626,19 +6963,21 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		goto err_free_txq_pcpu;
 	}
 
-	for_each_present_cpu(cpu) {
-		port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+	if (!port->has_tx_irqs) {
+		for_each_present_cpu(cpu) {
+			port_pcpu = per_cpu_ptr(port->pcpu, cpu);
 
-		hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC,
-			     HRTIMER_MODE_REL_PINNED);
-		port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb;
-		port_pcpu->timer_scheduled = false;
+			hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC,
+				     HRTIMER_MODE_REL_PINNED);
+			port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb;
+			port_pcpu->timer_scheduled = false;
 
-		tasklet_init(&port_pcpu->tx_done_tasklet, mvpp2_tx_proc_cb,
-			     (unsigned long)dev);
+			tasklet_init(&port_pcpu->tx_done_tasklet,
+				     mvpp2_tx_proc_cb,
+				     (unsigned long)dev);
+		}
 	}
 
-	netif_napi_add(dev, &port->napi, mvpp2_poll, NAPI_POLL_WEIGHT);
 	features = NETIF_F_SG | NETIF_F_IP_CSUM;
 	dev->features = features | NETIF_F_RXCSUM;
 	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
@@ -6662,12 +7001,12 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 err_free_port_pcpu:
 	free_percpu(port->pcpu);
 err_free_txq_pcpu:
-	for (i = 0; i < txq_number; i++)
+	for (i = 0; i < port->ntxqs; i++)
 		free_percpu(port->txqs[i]->pcpu);
 err_free_stats:
 	free_percpu(port->stats);
-err_free_irq:
-	irq_dispose_mapping(port->irq);
+err_deinit_qvecs:
+	mvpp2_queue_vectors_deinit(port);
 err_free_netdev:
 	of_node_put(phy_node);
 	free_netdev(dev);
@@ -6683,9 +7022,9 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
 	of_node_put(port->phy_node);
 	free_percpu(port->pcpu);
 	free_percpu(port->stats);
-	for (i = 0; i < txq_number; i++)
+	for (i = 0; i < port->ntxqs; i++)
 		free_percpu(port->txqs[i]->pcpu);
-	irq_dispose_mapping(port->irq);
+	mvpp2_queue_vectors_deinit(port);
 	free_netdev(port->dev);
 }
 
@@ -6800,13 +7139,6 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
 	int err, i;
 	u32 val;
 
-	/* Checks for hardware constraints */
-	if (rxq_number % 4 || (rxq_number > priv->max_port_rxqs) ||
-	    (txq_number > MVPP2_MAX_TXQ)) {
-		dev_err(&pdev->dev, "invalid queue size parameter\n");
-		return -EINVAL;
-	}
-
 	/* MBUS windows configuration */
 	dram_target_info = mv_mbus_dram_info();
 	if (dram_target_info)
@@ -6845,23 +7177,6 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
 	/* Rx Fifo Init */
 	mvpp2_rx_fifo_init(priv);
 
-	/* Reset Rx queue group interrupt configuration */
-	for (i = 0; i < MVPP2_MAX_PORTS; i++) {
-		if (priv->hw_version == MVPP21) {
-			mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(i),
-				    rxq_number);
-			continue;
-		} else {
-			u32 val;
-
-			val = (i << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET);
-			mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
-
-			val = (rxq_number << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET);
-			mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
-		}
-	}
-
 	if (priv->hw_version == MVPP21)
 		writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT,
 		       priv->lms_base + MVPP2_MNG_EXTENDED_GLOBAL_CTRL_REG);
@@ -6892,7 +7207,7 @@ static int mvpp2_probe(struct platform_device *pdev)
 	struct mvpp2 *priv;
 	struct resource *res;
 	void __iomem *base;
-	int port_count, cpu;
+	int port_count, i;
 	int err;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
@@ -6919,12 +7234,12 @@ static int mvpp2_probe(struct platform_device *pdev)
 			return PTR_ERR(priv->iface_base);
 	}
 
-	for_each_present_cpu(cpu) {
+	for (i = 0; i < MVPP2_MAX_THREADS; i++) {
 		u32 addr_space_sz;
 
 		addr_space_sz = (priv->hw_version == MVPP21 ?
 				 MVPP21_ADDR_SPACE_SZ : MVPP22_ADDR_SPACE_SZ);
-		priv->cpu_base[cpu] = base + cpu * addr_space_sz;
+		priv->swth_base[i] = base + i * addr_space_sz;
 	}
 
 	if (priv->hw_version == MVPP21)
diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
index 698bb89aa901..f9149d2a4694 100644
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -7,11 +7,11 @@ config NET_VENDOR_MEDIATEK
 if NET_VENDOR_MEDIATEK
 
 config NET_MEDIATEK_SOC
-	tristate "MediaTek MT7623 Gigabit ethernet support"
-	depends on NET_VENDOR_MEDIATEK && (MACH_MT7623 || MACH_MT2701)
+	tristate "MediaTek SoC Gigabit Ethernet support"
+	depends on NET_VENDOR_MEDIATEK
 	select PHYLIB
 	---help---
 	  This driver supports the gigabit ethernet MACs in the
-	  MediaTek MT2701/MT7623 chipset family.
+	  MediaTek SoC family.
 
 endif #NET_VENDOR_MEDIATEK
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e588a0cdb074..acf2b3b8009c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -53,7 +53,8 @@ static const struct mtk_ethtool_stats {
 };
 
 static const char * const mtk_clks_source_name[] = {
-	"ethif", "esw", "gp1", "gp2", "trgpll"
+	"ethif", "esw", "gp0", "gp1", "gp2", "trgpll", "sgmii_tx250m",
+	"sgmii_rx250m", "sgmii_cdr_ref", "sgmii_cdr_fb", "sgmii_ck", "eth2pll"
 };
 
 void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
@@ -163,6 +164,47 @@ static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed)
 	mtk_w32(eth, val, TRGMII_TCK_CTRL);
 }
 
+static void mtk_gmac_sgmii_hw_setup(struct mtk_eth *eth, int mac_id)
+{
+	u32 val;
+
+	/* Setup the link timer and QPHY power up inside SGMIISYS */
+	regmap_write(eth->sgmiisys, SGMSYS_PCS_LINK_TIMER,
+		     SGMII_LINK_TIMER_DEFAULT);
+
+	regmap_read(eth->sgmiisys, SGMSYS_SGMII_MODE, &val);
+	val |= SGMII_REMOTE_FAULT_DIS;
+	regmap_write(eth->sgmiisys, SGMSYS_SGMII_MODE, val);
+
+	regmap_read(eth->sgmiisys, SGMSYS_PCS_CONTROL_1, &val);
+	val |= SGMII_AN_RESTART;
+	regmap_write(eth->sgmiisys, SGMSYS_PCS_CONTROL_1, val);
+
+	regmap_read(eth->sgmiisys, SGMSYS_QPHY_PWR_STATE_CTRL, &val);
+	val &= ~SGMII_PHYA_PWD;
+	regmap_write(eth->sgmiisys, SGMSYS_QPHY_PWR_STATE_CTRL, val);
+
+	/* Determine MUX for which GMAC uses the SGMII interface */
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_DUAL_GMAC_SHARED_SGMII)) {
+		regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+		val &= ~SYSCFG0_SGMII_MASK;
+		val |= !mac_id ? SYSCFG0_SGMII_GMAC1 : SYSCFG0_SGMII_GMAC2;
+		regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
+
+		dev_info(eth->dev, "setup shared sgmii for gmac=%d\n",
+			 mac_id);
+	}
+
+	/* Setup the GMAC1 going through SGMII path when SoC also support
+	 * ESW on GMAC1
+	 */
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_GMAC1_ESW | MTK_GMAC1_SGMII) &&
+	    !mac_id) {
+		mtk_w32(eth, 0, MTK_MAC_MISC);
+		dev_info(eth->dev, "setup gmac1 going through sgmii");
+	}
+}
+
 static void mtk_phy_link_adjust(struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
@@ -185,7 +227,8 @@ static void mtk_phy_link_adjust(struct net_device *dev)
 		break;
 	};
 
-	if (mac->id == 0 && !mac->trgmii)
+	if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GMAC1_TRGMII) &&
+	    !mac->id && !mac->trgmii)
 		mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed);
 
 	if (dev->phydev->link)
@@ -269,6 +312,7 @@ static int mtk_phy_connect(struct net_device *dev)
 	if (!np)
 		return -ENODEV;
 
+	mac->ge_mode = 0;
 	switch (of_get_phy_mode(np)) {
 	case PHY_INTERFACE_MODE_TRGMII:
 		mac->trgmii = true;
@@ -276,7 +320,10 @@ static int mtk_phy_connect(struct net_device *dev)
 	case PHY_INTERFACE_MODE_RGMII_RXID:
 	case PHY_INTERFACE_MODE_RGMII_ID:
 	case PHY_INTERFACE_MODE_RGMII:
-		mac->ge_mode = 0;
+		break;
+	case PHY_INTERFACE_MODE_SGMII:
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII))
+			mtk_gmac_sgmii_hw_setup(eth, mac->id);
 		break;
 	case PHY_INTERFACE_MODE_MII:
 		mac->ge_mode = 1;
@@ -1032,7 +1079,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 	unsigned int done[MTK_MAX_DEVS];
 	unsigned int bytes[MTK_MAX_DEVS];
 	u32 cpu, dma;
-	static int condition;
 	int total = 0, i;
 
 	memset(done, 0, sizeof(done));
@@ -1056,10 +1102,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 			mac = 1;
 
 		skb = tx_buf->skb;
-		if (!skb) {
-			condition = 1;
+		if (!skb)
 			break;
-		}
 
 		if (skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC) {
 			bytes[mac] += skb->len;
@@ -1837,9 +1881,36 @@ static void ethsys_reset(struct mtk_eth *eth, u32 reset_bits)
 	mdelay(10);
 }
 
+static void mtk_clk_disable(struct mtk_eth *eth)
+{
+	int clk;
+
+	for (clk = MTK_CLK_MAX - 1; clk >= 0; clk--)
+		clk_disable_unprepare(eth->clks[clk]);
+}
+
+static int mtk_clk_enable(struct mtk_eth *eth)
+{
+	int clk, ret;
+
+	for (clk = 0; clk < MTK_CLK_MAX ; clk++) {
+		ret = clk_prepare_enable(eth->clks[clk]);
+		if (ret)
+			goto err_disable_clks;
+	}
+
+	return 0;
+
+err_disable_clks:
+	while (--clk >= 0)
+		clk_disable_unprepare(eth->clks[clk]);
+
+	return ret;
+}
+
 static int mtk_hw_init(struct mtk_eth *eth)
 {
-	int i, val;
+	int i, val, ret;
 
 	if (test_and_set_bit(MTK_HW_INIT, &eth->state))
 		return 0;
@@ -1847,10 +1918,10 @@ static int mtk_hw_init(struct mtk_eth *eth)
 	pm_runtime_enable(eth->dev);
 	pm_runtime_get_sync(eth->dev);
 
-	clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]);
-	clk_prepare_enable(eth->clks[MTK_CLK_ESW]);
-	clk_prepare_enable(eth->clks[MTK_CLK_GP1]);
-	clk_prepare_enable(eth->clks[MTK_CLK_GP2]);
+	ret = mtk_clk_enable(eth);
+	if (ret)
+		goto err_disable_pm;
+
 	ethsys_reset(eth, RSTCTRL_FE);
 	ethsys_reset(eth, RSTCTRL_PPE);
 
@@ -1918,6 +1989,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
 	}
 
 	return 0;
+
+err_disable_pm:
+	pm_runtime_put_sync(eth->dev);
+	pm_runtime_disable(eth->dev);
+
+	return ret;
 }
 
 static int mtk_hw_deinit(struct mtk_eth *eth)
@@ -1925,10 +2002,7 @@ static int mtk_hw_deinit(struct mtk_eth *eth)
 	if (!test_and_clear_bit(MTK_HW_INIT, &eth->state))
 		return 0;
 
-	clk_disable_unprepare(eth->clks[MTK_CLK_GP2]);
-	clk_disable_unprepare(eth->clks[MTK_CLK_GP1]);
-	clk_disable_unprepare(eth->clks[MTK_CLK_ESW]);
-	clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]);
+	mtk_clk_disable(eth);
 
 	pm_runtime_put_sync(eth->dev);
 	pm_runtime_disable(eth->dev);
@@ -2395,6 +2469,7 @@ static int mtk_get_chip_id(struct mtk_eth *eth, u32 *chip_id)
 static bool mtk_is_hwlro_supported(struct mtk_eth *eth)
 {
 	switch (eth->chip_id) {
+	case MT7622_ETH:
 	case MT7623_ETH:
 		return true;
 	}
@@ -2406,6 +2481,7 @@ static int mtk_probe(struct platform_device *pdev)
 {
 	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	struct device_node *mac_np;
+	const struct of_device_id *match;
 	struct mtk_eth *eth;
 	int err;
 	int i;
@@ -2414,6 +2490,9 @@ static int mtk_probe(struct platform_device *pdev)
 	if (!eth)
 		return -ENOMEM;
 
+	match = of_match_device(of_mtk_match, &pdev->dev);
+	eth->soc = (struct mtk_soc_data *)match->data;
+
 	eth->dev = &pdev->dev;
 	eth->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(eth->base))
@@ -2430,6 +2509,16 @@ static int mtk_probe(struct platform_device *pdev)
 		return PTR_ERR(eth->ethsys);
 	}
 
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
+		eth->sgmiisys =
+		syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+						"mediatek,sgmiisys");
+		if (IS_ERR(eth->sgmiisys)) {
+			dev_err(&pdev->dev, "no sgmiisys regmap found\n");
+			return PTR_ERR(eth->sgmiisys);
+		}
+	}
+
 	eth->pctl = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
 						    "mediatek,pctl");
 	if (IS_ERR(eth->pctl)) {
@@ -2450,7 +2539,12 @@ static int mtk_probe(struct platform_device *pdev)
 		if (IS_ERR(eth->clks[i])) {
 			if (PTR_ERR(eth->clks[i]) == -EPROBE_DEFER)
 				return -EPROBE_DEFER;
-			return -ENODEV;
+			if (eth->soc->required_clks & BIT(i)) {
+				dev_err(&pdev->dev, "clock %s not found\n",
+					mtk_clks_source_name[i]);
+				return -EINVAL;
+			}
+			eth->clks[i] = NULL;
 		}
 	}
 
@@ -2553,8 +2647,25 @@ static int mtk_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct mtk_soc_data mt2701_data = {
+	.caps = MTK_GMAC1_TRGMII,
+	.required_clks = MT7623_CLKS_BITMAP
+};
+
+static const struct mtk_soc_data mt7622_data = {
+	.caps = MTK_DUAL_GMAC_SHARED_SGMII | MTK_GMAC1_ESW,
+	.required_clks = MT7622_CLKS_BITMAP
+};
+
+static const struct mtk_soc_data mt7623_data = {
+	.caps = MTK_GMAC1_TRGMII,
+	.required_clks = MT7623_CLKS_BITMAP
+};
+
 const struct of_device_id of_mtk_match[] = {
-	{ .compatible = "mediatek,mt2701-eth" },
+	{ .compatible = "mediatek,mt2701-eth", .data = &mt2701_data},
+	{ .compatible = "mediatek,mt7622-eth", .data = &mt7622_data},
+	{ .compatible = "mediatek,mt7623-eth", .data = &mt7623_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, of_mtk_match);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 5868a09f623a..4594862e5a9b 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -302,6 +302,9 @@
 #define PHY_IAC_REG_SHIFT	25
 #define PHY_IAC_TIMEOUT		HZ
 
+#define MTK_MAC_MISC		0x1000c
+#define MTK_MUX_TO_ESW		BIT(0)
+
 /* Mac control registers */
 #define MTK_MAC_MCR(x)		(0x10100 + (x * 0x100))
 #define MAC_MCR_MAX_RX_1536	BIT(24)
@@ -357,11 +360,15 @@
 #define ETHSYS_CHIPID0_3	0x0
 #define ETHSYS_CHIPID4_7	0x4
 #define MT7623_ETH		7623
+#define MT7622_ETH		7622
 
 /* ethernet subsystem config register */
 #define ETHSYS_SYSCFG0		0x14
 #define SYSCFG0_GE_MASK		0x3
 #define SYSCFG0_GE_MODE(x, y)	(x << (12 + (y * 2)))
+#define SYSCFG0_SGMII_MASK	(3 << 8)
+#define SYSCFG0_SGMII_GMAC1	((2 << 8) & GENMASK(9, 8))
+#define SYSCFG0_SGMII_GMAC2	((3 << 8) & GENMASK(9, 8))
 
 /* ethernet subsystem clock register */
 #define ETHSYS_CLKCFG0		0x2c
@@ -372,6 +379,23 @@
 #define RSTCTRL_FE		BIT(6)
 #define RSTCTRL_PPE		BIT(31)
 
+/* SGMII subsystem config registers */
+/* Register to auto-negotiation restart */
+#define SGMSYS_PCS_CONTROL_1	0x0
+#define SGMII_AN_RESTART	BIT(9)
+
+/* Register to programmable link timer, the unit in 2 * 8ns */
+#define SGMSYS_PCS_LINK_TIMER	0x18
+#define SGMII_LINK_TIMER_DEFAULT	(0x186a0 & GENMASK(19, 0))
+
+/* Register to control remote fault */
+#define SGMSYS_SGMII_MODE	0x20
+#define SGMII_REMOTE_FAULT_DIS	BIT(8)
+
+/* Register to power up QPHY */
+#define SGMSYS_QPHY_PWR_STATE_CTRL 0xe8
+#define	SGMII_PHYA_PWD		BIT(4)
+
 struct mtk_rx_dma {
 	unsigned int rxd1;
 	unsigned int rxd2;
@@ -437,12 +461,31 @@ enum mtk_tx_flags {
 enum mtk_clks_map {
 	MTK_CLK_ETHIF,
 	MTK_CLK_ESW,
+	MTK_CLK_GP0,
 	MTK_CLK_GP1,
 	MTK_CLK_GP2,
 	MTK_CLK_TRGPLL,
+	MTK_CLK_SGMII_TX_250M,
+	MTK_CLK_SGMII_RX_250M,
+	MTK_CLK_SGMII_CDR_REF,
+	MTK_CLK_SGMII_CDR_FB,
+	MTK_CLK_SGMII_CK,
+	MTK_CLK_ETH2PLL,
 	MTK_CLK_MAX
 };
 
+#define MT7623_CLKS_BITMAP	(BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) |  \
+				 BIT(MTK_CLK_GP1) | BIT(MTK_CLK_GP2) | \
+				 BIT(MTK_CLK_TRGPLL))
+#define MT7622_CLKS_BITMAP	(BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) |  \
+				 BIT(MTK_CLK_GP0) | BIT(MTK_CLK_GP1) | \
+				 BIT(MTK_CLK_GP2) | \
+				 BIT(MTK_CLK_SGMII_TX_250M) | \
+				 BIT(MTK_CLK_SGMII_RX_250M) | \
+				 BIT(MTK_CLK_SGMII_CDR_REF) | \
+				 BIT(MTK_CLK_SGMII_CDR_FB) | \
+				 BIT(MTK_CLK_SGMII_CK) | \
+				 BIT(MTK_CLK_ETH2PLL))
 enum mtk_dev_state {
 	MTK_HW_INIT,
 	MTK_RESETTING
@@ -511,6 +554,28 @@ struct mtk_rx_ring {
 	u32 crx_idx_reg;
 };
 
+#define MTK_TRGMII			BIT(0)
+#define MTK_GMAC1_TRGMII		(BIT(1) | MTK_TRGMII)
+#define MTK_ESW				BIT(4)
+#define MTK_GMAC1_ESW			(BIT(5) | MTK_ESW)
+#define MTK_SGMII			BIT(8)
+#define MTK_GMAC1_SGMII			(BIT(9) | MTK_SGMII)
+#define MTK_GMAC2_SGMII			(BIT(10) | MTK_SGMII)
+#define MTK_DUAL_GMAC_SHARED_SGMII	(BIT(11) | MTK_GMAC1_SGMII | \
+					 MTK_GMAC2_SGMII)
+#define MTK_HAS_CAPS(caps, _x)		(((caps) & (_x)) == (_x))
+
+/* struct mtk_eth_data -	This is the structure holding all differences
+ *				among various plaforms
+ * @caps			Flags shown the extra capability for the SoC
+ * @required_clks		Flags shown the bitmap for required clocks on
+ *				the target SoC
+ */
+struct mtk_soc_data {
+	u32		caps;
+	u32		required_clks;
+};
+
 /* currently no SoC has more than 2 macs */
 #define MTK_MAX_DEVS			2
 
@@ -529,6 +594,8 @@ struct mtk_rx_ring {
  * @msg_enable:		Ethtool msg level
  * @ethsys:		The register map pointing at the range used to setup
  *			MII modes
+ * @sgmiisys:		The register map pointing at the range used to setup
+ *			SGMII modes
  * @pctl:		The register map pointing at the range used to setup
  *			GMAC port drive/slew values
  * @dma_refcnt:		track how many netdevs are using the DMA engine
@@ -542,7 +609,8 @@ struct mtk_rx_ring {
  * @clks:		clock array for all clocks required
  * @mii_bus:		If there is a bus we need to create an instance for it
  * @pending_work:	The workqueue used to reset the dma ring
- * @state               Initialization and runtime state of the device.
+ * @state:		Initialization and runtime state of the device
+ * @soc:		Holding specific data among vaious SoCs
  */
 
 struct mtk_eth {
@@ -558,6 +626,7 @@ struct mtk_eth {
 	u32				msg_enable;
 	unsigned long			sysclk;
 	struct regmap			*ethsys;
+	struct regmap			*sgmiisys;
 	struct regmap			*pctl;
 	u32				chip_id;
 	bool				hwlro;
@@ -574,6 +643,8 @@ struct mtk_eth {
 	struct mii_bus			*mii_bus;
 	struct work_struct		pending_work;
 	unsigned long			state;
+
+	const struct mtk_soc_data	*soc;
 };
 
 /* struct mtk_mac -	the structure that holds the info about the MACs of the
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 85fe17e4dcfb..87d1f4d2a77b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -208,12 +208,10 @@ int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
 			      cq->moder_cnt, cq->moder_time);
 }
 
-int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
 {
 	mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map,
 		    &priv->mdev->uar_lock);
-
-	return 0;
 }
 
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 2b0cbca4beb5..686e18de9a97 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -147,7 +147,7 @@ void mlx4_en_update_loopback_state(struct net_device *dev,
 	mutex_unlock(&priv->mdev->state_lock);
 }
 
-static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
+static void mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 {
 	struct mlx4_en_profile *params = &mdev->profile;
 	int i;
@@ -176,8 +176,6 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 		params->prof[i].rss_rings = 0;
 		params->prof[i].inline_thold = inline_thold;
 	}
-
-	return 0;
 }
 
 static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port)
@@ -309,10 +307,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
 	}
 
 	/* Build device profile according to supplied module parameters */
-	if (mlx4_en_get_profile(mdev)) {
-		mlx4_err(mdev, "Bad module parameters, aborting\n");
-		goto err_mr;
-	}
+	mlx4_en_get_profile(mdev);
 
 	/* Configure which ports to start according to module parameters */
 	mdev->port_cnt = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 3a291fc1780a..6e67ca7aa7f5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -130,19 +130,20 @@ out:
 	return err;
 }
 
-static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle,
-			      u32 chain_index, __be16 proto,
-			      struct tc_to_netdev *tc)
+static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			      void *type_data)
 {
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	if (tc->mqprio->num_tc && tc->mqprio->num_tc != MLX4_EN_NUM_UP_HIGH)
+	if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH)
 		return -EINVAL;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-	return mlx4_en_alloc_tx_queue_per_tc(dev, tc->mqprio->num_tc);
+	return mlx4_en_alloc_tx_queue_per_tc(dev, mqprio->num_tc);
 }
 
 #ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d350b2158104..fdb3ad0cbe54 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -685,7 +685,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 			int cq_idx);
 void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
-int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b19e9d235008..981f8415b546 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2992,12 +2992,16 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
 	return 0;
 }
 
-static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
+static int mlx5e_setup_tc_mqprio(struct net_device *netdev,
+				 struct tc_mqprio_qopt *mqprio)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_channels new_channels = {};
+	u8 tc = mqprio->num_tc;
 	int err = 0;
 
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
 	if (tc && tc != MLX5E_MAX_NUM_TC)
 		return -EINVAL;
 
@@ -3021,41 +3025,42 @@ out:
 	return err;
 }
 
-static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
-			      u32 chain_index, __be16 proto,
-			      struct tc_to_netdev *tc)
-{
 #ifdef CONFIG_MLX5_ESWITCH
+static int mlx5e_setup_tc_cls_flower(struct net_device *dev,
+				     struct tc_cls_flower_offload *cls_flower)
+{
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
-		goto mqprio;
-
-	if (chain_index)
+	if (TC_H_MAJ(cls_flower->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
-	switch (tc->type) {
-	case TC_SETUP_CLSFLOWER:
-		switch (tc->cls_flower->command) {
-		case TC_CLSFLOWER_REPLACE:
-			return mlx5e_configure_flower(priv, proto, tc->cls_flower);
-		case TC_CLSFLOWER_DESTROY:
-			return mlx5e_delete_flower(priv, tc->cls_flower);
-		case TC_CLSFLOWER_STATS:
-			return mlx5e_stats_flower(priv, tc->cls_flower);
-		}
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return mlx5e_configure_flower(priv, cls_flower);
+	case TC_CLSFLOWER_DESTROY:
+		return mlx5e_delete_flower(priv, cls_flower);
+	case TC_CLSFLOWER_STATS:
+		return mlx5e_stats_flower(priv, cls_flower);
 	default:
 		return -EOPNOTSUPP;
 	}
-
-mqprio:
+}
 #endif
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-
-	return mlx5e_setup_tc(dev, tc->mqprio->num_tc);
+static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			  void *type_data)
+{
+	switch (type) {
+#ifdef CONFIG_MLX5_ESWITCH
+	case TC_SETUP_CLSFLOWER:
+		return mlx5e_setup_tc_cls_flower(dev, type_data);
+#endif
+	case TC_SETUP_MQPRIO:
+		return mlx5e_setup_tc_mqprio(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static void
@@ -3693,7 +3698,7 @@ static const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
 	.ndo_start_xmit          = mlx5e_xmit,
-	.ndo_setup_tc            = mlx5e_ndo_setup_tc,
+	.ndo_setup_tc            = mlx5e_setup_tc,
 	.ndo_select_queue        = mlx5e_select_queue,
 	.ndo_get_stats64         = mlx5e_get_stats,
 	.ndo_set_rx_mode         = mlx5e_set_rx_mode,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index a0dd0e7e5b57..f3c494a4ecdf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -651,37 +651,42 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
 	return 0;
 }
 
-static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
-				  u32 chain_index, __be16 proto,
-				  struct tc_to_netdev *tc)
+static int
+mlx5e_rep_setup_tc_cls_flower(struct net_device *dev,
+			      struct tc_cls_flower_offload *cls_flower)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+	if (TC_H_MAJ(cls_flower->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
-	if (tc->egress_dev) {
+	if (cls_flower->egress_dev) {
 		struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-		struct net_device *uplink_dev = mlx5_eswitch_get_uplink_netdev(esw);
 
-		return uplink_dev->netdev_ops->ndo_setup_tc(uplink_dev, handle,
-							    chain_index,
-							    proto, tc);
+		dev = mlx5_eswitch_get_uplink_netdev(esw);
+		return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
+						     cls_flower);
 	}
 
-	if (chain_index)
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return mlx5e_configure_flower(priv, cls_flower);
+	case TC_CLSFLOWER_DESTROY:
+		return mlx5e_delete_flower(priv, cls_flower);
+	case TC_CLSFLOWER_STATS:
+		return mlx5e_stats_flower(priv, cls_flower);
+	default:
 		return -EOPNOTSUPP;
+	}
+}
 
-	switch (tc->type) {
+static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			      void *type_data)
+{
+	switch (type) {
 	case TC_SETUP_CLSFLOWER:
-		switch (tc->cls_flower->command) {
-		case TC_CLSFLOWER_REPLACE:
-			return mlx5e_configure_flower(priv, proto, tc->cls_flower);
-		case TC_CLSFLOWER_DESTROY:
-			return mlx5e_delete_flower(priv, tc->cls_flower);
-		case TC_CLSFLOWER_STATS:
-			return mlx5e_stats_flower(priv, tc->cls_flower);
-		}
+		return mlx5e_rep_setup_tc_cls_flower(dev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -773,7 +778,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
 	.ndo_stop                = mlx5e_rep_close,
 	.ndo_start_xmit          = mlx5e_xmit,
 	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
-	.ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
+	.ndo_setup_tc            = mlx5e_rep_setup_tc,
 	.ndo_get_stats64         = mlx5e_rep_get_stats,
 	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
 	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3c536f560dd2..3b10d3df7627 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1326,7 +1326,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	LIST_HEAD(actions);
 	int err;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
@@ -1839,7 +1839,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	bool encap = false;
 	int err = 0;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	memset(attr, 0, sizeof(*attr));
@@ -1939,7 +1939,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	return err;
 }
 
-int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
 			   struct tc_cls_flower_offload *f)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 36473ec65ce8..c14c263a739b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -33,13 +33,15 @@
 #ifndef __MLX5_EN_TC_H__
 #define __MLX5_EN_TC_H__
 
+#include <net/pkt_cls.h>
+
 #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
 
 #ifdef CONFIG_MLX5_ESWITCH
 int mlx5e_tc_init(struct mlx5e_priv *priv);
 void mlx5e_tc_cleanup(struct mlx5e_priv *priv);
 
-int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
 			   struct tc_cls_flower_offload *f);
 int mlx5e_delete_flower(struct mlx5e_priv *priv,
 			struct tc_cls_flower_offload *f);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 695adff89d71..d56eea310509 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -75,6 +75,7 @@ config MLXSW_SPECTRUM
 	depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
 	depends on PSAMPLE || PSAMPLE=n
 	depends on BRIDGE || BRIDGE=n
+	depends on IPV6 || IPV6=n
 	select PARMAN
 	select MLXFW
 	default m
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index affe84eb4bff..9d5e7cf288be 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -667,7 +667,7 @@ static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core,
 	int err;
 
 	dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n",
-		trans->tid, reg->id, mlxsw_reg_id_str(reg->id),
+		tid, reg->id, mlxsw_reg_id_str(reg->id),
 		mlxsw_core_reg_access_type_str(type));
 
 	skb = mlxsw_emad_alloc(mlxsw_core, reg->len);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index 9807ef814e42..f6963b0b4a55 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
@@ -57,6 +57,9 @@ enum mlxsw_afk_element {
 	MLXSW_AFK_ELEMENT_VID,
 	MLXSW_AFK_ELEMENT_PCP,
 	MLXSW_AFK_ELEMENT_TCP_FLAGS,
+	MLXSW_AFK_ELEMENT_IP_TTL_,
+	MLXSW_AFK_ELEMENT_IP_ECN,
+	MLXSW_AFK_ELEMENT_IP_DSCP,
 	MLXSW_AFK_ELEMENT_MAX,
 };
 
@@ -104,6 +107,9 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
 	MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12),
 	MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3),
 	MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9),
+	MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8),
+	MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2),
+	MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6),
 	MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32),
 	MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32),
 	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 1bd34d9a7b9e..7e8ba546c3a4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3679,15 +3679,17 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP,
-	MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP,
-	MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
 };
 
 /* reg_htgt_trap_group
@@ -3952,10 +3954,12 @@ MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2);
  */
 MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8);
 
-static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en)
+static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en,
+				       bool ipv6_en)
 {
 	MLXSW_REG_ZERO(rgcr, payload);
 	mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en);
+	mlxsw_reg_rgcr_ipv6_en_set(payload, ipv6_en);
 }
 
 /* RITR - Router Interface Table Register
@@ -3988,16 +3992,16 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1);
 MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1);
 
 enum mlxsw_reg_ritr_if_type {
+	/* VLAN interface. */
 	MLXSW_REG_RITR_VLAN_IF,
+	/* FID interface. */
 	MLXSW_REG_RITR_FID_IF,
+	/* Sub-port interface. */
 	MLXSW_REG_RITR_SP_IF,
 };
 
 /* reg_ritr_type
- * Router interface type.
- * 0 - VLAN interface.
- * 1 - FID interface.
- * 2 - Sub-port interface.
+ * Router interface type as per enum mlxsw_reg_ritr_if_type.
  * Access: RW
  */
 MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3);
@@ -4203,10 +4207,12 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
 	MLXSW_REG_ZERO(ritr, payload);
 	mlxsw_reg_ritr_enable_set(payload, enable);
 	mlxsw_reg_ritr_ipv4_set(payload, 1);
+	mlxsw_reg_ritr_ipv6_set(payload, 1);
 	mlxsw_reg_ritr_type_set(payload, type);
 	mlxsw_reg_ritr_op_set(payload, op);
 	mlxsw_reg_ritr_rif_set(payload, rif);
 	mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+	mlxsw_reg_ritr_ipv6_fe_set(payload, 1);
 	mlxsw_reg_ritr_lb_en_set(payload, 1);
 	mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
 	mlxsw_reg_ritr_mtu_set(payload, mtu);
@@ -4712,12 +4718,13 @@ MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8);
 /* reg_ralue_dip*
  * The prefix of the route or of the marker that the object of the LPM
  * is compared with. The most significant bits of the dip are the prefix.
- * The list significant bits must be '0' if the prefix_len is smaller
+ * The least significant bits must be '0' if the prefix_len is smaller
  * than 128 for IPv6 or smaller than 32 for IPv4.
  * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved.
  * Access: Index
  */
 MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32);
+MLXSW_ITEM_BUF(reg, ralue, dip6, 0x0C, 16);
 
 enum mlxsw_reg_ralue_entry_type {
 	MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1,
@@ -4806,7 +4813,7 @@ MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13);
  */
 MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16);
 
-/* reg_ralue_v
+/* reg_ralue_ip2me_v
  * Valid bit for the tunnel_ptr field.
  * If valid = 0 then trap to CPU as IP2ME trap ID.
  * If valid = 1 and the packet format allows NVE or IPinIP tunnel
@@ -4816,15 +4823,15 @@ MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16);
  * Only relevant in case of IP2ME action.
  * Access: RW
  */
-MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1);
+MLXSW_ITEM32(reg, ralue, ip2me_v, 0x24, 31, 1);
 
-/* reg_ralue_tunnel_ptr
+/* reg_ralue_ip2me_tunnel_ptr
  * Tunnel Pointer for NVE or IPinIP tunnel decapsulation.
  * For Spectrum, pointer to KVD Linear.
  * Only relevant in case of IP2ME action.
  * Access: RW
  */
-MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24);
+MLXSW_ITEM32(reg, ralue, ip2me_tunnel_ptr, 0x24, 0, 24);
 
 static inline void mlxsw_reg_ralue_pack(char *payload,
 					enum mlxsw_reg_ralxx_protocol protocol,
@@ -4851,6 +4858,16 @@ static inline void mlxsw_reg_ralue_pack4(char *payload,
 	mlxsw_reg_ralue_dip4_set(payload, dip);
 }
 
+static inline void mlxsw_reg_ralue_pack6(char *payload,
+					 enum mlxsw_reg_ralxx_protocol protocol,
+					 enum mlxsw_reg_ralue_op op,
+					 u16 virtual_router, u8 prefix_len,
+					 const void *dip)
+{
+	mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len);
+	mlxsw_reg_ralue_dip6_memcpy_to(payload, dip);
+}
+
 static inline void
 mlxsw_reg_ralue_act_remote_pack(char *payload,
 				enum mlxsw_reg_ralue_trap_action trap_action,
@@ -4954,6 +4971,7 @@ MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16);
  * Access: Index
  */
 MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32);
+MLXSW_ITEM_BUF(reg, rauht, dip6, 0x10, 16);
 
 enum mlxsw_reg_rauht_trap_action {
 	MLXSW_REG_RAUHT_TRAP_ACTION_NOP,
@@ -5018,6 +5036,15 @@ static inline void mlxsw_reg_rauht_pack4(char *payload,
 	mlxsw_reg_rauht_dip4_set(payload, dip);
 }
 
+static inline void mlxsw_reg_rauht_pack6(char *payload,
+					 enum mlxsw_reg_rauht_op op, u16 rif,
+					 const char *mac, const char *dip)
+{
+	mlxsw_reg_rauht_pack(payload, op, rif, mac);
+	mlxsw_reg_rauht_type_set(payload, MLXSW_REG_RAUHT_TYPE_IPV6);
+	mlxsw_reg_rauht_dip6_memcpy_to(payload, dip);
+}
+
 /* RALEU - Router Algorithmic LPM ECMP Update Register
  * ---------------------------------------------------
  * The register enables updating the ECMP section in the action for multiple
@@ -5216,6 +5243,30 @@ MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
 MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0,
 		     32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false);
 
+#define MLXSW_REG_RAUHTD_IPV6_ENT_LEN 0x20
+
+/* reg_rauhtd_ipv6_ent_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1,
+		     MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv6_ent_rif
+ * Router interface.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+		     16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv6_ent_dip
+ * Destination IPv6 address.
+ * Access: RO
+ */
+MLXSW_ITEM_BUF_INDEXED(reg, rauhtd, ipv6_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN,
+		       16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x10);
+
 static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
 						    int ent_index, u16 *p_rif,
 						    u32 *p_dip)
@@ -5224,6 +5275,14 @@ static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
 	*p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index);
 }
 
+static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload,
+						    int rec_index, u16 *p_rif,
+						    char *p_dip)
+{
+	*p_rif = mlxsw_reg_rauhtd_ipv6_ent_rif_get(payload, rec_index);
+	mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip);
+}
+
 /* MFCR - Management Fan Control Register
  * --------------------------------------
  * This register controls the settings of the Fan Speed PWM mechanism.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 60bf8f27cc00..eb7c4549f464 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -58,6 +58,7 @@
 #include <net/tc_act/tc_mirred.h>
 #include <net/netevent.h>
 #include <net/tc_act/tc_sample.h>
+#include <net/addrconf.h>
 
 #include "spectrum.h"
 #include "pci.h"
@@ -1616,16 +1617,16 @@ mlxsw_sp_port_del_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port)
 }
 
 static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
-					  __be16 protocol,
-					  struct tc_cls_matchall_offload *cls,
+					  struct tc_cls_matchall_offload *f,
 					  bool ingress)
 {
 	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
+	__be16 protocol = f->common.protocol;
 	const struct tc_action *a;
 	LIST_HEAD(actions);
 	int err;
 
-	if (!tc_single_action(cls->exts)) {
+	if (!tcf_exts_has_one_action(f->exts)) {
 		netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n");
 		return -EOPNOTSUPP;
 	}
@@ -1633,9 +1634,9 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 	mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
 	if (!mall_tc_entry)
 		return -ENOMEM;
-	mall_tc_entry->cookie = cls->cookie;
+	mall_tc_entry->cookie = f->cookie;
 
-	tcf_exts_to_list(cls->exts, &actions);
+	tcf_exts_to_list(f->exts, &actions);
 	a = list_first_entry(&actions, struct tc_action, list);
 
 	if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
@@ -1647,7 +1648,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 							    mirror, a, ingress);
 	} else if (is_tcf_sample(a) && protocol == htons(ETH_P_ALL)) {
 		mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE;
-		err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, cls,
+		err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, f,
 							    a, ingress);
 	} else {
 		err = -EOPNOTSUPP;
@@ -1665,12 +1666,12 @@ err_add_action:
 }
 
 static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
-					   struct tc_cls_matchall_offload *cls)
+					   struct tc_cls_matchall_offload *f)
 {
 	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
 
 	mall_tc_entry = mlxsw_sp_port_mall_tc_entry_find(mlxsw_sp_port,
-							 cls->cookie);
+							 f->cookie);
 	if (!mall_tc_entry) {
 		netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n");
 		return;
@@ -1692,49 +1693,61 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 	kfree(mall_tc_entry);
 }
 
-static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle,
-			     u32 chain_index, __be16 proto,
-			     struct tc_to_netdev *tc)
+static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
+					  struct tc_cls_matchall_offload *f)
 {
-	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+	bool ingress = TC_H_MAJ(f->common.handle) == TC_H_MAJ(TC_H_INGRESS);
 
-	if (chain_index)
+	if (f->common.chain_index)
 		return -EOPNOTSUPP;
 
-	switch (tc->type) {
-	case TC_SETUP_MATCHALL:
-		switch (tc->cls_mall->command) {
-		case TC_CLSMATCHALL_REPLACE:
-			return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port,
-							      proto,
-							      tc->cls_mall,
-							      ingress);
-		case TC_CLSMATCHALL_DESTROY:
-			mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port,
-						       tc->cls_mall);
-			return 0;
-		default:
-			return -EOPNOTSUPP;
-		}
-	case TC_SETUP_CLSFLOWER:
-		switch (tc->cls_flower->command) {
-		case TC_CLSFLOWER_REPLACE:
-			return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress,
-						       proto, tc->cls_flower);
-		case TC_CLSFLOWER_DESTROY:
-			mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress,
-						tc->cls_flower);
-			return 0;
-		case TC_CLSFLOWER_STATS:
-			return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress,
-						     tc->cls_flower);
-		default:
-			return -EOPNOTSUPP;
-		}
+	switch (f->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f,
+						      ingress);
+	case TC_CLSMATCHALL_DESTROY:
+		mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, f);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port,
+			     struct tc_cls_flower_offload *f)
+{
+	bool ingress = TC_H_MAJ(f->common.handle) == TC_H_MAJ(TC_H_INGRESS);
+
+	if (f->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f);
+	case TC_CLSFLOWER_DESTROY:
+		mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, f);
+		return 0;
+	case TC_CLSFLOWER_STATS:
+		return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, f);
+	default:
+		return -EOPNOTSUPP;
 	}
+}
+
+static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			     void *type_data)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 
-	return -EOPNOTSUPP;
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data);
+	case TC_SETUP_CLSFLOWER:
+		return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
@@ -3333,15 +3346,47 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false),
 	MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false),
 	MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false),
+	MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD,
+			  false),
+	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+			     false),
+	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD,
+			     false),
+	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+			     false),
 	/* L3 traps */
-	MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false),
-	MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
-	MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
-	MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false),
-	MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false),
+	MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
+	MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
+			  false),
+	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP,
+			  false),
+	MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false),
+	MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false),
+	MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false),
+	MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
+	MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false),
+	MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+			  false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+			  false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+			  false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+			  false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false),
+	MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
+			  false),
+	MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false),
+	MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false),
+	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
 	/* PKT Sample trap */
 	MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
 		  false, SP_IP2ME, DISCARD),
@@ -3376,15 +3421,17 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			burst_size = 7;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
 			rate = 16 * 1024;
 			burst_size = 10;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
 			rate = 1024;
 			burst_size = 7;
 			break;
@@ -3433,21 +3480,23 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			priority = 5;
 			tc = 5;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
 			priority = 4;
 			tc = 4;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
 			priority = 3;
 			tc = 3;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
 			priority = 2;
 			tc = 2;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
 			priority = 1;
@@ -4357,6 +4406,10 @@ static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
 	.priority = 10,	/* Must be called before FIB notifier block */
 };
 
+static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
+	.notifier_call = mlxsw_sp_inet6addr_event,
+};
+
 static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
 	.notifier_call = mlxsw_sp_router_netevent_event,
 };
@@ -4377,6 +4430,7 @@ static int __init mlxsw_sp_module_init(void)
 
 	register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 	register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+	register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
 	register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
 
 	err = mlxsw_core_driver_register(&mlxsw_sp_driver);
@@ -4393,6 +4447,7 @@ err_pci_driver_register:
 	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
 err_core_driver_register:
 	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+	unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
 	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 	return err;
@@ -4403,6 +4458,7 @@ static void __exit mlxsw_sp_module_exit(void)
 	mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver);
 	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
 	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+	unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
 	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 5ef98d4d0ab6..8452d1db2f3f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -384,6 +384,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
 			    unsigned long event, void *ptr);
+int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
+			     unsigned long event, void *ptr);
 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
 				 struct netdev_notifier_changeupper_info *info);
 void
@@ -506,7 +508,7 @@ extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops;
 
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
-			    __be16 protocol, struct tc_cls_flower_offload *f);
+			    struct tc_cls_flower_offload *f);
 void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 			     struct tc_cls_flower_offload *f);
 int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 01a1501b56ca..508b5fcacd77 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -369,7 +369,7 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
 		local_port = mlxsw_sp_port->local_port;
 		in_port = false;
 	} else {
-		/* If out_dev is NULL, the called wants to
+		/* If out_dev is NULL, the caller wants to
 		 * set forward to ingress port.
 		 */
 		local_port = 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
index 85d5001a5818..fb8031828454 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
@@ -70,6 +70,9 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
 	MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6),
 	MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index 61a10f166f97..bc5173f1b5c1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -984,6 +984,9 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
 	MLXSW_AFK_ELEMENT_VID,
 	MLXSW_AFK_ELEMENT_PCP,
 	MLXSW_AFK_ELEMENT_TCP_FLAGS,
+	MLXSW_AFK_ELEMENT_IP_TTL_,
+	MLXSW_AFK_ELEMENT_IP_ECN,
+	MLXSW_AFK_ELEMENT_IP_DSCP,
 };
 
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index 6afbe9ec64e2..bbd238e50f05 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -109,7 +109,6 @@ static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 
 static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 	[MLXSW_REG_SFGC_TYPE_BROADCAST]				= 1,
-	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]	= 1,
 	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP]	= 1,
 	[MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL]			= 1,
 	[MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST]			= 1,
@@ -117,6 +116,7 @@ static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 
 static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4]	= 1,
+	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]	= 1,
 };
 
 static const int *mlxsw_sp_packet_type_sfgc_types[] = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 21bb2bf62d3e..95428b41c50f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -53,7 +53,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 	LIST_HEAD(actions);
 	int err;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return 0;
 
 	/* Count action is inserted first */
@@ -212,11 +212,46 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_acl_rule_info *rulei,
+				    struct tc_cls_flower_offload *f,
+				    u16 n_proto)
+{
+	struct flow_dissector_key_ip *key, *mask;
+
+	if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP))
+		return 0;
+
+	if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) {
+		dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n");
+		return -EINVAL;
+	}
+
+	key = skb_flow_dissector_target(f->dissector,
+					FLOW_DISSECTOR_KEY_IP,
+					f->key);
+	mask = skb_flow_dissector_target(f->dissector,
+					 FLOW_DISSECTOR_KEY_IP,
+					 f->mask);
+	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_,
+				       key->ttl, mask->ttl);
+
+	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN,
+				       key->tos & 0x3, mask->tos & 0x3);
+
+	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP,
+				       key->tos >> 6, mask->tos >> 6);
+
+	return 0;
+}
+
 static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 				 struct net_device *dev,
 				 struct mlxsw_sp_acl_rule_info *rulei,
 				 struct tc_cls_flower_offload *f)
 {
+	u16 n_proto_mask = 0;
+	u16 n_proto_key = 0;
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 	int err;
@@ -229,12 +264,13 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
 	      BIT(FLOW_DISSECTOR_KEY_TCP) |
+	      BIT(FLOW_DISSECTOR_KEY_IP) |
 	      BIT(FLOW_DISSECTOR_KEY_VLAN))) {
 		dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n");
 		return -EOPNOTSUPP;
 	}
 
-	mlxsw_sp_acl_rulei_priority(rulei, f->prio);
+	mlxsw_sp_acl_rulei_priority(rulei, f->common.prio);
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
 		struct flow_dissector_key_control *key =
@@ -253,8 +289,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 			skb_flow_dissector_target(f->dissector,
 						  FLOW_DISSECTOR_KEY_BASIC,
 						  f->mask);
-		u16 n_proto_key = ntohs(key->n_proto);
-		u16 n_proto_mask = ntohs(mask->n_proto);
+		n_proto_key = ntohs(key->n_proto);
+		n_proto_mask = ntohs(mask->n_proto);
 
 		if (n_proto_key == ETH_P_ALL) {
 			n_proto_key = 0;
@@ -324,11 +360,15 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		return err;
 
+	err = mlxsw_sp_flower_parse_ip(mlxsw_sp, rulei, f, n_proto_key & n_proto_mask);
+	if (err)
+		return err;
+
 	return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, rulei, f->exts);
 }
 
 int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
-			    __be16 protocol, struct tc_cls_flower_offload *f)
+			    struct tc_cls_flower_offload *f)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct net_device *dev = mlxsw_sp_port->dev;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 4b2e0fd7d51e..93b6da88e79c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -43,12 +43,19 @@
 #include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_bridge.h>
+#include <linux/socket.h>
+#include <linux/route.h>
 #include <net/netevent.h>
 #include <net/neighbour.h>
 #include <net/arp.h>
 #include <net/ip_fib.h>
+#include <net/ip6_fib.h>
 #include <net/fib_rules.h>
 #include <net/l3mdev.h>
+#include <net/addrconf.h>
+#include <net/ndisc.h>
+#include <net/ipv6.h>
+#include <net/fib_notifier.h>
 
 #include "spectrum.h"
 #include "core.h"
@@ -304,7 +311,7 @@ static struct mlxsw_sp_rif *
 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
 			 const struct net_device *dev);
 
-#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE)
+#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
 
 struct mlxsw_sp_prefix_usage {
 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
@@ -384,21 +391,31 @@ struct mlxsw_sp_fib_node {
 	struct mlxsw_sp_fib_key key;
 };
 
-struct mlxsw_sp_fib_entry_params {
+struct mlxsw_sp_fib_entry {
+	struct list_head list;
+	struct mlxsw_sp_fib_node *fib_node;
+	enum mlxsw_sp_fib_entry_type type;
+	struct list_head nexthop_group_node;
+	struct mlxsw_sp_nexthop_group *nh_group;
+};
+
+struct mlxsw_sp_fib4_entry {
+	struct mlxsw_sp_fib_entry common;
 	u32 tb_id;
 	u32 prio;
 	u8 tos;
 	u8 type;
 };
 
-struct mlxsw_sp_fib_entry {
+struct mlxsw_sp_fib6_entry {
+	struct mlxsw_sp_fib_entry common;
+	struct list_head rt6_list;
+	unsigned int nrt6;
+};
+
+struct mlxsw_sp_rt6 {
 	struct list_head list;
-	struct mlxsw_sp_fib_node *fib_node;
-	enum mlxsw_sp_fib_entry_type type;
-	struct list_head nexthop_group_node;
-	struct mlxsw_sp_nexthop_group *nh_group;
-	struct mlxsw_sp_fib_entry_params params;
-	bool offloaded;
+	struct rt6_info *rt;
 };
 
 enum mlxsw_sp_l3proto {
@@ -428,6 +445,7 @@ struct mlxsw_sp_vr {
 	u32 tb_id; /* kernel fib table id */
 	unsigned int rif_count;
 	struct mlxsw_sp_fib *fib4;
+	struct mlxsw_sp_fib *fib6;
 };
 
 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
@@ -625,7 +643,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
 
 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
 {
-	return !!vr->fib4;
+	return !!vr->fib4 || !!vr->fib6;
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
@@ -694,7 +712,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
 	case MLXSW_SP_L3_PROTO_IPV4:
 		return vr->fib4;
 	case MLXSW_SP_L3_PROTO_IPV6:
-		BUG_ON(1);
+		return vr->fib6;
 	}
 	return NULL;
 }
@@ -703,6 +721,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 					      u32 tb_id)
 {
 	struct mlxsw_sp_vr *vr;
+	int err;
 
 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
 	if (!vr)
@@ -710,12 +729,24 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 	vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
 	if (IS_ERR(vr->fib4))
 		return ERR_CAST(vr->fib4);
+	vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
+	if (IS_ERR(vr->fib6)) {
+		err = PTR_ERR(vr->fib6);
+		goto err_fib6_create;
+	}
 	vr->tb_id = tb_id;
 	return vr;
+
+err_fib6_create:
+	mlxsw_sp_fib_destroy(vr->fib4);
+	vr->fib4 = NULL;
+	return ERR_PTR(err);
 }
 
 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
 {
+	mlxsw_sp_fib_destroy(vr->fib6);
+	vr->fib6 = NULL;
 	mlxsw_sp_fib_destroy(vr->fib4);
 	vr->fib4 = NULL;
 }
@@ -773,7 +804,8 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
 
 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
 {
-	if (!vr->rif_count && list_empty(&vr->fib4->node_list))
+	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
+	    list_empty(&vr->fib6->node_list))
 		mlxsw_sp_vr_destroy(vr);
 }
 
@@ -929,8 +961,15 @@ mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
 static void
 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
 {
-	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+	unsigned long interval;
 
+#if IS_ENABLED(CONFIG_IPV6)
+	interval = min_t(unsigned long,
+			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
+			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
+#else
+	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+#endif
 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
 }
 
@@ -965,6 +1004,44 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
 	neigh_release(n);
 }
 
+#if IS_ENABLED(IPV6)
+static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+						   char *rauhtd_pl,
+						   int rec_index)
+{
+	struct net_device *dev;
+	struct neighbour *n;
+	struct in6_addr dip;
+	u16 rif;
+
+	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
+					 (char *) &dip);
+
+	if (!mlxsw_sp->router->rifs[rif]) {
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
+		return;
+	}
+
+	dev = mlxsw_sp->router->rifs[rif]->dev;
+	n = neigh_lookup(&nd_tbl, &dip, dev);
+	if (!n) {
+		netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
+			   &dip);
+		return;
+	}
+
+	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
+	neigh_event_send(n, NULL);
+	neigh_release(n);
+}
+#else
+static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+						   char *rauhtd_pl,
+						   int rec_index)
+{
+}
+#endif
+
 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
 						   char *rauhtd_pl,
 						   int rec_index)
@@ -988,6 +1065,15 @@ static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
 
 }
 
+static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+						   char *rauhtd_pl,
+						   int rec_index)
+{
+	/* One record contains one entry. */
+	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
+					       rec_index);
+}
+
 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
 					      char *rauhtd_pl, int rec_index)
 {
@@ -997,7 +1083,8 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
 						       rec_index);
 		break;
 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
-		WARN_ON_ONCE(1);
+		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
+						       rec_index);
 		break;
 	}
 }
@@ -1022,22 +1109,20 @@ static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
 	return false;
 }
 
-static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+static int
+__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
+				       char *rauhtd_pl,
+				       enum mlxsw_reg_rauhtd_type type)
 {
-	char *rauhtd_pl;
-	u8 num_rec;
-	int i, err;
-
-	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
-	if (!rauhtd_pl)
-		return -ENOMEM;
+	int i, num_rec;
+	int err;
 
 	/* Make sure the neighbour's netdev isn't removed in the
 	 * process.
 	 */
 	rtnl_lock();
 	do {
-		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
+		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
 				      rauhtd_pl);
 		if (err) {
@@ -1051,6 +1136,27 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
 	rtnl_unlock();
 
+	return err;
+}
+
+static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+{
+	enum mlxsw_reg_rauhtd_type type;
+	char *rauhtd_pl;
+	int err;
+
+	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
+	if (!rauhtd_pl)
+		return -ENOMEM;
+
+	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
+	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
+	if (err)
+		goto out;
+
+	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
+	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
+out:
 	kfree(rauhtd_pl);
 	return err;
 }
@@ -1147,6 +1253,32 @@ mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void
+mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_neigh_entry *neigh_entry,
+				enum mlxsw_reg_rauht_op op)
+{
+	struct neighbour *n = neigh_entry->key.n;
+	char rauht_pl[MLXSW_REG_RAUHT_LEN];
+	const char *dip = n->primary_key;
+
+	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
+			      dip);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
+}
+
+static bool mlxsw_sp_neigh_ipv6_ignore(struct neighbour *n)
+{
+	/* Packets with a link-local destination address are trapped
+	 * after LPM lookup and never reach the neighbour table, so
+	 * there is no need to program such neighbours to the device.
+	 */
+	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
+	    IPV6_ADDR_LINKLOCAL)
+		return true;
+	return false;
+}
+
+static void
 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_neigh_entry *neigh_entry,
 			    bool adding)
@@ -1154,11 +1286,17 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
 	if (!adding && !neigh_entry->connected)
 		return;
 	neigh_entry->connected = adding;
-	if (neigh_entry->key.n->tbl == &arp_tbl)
+	if (neigh_entry->key.n->tbl->family == AF_INET) {
 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
 						mlxsw_sp_rauht_op(adding));
-	else
+	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
+		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry->key.n))
+			return;
+		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
+						mlxsw_sp_rauht_op(adding));
+	} else {
 		WARN_ON_ONCE(1);
+	}
 }
 
 struct mlxsw_sp_neigh_event_work {
@@ -1227,7 +1365,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
 		p = ptr;
 
 		/* We don't care about changes in the default table. */
-		if (!p->dev || p->tbl != &arp_tbl)
+		if (!p->dev || (p->tbl->family != AF_INET &&
+				p->tbl->family != AF_INET6))
 			return NOTIFY_DONE;
 
 		/* We are in atomic context and can't take RTNL mutex,
@@ -1246,7 +1385,7 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
 	case NETEVENT_NEIGH_UPDATE:
 		n = ptr;
 
-		if (n->tbl != &arp_tbl)
+		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
 			return NOTIFY_DONE;
 
 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
@@ -1307,25 +1446,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
 }
 
-static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
-				    const struct mlxsw_sp_rif *rif)
-{
-	char rauht_pl[MLXSW_REG_RAUHT_LEN];
-
-	mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
-			     rif->rif_index, rif->addr);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
-}
-
 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_rif *rif)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
 
-	mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
-				 rif_list_node)
+				 rif_list_node) {
+		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
+	}
 }
 
 struct mlxsw_sp_nexthop_key {
@@ -1340,6 +1470,7 @@ struct mlxsw_sp_nexthop {
 						*/
 	struct rhash_head ht_node;
 	struct mlxsw_sp_nexthop_key key;
+	unsigned char gw_addr[sizeof(struct in6_addr)];
 	struct mlxsw_sp_rif *rif;
 	u8 should_offload:1, /* set indicates this neigh is connected and
 			      * should be put to KVD linear area of this group.
@@ -1360,6 +1491,7 @@ struct mlxsw_sp_nexthop_group_key {
 struct mlxsw_sp_nexthop_group {
 	struct rhash_head ht_node;
 	struct list_head fib_list; /* list of fib entries that use this group */
+	struct neigh_table *neigh_tbl;
 	struct mlxsw_sp_nexthop_group_key key;
 	u8 adj_index_valid:1,
 	   gateway:1; /* routes using the group use a gateway */
@@ -1535,6 +1667,24 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void
+mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
+				   enum mlxsw_reg_ralue_op op, int err);
+
+static void
+mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
+	struct mlxsw_sp_fib_entry *fib_entry;
+
+	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
+						      fib_entry))
+			continue;
+		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
+	}
+}
+
+static void
 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_nexthop_group *nh_grp)
 {
@@ -1556,7 +1706,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 	for (i = 0; i < nh_grp->count; i++) {
 		nh = &nh_grp->nexthops[i];
 
-		if (nh->should_offload ^ nh->offloaded) {
+		if (nh->should_offload != nh->offloaded) {
 			offload_change = true;
 			if (nh->should_offload)
 				nh->update = 1;
@@ -1621,6 +1771,10 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
 		goto set_trap;
 	}
+
+	/* Offload state within the group changed, so update the flags. */
+	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
+
 	return;
 
 set_trap:
@@ -1640,9 +1794,9 @@ set_trap:
 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
 					    bool removing)
 {
-	if (!removing && !nh->should_offload)
+	if (!removing)
 		nh->should_offload = 1;
-	else if (removing && nh->offloaded)
+	else if (nh->offloaded)
 		nh->should_offload = 0;
 	nh->update = 1;
 }
@@ -1684,7 +1838,6 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
 				       struct mlxsw_sp_nexthop *nh)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry;
-	struct fib_nh *fib_nh = nh->key.fib_nh;
 	struct neighbour *n;
 	u8 nud_state, dead;
 	int err;
@@ -1693,13 +1846,14 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
 		return 0;
 
 	/* Take a reference of neigh here ensuring that neigh would
-	 * not be detructed before the nexthop entry is finished.
+	 * not be destructed before the nexthop entry is finished.
 	 * The reference is taken either in neigh_lookup() or
 	 * in neigh_create() in case n is not found.
 	 */
-	n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
+	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
 	if (!n) {
-		n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
+		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
+				 nh->rif->dev);
 		if (IS_ERR(n))
 			return PTR_ERR(n);
 		neigh_event_send(n, NULL);
@@ -1761,10 +1915,10 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
 	neigh_release(n);
 }
 
-static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
-				 struct mlxsw_sp_nexthop_group *nh_grp,
-				 struct mlxsw_sp_nexthop *nh,
-				 struct fib_nh *fib_nh)
+static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_nexthop_group *nh_grp,
+				  struct mlxsw_sp_nexthop *nh,
+				  struct fib_nh *fib_nh)
 {
 	struct net_device *dev = fib_nh->nh_dev;
 	struct in_device *in_dev;
@@ -1773,6 +1927,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
 
 	nh->nh_grp = nh_grp;
 	nh->key.fib_nh = fib_nh;
+	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
 	if (err)
 		return err;
@@ -1802,16 +1957,16 @@ err_nexthop_neigh_init:
 	return err;
 }
 
-static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
-				  struct mlxsw_sp_nexthop *nh)
+static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_nexthop *nh)
 {
 	mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
 	mlxsw_sp_nexthop_rif_fini(nh);
 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
 }
 
-static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
-				   unsigned long event, struct fib_nh *fib_nh)
+static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
+				    unsigned long event, struct fib_nh *fib_nh)
 {
 	struct mlxsw_sp_nexthop_key key;
 	struct mlxsw_sp_nexthop *nh;
@@ -1856,7 +2011,7 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 }
 
 static struct mlxsw_sp_nexthop_group *
-mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 {
 	struct mlxsw_sp_nexthop_group *nh_grp;
 	struct mlxsw_sp_nexthop *nh;
@@ -1871,6 +2026,8 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 	if (!nh_grp)
 		return ERR_PTR(-ENOMEM);
 	INIT_LIST_HEAD(&nh_grp->fib_list);
+	nh_grp->neigh_tbl = &arp_tbl;
+
 	nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
 	nh_grp->count = fi->fib_nhs;
 	nh_grp->key.fi = fi;
@@ -1878,9 +2035,9 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 	for (i = 0; i < nh_grp->count; i++) {
 		nh = &nh_grp->nexthops[i];
 		fib_nh = &fi->fib_nh[i];
-		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
+		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
 		if (err)
-			goto err_nexthop_init;
+			goto err_nexthop4_init;
 	}
 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
 	if (err)
@@ -1889,10 +2046,10 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 	return nh_grp;
 
 err_nexthop_group_insert:
-err_nexthop_init:
+err_nexthop4_init:
 	for (i--; i >= 0; i--) {
 		nh = &nh_grp->nexthops[i];
-		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
 	}
 	fib_info_put(nh_grp->key.fi);
 	kfree(nh_grp);
@@ -1900,8 +2057,8 @@ err_nexthop_init:
 }
 
 static void
-mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
-			       struct mlxsw_sp_nexthop_group *nh_grp)
+mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_nexthop_group *nh_grp)
 {
 	struct mlxsw_sp_nexthop *nh;
 	int i;
@@ -1909,7 +2066,7 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
 	for (i = 0; i < nh_grp->count; i++) {
 		nh = &nh_grp->nexthops[i];
-		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
 	}
 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
 	WARN_ON_ONCE(nh_grp->adj_index_valid);
@@ -1917,9 +2074,9 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
 	kfree(nh_grp);
 }
 
-static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_fib_entry *fib_entry,
-				      struct fib_info *fi)
+static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry,
+				       struct fib_info *fi)
 {
 	struct mlxsw_sp_nexthop_group_key key;
 	struct mlxsw_sp_nexthop_group *nh_grp;
@@ -1927,7 +2084,7 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
 	key.fi = fi;
 	nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
 	if (!nh_grp) {
-		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
+		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
 		if (IS_ERR(nh_grp))
 			return PTR_ERR(nh_grp);
 	}
@@ -1936,15 +2093,25 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
-				       struct mlxsw_sp_fib_entry *fib_entry)
+static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib_entry *fib_entry)
 {
 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
 
 	list_del(&fib_entry->nexthop_group_node);
 	if (!list_empty(&nh_grp->fib_list))
 		return;
-	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
+	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static bool
+mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_fib4_entry *fib4_entry;
+
+	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
+				  common);
+	return !fib4_entry->tos;
 }
 
 static bool
@@ -1952,8 +2119,14 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
 {
 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
 
-	if (fib_entry->params.tos)
-		return false;
+	switch (fib_entry->fib_node->fib->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
+			return false;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		break;
+	}
 
 	switch (fib_entry->type) {
 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
@@ -1965,16 +2138,111 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
 	}
 }
 
-static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+static struct mlxsw_sp_nexthop *
+mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
+		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
+{
+	int i;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+		if (nh->rif && nh->rif->dev == rt->dst.dev &&
+		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
+				    &rt->rt6i_gateway))
+			return nh;
+		continue;
+	}
+
+	return NULL;
+}
+
+static void
+mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+	int i;
+
+	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
+		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
+		return;
+	}
+
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+		if (nh->offloaded)
+			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
+		else
+			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
+	}
+}
+
+static void
+mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+	int i;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
+	}
+}
+
+static void
+mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 {
-	fib_entry->offloaded = true;
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+				  common);
+
+	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
+		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
+				 list)->rt->rt6i_flags |= RTF_OFFLOAD;
+		return;
+	}
+
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+		struct mlxsw_sp_nexthop *nh;
+
+		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
+		if (nh && nh->offloaded)
+			mlxsw_sp_rt6->rt->rt6i_flags |= RTF_OFFLOAD;
+		else
+			mlxsw_sp_rt6->rt->rt6i_flags &= ~RTF_OFFLOAD;
+	}
+}
 
+static void
+mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+				  common);
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+		rt->rt6i_flags &= ~RTF_OFFLOAD;
+	}
+}
+
+static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
 	switch (fib_entry->fib_node->fib->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		fib_info_offload_inc(fib_entry->nh_group->key.fi);
+		mlxsw_sp_fib4_entry_offload_set(fib_entry);
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
-		WARN_ON_ONCE(1);
+		mlxsw_sp_fib6_entry_offload_set(fib_entry);
+		break;
 	}
 }
 
@@ -1983,13 +2251,12 @@ mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 {
 	switch (fib_entry->fib_node->fib->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		fib_info_offload_dec(fib_entry->nh_group->key.fi);
+		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
-		WARN_ON_ONCE(1);
+		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
+		break;
 	}
-
-	fib_entry->offloaded = false;
 }
 
 static void
@@ -1998,17 +2265,13 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
 {
 	switch (op) {
 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
-		if (!fib_entry->offloaded)
-			return;
 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
 		if (err)
 			return;
-		if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
-		    !fib_entry->offloaded)
+		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
 			mlxsw_sp_fib_entry_offload_set(fib_entry);
-		else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
-			 fib_entry->offloaded)
+		else if (!mlxsw_sp_fib_entry_should_offload(fib_entry))
 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
 		return;
 	default:
@@ -2016,13 +2279,37 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
 	}
 }
 
-static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
-					 struct mlxsw_sp_fib_entry *fib_entry,
-					 enum mlxsw_reg_ralue_op op)
+static void
+mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
+			      const struct mlxsw_sp_fib_entry *fib_entry,
+			      enum mlxsw_reg_ralue_op op)
 {
-	char ralue_pl[MLXSW_REG_RALUE_LEN];
 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
-	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
+	enum mlxsw_reg_ralxx_protocol proto;
+	u32 *p_dip;
+
+	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
+
+	switch (fib->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		p_dip = (u32 *) fib_entry->fib_node->key.addr;
+		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
+				      fib_entry->fib_node->key.prefix_len,
+				      *p_dip);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
+				      fib_entry->fib_node->key.prefix_len,
+				      fib_entry->fib_node->key.addr);
+		break;
+	}
+}
+
+static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib_entry *fib_entry,
+					enum mlxsw_reg_ralue_op op)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
 	enum mlxsw_reg_ralue_trap_action trap_action;
 	u16 trap_id = 0;
 	u32 adjacency_index = 0;
@@ -2041,24 +2328,19 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
 	}
 
-	mlxsw_reg_ralue_pack4(ralue_pl,
-			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
-			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
-			      *p_dip);
+	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
 					adjacency_index, ecmp_size);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
-static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_fib_entry *fib_entry,
-					enum mlxsw_reg_ralue_op op)
+static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry,
+				       enum mlxsw_reg_ralue_op op)
 {
 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
-	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
 	enum mlxsw_reg_ralue_trap_action trap_action;
 	char ralue_pl[MLXSW_REG_RALUE_LEN];
-	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
 	u16 trap_id = 0;
 	u16 rif_index = 0;
 
@@ -2070,42 +2352,34 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
 	}
 
-	mlxsw_reg_ralue_pack4(ralue_pl,
-			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
-			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
-			      *p_dip);
+	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
 				       rif_index);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
-static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
-				       struct mlxsw_sp_fib_entry *fib_entry,
-				       enum mlxsw_reg_ralue_op op)
+static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_fib_entry *fib_entry,
+				      enum mlxsw_reg_ralue_op op)
 {
-	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
 	char ralue_pl[MLXSW_REG_RALUE_LEN];
-	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
 
-	mlxsw_reg_ralue_pack4(ralue_pl,
-			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
-			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
-			      *p_dip);
+	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
-static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
-				  struct mlxsw_sp_fib_entry *fib_entry,
-				  enum mlxsw_reg_ralue_op op)
+static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_fib_entry *fib_entry,
+				   enum mlxsw_reg_ralue_op op)
 {
 	switch (fib_entry->type) {
 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
-		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
+		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
-		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
+		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
-		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
+		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
 	}
 	return -EINVAL;
 }
@@ -2114,16 +2388,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_fib_entry *fib_entry,
 				 enum mlxsw_reg_ralue_op op)
 {
-	int err = -EINVAL;
+	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
 
-	switch (fib_entry->fib_node->fib->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		return err;
-	}
 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
+
 	return err;
 }
 
@@ -2173,72 +2441,80 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 	}
 }
 
-static struct mlxsw_sp_fib_entry *
+static struct mlxsw_sp_fib4_entry *
 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_fib_node *fib_node,
 			   const struct fib_entry_notifier_info *fen_info)
 {
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 	struct mlxsw_sp_fib_entry *fib_entry;
 	int err;
 
-	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
-	if (!fib_entry) {
-		err = -ENOMEM;
-		goto err_fib_entry_alloc;
-	}
+	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
+	if (!fib4_entry)
+		return ERR_PTR(-ENOMEM);
+	fib_entry = &fib4_entry->common;
 
 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
 	if (err)
 		goto err_fib4_entry_type_set;
 
-	err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
+	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
 	if (err)
-		goto err_nexthop_group_get;
+		goto err_nexthop4_group_get;
 
-	fib_entry->params.prio = fen_info->fi->fib_priority;
-	fib_entry->params.tb_id = fen_info->tb_id;
-	fib_entry->params.type = fen_info->type;
-	fib_entry->params.tos = fen_info->tos;
+	fib4_entry->prio = fen_info->fi->fib_priority;
+	fib4_entry->tb_id = fen_info->tb_id;
+	fib4_entry->type = fen_info->type;
+	fib4_entry->tos = fen_info->tos;
 
 	fib_entry->fib_node = fib_node;
 
-	return fib_entry;
+	return fib4_entry;
 
-err_nexthop_group_get:
+err_nexthop4_group_get:
 err_fib4_entry_type_set:
-	kfree(fib_entry);
-err_fib_entry_alloc:
+	kfree(fib4_entry);
 	return ERR_PTR(err);
 }
 
 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_fib_entry *fib_entry)
+					struct mlxsw_sp_fib4_entry *fib4_entry)
 {
-	mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
-	kfree(fib_entry);
+	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
+	kfree(fib4_entry);
 }
 
 static struct mlxsw_sp_fib_node *
-mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
-		       const struct fib_entry_notifier_info *fen_info);
+mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
+			 size_t addr_len, unsigned char prefix_len);
 
-static struct mlxsw_sp_fib_entry *
+static struct mlxsw_sp_fib4_entry *
 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
 			   const struct fib_entry_notifier_info *fen_info)
 {
-	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 	struct mlxsw_sp_fib_node *fib_node;
+	struct mlxsw_sp_fib *fib;
+	struct mlxsw_sp_vr *vr;
 
-	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
-	if (IS_ERR(fib_node))
+	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
+	if (!vr)
+		return NULL;
+	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
+
+	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
+					    sizeof(fen_info->dst),
+					    fen_info->dst_len);
+	if (!fib_node)
 		return NULL;
 
-	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
-		if (fib_entry->params.tb_id == fen_info->tb_id &&
-		    fib_entry->params.tos == fen_info->tos &&
-		    fib_entry->params.type == fen_info->type &&
-		    fib_entry->nh_group->key.fi == fen_info->fi) {
-			return fib_entry;
+	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
+		if (fib4_entry->tb_id == fen_info->tb_id &&
+		    fib4_entry->tos == fen_info->tos &&
+		    fib4_entry->type == fen_info->type &&
+		    fib4_entry->common.nh_group->key.fi == fen_info->fi) {
+			return fib4_entry;
 		}
 	}
 
@@ -2395,28 +2671,25 @@ static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
 }
 
 static struct mlxsw_sp_fib_node *
-mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
-		       const struct fib_entry_notifier_info *fen_info)
+mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
+		      size_t addr_len, unsigned char prefix_len,
+		      enum mlxsw_sp_l3proto proto)
 {
 	struct mlxsw_sp_fib_node *fib_node;
 	struct mlxsw_sp_fib *fib;
 	struct mlxsw_sp_vr *vr;
 	int err;
 
-	vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id);
+	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id);
 	if (IS_ERR(vr))
 		return ERR_CAST(vr);
-	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
+	fib = mlxsw_sp_vr_fib(vr, proto);
 
-	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
-					    sizeof(fen_info->dst),
-					    fen_info->dst_len);
+	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
 	if (fib_node)
 		return fib_node;
 
-	fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst,
-					    sizeof(fen_info->dst),
-					    fen_info->dst_len);
+	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
 	if (!fib_node) {
 		err = -ENOMEM;
 		goto err_fib_node_create;
@@ -2435,8 +2708,8 @@ err_fib_node_create:
 	return ERR_PTR(err);
 }
 
-static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
-				   struct mlxsw_sp_fib_node *fib_node)
+static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_fib_node *fib_node)
 {
 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
 
@@ -2447,95 +2720,100 @@ static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_vr_put(vr);
 }
 
-static struct mlxsw_sp_fib_entry *
+static struct mlxsw_sp_fib4_entry *
 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-			      const struct mlxsw_sp_fib_entry_params *params)
+			      const struct mlxsw_sp_fib4_entry *new4_entry)
 {
-	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 
-	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
-		if (fib_entry->params.tb_id > params->tb_id)
+	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
+		if (fib4_entry->tb_id > new4_entry->tb_id)
 			continue;
-		if (fib_entry->params.tb_id != params->tb_id)
+		if (fib4_entry->tb_id != new4_entry->tb_id)
 			break;
-		if (fib_entry->params.tos > params->tos)
+		if (fib4_entry->tos > new4_entry->tos)
 			continue;
-		if (fib_entry->params.prio >= params->prio ||
-		    fib_entry->params.tos < params->tos)
-			return fib_entry;
+		if (fib4_entry->prio >= new4_entry->prio ||
+		    fib4_entry->tos < new4_entry->tos)
+			return fib4_entry;
 	}
 
 	return NULL;
 }
 
-static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
-					  struct mlxsw_sp_fib_entry *new_entry)
+static int
+mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
+			       struct mlxsw_sp_fib4_entry *new4_entry)
 {
 	struct mlxsw_sp_fib_node *fib_node;
 
-	if (WARN_ON(!fib_entry))
+	if (WARN_ON(!fib4_entry))
 		return -EINVAL;
 
-	fib_node = fib_entry->fib_node;
-	list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
-		if (fib_entry->params.tb_id != new_entry->params.tb_id ||
-		    fib_entry->params.tos != new_entry->params.tos ||
-		    fib_entry->params.prio != new_entry->params.prio)
+	fib_node = fib4_entry->common.fib_node;
+	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
+				 common.list) {
+		if (fib4_entry->tb_id != new4_entry->tb_id ||
+		    fib4_entry->tos != new4_entry->tos ||
+		    fib4_entry->prio != new4_entry->prio)
 			break;
 	}
 
-	list_add_tail(&new_entry->list, &fib_entry->list);
+	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
 	return 0;
 }
 
 static int
-mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
-			       struct mlxsw_sp_fib_entry *new_entry,
+mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
 			       bool replace, bool append)
 {
-	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 
-	fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
+	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
 
 	if (append)
-		return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
-	if (replace && WARN_ON(!fib_entry))
+		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
+	if (replace && WARN_ON(!fib4_entry))
 		return -EINVAL;
 
 	/* Insert new entry before replaced one, so that we can later
 	 * remove the second.
 	 */
-	if (fib_entry) {
-		list_add_tail(&new_entry->list, &fib_entry->list);
+	if (fib4_entry) {
+		list_add_tail(&new4_entry->common.list,
+			      &fib4_entry->common.list);
 	} else {
-		struct mlxsw_sp_fib_entry *last;
+		struct mlxsw_sp_fib4_entry *last;
 
-		list_for_each_entry(last, &fib_node->entry_list, list) {
-			if (new_entry->params.tb_id > last->params.tb_id)
+		list_for_each_entry(last, &fib_node->entry_list, common.list) {
+			if (new4_entry->tb_id > last->tb_id)
 				break;
-			fib_entry = last;
+			fib4_entry = last;
 		}
 
-		if (fib_entry)
-			list_add(&new_entry->list, &fib_entry->list);
+		if (fib4_entry)
+			list_add(&new4_entry->common.list,
+				 &fib4_entry->common.list);
 		else
-			list_add(&new_entry->list, &fib_node->entry_list);
+			list_add(&new4_entry->common.list,
+				 &fib_node->entry_list);
 	}
 
 	return 0;
 }
 
 static void
-mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
+mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
 {
-	list_del(&fib_entry->list);
+	list_del(&fib4_entry->common.list);
 }
 
-static int
-mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
-			     const struct mlxsw_sp_fib_node *fib_node,
-			     struct mlxsw_sp_fib_entry *fib_entry)
+static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry)
 {
+	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
+
 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
 		return 0;
 
@@ -2552,11 +2830,11 @@ mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
 }
 
-static void
-mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
-			     const struct mlxsw_sp_fib_node *fib_node,
-			     struct mlxsw_sp_fib_entry *fib_entry)
+static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib_entry *fib_entry)
 {
+	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
+
 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
 		return;
 
@@ -2574,54 +2852,50 @@ mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
-					 struct mlxsw_sp_fib_entry *fib_entry,
+					 struct mlxsw_sp_fib4_entry *fib4_entry,
 					 bool replace, bool append)
 {
-	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
 	int err;
 
-	err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
-					     append);
+	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
 	if (err)
 		return err;
 
-	err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
+	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
 	if (err)
-		goto err_fib4_node_entry_add;
+		goto err_fib_node_entry_add;
 
 	return 0;
 
-err_fib4_node_entry_add:
-	mlxsw_sp_fib4_node_list_remove(fib_entry);
+err_fib_node_entry_add:
+	mlxsw_sp_fib4_node_list_remove(fib4_entry);
 	return err;
 }
 
 static void
 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
-				struct mlxsw_sp_fib_entry *fib_entry)
+				struct mlxsw_sp_fib4_entry *fib4_entry)
 {
-	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
-
-	mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
-	mlxsw_sp_fib4_node_list_remove(fib_entry);
+	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
+	mlxsw_sp_fib4_node_list_remove(fib4_entry);
 }
 
 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_fib_entry *fib_entry,
+					struct mlxsw_sp_fib4_entry *fib4_entry,
 					bool replace)
 {
-	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
-	struct mlxsw_sp_fib_entry *replaced;
+	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
+	struct mlxsw_sp_fib4_entry *replaced;
 
 	if (!replace)
 		return;
 
 	/* We inserted the new entry before replaced one */
-	replaced = list_next_entry(fib_entry, list);
+	replaced = list_next_entry(fib4_entry, common.list);
 
 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
-	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
 static int
@@ -2629,76 +2903,723 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
 			 const struct fib_entry_notifier_info *fen_info,
 			 bool replace, bool append)
 {
-	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 	struct mlxsw_sp_fib_node *fib_node;
 	int err;
 
 	if (mlxsw_sp->router->aborted)
 		return 0;
 
-	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
+	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
+					 &fen_info->dst, sizeof(fen_info->dst),
+					 fen_info->dst_len,
+					 MLXSW_SP_L3_PROTO_IPV4);
 	if (IS_ERR(fib_node)) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
 		return PTR_ERR(fib_node);
 	}
 
-	fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
-	if (IS_ERR(fib_entry)) {
+	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
+	if (IS_ERR(fib4_entry)) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
-		err = PTR_ERR(fib_entry);
+		err = PTR_ERR(fib4_entry);
 		goto err_fib4_entry_create;
 	}
 
-	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
+	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
 					    append);
 	if (err) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
 		goto err_fib4_node_entry_link;
 	}
 
-	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
+	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
 
 	return 0;
 
 err_fib4_node_entry_link:
-	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
+	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
 err_fib4_entry_create:
-	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 	return err;
 }
 
 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
 				     struct fib_entry_notifier_info *fen_info)
 {
+	struct mlxsw_sp_fib4_entry *fib4_entry;
+	struct mlxsw_sp_fib_node *fib_node;
+
+	if (mlxsw_sp->router->aborted)
+		return;
+
+	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
+	if (WARN_ON(!fib4_entry))
+		return;
+	fib_node = fib4_entry->common.fib_node;
+
+	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
+	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
+{
+	/* Packets with link-local destination IP arriving to the router
+	 * are trapped to the CPU, so no need to program specific routes
+	 * for them.
+	 */
+	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
+		return true;
+
+	/* Multicast routes aren't supported, so ignore them. Neighbour
+	 * Discovery packets are specifically trapped.
+	 */
+	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
+		return true;
+
+	/* Cloned routes are irrelevant in the forwarding path. */
+	if (rt->rt6i_flags & RTF_CACHE)
+		return true;
+
+	return false;
+}
+
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
+	if (!mlxsw_sp_rt6)
+		return ERR_PTR(-ENOMEM);
+
+	/* In case of route replace, replaced route is deleted with
+	 * no notification. Take reference to prevent accessing freed
+	 * memory.
+	 */
+	mlxsw_sp_rt6->rt = rt;
+	rt6_hold(rt);
+
+	return mlxsw_sp_rt6;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+{
+	rt6_release(rt);
+}
+#else
+static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+{
+}
+#endif
+
+static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
+{
+	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
+	kfree(mlxsw_sp_rt6);
+}
+
+static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
+{
+	/* RTF_CACHE routes are ignored */
+	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
+}
+
+static struct rt6_info *
+mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
+				list)->rt;
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+				 const struct rt6_info *nrt, bool replace)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+
+	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
+		return NULL;
+
+	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
+		 * virtual router.
+		 */
+		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+			continue;
+		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+			break;
+		if (rt->rt6i_metric < nrt->rt6i_metric)
+			continue;
+		if (rt->rt6i_metric == nrt->rt6i_metric &&
+		    mlxsw_sp_fib6_rt_can_mp(rt))
+			return fib6_entry;
+		if (rt->rt6i_metric > nrt->rt6i_metric)
+			break;
+	}
+
+	return NULL;
+}
+
+static struct mlxsw_sp_rt6 *
+mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
+			    const struct rt6_info *rt)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+		if (mlxsw_sp_rt6->rt == rt)
+			return mlxsw_sp_rt6;
+	}
+
+	return NULL;
+}
+
+static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_nexthop_group *nh_grp,
+				  struct mlxsw_sp_nexthop *nh,
+				  const struct rt6_info *rt)
+{
+	struct net_device *dev = rt->dst.dev;
+	struct mlxsw_sp_rif *rif;
+	int err;
+
+	nh->nh_grp = nh_grp;
+	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
+
+	if (!dev)
+		return 0;
+
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!rif)
+		return 0;
+	mlxsw_sp_nexthop_rif_init(nh, rif);
+
+	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
+	if (err)
+		goto err_nexthop_neigh_init;
+
+	return 0;
+
+err_nexthop_neigh_init:
+	mlxsw_sp_nexthop_rif_fini(nh);
+	return err;
+}
+
+static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_nexthop *nh)
+{
+	mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
+	mlxsw_sp_nexthop_rif_fini(nh);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp;
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+	struct mlxsw_sp_nexthop *nh;
+	size_t alloc_size;
+	int i = 0;
+	int err;
+
+	alloc_size = sizeof(*nh_grp) +
+		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
+	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+	if (!nh_grp)
+		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&nh_grp->fib_list);
+#if IS_ENABLED(CONFIG_IPV6)
+	nh_grp->neigh_tbl = &nd_tbl;
+#endif
+	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
+					struct mlxsw_sp_rt6, list);
+	nh_grp->gateway = !!(mlxsw_sp_rt6->rt->rt6i_flags & RTF_GATEWAY);
+	nh_grp->count = fib6_entry->nrt6;
+	for (i = 0; i < nh_grp->count; i++) {
+		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+		nh = &nh_grp->nexthops[i];
+		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
+		if (err)
+			goto err_nexthop6_init;
+		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
+	}
+	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+	return nh_grp;
+
+err_nexthop6_init:
+	for (i--; i >= 0; i--) {
+		nh = &nh_grp->nexthops[i];
+		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
+	}
+	kfree(nh_grp);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	struct mlxsw_sp_nexthop *nh;
+	int i = nh_grp->count;
+
+	for (i--; i >= 0; i--) {
+		nh = &nh_grp->nexthops[i];
+		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
+	}
+	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+	WARN_ON(nh_grp->adj_index_valid);
+	kfree(nh_grp);
+}
+
+static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp;
+
+	/* For now, don't consolidate nexthop groups */
+	nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
+	if (IS_ERR(nh_grp))
+		return PTR_ERR(nh_grp);
+
+	list_add_tail(&fib6_entry->common.nexthop_group_node,
+		      &nh_grp->fib_list);
+	fib6_entry->common.nh_group = nh_grp;
+
+	return 0;
+}
+
+static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+
+	list_del(&fib_entry->nexthop_group_node);
+	if (!list_empty(&nh_grp->fib_list))
+		return;
+	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static int
+mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
+	int err;
+
+	fib6_entry->common.nh_group = NULL;
+	list_del(&fib6_entry->common.nexthop_group_node);
+
+	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+	if (err)
+		goto err_nexthop6_group_get;
+
+	/* In case this entry is offloaded, then the adjacency index
+	 * currently associated with it in the device's table is that
+	 * of the old group. Start using the new one instead.
+	 */
+	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+	if (err)
+		goto err_fib_node_entry_add;
+
+	if (list_empty(&old_nh_grp->fib_list))
+		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
+
+	return 0;
+
+err_fib_node_entry_add:
+	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
+err_nexthop6_group_get:
+	list_add_tail(&fib6_entry->common.nexthop_group_node,
+		      &old_nh_grp->fib_list);
+	fib6_entry->common.nh_group = old_nh_grp;
+	return err;
+}
+
+static int
+mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_fib6_entry *fib6_entry,
+				struct rt6_info *rt)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+	int err;
+
+	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
+	if (IS_ERR(mlxsw_sp_rt6))
+		return PTR_ERR(mlxsw_sp_rt6);
+
+	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+	fib6_entry->nrt6++;
+
+	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
+	if (err)
+		goto err_nexthop6_group_update;
+
+	return 0;
+
+err_nexthop6_group_update:
+	fib6_entry->nrt6--;
+	list_del(&mlxsw_sp_rt6->list);
+	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	return err;
+}
+
+static void
+mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_fib6_entry *fib6_entry,
+				struct rt6_info *rt)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
+	if (WARN_ON(!mlxsw_sp_rt6))
+		return;
+
+	fib6_entry->nrt6--;
+	list_del(&mlxsw_sp_rt6->list);
+	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
+	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+}
+
+static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp_fib_entry *fib_entry,
+					 const struct rt6_info *rt)
+{
+	/* Packets hitting RTF_REJECT routes need to be discarded by the
+	 * stack. We can rely on their destination device not having a
+	 * RIF (it's the loopback device) and can thus use action type
+	 * local, which will cause them to be trapped with a lower
+	 * priority than packets that need to be locally received.
+	 */
+	if (rt->rt6i_flags & RTF_LOCAL)
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+	else if (rt->rt6i_flags & RTF_REJECT)
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+	else if (rt->rt6i_flags & RTF_GATEWAY)
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+	else
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+}
+
+static void
+mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
+
+	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
+				 list) {
+		fib6_entry->nrt6--;
+		list_del(&mlxsw_sp_rt6->list);
+		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	}
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_fib_node *fib_node,
+			   struct rt6_info *rt)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+	int err;
+
+	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
+	if (!fib6_entry)
+		return ERR_PTR(-ENOMEM);
+	fib_entry = &fib6_entry->common;
+
+	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
+	if (IS_ERR(mlxsw_sp_rt6)) {
+		err = PTR_ERR(mlxsw_sp_rt6);
+		goto err_rt6_create;
+	}
+
+	mlxsw_sp_fib6_entry_type_set(fib_entry, mlxsw_sp_rt6->rt);
+
+	INIT_LIST_HEAD(&fib6_entry->rt6_list);
+	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+	fib6_entry->nrt6 = 1;
+	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+	if (err)
+		goto err_nexthop6_group_get;
+
+	fib_entry->fib_node = fib_node;
+
+	return fib6_entry;
+
+err_nexthop6_group_get:
+	list_del(&mlxsw_sp_rt6->list);
+	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+err_rt6_create:
+	kfree(fib6_entry);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
+	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
+	WARN_ON(fib6_entry->nrt6);
+	kfree(fib6_entry);
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+			      const struct rt6_info *nrt, bool replace)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
+
+	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+			continue;
+		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+			break;
+		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
+			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
+			    mlxsw_sp_fib6_rt_can_mp(nrt))
+				return fib6_entry;
+			if (mlxsw_sp_fib6_rt_can_mp(nrt))
+				fallback = fallback ?: fib6_entry;
+		}
+		if (rt->rt6i_metric > nrt->rt6i_metric)
+			return fallback ?: fib6_entry;
+	}
+
+	return fallback;
+}
+
+static int
+mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
+			       bool replace)
+{
+	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
+	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+
+	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
+
+	if (replace && WARN_ON(!fib6_entry))
+		return -EINVAL;
+
+	if (fib6_entry) {
+		list_add_tail(&new6_entry->common.list,
+			      &fib6_entry->common.list);
+	} else {
+		struct mlxsw_sp_fib6_entry *last;
+
+		list_for_each_entry(last, &fib_node->entry_list, common.list) {
+			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
+
+			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
+				break;
+			fib6_entry = last;
+		}
+
+		if (fib6_entry)
+			list_add(&new6_entry->common.list,
+				 &fib6_entry->common.list);
+		else
+			list_add(&new6_entry->common.list,
+				 &fib_node->entry_list);
+	}
+
+	return 0;
+}
+
+static void
+mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	list_del(&fib6_entry->common.list);
+}
+
+static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
+					 struct mlxsw_sp_fib6_entry *fib6_entry,
+					 bool replace)
+{
+	int err;
+
+	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+	if (err)
+		goto err_fib_node_entry_add;
+
+	return 0;
+
+err_fib_node_entry_add:
+	mlxsw_sp_fib6_node_list_remove(fib6_entry);
+	return err;
+}
+
+static void
+mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
+	mlxsw_sp_fib6_node_list_remove(fib6_entry);
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
+			   const struct rt6_info *rt)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_fib_node *fib_node;
+	struct mlxsw_sp_fib *fib;
+	struct mlxsw_sp_vr *vr;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
+	if (!vr)
+		return NULL;
+	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
+
+	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
+					    sizeof(rt->rt6i_dst.addr),
+					    rt->rt6i_dst.plen);
+	if (!fib_node)
+		return NULL;
+
+	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
+		    rt->rt6i_metric == iter_rt->rt6i_metric &&
+		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
+			return fib6_entry;
+	}
+
+	return NULL;
+}
+
+static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib6_entry *fib6_entry,
+					bool replace)
+{
+	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
+	struct mlxsw_sp_fib6_entry *replaced;
+
+	if (!replace)
+		return;
+
+	replaced = list_next_entry(fib6_entry, common.list);
+
+	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
+	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
+static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
+				    struct rt6_info *rt, bool replace)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_fib_node *fib_node;
+	int err;
+
+	if (mlxsw_sp->router->aborted)
+		return 0;
+
+	if (rt->rt6i_src.plen)
+		return -EINVAL;
+
+	if (mlxsw_sp_fib6_rt_should_ignore(rt))
+		return 0;
+
+	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
+					 &rt->rt6i_dst.addr,
+					 sizeof(rt->rt6i_dst.addr),
+					 rt->rt6i_dst.plen,
+					 MLXSW_SP_L3_PROTO_IPV6);
+	if (IS_ERR(fib_node))
+		return PTR_ERR(fib_node);
+
+	/* Before creating a new entry, try to append route to an existing
+	 * multipath entry.
+	 */
+	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
+	if (fib6_entry) {
+		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
+		if (err)
+			goto err_fib6_entry_nexthop_add;
+		return 0;
+	}
+
+	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
+	if (IS_ERR(fib6_entry)) {
+		err = PTR_ERR(fib6_entry);
+		goto err_fib6_entry_create;
+	}
+
+	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
+	if (err)
+		goto err_fib6_node_entry_link;
+
+	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
+
+	return 0;
+
+err_fib6_node_entry_link:
+	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+err_fib6_entry_create:
+err_fib6_entry_nexthop_add:
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+	return err;
+}
+
+static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
+				     struct rt6_info *rt)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
 
 	if (mlxsw_sp->router->aborted)
 		return;
 
-	fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
-	if (WARN_ON(!fib_entry))
+	if (mlxsw_sp_fib6_rt_should_ignore(rt))
+		return;
+
+	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
+	if (WARN_ON(!fib6_entry))
 		return;
-	fib_node = fib_entry->fib_node;
 
-	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
-	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
-	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+	/* If route is part of a multipath entry, but not the last one
+	 * removed, then only reduce its nexthop group.
+	 */
+	if (!list_is_singular(&fib6_entry->rt6_list)) {
+		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
+		return;
+	}
+
+	fib_node = fib6_entry->common.fib_node;
+
+	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
-static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
+static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
+					    enum mlxsw_reg_ralxx_protocol proto,
+					    u8 tree_id)
 {
 	char ralta_pl[MLXSW_REG_RALTA_LEN];
 	char ralst_pl[MLXSW_REG_RALST_LEN];
 	int i, err;
 
-	mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
-			     MLXSW_SP_LPM_TREE_MIN);
+	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
 	if (err)
 		return err;
 
-	mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
+	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
 	if (err)
 		return err;
@@ -2711,17 +3632,14 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
 		if (!mlxsw_sp_vr_is_used(vr))
 			continue;
 
-		mlxsw_reg_raltb_pack(raltb_pl, vr->id,
-				     MLXSW_REG_RALXX_PROTOCOL_IPV4,
-				     MLXSW_SP_LPM_TREE_MIN);
+		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
 				      raltb_pl);
 		if (err)
 			return err;
 
-		mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
-				      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0,
-				      0);
+		mlxsw_reg_ralue_pack(ralue_pl, proto,
+				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
 				      ralue_pl);
@@ -2732,17 +3650,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 }
 
+static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
+{
+	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
+	int err;
+
+	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
+					       MLXSW_SP_LPM_TREE_MIN);
+	if (err)
+		return err;
+
+	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
+	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
+						MLXSW_SP_LPM_TREE_MIN + 1);
+}
+
 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_fib_node *fib_node)
 {
-	struct mlxsw_sp_fib_entry *fib_entry, *tmp;
+	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
 
-	list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
-		bool do_break = &tmp->list == &fib_node->entry_list;
+	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
+				 common.list) {
+		bool do_break = &tmp->common.list == &fib_node->entry_list;
 
-		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
-		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
-		mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
+		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 		/* Break when entry list is empty and node was freed.
 		 * Otherwise, we'll access freed memory in the next
 		 * iteration.
@@ -2752,6 +3686,23 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
 	}
 }
 
+static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_fib_node *fib_node)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
+
+	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
+				 common.list) {
+		bool do_break = &tmp->common.list == &fib_node->entry_list;
+
+		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+		if (do_break)
+			break;
+	}
+}
+
 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_fib_node *fib_node)
 {
@@ -2760,7 +3711,7 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
-		WARN_ON_ONCE(1);
+		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
 		break;
 	}
 }
@@ -2791,10 +3742,17 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 		if (!mlxsw_sp_vr_is_used(vr))
 			continue;
 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
+
+		/* If virtual router was only used for IPv4, then it's no
+		 * longer used.
+		 */
+		if (!mlxsw_sp_vr_is_used(vr))
+			continue;
+		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
 	}
 }
 
-static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
 {
 	int err;
 
@@ -2811,6 +3769,7 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
 struct mlxsw_sp_fib_event_work {
 	struct work_struct work;
 	union {
+		struct fib6_entry_notifier_info fen6_info;
 		struct fib_entry_notifier_info fen_info;
 		struct fib_rule_notifier_info fr_info;
 		struct fib_nh_notifier_info fnh_info;
@@ -2819,7 +3778,7 @@ struct mlxsw_sp_fib_event_work {
 	unsigned long event;
 };
 
-static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
+static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 {
 	struct mlxsw_sp_fib_event_work *fib_work =
 		container_of(work, struct mlxsw_sp_fib_event_work, work);
@@ -2839,7 +3798,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
 					       replace, append);
 		if (err)
-			mlxsw_sp_router_fib4_abort(mlxsw_sp);
+			mlxsw_sp_router_fib_abort(mlxsw_sp);
 		fib_info_put(fib_work->fen_info.fi);
 		break;
 	case FIB_EVENT_ENTRY_DEL:
@@ -2850,13 +3809,13 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
 	case FIB_EVENT_RULE_DEL:
 		rule = fib_work->fr_info.rule;
 		if (!fib4_rule_default(rule) && !rule->l3mdev)
-			mlxsw_sp_router_fib4_abort(mlxsw_sp);
+			mlxsw_sp_router_fib_abort(mlxsw_sp);
 		fib_rule_put(rule);
 		break;
 	case FIB_EVENT_NH_ADD: /* fall through */
 	case FIB_EVENT_NH_DEL:
-		mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
-				       fib_work->fnh_info.fib_nh);
+		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
+					fib_work->fnh_info.fib_nh);
 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
 		break;
 	}
@@ -2864,6 +3823,87 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
 	kfree(fib_work);
 }
 
+static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
+{
+	struct mlxsw_sp_fib_event_work *fib_work =
+		container_of(work, struct mlxsw_sp_fib_event_work, work);
+	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+	struct fib_rule *rule;
+	bool replace;
+	int err;
+
+	rtnl_lock();
+	switch (fib_work->event) {
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_ADD:
+		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
+		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
+					       fib_work->fen6_info.rt, replace);
+		if (err)
+			mlxsw_sp_router_fib_abort(mlxsw_sp);
+		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+		break;
+	case FIB_EVENT_ENTRY_DEL:
+		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
+		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+		break;
+	case FIB_EVENT_RULE_ADD: /* fall through */
+	case FIB_EVENT_RULE_DEL:
+		rule = fib_work->fr_info.rule;
+		if (!fib6_rule_default(rule) && !rule->l3mdev)
+			mlxsw_sp_router_fib_abort(mlxsw_sp);
+		fib_rule_put(rule);
+		break;
+	}
+	rtnl_unlock();
+	kfree(fib_work);
+}
+
+static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
+				       struct fib_notifier_info *info)
+{
+	switch (fib_work->event) {
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_APPEND: /* fall through */
+	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_DEL:
+		memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info));
+		/* Take referece on fib_info to prevent it from being
+		 * freed while work is queued. Release it afterwards.
+		 */
+		fib_info_hold(fib_work->fen_info.fi);
+		break;
+	case FIB_EVENT_RULE_ADD: /* fall through */
+	case FIB_EVENT_RULE_DEL:
+		memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
+		fib_rule_get(fib_work->fr_info.rule);
+		break;
+	case FIB_EVENT_NH_ADD: /* fall through */
+	case FIB_EVENT_NH_DEL:
+		memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info));
+		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+		break;
+	}
+}
+
+static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
+				       struct fib_notifier_info *info)
+{
+	switch (fib_work->event) {
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_DEL:
+		memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info));
+		rt6_hold(fib_work->fen6_info.rt);
+		break;
+	case FIB_EVENT_RULE_ADD: /* fall through */
+	case FIB_EVENT_RULE_DEL:
+		memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
+		fib_rule_get(fib_work->fr_info.rule);
+		break;
+	}
+}
+
 /* Called with rcu_read_lock() */
 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 				     unsigned long event, void *ptr)
@@ -2879,31 +3919,18 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 	if (WARN_ON(!fib_work))
 		return NOTIFY_BAD;
 
-	INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
 	fib_work->mlxsw_sp = router->mlxsw_sp;
 	fib_work->event = event;
 
-	switch (event) {
-	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND: /* fall through */
-	case FIB_EVENT_ENTRY_ADD: /* fall through */
-	case FIB_EVENT_ENTRY_DEL:
-		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
-		/* Take referece on fib_info to prevent it from being
-		 * freed while work is queued. Release it afterwards.
-		 */
-		fib_info_hold(fib_work->fen_info.fi);
-		break;
-	case FIB_EVENT_RULE_ADD: /* fall through */
-	case FIB_EVENT_RULE_DEL:
-		memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
-		fib_rule_get(fib_work->fr_info.rule);
+	switch (info->family) {
+	case AF_INET:
+		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
+		mlxsw_sp_router_fib4_event(fib_work, info);
 		break;
-	case FIB_EVENT_NH_ADD: /* fall through */
-	case FIB_EVENT_NH_DEL:
-		memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
-		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+	case AF_INET6:
+		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
+		mlxsw_sp_router_fib6_event(fib_work, info);
 		break;
 	}
 
@@ -2948,17 +3975,28 @@ static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
 }
 
-static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif,
-				       const struct in_device *in_dev,
-				       unsigned long event)
+static bool
+mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
+			   unsigned long event)
 {
+	struct inet6_dev *inet6_dev;
+	bool addr_list_empty = true;
+	struct in_device *idev;
+
 	switch (event) {
 	case NETDEV_UP:
-		if (!rif)
-			return true;
-		return false;
+		return rif == NULL;
 	case NETDEV_DOWN:
-		if (rif && !in_dev->ifa_list &&
+		idev = __in_dev_get_rtnl(dev);
+		if (idev && idev->ifa_list)
+			addr_list_empty = false;
+
+		inet6_dev = __in6_dev_get(dev);
+		if (addr_list_empty && inet6_dev &&
+		    !list_empty(&inet6_dev->addr_list))
+			addr_list_empty = false;
+
+		if (rif && addr_list_empty &&
 		    !netif_is_l3_slave(rif->dev))
 			return true;
 		/* It is possible we already removed the RIF ourselves
@@ -3356,7 +4394,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
 		goto out;
 
 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
-	if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event))
+	if (!mlxsw_sp_rif_should_config(rif, dev, event))
 		goto out;
 
 	err = __mlxsw_sp_inetaddr_event(dev, event);
@@ -3364,6 +4402,61 @@ out:
 	return notifier_from_errno(err);
 }
 
+struct mlxsw_sp_inet6addr_event_work {
+	struct work_struct work;
+	struct net_device *dev;
+	unsigned long event;
+};
+
+static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
+{
+	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
+		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
+	struct net_device *dev = inet6addr_work->dev;
+	unsigned long event = inet6addr_work->event;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_rif *rif;
+
+	rtnl_lock();
+	mlxsw_sp = mlxsw_sp_lower_get(dev);
+	if (!mlxsw_sp)
+		goto out;
+
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!mlxsw_sp_rif_should_config(rif, dev, event))
+		goto out;
+
+	__mlxsw_sp_inetaddr_event(dev, event);
+out:
+	rtnl_unlock();
+	dev_put(dev);
+	kfree(inet6addr_work);
+}
+
+/* Called with rcu_read_lock() */
+int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
+			     unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
+	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
+	struct net_device *dev = if6->idev->dev;
+
+	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
+		return NOTIFY_DONE;
+
+	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
+	if (!inet6addr_work)
+		return NOTIFY_BAD;
+
+	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
+	inet6addr_work->dev = dev;
+	inet6addr_work->event = event;
+	dev_hold(dev);
+	mlxsw_core_schedule_work(&inet6addr_work->work);
+
+	return NOTIFY_DONE;
+}
+
 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
 			     const char *mac, int mtu)
 {
@@ -3565,6 +4658,11 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
 	if (err)
 		return err;
 
+	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+				     mlxsw_sp_router_port(mlxsw_sp), true);
+	if (err)
+		goto err_fid_mc_flood_set;
+
 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
 				     mlxsw_sp_router_port(mlxsw_sp), true);
 	if (err)
@@ -3573,6 +4671,9 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
 	return 0;
 
 err_fid_bc_flood_set:
+	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+			       mlxsw_sp_router_port(mlxsw_sp), false);
+err_fid_mc_flood_set:
 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
 	return err;
 }
@@ -3584,6 +4685,8 @@ static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
 
 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
 			       mlxsw_sp_router_port(mlxsw_sp), false);
+	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+			       mlxsw_sp_router_port(mlxsw_sp), false);
 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
 }
 
@@ -3614,6 +4717,11 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
 	if (err)
 		return err;
 
+	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+				     mlxsw_sp_router_port(mlxsw_sp), true);
+	if (err)
+		goto err_fid_mc_flood_set;
+
 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
 				     mlxsw_sp_router_port(mlxsw_sp), true);
 	if (err)
@@ -3622,6 +4730,9 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
 	return 0;
 
 err_fid_bc_flood_set:
+	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+			       mlxsw_sp_router_port(mlxsw_sp), false);
+err_fid_mc_flood_set:
 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
 	return err;
 }
@@ -3633,6 +4744,8 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
 
 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
 			       mlxsw_sp_router_port(mlxsw_sp), false);
+	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+			       mlxsw_sp_router_port(mlxsw_sp), false);
 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
 }
 
@@ -3704,7 +4817,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 		return -EIO;
 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
 
-	mlxsw_reg_rgcr_pack(rgcr_pl, true);
+	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 	if (err)
@@ -3716,7 +4829,7 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
 
-	mlxsw_reg_rgcr_pack(rgcr_pl, false);
+	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 12b5ed58f3eb..61652396bf75 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -61,11 +61,32 @@ enum {
 	MLXSW_TRAP_ID_MTUERROR = 0x52,
 	MLXSW_TRAP_ID_TTLERROR = 0x53,
 	MLXSW_TRAP_ID_LBERROR = 0x54,
-	MLXSW_TRAP_ID_OSPF = 0x55,
+	MLXSW_TRAP_ID_IPV4_OSPF = 0x55,
 	MLXSW_TRAP_ID_IP2ME = 0x5F,
+	MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60,
+	MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61,
+	MLXSW_TRAP_ID_IPV6_LINK_LOCAL_SRC = 0x62,
+	MLXSW_TRAP_ID_IPV6_ALL_NODES_LINK = 0x63,
+	MLXSW_TRAP_ID_IPV6_OSPF = 0x64,
+	MLXSW_TRAP_ID_IPV6_MLDV12_LISTENER_QUERY = 0x65,
+	MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_REPORT = 0x66,
+	MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_DONE = 0x67,
+	MLXSW_TRAP_ID_IPV6_MLDV2_LISTENER_REPORT = 0x68,
+	MLXSW_TRAP_ID_IPV6_DHCP = 0x69,
+	MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F,
 	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
-	MLXSW_TRAP_ID_BGP_IPV4 = 0x88,
+	MLXSW_TRAP_ID_IPV4_BGP = 0x88,
+	MLXSW_TRAP_ID_IPV6_BGP = 0x89,
+	MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,
+	MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISMENT = 0x8B,
+	MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C,
+	MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D,
+	MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E,
 	MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
+	MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
+	MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
+	MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
+	MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
 	MLXSW_TRAP_ID_ACL0 = 0x1C0,
 
 	MLXSW_TRAP_ID_MAX = 0x1FF
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index c0d7d5eec7e7..2e4effa9fe45 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -161,7 +161,7 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
 
 	priv->rx_head = 0;
 
-	/* reset the MAC controller TX/RX desciptor base address */
+	/* reset the MAC controller TX/RX descriptor base address */
 	writel(priv->tx_base, priv->base + REG_TXR_BASE_ADDRESS);
 	writel(priv->rx_base, priv->base + REG_RXR_BASE_ADDRESS);
 }
@@ -269,9 +269,8 @@ rx_next:
 		priv->rx_head = rx_head;
 	}
 
-	if (rx < budget) {
+	if (rx < budget)
 		napi_complete_done(napi, rx);
-	}
 
 	priv->reg_imr |= RPKT_FINISH_M;
 	writel(priv->reg_imr, priv->base + REG_INTERRUPT_MASK);
@@ -289,8 +288,8 @@ static int moxart_tx_queue_space(struct net_device *ndev)
 static void moxart_tx_finished(struct net_device *ndev)
 {
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-	unsigned tx_head = priv->tx_head;
-	unsigned tx_tail = priv->tx_tail;
+	unsigned int tx_head = priv->tx_head;
+	unsigned int tx_tail = priv->tx_tail;
 
 	while (tx_tail != tx_head) {
 		dma_unmap_single(&ndev->dev, priv->tx_mapping[tx_tail],
@@ -312,7 +311,7 @@ static void moxart_tx_finished(struct net_device *ndev)
 
 static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
 {
-	struct net_device *ndev = (struct net_device *) dev_id;
+	struct net_device *ndev = (struct net_device *)dev_id;
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
 	unsigned int ists = readl(priv->base + REG_INTERRUPT_STATUS);
 
@@ -495,7 +494,7 @@ static int moxart_mac_probe(struct platform_device *pdev)
 	priv->tx_desc_base = dma_alloc_coherent(NULL, TX_REG_DESC_SIZE *
 						TX_DESC_NUM, &priv->tx_base,
 						GFP_DMA | GFP_KERNEL);
-	if (priv->tx_desc_base == NULL) {
+	if (!priv->tx_desc_base) {
 		ret = -ENOMEM;
 		goto init_fail;
 	}
@@ -503,7 +502,7 @@ static int moxart_mac_probe(struct platform_device *pdev)
 	priv->rx_desc_base = dma_alloc_coherent(NULL, RX_REG_DESC_SIZE *
 						RX_DESC_NUM, &priv->rx_base,
 						GFP_DMA | GFP_KERNEL);
-	if (priv->rx_desc_base == NULL) {
+	if (!priv->rx_desc_base) {
 		ret = -ENOMEM;
 		goto init_fail;
 	}
diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h
index 686b8957d5cf..bee608b547d1 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.h
+++ b/drivers/net/ethernet/moxa/moxart_ether.h
@@ -55,17 +55,17 @@
 #define RX_DESC2_ADDRESS_VIRT	4
 
 #define TX_DESC_NUM		64
-#define TX_DESC_NUM_MASK	(TX_DESC_NUM-1)
+#define TX_DESC_NUM_MASK	(TX_DESC_NUM - 1)
 #define TX_NEXT(N)		(((N) + 1) & (TX_DESC_NUM_MASK))
 #define TX_BUF_SIZE		1600
-#define TX_BUF_SIZE_MAX		(TX_DESC1_BUF_SIZE_MASK+1)
+#define TX_BUF_SIZE_MAX		(TX_DESC1_BUF_SIZE_MASK + 1)
 #define TX_WAKE_THRESHOLD	16
 
 #define RX_DESC_NUM		64
-#define RX_DESC_NUM_MASK	(RX_DESC_NUM-1)
+#define RX_DESC_NUM_MASK	(RX_DESC_NUM - 1)
 #define RX_NEXT(N)		(((N) + 1) & (RX_DESC_NUM_MASK))
 #define RX_BUF_SIZE		1600
-#define RX_BUF_SIZE_MAX		(RX_DESC1_BUF_SIZE_MASK+1)
+#define RX_BUF_SIZE_MAX		(RX_DESC1_BUF_SIZE_MASK + 1)
 
 #define REG_INTERRUPT_STATUS	0
 #define REG_INTERRUPT_MASK	4
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index fd2ec36c6fa1..462eda926b1c 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -42,8 +42,6 @@
  *     aggregated as a single large packet
  * napi: This parameter used to enable/disable NAPI (polling Rx)
  *     Possible values '1' for enable and '0' for disable. Default is '1'
- * ufo: This parameter used to enable/disable UDP Fragmentation Offload(UFO)
- *      Possible values '1' for enable and '0' for disable. Default is '0'
  * vlan_tag_strip: This can be used to enable or disable vlan stripping.
  *                 Possible values '1' for enable , '0' for disable.
  *                 Default is '2' - which means disable in promisc mode
@@ -453,7 +451,6 @@ S2IO_PARM_INT(lro_max_pkts, 0xFFFF);
 S2IO_PARM_INT(indicate_max_pkts, 0);
 
 S2IO_PARM_INT(napi, 1);
-S2IO_PARM_INT(ufo, 0);
 S2IO_PARM_INT(vlan_tag_strip, NO_STRIP_IN_PROMISC);
 
 static unsigned int tx_fifo_len[MAX_TX_FIFOS] =
@@ -4128,32 +4125,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	frg_len = skb_headlen(skb);
-	if (offload_type == SKB_GSO_UDP) {
-		int ufo_size;
-
-		ufo_size = s2io_udp_mss(skb);
-		ufo_size &= ~7;
-		txdp->Control_1 |= TXD_UFO_EN;
-		txdp->Control_1 |= TXD_UFO_MSS(ufo_size);
-		txdp->Control_1 |= TXD_BUFFER0_SIZE(8);
-#ifdef __BIG_ENDIAN
-		/* both variants do cpu_to_be64(be32_to_cpu(...)) */
-		fifo->ufo_in_band_v[put_off] =
-			(__force u64)skb_shinfo(skb)->ip6_frag_id;
-#else
-		fifo->ufo_in_band_v[put_off] =
-			(__force u64)skb_shinfo(skb)->ip6_frag_id << 32;
-#endif
-		txdp->Host_Control = (unsigned long)fifo->ufo_in_band_v;
-		txdp->Buffer_Pointer = pci_map_single(sp->pdev,
-						      fifo->ufo_in_band_v,
-						      sizeof(u64),
-						      PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer))
-			goto pci_map_failed;
-		txdp++;
-	}
-
 	txdp->Buffer_Pointer = pci_map_single(sp->pdev, skb->data,
 					      frg_len, PCI_DMA_TODEVICE);
 	if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer))
@@ -4161,8 +4132,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	txdp->Host_Control = (unsigned long)skb;
 	txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len);
-	if (offload_type == SKB_GSO_UDP)
-		txdp->Control_1 |= TXD_UFO_EN;
 
 	frg_cnt = skb_shinfo(skb)->nr_frags;
 	/* For fragmented SKB. */
@@ -4177,14 +4146,9 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 							     skb_frag_size(frag),
 							     DMA_TO_DEVICE);
 		txdp->Control_1 = TXD_BUFFER0_SIZE(skb_frag_size(frag));
-		if (offload_type == SKB_GSO_UDP)
-			txdp->Control_1 |= TXD_UFO_EN;
 	}
 	txdp->Control_1 |= TXD_GATHER_CODE_LAST;
 
-	if (offload_type == SKB_GSO_UDP)
-		frg_cnt++; /* as Txd0 was used for inband header */
-
 	tx_fifo = mac_control->tx_FIFO_start[queue];
 	val64 = fifo->list_info[put_off].list_phy_addr;
 	writeq(val64, &tx_fifo->TxDL_Pointer);
@@ -7910,11 +7874,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 		NETIF_F_RXCSUM | NETIF_F_LRO;
 	dev->features |= dev->hw_features |
 		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
-	if (sp->device_type & XFRAME_II_DEVICE) {
-		dev->hw_features |= NETIF_F_UFO;
-		if (ufo)
-			dev->features |= NETIF_F_UFO;
-	}
 	if (sp->high_dma_flag == true)
 		dev->features |= NETIF_F_HIGHDMA;
 	dev->watchdog_timeo = WATCH_DOG_TIMEOUT;
@@ -8147,10 +8106,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 
 	DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n",
 		  dev->name);
-	if (ufo)
-		DBG_PRINT(ERR_DBG,
-			  "%s: UDP Fragmentation Offload(UFO) enabled\n",
-			  dev->name);
 	/* Initialize device name */
 	snprintf(sp->name, sizeof(sp->name), "%s Neterion %s", dev->name,
 		 sp->product_name);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index afbdf5fd4e4f..f981f60ec306 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -121,23 +121,21 @@ static void nfp_bpf_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
 }
 
 static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			    u32 handle, __be16 proto, struct tc_to_netdev *tc)
+			    enum tc_setup_type type, void *type_data)
 {
+	struct tc_cls_bpf_offload *cls_bpf = type_data;
 	struct nfp_net *nn = netdev_priv(netdev);
 
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
-		return -EOPNOTSUPP;
-	if (proto != htons(ETH_P_ALL))
+	if (type != TC_SETUP_CLSBPF || !nfp_net_ebpf_capable(nn) ||
+	    TC_H_MAJ(cls_bpf->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    cls_bpf->common.protocol != htons(ETH_P_ALL) ||
+	    cls_bpf->common.chain_index)
 		return -EOPNOTSUPP;
 
-	if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) {
-		if (!nn->dp.bpf_offload_xdp)
-			return nfp_net_bpf_offload(nn, tc->cls_bpf);
-		else
-			return -EBUSY;
-	}
+	if (nn->dp.bpf_offload_xdp)
+		return -EBUSY;
 
-	return -EINVAL;
+	return nfp_net_bpf_offload(nn, cls_bpf);
 }
 
 static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 78d80a364edb..a88bb5bc0082 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -115,14 +115,14 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
 
 	/* TC direct action */
 	if (cls_bpf->exts_integrated) {
-		if (tc_no_actions(cls_bpf->exts))
+		if (!tcf_exts_has_actions(cls_bpf->exts))
 			return NN_ACT_DIRECT;
 
 		return -EOPNOTSUPP;
 	}
 
 	/* TC legacy mode */
-	if (!tc_single_action(cls_bpf->exts))
+	if (!tcf_exts_has_one_action(cls_bpf->exts))
 		return -EOPNOTSUPP;
 
 	tcf_exts_to_list(cls_bpf->exts, &actions);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 9e64c048e83f..71e4f4f4e9ba 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -38,8 +38,8 @@
 #include <linux/hashtable.h>
 #include <linux/time64.h>
 #include <linux/types.h>
+#include <net/pkt_cls.h>
 
-struct tc_to_netdev;
 struct net_device;
 struct nfp_app;
 
@@ -135,7 +135,7 @@ int nfp_flower_metadata_init(struct nfp_app *app);
 void nfp_flower_metadata_cleanup(struct nfp_app *app);
 
 int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			u32 handle, __be16 proto, struct tc_to_netdev *tc);
+			enum tc_setup_type type, void *type_data);
 int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 				  struct nfp_fl_key_ls *key_ls,
 				  struct net_device *netdev,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 4ad10bd5e139..01767c7376d5 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -385,16 +385,15 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
 }
 
 int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			u32 handle, __be16 proto, struct tc_to_netdev *tc)
+			enum tc_setup_type type, void *type_data)
 {
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
-		return -EOPNOTSUPP;
+	struct tc_cls_flower_offload *cls_flower = type_data;
 
-	if (!eth_proto_is_802_3(proto))
+	if (type != TC_SETUP_CLSFLOWER ||
+	    TC_H_MAJ(cls_flower->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    !eth_proto_is_802_3(cls_flower->common.protocol) ||
+	    cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
-	if (tc->type != TC_SETUP_CLSFLOWER)
-		return -EINVAL;
-
-	return nfp_flower_repr_offload(app, netdev, tc->cls_flower);
+	return nfp_flower_repr_offload(app, netdev, cls_flower);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index 5d714e10d9a9..f34e8778fae2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -42,7 +42,6 @@ struct bpf_prog;
 struct net_device;
 struct pci_dev;
 struct sk_buff;
-struct tc_to_netdev;
 struct sk_buff;
 struct nfp_app;
 struct nfp_cpp;
@@ -109,7 +108,7 @@ struct nfp_app_type {
 	void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb);
 
 	int (*setup_tc)(struct nfp_app *app, struct net_device *netdev,
-			u32 handle, __be16 proto, struct tc_to_netdev *tc);
+			enum tc_setup_type type, void *type_data);
 	bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn);
 	int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn,
 			   struct bpf_prog *prog);
@@ -238,12 +237,11 @@ static inline bool nfp_app_tc_busy(struct nfp_app *app, struct nfp_net *nn)
 
 static inline int nfp_app_setup_tc(struct nfp_app *app,
 				   struct net_device *netdev,
-				   u32 handle, __be16 proto,
-				   struct tc_to_netdev *tc)
+				   enum tc_setup_type type, void *type_data)
 {
 	if (!app || !app->type->setup_tc)
 		return -EOPNOTSUPP;
-	return app->type->setup_tc(app, netdev, handle, proto, tc);
+	return app->type->setup_tc(app, netdev, type, type_data);
 }
 
 static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index d67969d3e484..dd769eceb33d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -174,6 +174,21 @@ static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs)
 		return nfp_pcie_sriov_enable(pdev, num_vfs);
 }
 
+static const struct firmware *
+nfp_net_fw_request(struct pci_dev *pdev, struct nfp_pf *pf, const char *name)
+{
+	const struct firmware *fw = NULL;
+	int err;
+
+	err = request_firmware_direct(&fw, name, &pdev->dev);
+	nfp_info(pf->cpp, "  %s: %s\n",
+		 name, err ? "not found" : "found, loading...");
+	if (err)
+		return NULL;
+
+	return fw;
+}
+
 /**
  * nfp_net_fw_find() - Find the correct firmware image for netdev mode
  * @pdev:	PCI Device structure
@@ -184,13 +199,32 @@ static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs)
 static const struct firmware *
 nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf)
 {
-	const struct firmware *fw = NULL;
 	struct nfp_eth_table_port *port;
+	const struct firmware *fw;
 	const char *fw_model;
 	char fw_name[256];
-	int spc, err = 0;
-	int i, j;
+	const u8 *serial;
+	u16 interface;
+	int spc, i, j;
 
+	nfp_info(pf->cpp, "Looking for firmware file in order of priority:\n");
+
+	/* First try to find a firmware image specific for this device */
+	interface = nfp_cpp_interface(pf->cpp);
+	nfp_cpp_serial(pf->cpp, &serial);
+	sprintf(fw_name, "netronome/serial-%pMF-%02hhx-%02hhx.nffw",
+		serial, interface >> 8, interface & 0xff);
+	fw = nfp_net_fw_request(pdev, pf, fw_name);
+	if (fw)
+		return fw;
+
+	/* Then try the PCI name */
+	sprintf(fw_name, "netronome/pci-%s.nffw", pci_name(pdev));
+	fw = nfp_net_fw_request(pdev, pf, fw_name);
+	if (fw)
+		return fw;
+
+	/* Finally try the card type and media */
 	if (!pf->eth_tbl) {
 		dev_err(&pdev->dev, "Error: can't identify media config\n");
 		return NULL;
@@ -223,13 +257,7 @@ nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf)
 	if (spc <= 0)
 		return NULL;
 
-	err = request_firmware(&fw, fw_name, &pdev->dev);
-	if (err)
-		return NULL;
-
-	dev_info(&pdev->dev, "Loading FW image: %s\n", fw_name);
-
-	return fw;
+	return nfp_net_fw_request(pdev, pf, fw_name);
 }
 
 /**
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 18750ff0ede6..ea471604450e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2658,6 +2658,7 @@ static int nfp_net_netdev_close(struct net_device *netdev)
 	/* Step 2: Tell NFP
 	 */
 	nfp_net_clear_config_and_disable(nn);
+	nfp_port_configure(netdev, false);
 
 	/* Step 3: Free resources
 	 */
@@ -2775,16 +2776,21 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 		goto err_free_all;
 
 	/* Step 2: Configure the NFP
+	 * - Ifup the physical interface if it exists
 	 * - Enable rings from 0 to tx_rings/rx_rings - 1.
 	 * - Write MAC address (in case it changed)
 	 * - Set the MTU
 	 * - Set the Freelist buffer size
 	 * - Enable the FW
 	 */
-	err = nfp_net_set_config_and_enable(nn);
+	err = nfp_port_configure(netdev, true);
 	if (err)
 		goto err_free_all;
 
+	err = nfp_net_set_config_and_enable(nn);
+	if (err)
+		goto err_port_disable;
+
 	/* Step 3: Enable for kernel
 	 * - put some freelist descriptors on each RX ring
 	 * - enable NAPI on each ring
@@ -2795,6 +2801,8 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 
 	return 0;
 
+err_port_disable:
+	nfp_port_configure(netdev, false);
 err_free_all:
 	nfp_net_close_free_all(nn);
 	return err;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 5797dbf2b507..d5e2361f0e86 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -704,7 +704,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
 	if (!pf->rtbl) {
 		nfp_err(pf->cpp, "No %s, giving up.\n",
 			pf->fw_loaded ? "symbol table" : "firmware found");
-		return -EPROBE_DEFER;
+		return -EINVAL;
 	}
 
 	mutex_lock(&pf->lock);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 8ec5474f4b18..47daad30756c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -239,15 +239,34 @@ static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
 static int nfp_repr_stop(struct net_device *netdev)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
+	int err;
+
+	err = nfp_app_repr_stop(repr->app, repr);
+	if (err)
+		return err;
 
-	return nfp_app_repr_stop(repr->app, repr);
+	nfp_port_configure(netdev, false);
+	return 0;
 }
 
 static int nfp_repr_open(struct net_device *netdev)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
+	int err;
+
+	err = nfp_port_configure(netdev, true);
+	if (err)
+		return err;
+
+	err = nfp_app_repr_open(repr->app, repr);
+	if (err)
+		goto err_port_disable;
 
-	return nfp_app_repr_open(repr->app, repr);
+	return 0;
+
+err_port_disable:
+	nfp_port_configure(netdev, false);
+	return err;
 }
 
 const struct net_device_ops nfp_repr_netdev_ops = {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index e42644dbb865..0cf65e57addb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -88,19 +88,16 @@ const struct switchdev_ops nfp_port_switchdev_ops = {
 	.switchdev_port_attr_get	= nfp_port_attr_get,
 };
 
-int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
-		      __be16 proto, struct tc_to_netdev *tc)
+int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+		      void *type_data)
 {
 	struct nfp_port *port;
 
-	if (chain_index)
-		return -EOPNOTSUPP;
-
 	port = nfp_port_from_netdev(netdev);
 	if (!port)
 		return -EOPNOTSUPP;
 
-	return nfp_app_setup_tc(port->app, netdev, handle, proto, tc);
+	return nfp_app_setup_tc(port->app, netdev, type, type_data);
 }
 
 struct nfp_port *
@@ -181,6 +178,33 @@ nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
 	return 0;
 }
 
+/**
+ * nfp_port_configure() - helper to set the interface configured bit
+ * @netdev:	net_device instance
+ * @configed:	Desired state
+ *
+ * Helper to set the ifup/ifdown state on the PHY only if there is a physical
+ * interface associated with the netdev.
+ *
+ * Return:
+ * 0 - configuration successful (or no change);
+ * -ERRNO - configuration failed.
+ */
+int nfp_port_configure(struct net_device *netdev, bool configed)
+{
+	struct nfp_eth_table_port *eth_port;
+	struct nfp_port *port;
+	int err;
+
+	port = nfp_port_from_netdev(netdev);
+	eth_port = __nfp_port_get_eth_port(port);
+	if (!eth_port)
+		return 0;
+
+	err = nfp_eth_set_configured(port->app->cpp, eth_port->index, configed);
+	return err < 0 && err != -EOPNOTSUPP ? err : 0;
+}
+
 int nfp_port_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
 			   struct nfp_port *port, unsigned int id)
 {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index a33d22e18f94..c88e376dcf0f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -36,7 +36,6 @@
 
 #include <net/devlink.h>
 
-struct tc_to_netdev;
 struct net_device;
 struct nfp_app;
 struct nfp_pf;
@@ -109,8 +108,8 @@ struct nfp_port {
 
 extern const struct switchdev_ops nfp_port_switchdev_ops;
 
-int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
-		      __be16 proto, struct tc_to_netdev *tc);
+int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+		      void *type_data);
 
 struct nfp_port *nfp_port_from_netdev(struct net_device *netdev);
 struct nfp_port *
@@ -120,6 +119,7 @@ struct nfp_eth_table_port *nfp_port_get_eth_port(struct nfp_port *port);
 
 int
 nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len);
+int nfp_port_configure(struct net_device *netdev, bool configed);
 
 struct nfp_port *
 nfp_port_alloc(struct nfp_app *app, enum nfp_port_type type,
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
index c2bc36e8649f..f6f7c085f8e0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
@@ -391,7 +391,10 @@ int nfp_eth_config_commit_end(struct nfp_nsp *nsp)
  * Enable or disable PHY module (this usually means setting the TX lanes
  * disable bits).
  *
- * Return: 0 or -ERRNO.
+ * Return:
+ * 0 - configuration successful;
+ * 1 - no changes were needed;
+ * -ERRNO - configuration failed.
  */
 int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable)
 {
@@ -427,7 +430,10 @@ int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable)
  *
  * Set the ifup/ifdown state on the PHY.
  *
- * Return: 0 or -ERRNO.
+ * Return:
+ * 0 - configuration successful;
+ * 1 - no changes were needed;
+ * -ERRNO - configuration failed.
  */
 int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed)
 {
@@ -439,6 +445,14 @@ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed)
 	if (IS_ERR(nsp))
 		return PTR_ERR(nsp);
 
+	/* Older ABI versions did support this feature, however this has only
+	 * been reliable since ABI 20.
+	 */
+	if (nfp_nsp_get_abi_ver_minor(nsp) < 20) {
+		nfp_eth_config_cleanup_end(nsp);
+		return -EOPNOTSUPP;
+	}
+
 	entries = nfp_nsp_config_entries(nsp);
 
 	/* Check if we are already in requested state */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 6c87bed13bd2..58a689fb04db 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1684,6 +1684,8 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 			   "Load request was sent. Load code: 0x%x\n",
 			   load_code);
 
+		qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt);
+
 		qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
 
 		p_hwfn->first_on_engine = (load_code ==
@@ -2472,6 +2474,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities;
 	u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg;
+	struct qed_mcp_link_capabilities *p_caps;
 	struct qed_mcp_link_params *link;
 
 	/* Read global nvm_cfg address */
@@ -2534,6 +2537,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 	/* Read default link configuration */
 	link = &p_hwfn->mcp_info->link_input;
+	p_caps = &p_hwfn->mcp_info->link_capabilities;
 	port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
 			offsetof(struct nvm_cfg1, port[MFW_PORT(p_hwfn)]);
 	link_temp = qed_rd(p_hwfn, p_ptt,
@@ -2588,10 +2592,45 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 				   NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX);
 	link->loopback_mode = 0;
 
-	DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
-		   "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x\n",
-		   link->speed.forced_speed, link->speed.advertised_speeds,
-		   link->speed.autoneg, link->pause.autoneg);
+	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) {
+		link_temp = qed_rd(p_hwfn, p_ptt, port_cfg_addr +
+				   offsetof(struct nvm_cfg1_port, ext_phy));
+		link_temp &= NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_MASK;
+		link_temp >>= NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_OFFSET;
+		p_caps->default_eee = QED_MCP_EEE_ENABLED;
+		link->eee.enable = true;
+		switch (link_temp) {
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_DISABLED:
+			p_caps->default_eee = QED_MCP_EEE_DISABLED;
+			link->eee.enable = false;
+			break;
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_BALANCED:
+			p_caps->eee_lpi_timer = EEE_TX_TIMER_USEC_BALANCED_TIME;
+			break;
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_AGGRESSIVE:
+			p_caps->eee_lpi_timer =
+			    EEE_TX_TIMER_USEC_AGGRESSIVE_TIME;
+			break;
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_LOW_LATENCY:
+			p_caps->eee_lpi_timer = EEE_TX_TIMER_USEC_LATENCY_TIME;
+			break;
+		}
+
+		link->eee.tx_lpi_timer = p_caps->eee_lpi_timer;
+		link->eee.tx_lpi_enable = link->eee.enable;
+		link->eee.adv_caps = QED_EEE_1G_ADV | QED_EEE_10G_ADV;
+	} else {
+		p_caps->default_eee = QED_MCP_EEE_UNSUPPORTED;
+	}
+
+	DP_VERBOSE(p_hwfn,
+		   NETIF_MSG_LINK,
+		   "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x EEE: %02x [%08x usec]\n",
+		   link->speed.forced_speed,
+		   link->speed.advertised_speeds,
+		   link->speed.autoneg,
+		   link->pause.autoneg,
+		   p_caps->default_eee, p_caps->eee_lpi_timer);
 
 	/* Read Multi-function information from shmem */
 	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
@@ -2751,6 +2790,27 @@ static void qed_hw_info_port_num(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 		qed_hw_info_port_num_ah(p_hwfn, p_ptt);
 }
 
+static void qed_get_eee_caps(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_link_capabilities *p_caps;
+	u32 eee_status;
+
+	p_caps = &p_hwfn->mcp_info->link_capabilities;
+	if (p_caps->default_eee == QED_MCP_EEE_UNSUPPORTED)
+		return;
+
+	p_caps->eee_speed_caps = 0;
+	eee_status = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
+			    offsetof(struct public_port, eee_status));
+	eee_status = (eee_status & EEE_SUPPORTED_SPEED_MASK) >>
+			EEE_SUPPORTED_SPEED_OFFSET;
+
+	if (eee_status & EEE_1G_SUPPORTED)
+		p_caps->eee_speed_caps |= QED_EEE_1G_ADV;
+	if (eee_status & EEE_10G_ADV)
+		p_caps->eee_speed_caps |= QED_EEE_10G_ADV;
+}
+
 static int
 qed_get_hw_info(struct qed_hwfn *p_hwfn,
 		struct qed_ptt *p_ptt,
@@ -2767,6 +2827,8 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
 
 	qed_hw_info_port_num(p_hwfn, p_ptt);
 
+	qed_mcp_get_capabilities(p_hwfn, p_ptt);
+
 	qed_hw_get_nvm_info(p_hwfn, p_ptt);
 
 	rc = qed_int_igu_read_cam(p_hwfn, p_ptt);
@@ -2785,6 +2847,8 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
 				p_hwfn->mcp_info->func_info.ovlan;
 
 		qed_mcp_cmd_port_init(p_hwfn, p_ptt);
+
+		qed_get_eee_caps(p_hwfn, p_ptt);
 	}
 
 	if (qed_mcp_is_init(p_hwfn)) {
@@ -3630,7 +3694,7 @@ static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	}
 
 	p_coal_timeset = p_eth_qzone;
-	memset(p_coal_timeset, 0, eth_qzone_size);
+	memset(p_eth_qzone, 0, eth_qzone_size);
 	SET_FIELD(p_coal_timeset->value, COALESCING_TIMESET_TIMESET, timeset);
 	SET_FIELD(p_coal_timeset->value, COALESCING_TIMESET_VALID, 1);
 	qed_memcpy_to(p_hwfn, p_ptt, hw_addr, p_eth_qzone, eth_qzone_size);
@@ -3638,12 +3702,46 @@ static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	return 0;
 }
 
-int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-			 u16 coalesce, u16 qid, u16 sb_id)
+int qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle)
+{
+	struct qed_queue_cid *p_cid = p_handle;
+	struct qed_hwfn *p_hwfn;
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	p_hwfn = p_cid->p_owner;
+
+	if (IS_VF(p_hwfn->cdev))
+		return qed_vf_pf_set_coalesce(p_hwfn, rx_coal, tx_coal, p_cid);
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (rx_coal) {
+		rc = qed_set_rxq_coalesce(p_hwfn, p_ptt, rx_coal, p_cid);
+		if (rc)
+			goto out;
+		p_hwfn->cdev->rx_coalesce_usecs = rx_coal;
+	}
+
+	if (tx_coal) {
+		rc = qed_set_txq_coalesce(p_hwfn, p_ptt, tx_coal, p_cid);
+		if (rc)
+			goto out;
+		p_hwfn->cdev->tx_coalesce_usecs = tx_coal;
+	}
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+	return rc;
+}
+
+int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 u16 coalesce, struct qed_queue_cid *p_cid)
 {
 	struct ustorm_eth_queue_zone eth_qzone;
 	u8 timeset, timer_res;
-	u16 fw_qid = 0;
 	u32 address;
 	int rc;
 
@@ -3660,32 +3758,29 @@ int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	}
 	timeset = (u8)(coalesce >> timer_res);
 
-	rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid);
-	if (rc)
-		return rc;
-
-	rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res, sb_id, false);
+	rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res,
+				   p_cid->sb_igu_id, false);
 	if (rc)
 		goto out;
 
-	address = BAR0_MAP_REG_USDM_RAM + USTORM_ETH_QUEUE_ZONE_OFFSET(fw_qid);
+	address = BAR0_MAP_REG_USDM_RAM +
+		  USTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id);
 
 	rc = qed_set_coalesce(p_hwfn, p_ptt, address, &eth_qzone,
 			      sizeof(struct ustorm_eth_queue_zone), timeset);
 	if (rc)
 		goto out;
 
-	p_hwfn->cdev->rx_coalesce_usecs = coalesce;
 out:
 	return rc;
 }
 
-int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-			 u16 coalesce, u16 qid, u16 sb_id)
+int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 u16 coalesce, struct qed_queue_cid *p_cid)
 {
 	struct xstorm_eth_queue_zone eth_qzone;
 	u8 timeset, timer_res;
-	u16 fw_qid = 0;
 	u32 address;
 	int rc;
 
@@ -3702,22 +3797,16 @@ int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	}
 	timeset = (u8)(coalesce >> timer_res);
 
-	rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid);
-	if (rc)
-		return rc;
-
-	rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res, sb_id, true);
+	rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res,
+				   p_cid->sb_igu_id, true);
 	if (rc)
 		goto out;
 
-	address = BAR0_MAP_REG_XSDM_RAM + XSTORM_ETH_QUEUE_ZONE_OFFSET(fw_qid);
+	address = BAR0_MAP_REG_XSDM_RAM +
+		  XSTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id);
 
 	rc = qed_set_coalesce(p_hwfn, p_ptt, address, &eth_qzone,
 			      sizeof(struct xstorm_eth_queue_zone), timeset);
-	if (rc)
-		goto out;
-
-	p_hwfn->cdev->tx_coalesce_usecs = coalesce;
 out:
 	return rc;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index 1f1df1bf127c..defdda1ffaa2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -443,38 +443,35 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt, u16 id, bool is_vf);
 
 /**
- * @brief qed_set_rxq_coalesce - Configure coalesce parameters for an Rx queue
- * The fact that we can configure coalescing to up to 511, but on varying
- * accuracy [the bigger the value the less accurate] up to a mistake of 3usec
- * for the highest values.
+ * @brief qed_get_queue_coalesce - Retrieve coalesce value for a given queue.
  *
  * @param p_hwfn
- * @param p_ptt
- * @param coalesce - Coalesce value in micro seconds.
- * @param qid - Queue index.
- * @param qid - SB Id
+ * @param p_coal - store coalesce value read from the hardware.
+ * @param p_handle
  *
  * @return int
- */
-int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-			 u16 coalesce, u16 qid, u16 sb_id);
+ **/
+int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle);
 
 /**
- * @brief qed_set_txq_coalesce - Configure coalesce parameters for a Tx queue
- * While the API allows setting coalescing per-qid, all tx queues sharing a
- * SB should be in same range [i.e., either 0-0x7f, 0x80-0xff or 0x100-0x1ff]
- * otherwise configuration would break.
+ * @brief qed_set_queue_coalesce - Configure coalesce parameters for Rx and
+ *    Tx queue. The fact that we can configure coalescing to up to 511, but on
+ *    varying accuracy [the bigger the value the less accurate] up to a mistake
+ *    of 3usec for the highest values.
+ *    While the API allows setting coalescing per-qid, all queues sharing a SB
+ *    should be in same range [i.e., either 0-0x7f, 0x80-0xff or 0x100-0x1ff]
+ *    otherwise configuration would break.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param coalesce - Coalesce value in micro seconds.
- * @param qid - Queue index.
- * @param qid - SB Id
+ *
+ * @param rx_coal - Rx Coalesce value in micro seconds.
+ * @param tx_coal - TX Coalesce value in micro seconds.
+ * @param p_handle
  *
  * @return int
- */
-int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-			 u16 coalesce, u16 qid, u16 sb_id);
+ **/
+int
+qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle);
+
 
 const char *qed_hw_get_resc_name(enum qed_resources res_id);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 31fb0bffa098..3427fe7049b5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -10825,6 +10825,17 @@ struct eth_phy_cfg {
 #define ETH_LOOPBACK_EXT		(3)
 #define ETH_LOOPBACK_MAC		(4)
 
+	u32 eee_cfg;
+#define EEE_CFG_EEE_ENABLED			BIT(0)
+#define EEE_CFG_TX_LPI				BIT(1)
+#define EEE_CFG_ADV_SPEED_1G			BIT(2)
+#define EEE_CFG_ADV_SPEED_10G			BIT(3)
+#define EEE_TX_TIMER_USEC_MASK			(0xfffffff0)
+#define EEE_TX_TIMER_USEC_OFFSET		4
+#define EEE_TX_TIMER_USEC_BALANCED_TIME		(0xa00)
+#define EEE_TX_TIMER_USEC_AGGRESSIVE_TIME	(0x100)
+#define EEE_TX_TIMER_USEC_LATENCY_TIME		(0x6000)
+
 	u32 feature_config_flags;
 #define ETH_EEE_MODE_ADV_LPI		(1 << 0)
 };
@@ -11242,6 +11253,25 @@ struct public_port {
 	u32 wol_pkt_len;
 	u32 wol_pkt_details;
 	struct dcb_dscp_map dcb_dscp_map;
+
+	u32 eee_status;
+#define EEE_ACTIVE_BIT			BIT(0)
+#define EEE_LD_ADV_STATUS_MASK		0x000000f0
+#define EEE_LD_ADV_STATUS_OFFSET	4
+#define EEE_1G_ADV			BIT(1)
+#define EEE_10G_ADV			BIT(2)
+#define EEE_LP_ADV_STATUS_MASK		0x00000f00
+#define EEE_LP_ADV_STATUS_OFFSET	8
+#define EEE_SUPPORTED_SPEED_MASK	0x0000f000
+#define EEE_SUPPORTED_SPEED_OFFSET	12
+#define EEE_1G_SUPPORTED		BIT(1)
+#define EEE_10G_SUPPORTED		BIT(2)
+
+	u32 eee_remote;
+#define EEE_REMOTE_TW_TX_MASK   0x0000ffff
+#define EEE_REMOTE_TW_TX_OFFSET 0
+#define EEE_REMOTE_TW_RX_MASK   0xffff0000
+#define EEE_REMOTE_TW_RX_OFFSET 16
 };
 
 struct public_func {
@@ -11570,6 +11600,9 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL	0x002b0000
 #define DRV_MSG_CODE_OS_WOL			0x002e0000
 
+#define DRV_MSG_CODE_FEATURE_SUPPORT		0x00300000
+#define DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT	0x00310000
+
 #define DRV_MSG_SEQ_NUMBER_MASK			0x0000ffff
 
 	u32 drv_mb_param;
@@ -11653,6 +11686,10 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT	8
 #define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_MASK		0x0000FF00
 
+#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_MASK		0x0000FFFF
+#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_OFFSET	0
+#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE		0x00000002
+
 	u32 fw_mb_header;
 #define FW_MSG_CODE_MASK			0xffff0000
 #define FW_MSG_CODE_UNSUPPORTED                 0x00000000
@@ -11696,6 +11733,9 @@ struct public_drv_mb {
 #define FW_MB_PARAM_GET_PF_RDMA_IWARP		0x2
 #define FW_MB_PARAM_GET_PF_RDMA_BOTH		0x3
 
+/* get MFW feature support response */
+#define FW_MB_PARAM_FEATURE_SUPPORT_EEE		0x00000002
+
 #define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR	(1 << 0)
 
 	u32 drv_pulse_mb;
@@ -11891,7 +11931,16 @@ struct nvm_cfg1_port {
 #define NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX			0x4
 	u32 phy_cfg;
 	u32 mgmt_traffic;
+
 	u32 ext_phy;
+	/* EEE power saving mode */
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_MASK		0x00FF0000
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_OFFSET		16
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_DISABLED		0x0
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_BALANCED		0x1
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_AGGRESSIVE		0x2
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_LOW_LATENCY		0x3
+
 	u32 mba_cfg1;
 	u32 mba_cfg2;
 	u32 vf_cfg;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 0ba5ec8a9814..9a1645852015 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2047,6 +2047,106 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
+int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct qed_queue_cid *p_cid, u16 *p_rx_coal)
+{
+	u32 coalesce, address, is_valid;
+	struct cau_sb_entry sb_entry;
+	u8 timer_res;
+	int rc;
+
+	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
+			       p_cid->sb_igu_id * sizeof(u64),
+			       (u64)(uintptr_t)&sb_entry, 2, 0);
+	if (rc) {
+		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
+		return rc;
+	}
+
+	timer_res = GET_FIELD(sb_entry.params, CAU_SB_ENTRY_TIMER_RES0);
+
+	address = BAR0_MAP_REG_USDM_RAM +
+		  USTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id);
+	coalesce = qed_rd(p_hwfn, p_ptt, address);
+
+	is_valid = GET_FIELD(coalesce, COALESCING_TIMESET_VALID);
+	if (!is_valid)
+		return -EINVAL;
+
+	coalesce = GET_FIELD(coalesce, COALESCING_TIMESET_TIMESET);
+	*p_rx_coal = (u16)(coalesce << timer_res);
+
+	return 0;
+}
+
+int qed_get_txq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct qed_queue_cid *p_cid, u16 *p_tx_coal)
+{
+	u32 coalesce, address, is_valid;
+	struct cau_sb_entry sb_entry;
+	u8 timer_res;
+	int rc;
+
+	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
+			       p_cid->sb_igu_id * sizeof(u64),
+			       (u64)(uintptr_t)&sb_entry, 2, 0);
+	if (rc) {
+		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
+		return rc;
+	}
+
+	timer_res = GET_FIELD(sb_entry.params, CAU_SB_ENTRY_TIMER_RES1);
+
+	address = BAR0_MAP_REG_XSDM_RAM +
+		  XSTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id);
+	coalesce = qed_rd(p_hwfn, p_ptt, address);
+
+	is_valid = GET_FIELD(coalesce, COALESCING_TIMESET_VALID);
+	if (!is_valid)
+		return -EINVAL;
+
+	coalesce = GET_FIELD(coalesce, COALESCING_TIMESET_TIMESET);
+	*p_tx_coal = (u16)(coalesce << timer_res);
+
+	return 0;
+}
+
+int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *p_coal, void *handle)
+{
+	struct qed_queue_cid *p_cid = handle;
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	if (IS_VF(p_hwfn->cdev)) {
+		rc = qed_vf_pf_get_coalesce(p_hwfn, p_coal, p_cid);
+		if (rc)
+			DP_NOTICE(p_hwfn, "Unable to read queue coalescing\n");
+
+		return rc;
+	}
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (p_cid->b_is_rx) {
+		rc = qed_get_rxq_coalesce(p_hwfn, p_ptt, p_cid, p_coal);
+		if (rc)
+			goto out;
+	} else {
+		rc = qed_get_txq_coalesce(p_hwfn, p_ptt, p_cid, p_coal);
+		if (rc)
+			goto out;
+	}
+
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
 static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 				 struct qed_dev_eth_info *info)
 {
@@ -2696,6 +2796,20 @@ static int qed_ntuple_arfs_filter_config(struct qed_dev *cdev, void *cookie,
 	return rc;
 }
 
+static int qed_get_coalesce(struct qed_dev *cdev, u16 *coal, void *handle)
+{
+	struct qed_queue_cid *p_cid = handle;
+	struct qed_hwfn *p_hwfn;
+	int rc;
+
+	p_hwfn = p_cid->p_owner;
+	rc = qed_get_queue_coalesce(p_hwfn, coal, handle);
+	if (rc)
+		DP_NOTICE(p_hwfn, "Unable to read queue calescing\n");
+
+	return rc;
+}
+
 static int qed_fp_cqe_completion(struct qed_dev *dev,
 				 u8 rss_id, struct eth_slow_path_rx_cqe *cqe)
 {
@@ -2739,6 +2853,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = {
 	.tunn_config = &qed_tunn_configure,
 	.ntuple_filter_config = &qed_ntuple_arfs_filter_config,
 	.configure_arfs_searcher = &qed_configure_arfs_searcher,
+	.get_coalesce = &qed_get_coalesce,
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index f8f09aadced7..cc1f248551c9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -400,4 +400,20 @@ qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
 
 u8 qed_mcast_bin_from_mac(u8 *mac);
 
-#endif /* _QED_L2_H */
+int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 u16 coalesce, struct qed_queue_cid *p_cid);
+
+int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 u16 coalesce, struct qed_queue_cid *p_cid);
+
+int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct qed_queue_cid *p_cid, u16 *p_hw_coal);
+
+int qed_get_txq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct qed_queue_cid *p_cid, u16 *p_hw_coal);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index b11399606990..27832885a87f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -954,9 +954,7 @@ static int qed_slowpath_start(struct qed_dev *cdev,
 	struct qed_tunnel_info tunn_info;
 	const u8 *data = NULL;
 	struct qed_hwfn *hwfn;
-#ifdef CONFIG_RFS_ACCEL
 	struct qed_ptt *p_ptt;
-#endif
 	int rc = -EINVAL;
 
 	if (qed_iov_wq_start(cdev))
@@ -972,7 +970,6 @@ static int qed_slowpath_start(struct qed_dev *cdev,
 			goto err;
 		}
 
-#ifdef CONFIG_RFS_ACCEL
 		if (cdev->num_hwfns == 1) {
 			p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
 			if (p_ptt) {
@@ -983,7 +980,6 @@ static int qed_slowpath_start(struct qed_dev *cdev,
 				goto err;
 			}
 		}
-#endif
 	}
 
 	cdev->rx_coalesce_usecs = QED_DEFAULT_RX_USECS;
@@ -1091,12 +1087,10 @@ err:
 	if (IS_PF(cdev))
 		release_firmware(cdev->firmware);
 
-#ifdef CONFIG_RFS_ACCEL
 	if (IS_PF(cdev) && (cdev->num_hwfns == 1) &&
 	    QED_LEADING_HWFN(cdev)->p_arfs_ptt)
 		qed_ptt_release(QED_LEADING_HWFN(cdev),
 				QED_LEADING_HWFN(cdev)->p_arfs_ptt);
-#endif
 
 	qed_iov_wq_stop(cdev, false);
 
@@ -1111,11 +1105,9 @@ static int qed_slowpath_stop(struct qed_dev *cdev)
 	qed_ll2_dealloc_if(cdev);
 
 	if (IS_PF(cdev)) {
-#ifdef CONFIG_RFS_ACCEL
 		if (cdev->num_hwfns == 1)
 			qed_ptt_release(QED_LEADING_HWFN(cdev),
 					QED_LEADING_HWFN(cdev)->p_arfs_ptt);
-#endif
 		qed_free_stream_mem(cdev);
 		if (IS_QED_ETH_IF(cdev))
 			qed_sriov_disable(cdev, true);
@@ -1305,6 +1297,10 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params)
 		}
 	}
 
+	if (params->override_flags & QED_LINK_OVERRIDE_EEE_CONFIG)
+		memcpy(&link_params->eee, &params->eee,
+		       sizeof(link_params->eee));
+
 	rc = qed_mcp_set_link(hwfn, ptt, params->link_up);
 
 	qed_ptt_release(hwfn, ptt);
@@ -1491,6 +1487,21 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 	if (link.partner_adv_pause == QED_LINK_PARTNER_ASYMMETRIC_PAUSE ||
 	    link.partner_adv_pause == QED_LINK_PARTNER_BOTH_PAUSE)
 		if_link->lp_caps |= QED_LM_Asym_Pause_BIT;
+
+	if (link_caps.default_eee == QED_MCP_EEE_UNSUPPORTED) {
+		if_link->eee_supported = false;
+	} else {
+		if_link->eee_supported = true;
+		if_link->eee_active = link.eee_active;
+		if_link->sup_caps = link_caps.eee_speed_caps;
+		/* MFW clears adv_caps on eee disable; use configured value */
+		if_link->eee.adv_caps = link.eee_adv_caps ? link.eee_adv_caps :
+					params.eee.adv_caps;
+		if_link->eee.lp_adv_caps = link.eee_lp_adv_caps;
+		if_link->eee.enable = params.eee.enable;
+		if_link->eee.tx_lpi_enable = params.eee.tx_lpi_enable;
+		if_link->eee.tx_lpi_timer = params.eee.tx_lpi_timer;
+	}
 }
 
 static void qed_get_current_link(struct qed_dev *cdev,
@@ -1557,36 +1568,10 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type,
 	return rc;
 }
 
-static void qed_get_coalesce(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal)
-{
-	*rx_coal = cdev->rx_coalesce_usecs;
-	*tx_coal = cdev->tx_coalesce_usecs;
-}
-
 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
-			    u16 qid, u16 sb_id)
+			    void *handle)
 {
-	struct qed_hwfn *hwfn;
-	struct qed_ptt *ptt;
-	int hwfn_index;
-	int status = 0;
-
-	hwfn_index = qid % cdev->num_hwfns;
-	hwfn = &cdev->hwfns[hwfn_index];
-	ptt = qed_ptt_acquire(hwfn);
-	if (!ptt)
-		return -EAGAIN;
-
-	status = qed_set_rxq_coalesce(hwfn, ptt, rx_coal,
-				      qid / cdev->num_hwfns, sb_id);
-	if (status)
-		goto out;
-	status = qed_set_txq_coalesce(hwfn, ptt, tx_coal,
-				      qid / cdev->num_hwfns, sb_id);
-out:
-	qed_ptt_release(hwfn, ptt);
-
-	return status;
+		return qed_set_queue_coalesce(rx_coal, tx_coal, handle);
 }
 
 static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
@@ -1735,7 +1720,6 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.chain_alloc = &qed_chain_alloc,
 	.chain_free = &qed_chain_free,
 	.nvm_get_image = &qed_nvm_get_image,
-	.get_coalesce = &qed_get_coalesce,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
 	.update_drv_state = &qed_update_drv_state,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 9da91045d167..c1ecce6b9141 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1097,6 +1097,31 @@ static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn,
 		DP_NOTICE(p_hwfn, "Transceiver is unplugged.\n");
 }
 
+static void qed_mcp_read_eee_config(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt,
+				    struct qed_mcp_link_state *p_link)
+{
+	u32 eee_status, val;
+
+	p_link->eee_adv_caps = 0;
+	p_link->eee_lp_adv_caps = 0;
+	eee_status = qed_rd(p_hwfn,
+			    p_ptt,
+			    p_hwfn->mcp_info->port_addr +
+			    offsetof(struct public_port, eee_status));
+	p_link->eee_active = !!(eee_status & EEE_ACTIVE_BIT);
+	val = (eee_status & EEE_LD_ADV_STATUS_MASK) >> EEE_LD_ADV_STATUS_OFFSET;
+	if (val & EEE_1G_ADV)
+		p_link->eee_adv_caps |= QED_EEE_1G_ADV;
+	if (val & EEE_10G_ADV)
+		p_link->eee_adv_caps |= QED_EEE_10G_ADV;
+	val = (eee_status & EEE_LP_ADV_STATUS_MASK) >> EEE_LP_ADV_STATUS_OFFSET;
+	if (val & EEE_1G_ADV)
+		p_link->eee_lp_adv_caps |= QED_EEE_1G_ADV;
+	if (val & EEE_10G_ADV)
+		p_link->eee_lp_adv_caps |= QED_EEE_10G_ADV;
+}
+
 static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
 				       struct qed_ptt *p_ptt, bool b_reset)
 {
@@ -1228,6 +1253,9 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
 
 	p_link->sfp_tx_fault = !!(status & LINK_STATUS_SFP_TX_FAULT);
 
+	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE)
+		qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link);
+
 	qed_link_update(p_hwfn);
 out:
 	spin_unlock_bh(&p_hwfn->mcp_info->link_lock);
@@ -1251,6 +1279,19 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up)
 	phy_cfg.pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0;
 	phy_cfg.adv_speed = params->speed.advertised_speeds;
 	phy_cfg.loopback_mode = params->loopback_mode;
+	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) {
+		if (params->eee.enable)
+			phy_cfg.eee_cfg |= EEE_CFG_EEE_ENABLED;
+		if (params->eee.tx_lpi_enable)
+			phy_cfg.eee_cfg |= EEE_CFG_TX_LPI;
+		if (params->eee.adv_caps & QED_EEE_1G_ADV)
+			phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_1G;
+		if (params->eee.adv_caps & QED_EEE_10G_ADV)
+			phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_10G;
+		phy_cfg.eee_cfg |= (params->eee.tx_lpi_timer <<
+				    EEE_TX_TIMER_USEC_OFFSET) &
+				   EEE_TX_TIMER_USEC_MASK;
+	}
 
 	p_hwfn->b_drv_link_init = b_up;
 
@@ -2822,3 +2863,28 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
 		p_unlock->resource = resource;
 	}
 }
+
+int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 mcp_resp;
+	int rc;
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT,
+			 0, &mcp_resp, &p_hwfn->mcp_info->capabilities);
+	if (!rc)
+		DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_PROBE),
+			   "MFW supported features: %08x\n",
+			   p_hwfn->mcp_info->capabilities);
+
+	return rc;
+}
+
+int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 mcp_resp, mcp_param, features;
+
+	features = DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE;
+
+	return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT,
+			   features, &mcp_resp, &mcp_param);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index af03b3651411..c7ec2395d1ce 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -53,15 +53,25 @@ struct qed_mcp_link_pause_params {
 	bool    forced_tx;
 };
 
+enum qed_mcp_eee_mode {
+	QED_MCP_EEE_DISABLED,
+	QED_MCP_EEE_ENABLED,
+	QED_MCP_EEE_UNSUPPORTED
+};
+
 struct qed_mcp_link_params {
-	struct qed_mcp_link_speed_params	speed;
-	struct qed_mcp_link_pause_params	pause;
-	u32				     loopback_mode;
+	struct qed_mcp_link_speed_params speed;
+	struct qed_mcp_link_pause_params pause;
+	u32 loopback_mode;
+	struct qed_link_eee_params eee;
 };
 
 struct qed_mcp_link_capabilities {
 	u32 speed_capabilities;
 	bool default_speed_autoneg;
+	enum qed_mcp_eee_mode default_eee;
+	u32 eee_lpi_timer;
+	u8 eee_speed_caps;
 };
 
 struct qed_mcp_link_state {
@@ -102,6 +112,9 @@ struct qed_mcp_link_state {
 	u8      partner_adv_pause;
 
 	bool    sfp_tx_fault;
+	bool    eee_active;
+	u8      eee_adv_caps;
+	u8      eee_lp_adv_caps;
 };
 
 struct qed_mcp_function_info {
@@ -546,6 +559,9 @@ struct qed_mcp_info {
 	u8					*mfw_mb_shadow;
 	u16					mfw_mb_length;
 	u32					mcp_hist;
+
+	/* Capabilties negotiated with the MFW */
+	u32					capabilities;
 };
 
 struct qed_mcp_mb_params {
@@ -925,5 +941,20 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
 				    struct qed_resc_unlock_params *p_unlock,
 				    enum qed_resc_lock
 				    resource, bool b_is_permanent);
+/**
+ * @brief Learn of supported MFW features; To be done during early init
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
+/**
+ * @brief Inform MFW of set of features supported by driver. Should be done
+ * inside the content of the LOAD_REQ.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 2cfd3bd9a031..3f40b1de7957 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -3400,6 +3400,157 @@ static void qed_iov_vf_mbx_release(struct qed_hwfn *p_hwfn,
 			     length, status);
 }
 
+static void qed_iov_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn,
+				       struct qed_ptt *p_ptt,
+				       struct qed_vf_info *p_vf)
+{
+	struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx;
+	struct pfvf_read_coal_resp_tlv *p_resp;
+	struct vfpf_read_coal_req_tlv *req;
+	u8 status = PFVF_STATUS_FAILURE;
+	struct qed_vf_queue *p_queue;
+	struct qed_queue_cid *p_cid;
+	u16 coal = 0, qid, i;
+	bool b_is_rx;
+	int rc = 0;
+
+	mbx->offset = (u8 *)mbx->reply_virt;
+	req = &mbx->req_virt->read_coal_req;
+
+	qid = req->qid;
+	b_is_rx = req->is_rx ? true : false;
+
+	if (b_is_rx) {
+		if (!qed_iov_validate_rxq(p_hwfn, p_vf, qid,
+					  QED_IOV_VALIDATE_Q_ENABLE)) {
+			DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+				   "VF[%d]: Invalid Rx queue_id = %d\n",
+				   p_vf->abs_vf_id, qid);
+			goto send_resp;
+		}
+
+		p_cid = qed_iov_get_vf_rx_queue_cid(&p_vf->vf_queues[qid]);
+		rc = qed_get_rxq_coalesce(p_hwfn, p_ptt, p_cid, &coal);
+		if (rc)
+			goto send_resp;
+	} else {
+		if (!qed_iov_validate_txq(p_hwfn, p_vf, qid,
+					  QED_IOV_VALIDATE_Q_ENABLE)) {
+			DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+				   "VF[%d]: Invalid Tx queue_id = %d\n",
+				   p_vf->abs_vf_id, qid);
+			goto send_resp;
+		}
+		for (i = 0; i < MAX_QUEUES_PER_QZONE; i++) {
+			p_queue = &p_vf->vf_queues[qid];
+			if ((!p_queue->cids[i].p_cid) ||
+			    (!p_queue->cids[i].b_is_tx))
+				continue;
+
+			p_cid = p_queue->cids[i].p_cid;
+
+			rc = qed_get_txq_coalesce(p_hwfn, p_ptt, p_cid, &coal);
+			if (rc)
+				goto send_resp;
+			break;
+		}
+	}
+
+	status = PFVF_STATUS_SUCCESS;
+
+send_resp:
+	p_resp = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_COALESCE_READ,
+			     sizeof(*p_resp));
+	p_resp->coal = coal;
+
+	qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
+		    sizeof(struct channel_list_end_tlv));
+
+	qed_iov_send_response(p_hwfn, p_ptt, p_vf, sizeof(*p_resp), status);
+}
+
+static void qed_iov_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
+				       struct qed_ptt *p_ptt,
+				       struct qed_vf_info *vf)
+{
+	struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
+	struct vfpf_update_coalesce *req;
+	u8 status = PFVF_STATUS_FAILURE;
+	struct qed_queue_cid *p_cid;
+	u16 rx_coal, tx_coal;
+	int rc = 0, i;
+	u16 qid;
+
+	req = &mbx->req_virt->update_coalesce;
+
+	rx_coal = req->rx_coal;
+	tx_coal = req->tx_coal;
+	qid = req->qid;
+
+	if (!qed_iov_validate_rxq(p_hwfn, vf, qid,
+				  QED_IOV_VALIDATE_Q_ENABLE) && rx_coal) {
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+			   "VF[%d]: Invalid Rx queue_id = %d\n",
+			   vf->abs_vf_id, qid);
+		goto out;
+	}
+
+	if (!qed_iov_validate_txq(p_hwfn, vf, qid,
+				  QED_IOV_VALIDATE_Q_ENABLE) && tx_coal) {
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+			   "VF[%d]: Invalid Tx queue_id = %d\n",
+			   vf->abs_vf_id, qid);
+		goto out;
+	}
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_IOV,
+		   "VF[%d]: Setting coalesce for VF rx_coal = %d, tx_coal = %d at queue = %d\n",
+		   vf->abs_vf_id, rx_coal, tx_coal, qid);
+
+	if (rx_coal) {
+		p_cid = qed_iov_get_vf_rx_queue_cid(&vf->vf_queues[qid]);
+
+		rc = qed_set_rxq_coalesce(p_hwfn, p_ptt, rx_coal, p_cid);
+		if (rc) {
+			DP_VERBOSE(p_hwfn,
+				   QED_MSG_IOV,
+				   "VF[%d]: Unable to set rx queue = %d coalesce\n",
+				   vf->abs_vf_id, vf->vf_queues[qid].fw_rx_qid);
+			goto out;
+		}
+		vf->rx_coal = rx_coal;
+	}
+
+	if (tx_coal) {
+		struct qed_vf_queue *p_queue = &vf->vf_queues[qid];
+
+		for (i = 0; i < MAX_QUEUES_PER_QZONE; i++) {
+			if (!p_queue->cids[i].p_cid)
+				continue;
+
+			if (!p_queue->cids[i].b_is_tx)
+				continue;
+
+			rc = qed_set_txq_coalesce(p_hwfn, p_ptt, tx_coal,
+						  p_queue->cids[i].p_cid);
+
+			if (rc) {
+				DP_VERBOSE(p_hwfn,
+					   QED_MSG_IOV,
+					   "VF[%d]: Unable to set tx queue coalesce\n",
+					   vf->abs_vf_id);
+				goto out;
+			}
+		}
+		vf->tx_coal = tx_coal;
+	}
+
+	status = PFVF_STATUS_SUCCESS;
+out:
+	qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_COALESCE_UPDATE,
+			     sizeof(struct pfvf_def_resp_tlv), status);
+}
 static int
 qed_iov_vf_flr_poll_dorq(struct qed_hwfn *p_hwfn,
 			 struct qed_vf_info *p_vf, struct qed_ptt *p_ptt)
@@ -3725,6 +3876,12 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
 		case CHANNEL_TLV_UPDATE_TUNN_PARAM:
 			qed_iov_vf_mbx_update_tunn_param(p_hwfn, p_ptt, p_vf);
 			break;
+		case CHANNEL_TLV_COALESCE_UPDATE:
+			qed_iov_vf_pf_set_coalesce(p_hwfn, p_ptt, p_vf);
+			break;
+		case CHANNEL_TLV_COALESCE_READ:
+			qed_iov_vf_pf_get_coalesce(p_hwfn, p_ptt, p_vf);
+			break;
 		}
 	} else if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) {
 		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index c2e44bce398c..3955929ba892 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -217,6 +217,9 @@ struct qed_vf_info {
 	u8 num_rxqs;
 	u8 num_txqs;
 
+	u16 rx_coal;
+	u16 tx_coal;
+
 	u8 num_sbs;
 
 	u8 num_mac_filters;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 1926d1ed439f..91b5e9f02a62 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -1343,6 +1343,81 @@ exit:
 	return rc;
 }
 
+int qed_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn,
+			   u16 *p_coal, struct qed_queue_cid *p_cid)
+{
+	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
+	struct pfvf_read_coal_resp_tlv *resp;
+	struct vfpf_read_coal_req_tlv *req;
+	int rc;
+
+	/* clear mailbox and prep header tlv */
+	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_COALESCE_READ, sizeof(*req));
+	req->qid = p_cid->rel.queue_id;
+	req->is_rx = p_cid->b_is_rx ? 1 : 0;
+
+	qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END,
+		    sizeof(struct channel_list_end_tlv));
+	resp = &p_iov->pf2vf_reply->read_coal_resp;
+
+	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
+	if (rc)
+		goto exit;
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
+		goto exit;
+
+	*p_coal = resp->coal;
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
+
+	return rc;
+}
+
+int
+qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
+		       u16 rx_coal, u16 tx_coal, struct qed_queue_cid *p_cid)
+{
+	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
+	struct vfpf_update_coalesce *req;
+	struct pfvf_def_resp_tlv *resp;
+	int rc;
+
+	/* clear mailbox and prep header tlv */
+	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_COALESCE_UPDATE, sizeof(*req));
+
+	req->rx_coal = rx_coal;
+	req->tx_coal = tx_coal;
+	req->qid = p_cid->rel.queue_id;
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_IOV,
+		   "Setting coalesce rx_coal = %d, tx_coal = %d at queue = %d\n",
+		   rx_coal, tx_coal, req->qid);
+
+	/* add list termination tlv */
+	qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END,
+		    sizeof(struct channel_list_end_tlv));
+
+	resp = &p_iov->pf2vf_reply->default_resp;
+	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
+	if (rc)
+		goto exit;
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
+		goto exit;
+
+	if (rx_coal)
+		p_hwfn->cdev->rx_coalesce_usecs = rx_coal;
+
+	if (tx_coal)
+		p_hwfn->cdev->tx_coalesce_usecs = tx_coal;
+
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
+	return rc;
+}
+
 u16 qed_vf_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index 34d9b882a780..97d44dfb38ca 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -497,6 +497,27 @@ struct tlv_buffer_size {
 	u8 tlv_buffer[TLV_BUFFER_SIZE];
 };
 
+struct vfpf_update_coalesce {
+	struct vfpf_first_tlv first_tlv;
+	u16 rx_coal;
+	u16 tx_coal;
+	u16 qid;
+	u8 padding[2];
+};
+
+struct vfpf_read_coal_req_tlv {
+	struct vfpf_first_tlv first_tlv;
+	u16 qid;
+	u8 is_rx;
+	u8 padding[5];
+};
+
+struct pfvf_read_coal_resp_tlv {
+	struct pfvf_tlv hdr;
+	u16 coal;
+	u8 padding[6];
+};
+
 union vfpf_tlvs {
 	struct vfpf_first_tlv first_tlv;
 	struct vfpf_acquire_tlv acquire;
@@ -509,7 +530,8 @@ union vfpf_tlvs {
 	struct vfpf_vport_update_tlv vport_update;
 	struct vfpf_ucast_filter_tlv ucast_filter;
 	struct vfpf_update_tunn_param_tlv tunn_param_update;
-	struct channel_list_end_tlv list_end;
+	struct vfpf_update_coalesce update_coalesce;
+	struct vfpf_read_coal_req_tlv read_coal_req;
 	struct tlv_buffer_size tlv_buf_size;
 };
 
@@ -519,6 +541,7 @@ union pfvf_tlvs {
 	struct tlv_buffer_size tlv_buf_size;
 	struct pfvf_start_queue_resp_tlv queue_start;
 	struct pfvf_update_tunn_param_tlv tunn_param_resp;
+	struct pfvf_read_coal_resp_tlv read_coal_resp;
 };
 
 enum qed_bulletin_bit {
@@ -624,8 +647,9 @@ enum {
 	CHANNEL_TLV_VPORT_UPDATE_ACCEPT_ANY_VLAN,
 	CHANNEL_TLV_VPORT_UPDATE_SGE_TPA,
 	CHANNEL_TLV_UPDATE_TUNN_PARAM,
-	CHANNEL_TLV_RESERVED,
+	CHANNEL_TLV_COALESCE_UPDATE,
 	CHANNEL_TLV_QID,
+	CHANNEL_TLV_COALESCE_READ,
 	CHANNEL_TLV_MAX,
 
 	/* Required for iterating over vport-update tlvs.
@@ -677,6 +701,31 @@ struct qed_vf_iov {
 	bool b_doorbell_bar;
 };
 
+/**
+ * @brief VF - Set Rx/Tx coalesce per VF's relative queue.
+ *             Coalesce value '0' will omit the configuration.
+ *
+ * @param p_hwfn
+ * @param rx_coal - coalesce value in micro second for rx queue
+ * @param tx_coal - coalesce value in micro second for tx queue
+ * @param p_cid   - queue cid
+ *
+ **/
+int qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
+			   u16 rx_coal,
+			   u16 tx_coal, struct qed_queue_cid *p_cid);
+
+/**
+ * @brief VF - Get coalesce per VF's relative queue.
+ *
+ * @param p_hwfn
+ * @param p_coal - coalesce value in micro second for VF queues.
+ * @param p_cid  - queue cid
+ *
+ **/
+int qed_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn,
+			   u16 *p_coal, struct qed_queue_cid *p_cid);
+
 #ifdef CONFIG_QED_SRIOV
 /**
  * @brief Read the VF bulletin and act on it if needed
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 4dfb238221f9..adb700512baa 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -160,6 +160,8 @@ struct qede_rdma_dev {
 
 struct qede_ptp;
 
+#define QEDE_RFS_MAX_FLTR	256
+
 struct qede_dev {
 	struct qed_dev			*cdev;
 	struct net_device		*ndev;
@@ -241,9 +243,7 @@ struct qede_dev {
 	u16				vxlan_dst_port;
 	u16				geneve_dst_port;
 
-#ifdef CONFIG_RFS_ACCEL
 	struct qede_arfs		*arfs;
-#endif
 	bool				wol_enabled;
 
 	struct qede_rdma_dev		rdma_info;
@@ -447,16 +447,21 @@ struct qede_fastpath {
 #ifdef CONFIG_RFS_ACCEL
 int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 		       u16 rxq_index, u32 flow_id);
+#define QEDE_SP_ARFS_CONFIG	4
+#define QEDE_SP_TASK_POLL_DELAY	(5 * HZ)
+#endif
+
 void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr);
 void qede_poll_for_freeing_arfs_filters(struct qede_dev *edev);
 void qede_arfs_filter_op(void *dev, void *filter, u8 fw_rc);
 void qede_free_arfs(struct qede_dev *edev);
 int qede_alloc_arfs(struct qede_dev *edev);
-
-#define QEDE_SP_ARFS_CONFIG	4
-#define QEDE_SP_TASK_POLL_DELAY	(5 * HZ)
-#define QEDE_RFS_MAX_FLTR	256
-#endif
+int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info);
+int qede_del_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info);
+int qede_get_cls_rule_entry(struct qede_dev *edev, struct ethtool_rxnfc *cmd);
+int qede_get_cls_rule_all(struct qede_dev *edev, struct ethtool_rxnfc *info,
+			  u32 *rule_locs);
+int qede_get_arfs_filter_count(struct qede_dev *edev);
 
 struct qede_reload_args {
 	void (*func)(struct qede_dev *edev, struct qede_reload_args *args);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 6a03d3e66cff..dae741270022 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -702,24 +702,62 @@ static u32 qede_get_link(struct net_device *dev)
 static int qede_get_coalesce(struct net_device *dev,
 			     struct ethtool_coalesce *coal)
 {
+	void *rx_handle = NULL, *tx_handle = NULL;
 	struct qede_dev *edev = netdev_priv(dev);
-	u16 rxc, txc;
+	u16 rx_coal, tx_coal, i, rc = 0;
+	struct qede_fastpath *fp;
+
+	rx_coal = QED_DEFAULT_RX_USECS;
+	tx_coal = QED_DEFAULT_TX_USECS;
 
 	memset(coal, 0, sizeof(struct ethtool_coalesce));
-	edev->ops->common->get_coalesce(edev->cdev, &rxc, &txc);
 
-	coal->rx_coalesce_usecs = rxc;
-	coal->tx_coalesce_usecs = txc;
+	__qede_lock(edev);
+	if (edev->state == QEDE_STATE_OPEN) {
+		for_each_queue(i) {
+			fp = &edev->fp_array[i];
 
-	return 0;
+			if (fp->type & QEDE_FASTPATH_RX) {
+				rx_handle = fp->rxq->handle;
+				break;
+			}
+		}
+
+		rc = edev->ops->get_coalesce(edev->cdev, &rx_coal, rx_handle);
+		if (rc) {
+			DP_INFO(edev, "Read Rx coalesce error\n");
+			goto out;
+		}
+
+		for_each_queue(i) {
+			fp = &edev->fp_array[i];
+			if (fp->type & QEDE_FASTPATH_TX) {
+				tx_handle = fp->txq->handle;
+				break;
+			}
+		}
+
+		rc = edev->ops->get_coalesce(edev->cdev, &tx_coal, tx_handle);
+		if (rc)
+			DP_INFO(edev, "Read Tx coalesce error\n");
+	}
+
+out:
+	__qede_unlock(edev);
+
+	coal->rx_coalesce_usecs = rx_coal;
+	coal->tx_coalesce_usecs = tx_coal;
+
+	return rc;
 }
 
 static int qede_set_coalesce(struct net_device *dev,
 			     struct ethtool_coalesce *coal)
 {
 	struct qede_dev *edev = netdev_priv(dev);
+	struct qede_fastpath *fp;
 	int i, rc = 0;
-	u16 rxc, txc, sb_id;
+	u16 rxc, txc;
 
 	if (!netif_running(dev)) {
 		DP_INFO(edev, "Interface is down\n");
@@ -730,21 +768,36 @@ static int qede_set_coalesce(struct net_device *dev,
 	    coal->tx_coalesce_usecs > QED_COALESCE_MAX) {
 		DP_INFO(edev,
 			"Can't support requested %s coalesce value [max supported value %d]\n",
-			coal->rx_coalesce_usecs > QED_COALESCE_MAX ? "rx"
-								   : "tx",
-			QED_COALESCE_MAX);
+			coal->rx_coalesce_usecs > QED_COALESCE_MAX ? "rx" :
+			"tx", QED_COALESCE_MAX);
 		return -EINVAL;
 	}
 
 	rxc = (u16)coal->rx_coalesce_usecs;
 	txc = (u16)coal->tx_coalesce_usecs;
 	for_each_queue(i) {
-		sb_id = edev->fp_array[i].sb_info->igu_sb_id;
-		rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc,
-						     (u16)i, sb_id);
-		if (rc) {
-			DP_INFO(edev, "Set coalesce error, rc = %d\n", rc);
-			return rc;
+		fp = &edev->fp_array[i];
+
+		if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
+			rc = edev->ops->common->set_coalesce(edev->cdev,
+							     rxc, 0,
+							     fp->rxq->handle);
+			if (rc) {
+				DP_INFO(edev,
+					"Set RX coalesce error, rc = %d\n", rc);
+				return rc;
+			}
+		}
+
+		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+			rc = edev->ops->common->set_coalesce(edev->cdev,
+							     0, txc,
+							     fp->txq->handle);
+			if (rc) {
+				DP_INFO(edev,
+					"Set TX coalesce error, rc = %d\n", rc);
+				return rc;
+			}
 		}
 	}
 
@@ -1045,20 +1098,34 @@ static int qede_get_rss_flags(struct qede_dev *edev, struct ethtool_rxnfc *info)
 }
 
 static int qede_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
-			  u32 *rules __always_unused)
+			  u32 *rule_locs)
 {
 	struct qede_dev *edev = netdev_priv(dev);
+	int rc = 0;
 
 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
 		info->data = QEDE_RSS_COUNT(edev);
-		return 0;
+		break;
 	case ETHTOOL_GRXFH:
-		return qede_get_rss_flags(edev, info);
+		rc = qede_get_rss_flags(edev, info);
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		info->rule_cnt = qede_get_arfs_filter_count(edev);
+		info->data = QEDE_RFS_MAX_FLTR;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		rc = qede_get_cls_rule_entry(edev, info);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		rc = qede_get_cls_rule_all(edev, info, rule_locs);
+		break;
 	default:
 		DP_ERR(edev, "Command parameters not supported\n");
-		return -EOPNOTSUPP;
+		rc = -EOPNOTSUPP;
 	}
+
+	return rc;
 }
 
 static int qede_set_rss_flags(struct qede_dev *edev, struct ethtool_rxnfc *info)
@@ -1168,14 +1235,24 @@ static int qede_set_rss_flags(struct qede_dev *edev, struct ethtool_rxnfc *info)
 static int qede_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
 {
 	struct qede_dev *edev = netdev_priv(dev);
+	int rc;
 
 	switch (info->cmd) {
 	case ETHTOOL_SRXFH:
-		return qede_set_rss_flags(edev, info);
+		rc = qede_set_rss_flags(edev, info);
+		break;
+	case ETHTOOL_SRXCLSRLINS:
+		rc = qede_add_cls_rule(edev, info);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		rc = qede_del_cls_rule(edev, info);
+		break;
 	default:
 		DP_INFO(edev, "Command parameters not supported\n");
-		return -EOPNOTSUPP;
+		rc = -EOPNOTSUPP;
 	}
+
+	return rc;
 }
 
 static u32 qede_get_rxfh_indir_size(struct net_device *dev)
@@ -1607,6 +1684,87 @@ static int qede_get_tunable(struct net_device *dev,
 	return 0;
 }
 
+static int qede_get_eee(struct net_device *dev, struct ethtool_eee *edata)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	struct qed_link_output current_link;
+
+	memset(&current_link, 0, sizeof(current_link));
+	edev->ops->common->get_link(edev->cdev, &current_link);
+
+	if (!current_link.eee_supported) {
+		DP_INFO(edev, "EEE is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (current_link.eee.adv_caps & QED_EEE_1G_ADV)
+		edata->advertised = ADVERTISED_1000baseT_Full;
+	if (current_link.eee.adv_caps & QED_EEE_10G_ADV)
+		edata->advertised |= ADVERTISED_10000baseT_Full;
+	if (current_link.sup_caps & QED_EEE_1G_ADV)
+		edata->supported = ADVERTISED_1000baseT_Full;
+	if (current_link.sup_caps & QED_EEE_10G_ADV)
+		edata->supported |= ADVERTISED_10000baseT_Full;
+	if (current_link.eee.lp_adv_caps & QED_EEE_1G_ADV)
+		edata->lp_advertised = ADVERTISED_1000baseT_Full;
+	if (current_link.eee.lp_adv_caps & QED_EEE_10G_ADV)
+		edata->lp_advertised |= ADVERTISED_10000baseT_Full;
+
+	edata->tx_lpi_timer = current_link.eee.tx_lpi_timer;
+	edata->eee_enabled = current_link.eee.enable;
+	edata->tx_lpi_enabled = current_link.eee.tx_lpi_enable;
+	edata->eee_active = current_link.eee_active;
+
+	return 0;
+}
+
+static int qede_set_eee(struct net_device *dev, struct ethtool_eee *edata)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	struct qed_link_output current_link;
+	struct qed_link_params params;
+
+	if (!edev->ops->common->can_link_change(edev->cdev)) {
+		DP_INFO(edev, "Link settings are not allowed to be changed\n");
+		return -EOPNOTSUPP;
+	}
+
+	memset(&current_link, 0, sizeof(current_link));
+	edev->ops->common->get_link(edev->cdev, &current_link);
+
+	if (!current_link.eee_supported) {
+		DP_INFO(edev, "EEE is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	memset(&params, 0, sizeof(params));
+	params.override_flags |= QED_LINK_OVERRIDE_EEE_CONFIG;
+
+	if (!(edata->advertised & (ADVERTISED_1000baseT_Full |
+				   ADVERTISED_10000baseT_Full)) ||
+	    ((edata->advertised & (ADVERTISED_1000baseT_Full |
+				   ADVERTISED_10000baseT_Full)) !=
+	     edata->advertised)) {
+		DP_VERBOSE(edev, QED_MSG_DEBUG,
+			   "Invalid advertised capabilities %d\n",
+			   edata->advertised);
+		return -EINVAL;
+	}
+
+	if (edata->advertised & ADVERTISED_1000baseT_Full)
+		params.eee.adv_caps = QED_EEE_1G_ADV;
+	if (edata->advertised & ADVERTISED_10000baseT_Full)
+		params.eee.adv_caps |= QED_EEE_10G_ADV;
+	params.eee.enable = edata->eee_enabled;
+	params.eee.tx_lpi_enable = edata->tx_lpi_enabled;
+	params.eee.tx_lpi_timer = edata->tx_lpi_timer;
+
+	params.link_up = true;
+	edev->ops->common->set_link(edev->cdev, &params);
+
+	return 0;
+}
+
 static const struct ethtool_ops qede_ethtool_ops = {
 	.get_link_ksettings = qede_get_link_ksettings,
 	.set_link_ksettings = qede_set_link_ksettings,
@@ -1640,6 +1798,9 @@ static const struct ethtool_ops qede_ethtool_ops = {
 	.get_channels = qede_get_channels,
 	.set_channels = qede_set_channels,
 	.self_test = qede_self_test,
+	.get_eee = qede_get_eee,
+	.set_eee = qede_set_eee,
+
 	.get_tunable = qede_get_tunable,
 	.set_tunable = qede_set_tunable,
 };
@@ -1650,6 +1811,8 @@ static const struct ethtool_ops qede_vf_ethtool_ops = {
 	.get_msglevel = qede_get_msglevel,
 	.set_msglevel = qede_set_msglevel,
 	.get_link = qede_get_link,
+	.get_coalesce = qede_get_coalesce,
+	.set_coalesce = qede_set_coalesce,
 	.get_ringparam = qede_get_ringparam,
 	.set_ringparam = qede_set_ringparam,
 	.get_strings = qede_get_strings,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index f939db5bac5f..f79e36e4060a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -38,7 +38,6 @@
 #include <linux/qed/qed_if.h>
 #include "qede.h"
 
-#ifdef CONFIG_RFS_ACCEL
 struct qede_arfs_tuple {
 	union {
 		__be32 src_ipv4;
@@ -76,10 +75,12 @@ struct qede_arfs_fltr_node {
 	u16 next_rxq_id;
 	bool filter_op;
 	bool used;
+	u8 fw_rc;
 	struct hlist_node node;
 };
 
 struct qede_arfs {
+#define QEDE_ARFS_BUCKET_HEAD(edev, idx) (&(edev)->arfs->arfs_hl_head[idx])
 #define QEDE_ARFS_POLL_COUNT	100
 #define QEDE_RFS_FLW_BITSHIFT	(4)
 #define QEDE_RFS_FLW_MASK	((1 << QEDE_RFS_FLW_BITSHIFT) - 1)
@@ -121,11 +122,56 @@ qede_free_arfs_filter(struct qede_dev *edev,  struct qede_arfs_fltr_node *fltr)
 	kfree(fltr);
 }
 
+static int
+qede_enqueue_fltr_and_config_searcher(struct qede_dev *edev,
+				      struct qede_arfs_fltr_node *fltr,
+				      u16 bucket_idx)
+{
+	fltr->mapping = dma_map_single(&edev->pdev->dev, fltr->data,
+				       fltr->buf_len, DMA_TO_DEVICE);
+	if (dma_mapping_error(&edev->pdev->dev, fltr->mapping)) {
+		DP_NOTICE(edev, "Failed to map DMA memory for rule\n");
+		qede_free_arfs_filter(edev, fltr);
+		return -ENOMEM;
+	}
+
+	INIT_HLIST_NODE(&fltr->node);
+	hlist_add_head(&fltr->node,
+		       QEDE_ARFS_BUCKET_HEAD(edev, bucket_idx));
+	edev->arfs->filter_count++;
+
+	if (edev->arfs->filter_count == 1 && !edev->arfs->enable) {
+		edev->ops->configure_arfs_searcher(edev->cdev, true);
+		edev->arfs->enable = true;
+	}
+
+	return 0;
+}
+
+static void
+qede_dequeue_fltr_and_config_searcher(struct qede_dev *edev,
+				      struct qede_arfs_fltr_node *fltr)
+{
+	hlist_del(&fltr->node);
+	dma_unmap_single(&edev->pdev->dev, fltr->mapping,
+			 fltr->buf_len, DMA_TO_DEVICE);
+
+	qede_free_arfs_filter(edev, fltr);
+	edev->arfs->filter_count--;
+
+	if (!edev->arfs->filter_count && edev->arfs->enable) {
+		edev->arfs->enable = false;
+		edev->ops->configure_arfs_searcher(edev->cdev, false);
+	}
+}
+
 void qede_arfs_filter_op(void *dev, void *filter, u8 fw_rc)
 {
 	struct qede_arfs_fltr_node *fltr = filter;
 	struct qede_dev *edev = dev;
 
+	fltr->fw_rc = fw_rc;
+
 	if (fw_rc) {
 		DP_NOTICE(edev,
 			  "Failed arfs filter configuration fw_rc=%d, flow_id=%d, sw_id=%d, src_port=%d, dst_port=%d, rxq=%d\n",
@@ -185,18 +231,17 @@ void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr)
 
 			if ((!test_bit(QEDE_FLTR_VALID, &fltr->state) &&
 			     !fltr->used) || free_fltr) {
-				hlist_del(&fltr->node);
-				dma_unmap_single(&edev->pdev->dev,
-						 fltr->mapping,
-						 fltr->buf_len, DMA_TO_DEVICE);
-				qede_free_arfs_filter(edev, fltr);
-				edev->arfs->filter_count--;
+				qede_dequeue_fltr_and_config_searcher(edev,
+								      fltr);
 			} else {
-				if ((rps_may_expire_flow(edev->ndev,
-							 fltr->rxq_id,
-							 fltr->flow_id,
-							 fltr->sw_id) || del) &&
-							 !free_fltr)
+				bool flow_exp = false;
+#ifdef CONFIG_RFS_ACCEL
+				flow_exp = rps_may_expire_flow(edev->ndev,
+							       fltr->rxq_id,
+							       fltr->flow_id,
+							       fltr->sw_id);
+#endif
+				if ((flow_exp || del) && !free_fltr)
 					qede_configure_arfs_fltr(edev, fltr,
 								 fltr->rxq_id,
 								 false);
@@ -213,10 +258,12 @@ void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr)
 			edev->arfs->enable = false;
 			edev->ops->configure_arfs_searcher(edev->cdev, false);
 		}
+#ifdef CONFIG_RFS_ACCEL
 	} else {
 		set_bit(QEDE_SP_ARFS_CONFIG, &edev->sp_flags);
 		schedule_delayed_work(&edev->sp_task,
 				      QEDE_SP_TASK_POLL_DELAY);
+#endif
 	}
 
 	spin_unlock_bh(&edev->arfs->arfs_list_lock);
@@ -258,25 +305,26 @@ int qede_alloc_arfs(struct qede_dev *edev)
 	spin_lock_init(&edev->arfs->arfs_list_lock);
 
 	for (i = 0; i <= QEDE_RFS_FLW_MASK; i++)
-		INIT_HLIST_HEAD(&edev->arfs->arfs_hl_head[i]);
+		INIT_HLIST_HEAD(QEDE_ARFS_BUCKET_HEAD(edev, i));
 
-	edev->ndev->rx_cpu_rmap = alloc_irq_cpu_rmap(QEDE_RSS_COUNT(edev));
-	if (!edev->ndev->rx_cpu_rmap) {
+	edev->arfs->arfs_fltr_bmap = vzalloc(BITS_TO_LONGS(QEDE_RFS_MAX_FLTR) *
+					     sizeof(long));
+	if (!edev->arfs->arfs_fltr_bmap) {
 		vfree(edev->arfs);
 		edev->arfs = NULL;
 		return -ENOMEM;
 	}
 
-	edev->arfs->arfs_fltr_bmap = vzalloc(BITS_TO_LONGS(QEDE_RFS_MAX_FLTR) *
-					     sizeof(long));
-	if (!edev->arfs->arfs_fltr_bmap) {
-		free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap);
-		edev->ndev->rx_cpu_rmap = NULL;
+#ifdef CONFIG_RFS_ACCEL
+	edev->ndev->rx_cpu_rmap = alloc_irq_cpu_rmap(QEDE_RSS_COUNT(edev));
+	if (!edev->ndev->rx_cpu_rmap) {
+		vfree(edev->arfs->arfs_fltr_bmap);
+		edev->arfs->arfs_fltr_bmap = NULL;
 		vfree(edev->arfs);
 		edev->arfs = NULL;
 		return -ENOMEM;
 	}
-
+#endif
 	return 0;
 }
 
@@ -285,16 +333,19 @@ void qede_free_arfs(struct qede_dev *edev)
 	if (!edev->arfs)
 		return;
 
+#ifdef CONFIG_RFS_ACCEL
 	if (edev->ndev->rx_cpu_rmap)
 		free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap);
 
 	edev->ndev->rx_cpu_rmap = NULL;
+#endif
 	vfree(edev->arfs->arfs_fltr_bmap);
 	edev->arfs->arfs_fltr_bmap = NULL;
 	vfree(edev->arfs);
 	edev->arfs = NULL;
 }
 
+#ifdef CONFIG_RFS_ACCEL
 static bool qede_compare_ip_addr(struct qede_arfs_fltr_node *tpos,
 				 const struct sk_buff *skb)
 {
@@ -394,9 +445,8 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 
 	spin_lock_bh(&edev->arfs->arfs_list_lock);
 
-	n = qede_arfs_htbl_key_search(&edev->arfs->arfs_hl_head[tbl_idx],
+	n = qede_arfs_htbl_key_search(QEDE_ARFS_BUCKET_HEAD(edev, tbl_idx),
 				      skb, ports[0], ports[1], ip_proto);
-
 	if (n) {
 		/* Filter match */
 		n->next_rxq_id = rxq_index;
@@ -448,23 +498,9 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 	n->tuple.ip_proto = ip_proto;
 	memcpy(n->data + ETH_HLEN, skb->data, skb_headlen(skb));
 
-	n->mapping = dma_map_single(&edev->pdev->dev, n->data,
-				    n->buf_len, DMA_TO_DEVICE);
-	if (dma_mapping_error(&edev->pdev->dev, n->mapping)) {
-		DP_NOTICE(edev, "Failed to map DMA memory for arfs\n");
-		qede_free_arfs_filter(edev, n);
-		rc = -ENOMEM;
+	rc = qede_enqueue_fltr_and_config_searcher(edev, n, tbl_idx);
+	if (rc)
 		goto ret_unlock;
-	}
-
-	INIT_HLIST_NODE(&n->node);
-	hlist_add_head(&n->node, &edev->arfs->arfs_hl_head[tbl_idx]);
-	edev->arfs->filter_count++;
-
-	if (edev->arfs->filter_count == 1 && !edev->arfs->enable) {
-		edev->ops->configure_arfs_searcher(edev->cdev, true);
-		edev->arfs->enable = true;
-	}
 
 	qede_configure_arfs_fltr(edev, n, n->rxq_id, true);
 
@@ -472,6 +508,7 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 
 	set_bit(QEDE_SP_ARFS_CONFIG, &edev->sp_flags);
 	schedule_delayed_work(&edev->sp_task, 0);
+
 	return n->sw_id;
 
 ret_unlock:
@@ -1263,3 +1300,371 @@ void qede_config_rx_mode(struct net_device *ndev)
 out:
 	kfree(uc_macs);
 }
+
+static struct qede_arfs_fltr_node *
+qede_get_arfs_fltr_by_loc(struct hlist_head *head, u32 location)
+{
+	struct qede_arfs_fltr_node *fltr;
+
+	hlist_for_each_entry(fltr, head, node)
+		if (location == fltr->sw_id)
+			return fltr;
+
+	return NULL;
+}
+
+static bool
+qede_compare_user_flow_ips(struct qede_arfs_fltr_node *tpos,
+			   struct ethtool_rx_flow_spec *fsp,
+			   __be16 proto)
+{
+	if (proto == htons(ETH_P_IP)) {
+		struct ethtool_tcpip4_spec *ip;
+
+		ip = &fsp->h_u.tcp_ip4_spec;
+
+		if (tpos->tuple.src_ipv4 == ip->ip4src &&
+		    tpos->tuple.dst_ipv4 == ip->ip4dst)
+			return true;
+		else
+			return false;
+	} else {
+		struct ethtool_tcpip6_spec *ip6;
+		struct in6_addr *src;
+
+		ip6 = &fsp->h_u.tcp_ip6_spec;
+		src = &tpos->tuple.src_ipv6;
+
+		if (!memcmp(src, &ip6->ip6src, sizeof(struct in6_addr)) &&
+		    !memcmp(&tpos->tuple.dst_ipv6, &ip6->ip6dst,
+			    sizeof(struct in6_addr)))
+			return true;
+		else
+			return false;
+	}
+	return false;
+}
+
+int qede_get_cls_rule_all(struct qede_dev *edev, struct ethtool_rxnfc *info,
+			  u32 *rule_locs)
+{
+	struct qede_arfs_fltr_node *fltr;
+	struct hlist_head *head;
+	int cnt = 0, rc = 0;
+
+	info->data = QEDE_RFS_MAX_FLTR;
+
+	__qede_lock(edev);
+
+	if (!edev->arfs) {
+		rc = -EPERM;
+		goto unlock;
+	}
+
+	head = QEDE_ARFS_BUCKET_HEAD(edev, 0);
+
+	hlist_for_each_entry(fltr, head, node) {
+		if (cnt == info->rule_cnt) {
+			rc = -EMSGSIZE;
+			goto unlock;
+		}
+
+		rule_locs[cnt] = fltr->sw_id;
+		cnt++;
+	}
+
+	info->rule_cnt = cnt;
+
+unlock:
+	__qede_unlock(edev);
+	return rc;
+}
+
+int qede_get_cls_rule_entry(struct qede_dev *edev, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+	struct qede_arfs_fltr_node *fltr = NULL;
+	int rc = 0;
+
+	cmd->data = QEDE_RFS_MAX_FLTR;
+
+	__qede_lock(edev);
+
+	if (!edev->arfs) {
+		rc = -EPERM;
+		goto unlock;
+	}
+
+	fltr = qede_get_arfs_fltr_by_loc(QEDE_ARFS_BUCKET_HEAD(edev, 0),
+					 fsp->location);
+	if (!fltr) {
+		DP_NOTICE(edev, "Rule not found - location=0x%x\n",
+			  fsp->location);
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	if (fltr->tuple.eth_proto == htons(ETH_P_IP)) {
+		if (fltr->tuple.ip_proto == IPPROTO_TCP)
+			fsp->flow_type = TCP_V4_FLOW;
+		else
+			fsp->flow_type = UDP_V4_FLOW;
+
+		fsp->h_u.tcp_ip4_spec.psrc = fltr->tuple.src_port;
+		fsp->h_u.tcp_ip4_spec.pdst = fltr->tuple.dst_port;
+		fsp->h_u.tcp_ip4_spec.ip4src = fltr->tuple.src_ipv4;
+		fsp->h_u.tcp_ip4_spec.ip4dst = fltr->tuple.dst_ipv4;
+	} else {
+		if (fltr->tuple.ip_proto == IPPROTO_TCP)
+			fsp->flow_type = TCP_V6_FLOW;
+		else
+			fsp->flow_type = UDP_V6_FLOW;
+		fsp->h_u.tcp_ip6_spec.psrc = fltr->tuple.src_port;
+		fsp->h_u.tcp_ip6_spec.pdst = fltr->tuple.dst_port;
+		memcpy(&fsp->h_u.tcp_ip6_spec.ip6src,
+		       &fltr->tuple.src_ipv6, sizeof(struct in6_addr));
+		memcpy(&fsp->h_u.tcp_ip6_spec.ip6dst,
+		       &fltr->tuple.dst_ipv6, sizeof(struct in6_addr));
+	}
+
+	fsp->ring_cookie = fltr->rxq_id;
+
+unlock:
+	__qede_unlock(edev);
+	return rc;
+}
+
+static int
+qede_validate_and_check_flow_exist(struct qede_dev *edev,
+				   struct ethtool_rx_flow_spec *fsp,
+				   int *min_hlen)
+{
+	__be16 src_port = 0x0, dst_port = 0x0;
+	struct qede_arfs_fltr_node *fltr;
+	struct hlist_node *temp;
+	struct hlist_head *head;
+	__be16 eth_proto;
+	u8 ip_proto;
+
+	if (fsp->location >= QEDE_RFS_MAX_FLTR ||
+	    fsp->ring_cookie >= QEDE_RSS_COUNT(edev))
+		return -EINVAL;
+
+	if (fsp->flow_type == TCP_V4_FLOW) {
+		*min_hlen += sizeof(struct iphdr) +
+				sizeof(struct tcphdr);
+		eth_proto = htons(ETH_P_IP);
+		ip_proto = IPPROTO_TCP;
+	} else if (fsp->flow_type == UDP_V4_FLOW) {
+		*min_hlen += sizeof(struct iphdr) +
+				sizeof(struct udphdr);
+		eth_proto = htons(ETH_P_IP);
+		ip_proto = IPPROTO_UDP;
+	} else if (fsp->flow_type == TCP_V6_FLOW) {
+		*min_hlen += sizeof(struct ipv6hdr) +
+				sizeof(struct tcphdr);
+		eth_proto = htons(ETH_P_IPV6);
+		ip_proto = IPPROTO_TCP;
+	} else if (fsp->flow_type == UDP_V6_FLOW) {
+		*min_hlen += sizeof(struct ipv6hdr) +
+				sizeof(struct udphdr);
+		eth_proto = htons(ETH_P_IPV6);
+		ip_proto = IPPROTO_UDP;
+	} else {
+		DP_NOTICE(edev, "Unsupported flow type = 0x%x\n",
+			  fsp->flow_type);
+		return -EPROTONOSUPPORT;
+	}
+
+	if (eth_proto == htons(ETH_P_IP)) {
+		src_port = fsp->h_u.tcp_ip4_spec.psrc;
+		dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+	} else {
+		src_port = fsp->h_u.tcp_ip6_spec.psrc;
+		dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+	}
+
+	head = QEDE_ARFS_BUCKET_HEAD(edev, 0);
+	hlist_for_each_entry_safe(fltr, temp, head, node) {
+		if ((fltr->tuple.ip_proto == ip_proto &&
+		     fltr->tuple.eth_proto == eth_proto &&
+		     qede_compare_user_flow_ips(fltr, fsp, eth_proto) &&
+		     fltr->tuple.src_port == src_port &&
+		     fltr->tuple.dst_port == dst_port) ||
+		    fltr->sw_id == fsp->location)
+			return -EEXIST;
+	}
+
+	return 0;
+}
+
+static int
+qede_poll_arfs_filter_config(struct qede_dev *edev,
+			     struct qede_arfs_fltr_node *fltr)
+{
+	int count = QEDE_ARFS_POLL_COUNT;
+
+	while (fltr->used && count) {
+		msleep(20);
+		count--;
+	}
+
+	if (count == 0 || fltr->fw_rc) {
+		qede_dequeue_fltr_and_config_searcher(edev, fltr);
+		return -EIO;
+	}
+
+	return fltr->fw_rc;
+}
+
+int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info)
+{
+	struct ethtool_rx_flow_spec *fsp = &info->fs;
+	struct qede_arfs_fltr_node *n;
+	int min_hlen = ETH_HLEN, rc;
+	struct ethhdr *eth;
+	struct iphdr *ip;
+	__be16 *ports;
+
+	__qede_lock(edev);
+
+	if (!edev->arfs) {
+		rc = -EPERM;
+		goto unlock;
+	}
+
+	rc = qede_validate_and_check_flow_exist(edev, fsp, &min_hlen);
+	if (rc)
+		goto unlock;
+
+	n = kzalloc(sizeof(*n), GFP_KERNEL);
+	if (!n) {
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	n->data = kzalloc(min_hlen, GFP_KERNEL);
+	if (!n->data) {
+		kfree(n);
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	n->sw_id = fsp->location;
+	set_bit(n->sw_id, edev->arfs->arfs_fltr_bmap);
+	n->buf_len = min_hlen;
+	n->rxq_id = fsp->ring_cookie;
+	n->next_rxq_id = n->rxq_id;
+	eth = (struct ethhdr *)n->data;
+
+	if (info->fs.flow_type == TCP_V4_FLOW ||
+	    info->fs.flow_type == UDP_V4_FLOW) {
+		ports = (__be16 *)(n->data + ETH_HLEN +
+					sizeof(struct iphdr));
+		eth->h_proto = htons(ETH_P_IP);
+		n->tuple.eth_proto = htons(ETH_P_IP);
+		n->tuple.src_ipv4 = info->fs.h_u.tcp_ip4_spec.ip4src;
+		n->tuple.dst_ipv4 = info->fs.h_u.tcp_ip4_spec.ip4dst;
+		n->tuple.src_port = info->fs.h_u.tcp_ip4_spec.psrc;
+		n->tuple.dst_port = info->fs.h_u.tcp_ip4_spec.pdst;
+		ports[0] = n->tuple.src_port;
+		ports[1] = n->tuple.dst_port;
+		ip = (struct iphdr *)(n->data + ETH_HLEN);
+		ip->saddr = info->fs.h_u.tcp_ip4_spec.ip4src;
+		ip->daddr = info->fs.h_u.tcp_ip4_spec.ip4dst;
+		ip->version = 0x4;
+		ip->ihl = 0x5;
+
+		if (info->fs.flow_type == TCP_V4_FLOW) {
+			n->tuple.ip_proto = IPPROTO_TCP;
+			ip->protocol = IPPROTO_TCP;
+		} else {
+			n->tuple.ip_proto = IPPROTO_UDP;
+			ip->protocol = IPPROTO_UDP;
+		}
+		ip->tot_len = cpu_to_be16(min_hlen - ETH_HLEN);
+	} else {
+		struct ipv6hdr *ip6;
+
+		ip6 = (struct ipv6hdr *)(n->data + ETH_HLEN);
+		ports = (__be16 *)(n->data + ETH_HLEN +
+					sizeof(struct ipv6hdr));
+		eth->h_proto = htons(ETH_P_IPV6);
+		n->tuple.eth_proto = htons(ETH_P_IPV6);
+		memcpy(&n->tuple.src_ipv6, &info->fs.h_u.tcp_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&n->tuple.dst_ipv6, &info->fs.h_u.tcp_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		n->tuple.src_port = info->fs.h_u.tcp_ip6_spec.psrc;
+		n->tuple.dst_port = info->fs.h_u.tcp_ip6_spec.pdst;
+		ports[0] = n->tuple.src_port;
+		ports[1] = n->tuple.dst_port;
+		memcpy(&ip6->saddr, &n->tuple.src_ipv6,
+		       sizeof(struct in6_addr));
+		memcpy(&ip6->daddr, &n->tuple.dst_ipv6,
+		       sizeof(struct in6_addr));
+		ip6->version = 0x6;
+
+		if (info->fs.flow_type == TCP_V6_FLOW) {
+			n->tuple.ip_proto = IPPROTO_TCP;
+			ip6->nexthdr = NEXTHDR_TCP;
+			ip6->payload_len = cpu_to_be16(sizeof(struct tcphdr));
+		} else {
+			n->tuple.ip_proto = IPPROTO_UDP;
+			ip6->nexthdr = NEXTHDR_UDP;
+			ip6->payload_len = cpu_to_be16(sizeof(struct udphdr));
+		}
+	}
+
+	rc = qede_enqueue_fltr_and_config_searcher(edev, n, 0);
+	if (rc)
+		goto unlock;
+
+	qede_configure_arfs_fltr(edev, n, n->rxq_id, true);
+	rc = qede_poll_arfs_filter_config(edev, n);
+unlock:
+	__qede_unlock(edev);
+	return rc;
+}
+
+int qede_del_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info)
+{
+	struct ethtool_rx_flow_spec *fsp = &info->fs;
+	struct qede_arfs_fltr_node *fltr = NULL;
+	int rc = -EPERM;
+
+	__qede_lock(edev);
+	if (!edev->arfs)
+		goto unlock;
+
+	fltr = qede_get_arfs_fltr_by_loc(QEDE_ARFS_BUCKET_HEAD(edev, 0),
+					 fsp->location);
+	if (!fltr)
+		goto unlock;
+
+	qede_configure_arfs_fltr(edev, fltr, fltr->rxq_id, false);
+
+	rc = qede_poll_arfs_filter_config(edev, fltr);
+	if (rc == 0)
+		qede_dequeue_fltr_and_config_searcher(edev, fltr);
+
+unlock:
+	__qede_unlock(edev);
+	return rc;
+}
+
+int qede_get_arfs_filter_count(struct qede_dev *edev)
+{
+	int count = 0;
+
+	__qede_lock(edev);
+
+	if (!edev->arfs)
+		goto unlock;
+
+	count = edev->arfs->filter_count;
+
+unlock:
+	__qede_unlock(edev);
+	return count;
+}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 06ca13dd9ddb..e5ee9f274a71 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -873,9 +873,7 @@ static void qede_update_pf_params(struct qed_dev *cdev)
 	 */
 	pf_params.eth_pf_params.num_vf_cons = 48;
 
-#ifdef CONFIG_RFS_ACCEL
 	pf_params.eth_pf_params.num_arfs_filters = QEDE_RFS_MAX_FLTR;
-#endif
 	qed_ops->common->update_pf_params(cdev, &pf_params);
 }
 
@@ -1984,12 +1982,12 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
 	qede_vlan_mark_nonconfigured(edev);
 	edev->ops->fastpath_stop(edev->cdev);
-#ifdef CONFIG_RFS_ACCEL
+
 	if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) {
 		qede_poll_for_freeing_arfs_filters(edev);
 		qede_free_arfs(edev);
 	}
-#endif
+
 	/* Release the interrupts */
 	qede_sync_free_irqs(edev);
 	edev->ops->common->set_fp_int(edev->cdev, 0);
@@ -2041,13 +2039,12 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
 	if (rc)
 		goto err2;
 
-#ifdef CONFIG_RFS_ACCEL
 	if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) {
 		rc = qede_alloc_arfs(edev);
 		if (rc)
 			DP_NOTICE(edev, "aRFS memory allocation failed\n");
 	}
-#endif
+
 	qede_napi_add_enable(edev);
 	DP_INFO(edev, "Napi added and enabled\n");
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
index 0844b7c75767..afa10a163da1 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
@@ -1285,7 +1285,7 @@ flash_temp:
 int qlcnic_dump_fw(struct qlcnic_adapter *adapter)
 {
 	struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump;
-	static const struct qlcnic_dump_operations *fw_dump_ops;
+	const struct qlcnic_dump_operations *fw_dump_ops;
 	struct qlcnic_83xx_dump_template_hdr *hdr_83xx;
 	u32 entry_offset, dump, no_entries, buf_offset = 0;
 	int i, k, ops_cnt, ops_index, dump_size = 0;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index 73027a6c06c7..82fcb83ea3c8 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
@@ -1248,7 +1248,7 @@ static const struct bin_attribute bin_attr_pm_config = {
 	.write = qlcnic_sysfs_write_pm_config,
 };
 
-static struct bin_attribute bin_attr_flash = {
+static const struct bin_attribute bin_attr_flash = {
 	.attr = {.name = "flash", .mode = (S_IRUGO | S_IWUSR)},
 	.size = 0,
 	.read = qlcnic_83xx_sysfs_flash_read_handler,
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 0525bd696d5d..96a27b00c90e 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -991,6 +991,7 @@ struct ravb_private {
 	struct net_device *ndev;
 	struct platform_device *pdev;
 	void __iomem *addr;
+	struct clk *clk;
 	struct mdiobb_ctrl mdiobb;
 	u32 num_rx_ring[NUM_RX_QUEUE];
 	u32 num_tx_ring[NUM_TX_QUEUE];
@@ -1033,6 +1034,7 @@ struct ravb_private {
 
 	unsigned no_avb_link:1;
 	unsigned avb_link_active_low:1;
+	unsigned wol_enabled:1;
 };
 
 static inline u32 ravb_read(struct net_device *ndev, enum ravb_reg reg)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 5931e859876c..fdf30bfa403b 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -680,6 +680,9 @@ static void ravb_emac_interrupt_unlocked(struct net_device *ndev)
 
 	ecsr = ravb_read(ndev, ECSR);
 	ravb_write(ndev, ecsr, ECSR);	/* clear interrupt */
+
+	if (ecsr & ECSR_MPD)
+		pm_wakeup_event(&priv->pdev->dev, 0);
 	if (ecsr & ECSR_ICD)
 		ndev->stats.tx_carrier_errors++;
 	if (ecsr & ECSR_LCHNG) {
@@ -1330,6 +1333,33 @@ static int ravb_get_ts_info(struct net_device *ndev,
 	return 0;
 }
 
+static void ravb_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+
+	wol->supported = 0;
+	wol->wolopts = 0;
+
+	if (priv->clk) {
+		wol->supported = WAKE_MAGIC;
+		wol->wolopts = priv->wol_enabled ? WAKE_MAGIC : 0;
+	}
+}
+
+static int ravb_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+
+	if (!priv->clk || wol->wolopts & ~WAKE_MAGIC)
+		return -EOPNOTSUPP;
+
+	priv->wol_enabled = !!(wol->wolopts & WAKE_MAGIC);
+
+	device_set_wakeup_enable(&priv->pdev->dev, priv->wol_enabled);
+
+	return 0;
+}
+
 static const struct ethtool_ops ravb_ethtool_ops = {
 	.nway_reset		= ravb_nway_reset,
 	.get_msglevel		= ravb_get_msglevel,
@@ -1343,6 +1373,8 @@ static const struct ethtool_ops ravb_ethtool_ops = {
 	.get_ts_info		= ravb_get_ts_info,
 	.get_link_ksettings	= ravb_get_link_ksettings,
 	.set_link_ksettings	= ravb_set_link_ksettings,
+	.get_wol		= ravb_get_wol,
+	.set_wol		= ravb_set_wol,
 };
 
 static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler,
@@ -2041,6 +2073,11 @@ static int ravb_probe(struct platform_device *pdev)
 
 	priv->chip_id = chip_id;
 
+	/* Get clock, if not found that's OK but Wake-On-Lan is unavailable */
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk))
+		priv->clk = NULL;
+
 	/* Set function */
 	ndev->netdev_ops = &ravb_netdev_ops;
 	ndev->ethtool_ops = &ravb_ethtool_ops;
@@ -2107,6 +2144,9 @@ static int ravb_probe(struct platform_device *pdev)
 	if (error)
 		goto out_napi_del;
 
+	if (priv->clk)
+		device_set_wakeup_capable(&pdev->dev, 1);
+
 	/* Print device information */
 	netdev_info(ndev, "Base address at %#x, %pM, IRQ %d.\n",
 		    (u32)ndev->base_addr, ndev->dev_addr, ndev->irq);
@@ -2160,15 +2200,66 @@ static int ravb_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int ravb_wol_setup(struct net_device *ndev)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+
+	/* Disable interrupts by clearing the interrupt masks. */
+	ravb_write(ndev, 0, RIC0);
+	ravb_write(ndev, 0, RIC2);
+	ravb_write(ndev, 0, TIC);
+
+	/* Only allow ECI interrupts */
+	synchronize_irq(priv->emac_irq);
+	napi_disable(&priv->napi[RAVB_NC]);
+	napi_disable(&priv->napi[RAVB_BE]);
+	ravb_write(ndev, ECSIPR_MPDIP, ECSIPR);
+
+	/* Enable MagicPacket */
+	ravb_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
+
+	/* Increased clock usage so device won't be suspended */
+	clk_enable(priv->clk);
+
+	return enable_irq_wake(priv->emac_irq);
+}
+
+static int ravb_wol_restore(struct net_device *ndev)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+	int ret;
+
+	napi_enable(&priv->napi[RAVB_NC]);
+	napi_enable(&priv->napi[RAVB_BE]);
+
+	/* Disable MagicPacket */
+	ravb_modify(ndev, ECMR, ECMR_MPDE, 0);
+
+	ret = ravb_close(ndev);
+	if (ret < 0)
+		return ret;
+
+	/* Restore clock usage count */
+	clk_disable(priv->clk);
+
+	return disable_irq_wake(priv->emac_irq);
+}
+
 static int __maybe_unused ravb_suspend(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
-	int ret = 0;
+	struct ravb_private *priv = netdev_priv(ndev);
+	int ret;
 
-	if (netif_running(ndev)) {
-		netif_device_detach(ndev);
+	if (!netif_running(ndev))
+		return 0;
+
+	netif_device_detach(ndev);
+
+	if (priv->wol_enabled)
+		ret = ravb_wol_setup(ndev);
+	else
 		ret = ravb_close(ndev);
-	}
 
 	return ret;
 }
@@ -2179,6 +2270,33 @@ static int __maybe_unused ravb_resume(struct device *dev)
 	struct ravb_private *priv = netdev_priv(ndev);
 	int ret = 0;
 
+	if (priv->wol_enabled) {
+		/* Reduce the usecount of the clock to zero and then
+		 * restore it to its original value. This is done to force
+		 * the clock to be re-enabled which is a workaround
+		 * for renesas-cpg-mssr driver which do not enable clocks
+		 * when resuming from PSCI suspend/resume.
+		 *
+		 * Without this workaround the driver fails to communicate
+		 * with the hardware if WoL was enabled when the system
+		 * entered PSCI suspend. This is due to that if WoL is enabled
+		 * we explicitly keep the clock from being turned off when
+		 * suspending, but in PSCI sleep power is cut so the clock
+		 * is disabled anyhow, the clock driver is not aware of this
+		 * so the clock is not turned back on when resuming.
+		 *
+		 * TODO: once the renesas-cpg-mssr suspend/resume is working
+		 *       this clock dance should be removed.
+		 */
+		clk_disable(priv->clk);
+		clk_disable(priv->clk);
+		clk_enable(priv->clk);
+		clk_enable(priv->clk);
+
+		/* Set reset mode to rearm the WoL logic */
+		ravb_write(ndev, CCC_OPC_RESET, CCC);
+	}
+
 	/* All register have been reset to default values.
 	 * Restore all registers which where setup at probe time and
 	 * reopen device if it was running before system suspended.
@@ -2202,6 +2320,11 @@ static int __maybe_unused ravb_resume(struct device *dev)
 	ravb_write(ndev, priv->desc_bat_dma, DBAT);
 
 	if (netif_running(ndev)) {
+		if (priv->wol_enabled) {
+			ret = ravb_wol_restore(ndev);
+			if (ret)
+				return ret;
+		}
 		ret = ravb_open(ndev);
 		if (ret < 0)
 			return ret;
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index b1e5c07099fa..fc8f8bdf6579 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -34,6 +34,7 @@
 #include <net/netevent.h>
 #include <net/arp.h>
 #include <net/fib_rules.h>
+#include <net/fib_notifier.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <generated/utsrelease.h>
 
@@ -2191,6 +2192,10 @@ static int rocker_router_fib_event(struct notifier_block *nb,
 {
 	struct rocker *rocker = container_of(nb, struct rocker, fib_nb);
 	struct rocker_fib_event_work *fib_work;
+	struct fib_notifier_info *info = ptr;
+
+	if (info->family != AF_INET)
+		return NOTIFY_DONE;
 
 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
 	if (WARN_ON(!fib_work))
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 600e30e8f0be..da4e26b53a52 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -2761,7 +2761,7 @@ static int ofdpa_fib4_add(struct rocker *rocker,
 				  fen_info->tb_id, 0);
 	if (err)
 		return err;
-	fib_info_offload_inc(fen_info->fi);
+	fen_info->fi->fib_nh->nh_flags |= RTNH_F_OFFLOAD;
 	return 0;
 }
 
@@ -2776,7 +2776,7 @@ static int ofdpa_fib4_del(struct rocker *rocker,
 	ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
 	if (!ofdpa_port)
 		return 0;
-	fib_info_offload_dec(fen_info->fi);
+	fen_info->fi->fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
 	return ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
 				   fen_info->dst_len, fen_info->fi,
 				   fen_info->tb_id, OFDPA_OP_FLAG_REMOVE);
@@ -2803,7 +2803,7 @@ static void ofdpa_fib4_abort(struct rocker *rocker)
 						       rocker);
 		if (!ofdpa_port)
 			continue;
-		fib_info_offload_dec(flow_entry->fi);
+		flow_entry->fi->fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
 		ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE,
 				   flow_entry);
 	}
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index fcea9371ab7f..d407adf59610 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -32,8 +32,8 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *net_dev);
 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
-int efx_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
-		 __be16 proto, struct tc_to_netdev *tc);
+int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+		 void *type_data);
 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
 extern unsigned int efx_piobuf_size;
 extern bool efx_separate_tx_channels;
diff --git a/drivers/net/ethernet/sfc/falcon/efx.h b/drivers/net/ethernet/sfc/falcon/efx.h
index e5a7a40cc8b6..4f3bb30661ea 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.h
+++ b/drivers/net/ethernet/sfc/falcon/efx.h
@@ -32,8 +32,8 @@ netdev_tx_t ef4_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *net_dev);
 netdev_tx_t ef4_enqueue_skb(struct ef4_tx_queue *tx_queue, struct sk_buff *skb);
 void ef4_xmit_done(struct ef4_tx_queue *tx_queue, unsigned int index);
-int ef4_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
-		 __be16 proto, struct tc_to_netdev *tc);
+int ef4_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+		 void *type_data);
 unsigned int ef4_tx_max_skb_descs(struct ef4_nic *efx);
 extern bool ef4_separate_tx_channels;
 
diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c
index f1520a404ac6..6a75f4140a4b 100644
--- a/drivers/net/ethernet/sfc/falcon/tx.c
+++ b/drivers/net/ethernet/sfc/falcon/tx.c
@@ -425,24 +425,25 @@ void ef4_init_tx_queue_core_txq(struct ef4_tx_queue *tx_queue)
 				     efx->n_tx_channels : 0));
 }
 
-int ef4_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
-		 __be16 proto, struct tc_to_netdev *ntc)
+int ef4_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+		 void *type_data)
 {
 	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct tc_mqprio_qopt *mqprio = type_data;
 	struct ef4_channel *channel;
 	struct ef4_tx_queue *tx_queue;
 	unsigned tc, num_tc;
 	int rc;
 
-	if (ntc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	num_tc = ntc->mqprio->num_tc;
+	num_tc = mqprio->num_tc;
 
 	if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0 || num_tc > EF4_MAX_TX_TC)
 		return -EINVAL;
 
-	ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
 	if (num_tc == net_dev->num_tc)
 		return 0;
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index c905971c5f3a..d3f96a8f743b 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -746,59 +746,171 @@ static const char *efx_mcdi_phy_test_name(struct efx_nic *efx,
 	return NULL;
 }
 
-#define SFP_PAGE_SIZE	128
-#define SFP_NUM_PAGES	2
-static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx,
-					  struct ethtool_eeprom *ee, u8 *data)
+#define SFP_PAGE_SIZE		128
+#define SFF_DIAG_TYPE_OFFSET	92
+#define SFF_DIAG_ADDR_CHANGE	BIT(2)
+#define SFF_8079_NUM_PAGES	2
+#define SFF_8472_NUM_PAGES	4
+#define SFF_8436_NUM_PAGES	5
+#define SFF_DMT_LEVEL_OFFSET	94
+
+/** efx_mcdi_phy_get_module_eeprom_page() - Get a single page of module eeprom
+ * @efx:	NIC context
+ * @page:	EEPROM page number
+ * @data:	Destination data pointer
+ * @offset:	Offset in page to copy from in to data
+ * @space:	Space available in data
+ *
+ * Return:
+ *   >=0 - amount of data copied
+ *   <0  - error
+ */
+static int efx_mcdi_phy_get_module_eeprom_page(struct efx_nic *efx,
+					       unsigned int page,
+					       u8 *data, ssize_t offset,
+					       ssize_t space)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX);
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN);
 	size_t outlen;
-	int rc;
 	unsigned int payload_len;
-	unsigned int space_remaining = ee->len;
-	unsigned int page;
-	unsigned int page_off;
 	unsigned int to_copy;
-	u8 *user_data = data;
+	int rc;
 
-	BUILD_BUG_ON(SFP_PAGE_SIZE * SFP_NUM_PAGES != ETH_MODULE_SFF_8079_LEN);
+	if (offset > SFP_PAGE_SIZE)
+		return -EINVAL;
 
-	page_off = ee->offset % SFP_PAGE_SIZE;
-	page = ee->offset / SFP_PAGE_SIZE;
+	to_copy = min(space, SFP_PAGE_SIZE - offset);
 
-	while (space_remaining && (page < SFP_NUM_PAGES)) {
-		MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page);
+	MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page);
+	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_PHY_MEDIA_INFO,
+				inbuf, sizeof(inbuf),
+				outbuf, sizeof(outbuf),
+				&outlen);
 
-		rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_MEDIA_INFO,
-				  inbuf, sizeof(inbuf),
-				  outbuf, sizeof(outbuf),
-				  &outlen);
-		if (rc)
-			return rc;
+	if (rc)
+		return rc;
+
+	if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST +
+			SFP_PAGE_SIZE))
+		return -EIO;
+
+	payload_len = MCDI_DWORD(outbuf, GET_PHY_MEDIA_INFO_OUT_DATALEN);
+	if (payload_len != SFP_PAGE_SIZE)
+		return -EIO;
 
-		if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST +
-			      SFP_PAGE_SIZE))
-			return -EIO;
+	memcpy(data, MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + offset,
+	       to_copy);
 
-		payload_len = MCDI_DWORD(outbuf,
-					 GET_PHY_MEDIA_INFO_OUT_DATALEN);
-		if (payload_len != SFP_PAGE_SIZE)
-			return -EIO;
+	return to_copy;
+}
 
-		/* Copy as much as we can into data */
-		payload_len -= page_off;
-		to_copy = (space_remaining < payload_len) ?
-			space_remaining : payload_len;
+static int efx_mcdi_phy_get_module_eeprom_byte(struct efx_nic *efx,
+					       unsigned int page,
+					       u8 byte)
+{
+	int rc;
+	u8 data;
 
-		memcpy(user_data,
-		       MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + page_off,
-		       to_copy);
+	rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, &data, byte, 1);
+	if (rc == 1)
+		return data;
+
+	return rc;
+}
+
+static int efx_mcdi_phy_diag_type(struct efx_nic *efx)
+{
+	/* Page zero of the EEPROM includes the diagnostic type at byte 92. */
+	return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
+						   SFF_DIAG_TYPE_OFFSET);
+}
 
-		space_remaining -= to_copy;
-		user_data += to_copy;
-		page_off = 0;
-		page++;
+static int efx_mcdi_phy_sff_8472_level(struct efx_nic *efx)
+{
+	/* Page zero of the EEPROM includes the DMT level at byte 94. */
+	return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
+						   SFF_DMT_LEVEL_OFFSET);
+}
+
+static u32 efx_mcdi_phy_module_type(struct efx_nic *efx)
+{
+	struct efx_mcdi_phy_data *phy_data = efx->phy_data;
+
+	if (phy_data->media != MC_CMD_MEDIA_QSFP_PLUS)
+		return phy_data->media;
+
+	/* A QSFP+ NIC may actually have an SFP+ module attached.
+	 * The ID is page 0, byte 0.
+	 */
+	switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) {
+	case 0x3:
+		return MC_CMD_MEDIA_SFP_PLUS;
+	case 0xc:
+	case 0xd:
+		return MC_CMD_MEDIA_QSFP_PLUS;
+	default:
+		return 0;
+	}
+}
+
+static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx,
+					  struct ethtool_eeprom *ee, u8 *data)
+{
+	int rc;
+	ssize_t space_remaining = ee->len;
+	unsigned int page_off;
+	bool ignore_missing;
+	int num_pages;
+	int page;
+
+	switch (efx_mcdi_phy_module_type(efx)) {
+	case MC_CMD_MEDIA_SFP_PLUS:
+		num_pages = efx_mcdi_phy_sff_8472_level(efx) > 0 ?
+				SFF_8472_NUM_PAGES : SFF_8079_NUM_PAGES;
+		page = 0;
+		ignore_missing = false;
+		break;
+	case MC_CMD_MEDIA_QSFP_PLUS:
+		num_pages = SFF_8436_NUM_PAGES;
+		page = -1; /* We obtain the lower page by asking for -1. */
+		ignore_missing = true; /* Ignore missing pages after page 0. */
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	page_off = ee->offset % SFP_PAGE_SIZE;
+	page += ee->offset / SFP_PAGE_SIZE;
+
+	while (space_remaining && (page < num_pages)) {
+		rc = efx_mcdi_phy_get_module_eeprom_page(efx, page,
+							 data, page_off,
+							 space_remaining);
+
+		if (rc > 0) {
+			space_remaining -= rc;
+			data += rc;
+			page_off = 0;
+			page++;
+		} else if (rc == 0) {
+			space_remaining = 0;
+		} else if (ignore_missing && (page > 0)) {
+			int intended_size = SFP_PAGE_SIZE - page_off;
+
+			space_remaining -= intended_size;
+			if (space_remaining < 0) {
+				space_remaining = 0;
+			} else {
+				memset(data, 0, intended_size);
+				data += intended_size;
+				page_off = 0;
+				page++;
+				rc = 0;
+			}
+		} else {
+			return rc;
+		}
 	}
 
 	return 0;
@@ -807,16 +919,42 @@ static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx,
 static int efx_mcdi_phy_get_module_info(struct efx_nic *efx,
 					struct ethtool_modinfo *modinfo)
 {
-	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	int sff_8472_level;
+	int diag_type;
 
-	switch (phy_cfg->media) {
+	switch (efx_mcdi_phy_module_type(efx)) {
 	case MC_CMD_MEDIA_SFP_PLUS:
-		modinfo->type = ETH_MODULE_SFF_8079;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
-		return 0;
+		sff_8472_level = efx_mcdi_phy_sff_8472_level(efx);
+
+		/* If we can't read the diagnostics level we have none. */
+		if (sff_8472_level < 0)
+			return -EOPNOTSUPP;
+
+		/* Check if this module requires the (unsupported) address
+		 * change operation.
+		 */
+		diag_type = efx_mcdi_phy_diag_type(efx);
+
+		if ((sff_8472_level == 0) ||
+		    (diag_type & SFF_DIAG_ADDR_CHANGE)) {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8472;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		}
+		break;
+
+	case MC_CMD_MEDIA_QSFP_PLUS:
+		modinfo->type = ETH_MODULE_SFF_8436;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+		break;
+
 	default:
 		return -EOPNOTSUPP;
 	}
+
+	return 0;
 }
 
 static const struct efx_phy_operations efx_mcdi_phy_ops = {
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 02d41eb4a8e9..32bf1fecf864 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -653,24 +653,25 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
 				     efx->n_tx_channels : 0));
 }
 
-int efx_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
-		 __be16 proto, struct tc_to_netdev *ntc)
+int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+		 void *type_data)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
+	struct tc_mqprio_qopt *mqprio = type_data;
 	struct efx_channel *channel;
 	struct efx_tx_queue *tx_queue;
 	unsigned tc, num_tc;
 	int rc;
 
-	if (ntc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	num_tc = ntc->mqprio->num_tc;
+	num_tc = mqprio->num_tc;
 
 	if (num_tc > EFX_MAX_TX_TC)
 		return -EINVAL;
 
-	ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
 	if (num_tc == net_dev->num_tc)
 		return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 85c0e41f8021..97035766c291 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -45,6 +45,15 @@ config DWMAC_GENERIC
 	  platform specific code to function or is using platform
 	  data for setup.
 
+config DWMAC_ANARION
+	tristate "Adaptrum Anarion GMAC support"
+	default ARC
+	depends on OF && (ARC || COMPILE_TEST)
+	help
+	  Support for Adaptrum Anarion GMAC Ethernet controller.
+
+	  This selects the Anarion SoC glue layer support for the stmmac driver.
+
 config DWMAC_IPQ806X
 	tristate "QCA IPQ806x DWMAC support"
 	default ARCH_QCOM
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index fd4937a7fcab..238307fadcdb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -7,6 +7,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o	\
 
 # Ordering matters. Generic driver must be last.
 obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
+obj-$(CONFIG_DWMAC_ANARION)	+= dwmac-anarion.o
 obj-$(CONFIG_DWMAC_IPQ806X)	+= dwmac-ipq806x.o
 obj-$(CONFIG_DWMAC_LPC18XX)	+= dwmac-lpc18xx.o
 obj-$(CONFIG_DWMAC_MESON)	+= dwmac-meson.o dwmac-meson8b.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
new file mode 100644
index 000000000000..85ce80c600c7
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
@@ -0,0 +1,152 @@
+/*
+ * Adaptrum Anarion DWMAC glue layer
+ *
+ * Copyright (C) 2017, Adaptrum, Inc.
+ * (Written by Alexandru Gagniuc <alex.g at adaptrum.com> for Adaptrum, Inc.)
+ * Licensed under the GPLv2 or (at your option) any later version.
+ */
+
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/stmmac.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+
+#define GMAC_RESET_CONTROL_REG		0
+#define GMAC_SW_CONFIG_REG		4
+#define  GMAC_CONFIG_INTF_SEL_MASK	(0x7 << 0)
+#define  GMAC_CONFIG_INTF_RGMII		(0x1 << 0)
+
+struct anarion_gmac {
+	uintptr_t ctl_block;
+	uint32_t phy_intf_sel;
+};
+
+static uint32_t gmac_read_reg(struct anarion_gmac *gmac, uint8_t reg)
+{
+	return readl((void *)(gmac->ctl_block + reg));
+};
+
+static void gmac_write_reg(struct anarion_gmac *gmac, uint8_t reg, uint32_t val)
+{
+	writel(val, (void *)(gmac->ctl_block + reg));
+}
+
+static int anarion_gmac_init(struct platform_device *pdev, void *priv)
+{
+	uint32_t sw_config;
+	struct anarion_gmac *gmac = priv;
+
+	/* Reset logic, configure interface mode, then release reset. SIMPLE! */
+	gmac_write_reg(gmac, GMAC_RESET_CONTROL_REG, 1);
+
+	sw_config = gmac_read_reg(gmac, GMAC_SW_CONFIG_REG);
+	sw_config &= ~GMAC_CONFIG_INTF_SEL_MASK;
+	sw_config |= (gmac->phy_intf_sel & GMAC_CONFIG_INTF_SEL_MASK);
+	gmac_write_reg(gmac, GMAC_SW_CONFIG_REG, sw_config);
+
+	gmac_write_reg(gmac, GMAC_RESET_CONTROL_REG, 0);
+
+	return 0;
+}
+
+static void anarion_gmac_exit(struct platform_device *pdev, void *priv)
+{
+	struct anarion_gmac *gmac = priv;
+
+	gmac_write_reg(gmac, GMAC_RESET_CONTROL_REG, 1);
+}
+
+static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev)
+{
+	int phy_mode;
+	struct resource *res;
+	void __iomem *ctl_block;
+	struct anarion_gmac *gmac;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	ctl_block = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ctl_block)) {
+		dev_err(&pdev->dev, "Cannot get reset region (%ld)!\n",
+			PTR_ERR(ctl_block));
+		return ctl_block;
+	}
+
+	gmac = devm_kzalloc(&pdev->dev, sizeof(*gmac), GFP_KERNEL);
+	if (!gmac)
+		return ERR_PTR(-ENOMEM);
+
+	gmac->ctl_block = (uintptr_t)ctl_block;
+
+	phy_mode = of_get_phy_mode(pdev->dev.of_node);
+	switch (phy_mode) {
+	case PHY_INTERFACE_MODE_RGMII:		/* Fall through */
+	case PHY_INTERFACE_MODE_RGMII_ID	/* Fall through */:
+	case PHY_INTERFACE_MODE_RGMII_RXID:	/* Fall through */
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		gmac->phy_intf_sel = GMAC_CONFIG_INTF_RGMII;
+		break;
+	default:
+		dev_err(&pdev->dev, "Unsupported phy-mode (%d)\n",
+			phy_mode);
+		return ERR_PTR(-ENOTSUPP);
+	}
+
+	return gmac;
+}
+
+static int anarion_dwmac_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct anarion_gmac *gmac;
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	gmac = anarion_config_dt(pdev);
+	if (IS_ERR(gmac))
+		return PTR_ERR(gmac);
+
+	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	if (IS_ERR(plat_dat))
+		return PTR_ERR(plat_dat);
+
+	plat_dat->init = anarion_gmac_init;
+	plat_dat->exit = anarion_gmac_exit;
+	anarion_gmac_init(pdev, gmac);
+	plat_dat->bsp_priv = gmac;
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret) {
+		stmmac_remove_config_dt(pdev, plat_dat);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id anarion_dwmac_match[] = {
+	{ .compatible = "adaptrum,anarion-gmac" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, anarion_dwmac_match);
+
+static struct platform_driver anarion_dwmac_driver = {
+	.probe  = anarion_dwmac_probe,
+	.remove = stmmac_pltfr_remove,
+	.driver = {
+		.name           = "anarion-dwmac",
+		.pm		= &stmmac_pltfr_pm_ops,
+		.of_match_table = anarion_dwmac_match,
+	},
+};
+module_platform_driver(anarion_dwmac_driver);
+
+MODULE_DESCRIPTION("Adaptrum Anarion DWMAC specific glue layer");
+MODULE_AUTHOR("Alexandru Gagniuc <mr.nuke.me@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index 8603e397097e..5b56c24b6ed2 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -248,7 +248,7 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[],
 	dev->ethtool_ops = &vsw_ethtool_ops;
 	dev->watchdog_timeo = VSW_TX_TIMEOUT;
 
-	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG;
+	dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG;
 	dev->features = dev->hw_features;
 
 	/* MTU range: 68 - 65535 */
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 4bb04aaf9650..6a4e8e1bbd90 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9221,8 +9221,7 @@ static int niu_get_of_props(struct niu *np)
 
 	phy_type = of_get_property(dp, "phy-type", &prop_len);
 	if (!phy_type) {
-		netdev_err(dev, "%s: OF node lacks phy-type property\n",
-			   dp->full_name);
+		netdev_err(dev, "%pOF: OF node lacks phy-type property\n", dp);
 		return -EINVAL;
 	}
 
@@ -9232,26 +9231,25 @@ static int niu_get_of_props(struct niu *np)
 	strcpy(np->vpd.phy_type, phy_type);
 
 	if (niu_phy_type_prop_decode(np, np->vpd.phy_type)) {
-		netdev_err(dev, "%s: Illegal phy string [%s]\n",
-			   dp->full_name, np->vpd.phy_type);
+		netdev_err(dev, "%pOF: Illegal phy string [%s]\n",
+			   dp, np->vpd.phy_type);
 		return -EINVAL;
 	}
 
 	mac_addr = of_get_property(dp, "local-mac-address", &prop_len);
 	if (!mac_addr) {
-		netdev_err(dev, "%s: OF node lacks local-mac-address property\n",
-			   dp->full_name);
+		netdev_err(dev, "%pOF: OF node lacks local-mac-address property\n",
+			   dp);
 		return -EINVAL;
 	}
 	if (prop_len != dev->addr_len) {
-		netdev_err(dev, "%s: OF MAC address prop len (%d) is wrong\n",
-			   dp->full_name, prop_len);
+		netdev_err(dev, "%pOF: OF MAC address prop len (%d) is wrong\n",
+			   dp, prop_len);
 	}
 	memcpy(dev->dev_addr, mac_addr, dev->addr_len);
 	if (!is_valid_ether_addr(&dev->dev_addr[0])) {
-		netdev_err(dev, "%s: OF MAC address is invalid\n",
-			   dp->full_name);
-		netdev_err(dev, "%s: [ %pM ]\n", dp->full_name, dev->dev_addr);
+		netdev_err(dev, "%pOF: OF MAC address is invalid\n", dp);
+		netdev_err(dev, "%pOF: [ %pM ]\n", dp, dev->dev_addr);
 		return -EINVAL;
 	}
 
@@ -10027,8 +10025,8 @@ static int niu_of_probe(struct platform_device *op)
 
 	reg = of_get_property(op->dev.of_node, "reg", NULL);
 	if (!reg) {
-		dev_err(&op->dev, "%s: No 'reg' property, aborting\n",
-			op->dev.of_node->full_name);
+		dev_err(&op->dev, "%pOF: No 'reg' property, aborting\n",
+			op->dev.of_node);
 		return -ENODEV;
 	}
 
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 75b167e3fe98..0b95105f7060 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -312,7 +312,7 @@ static struct vnet *vnet_new(const u64 *local_mac,
 	dev->watchdog_timeo = VNET_TX_TIMEOUT;
 
 	dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE |
-			   NETIF_F_IP_CSUM | NETIF_F_SG;
+			   NETIF_F_HW_CSUM | NETIF_F_SG;
 	dev->features = dev->hw_features;
 
 	/* MTU range: 68 - 65535 */
diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index 9e86833249d4..ecf456c7b6d1 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -303,7 +303,7 @@ static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
 	return skb;
 }
 
-static inline void vnet_fullcsum(struct sk_buff *skb)
+static inline void vnet_fullcsum_ipv4(struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	int offset = skb_transport_offset(skb);
@@ -335,6 +335,40 @@ static inline void vnet_fullcsum(struct sk_buff *skb)
 	}
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static inline void vnet_fullcsum_ipv6(struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	int offset = skb_transport_offset(skb);
+
+	if (skb->protocol != htons(ETH_P_IPV6))
+		return;
+	if (ip6h->nexthdr != IPPROTO_TCP &&
+	    ip6h->nexthdr != IPPROTO_UDP)
+		return;
+	skb->ip_summed = CHECKSUM_NONE;
+	skb->csum_level = 1;
+	skb->csum = 0;
+	if (ip6h->nexthdr == IPPROTO_TCP) {
+		struct tcphdr *ptcp = tcp_hdr(skb);
+
+		ptcp->check = 0;
+		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					      skb->len - offset, IPPROTO_TCP,
+					      skb->csum);
+	} else if (ip6h->nexthdr == IPPROTO_UDP) {
+		struct udphdr *pudp = udp_hdr(skb);
+
+		pudp->check = 0;
+		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					      skb->len - offset, IPPROTO_UDP,
+					      skb->csum);
+	}
+}
+#endif
+
 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
 {
 	struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
@@ -394,9 +428,14 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
 				struct iphdr *iph = ip_hdr(skb);
 				int ihl = iph->ihl * 4;
 
-				skb_reset_transport_header(skb);
 				skb_set_transport_header(skb, ihl);
-				vnet_fullcsum(skb);
+				vnet_fullcsum_ipv4(skb);
+#if IS_ENABLED(CONFIG_IPV6)
+			} else if (skb->protocol == htons(ETH_P_IPV6)) {
+				skb_set_transport_header(skb,
+							 sizeof(struct ipv6hdr));
+				vnet_fullcsum_ipv6(skb);
+#endif
 			}
 		}
 		if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
@@ -1115,24 +1154,47 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies)
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			start = skb_checksum_start_offset(skb);
 		if (start) {
-			struct iphdr *iph = ip_hdr(nskb);
 			int offset = start + nskb->csum_offset;
 
+			/* copy the headers, no csum here */
 			if (skb_copy_bits(skb, 0, nskb->data, start)) {
 				dev_kfree_skb(nskb);
 				dev_kfree_skb(skb);
 				return NULL;
 			}
+
+			/* copy the rest, with csum calculation */
 			*(__sum16 *)(skb->data + offset) = 0;
 			csum = skb_copy_and_csum_bits(skb, start,
 						      nskb->data + start,
 						      skb->len - start, 0);
-			if (iph->protocol == IPPROTO_TCP ||
-			    iph->protocol == IPPROTO_UDP) {
-				csum = csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - start,
-							 iph->protocol, csum);
+
+			/* add in the header checksums */
+			if (skb->protocol == htons(ETH_P_IP)) {
+				struct iphdr *iph = ip_hdr(nskb);
+
+				if (iph->protocol == IPPROTO_TCP ||
+				    iph->protocol == IPPROTO_UDP) {
+					csum = csum_tcpudp_magic(iph->saddr,
+								 iph->daddr,
+								 skb->len - start,
+								 iph->protocol,
+								 csum);
+				}
+			} else if (skb->protocol == htons(ETH_P_IPV6)) {
+				struct ipv6hdr *ip6h = ipv6_hdr(nskb);
+
+				if (ip6h->nexthdr == IPPROTO_TCP ||
+				    ip6h->nexthdr == IPPROTO_UDP) {
+					csum = csum_ipv6_magic(&ip6h->saddr,
+							       &ip6h->daddr,
+							       skb->len - start,
+							       ip6h->nexthdr,
+							       csum);
+				}
 			}
+
+			/* save the final result */
 			*(__sum16 *)(nskb->data + offset) = csum;
 
 			nskb->ip_summed = CHECKSUM_NONE;
@@ -1318,8 +1380,14 @@ int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
 	if (unlikely(!skb))
 		goto out_dropped;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		vnet_fullcsum(skb);
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (skb->protocol == htons(ETH_P_IP))
+			vnet_fullcsum_ipv4(skb);
+#if IS_ENABLED(CONFIG_IPV6)
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			vnet_fullcsum_ipv6(skb);
+#endif
+	}
 
 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 	i = skb_get_queue_mapping(skb);
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
index 3b91257683bc..e1b55b8fb8e0 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
@@ -17,6 +17,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/tcp.h>
+#include <linux/interrupt.h>
 
 #include "dwc-xlgmac.h"
 #include "dwc-xlgmac-reg.h"
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index badd0a8caeb9..c8776dbf1a55 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1321,8 +1321,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 		phy = of_phy_connect(priv->ndev, slave->data->phy_node,
 				 &cpsw_adjust_link, 0, slave->data->phy_if);
 		if (!phy) {
-			dev_err(priv->dev, "phy \"%s\" not found on slave %d\n",
-				slave->data->phy_node->full_name,
+			dev_err(priv->dev, "phy \"%pOF\" not found on slave %d\n",
+				slave->data->phy_node,
 				slave->slave_num);
 			return;
 		}
@@ -2670,8 +2670,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		parp = of_get_property(slave_node, "phy_id", &lenp);
 		if (slave_data->phy_node) {
 			dev_dbg(&pdev->dev,
-				"slave[%d] using phy-handle=\"%s\"\n",
-				i, slave_data->phy_node->full_name);
+				"slave[%d] using phy-handle=\"%pOF\"\n",
+				i, slave_data->phy_node);
 		} else if (of_phy_is_fixed_link(slave_node)) {
 			/* In the case of a fixed PHY, the DT node associated
 			 * to the PHY is the Ethernet MAC DT node.
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 64d5527feb2a..4bb561856af5 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1480,8 +1480,8 @@ static int emac_dev_open(struct net_device *ndev)
 		phydev = of_phy_connect(ndev, priv->phy_node,
 					&emac_adjust_link, 0, 0);
 		if (!phydev) {
-			dev_err(emac_dev, "could not connect to phy %s\n",
-				priv->phy_node->full_name);
+			dev_err(emac_dev, "could not connect to phy %pOF\n",
+				priv->phy_node);
 			ret = -ENODEV;
 			goto err;
 		}
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 9d52c3a78621..eb96a6913235 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1877,20 +1877,21 @@ static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
 	return 0;
 }
 
-static int netcp_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-			  __be16 proto, struct tc_to_netdev *tc)
+static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			  void *type_data)
 {
+	struct tc_mqprio_qopt *mqprio = type_data;
 	u8 num_tc;
 	int i;
 
 	/* setup tc must be called under rtnl lock */
 	ASSERT_RTNL();
 
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-	num_tc = tc->mqprio->num_tc;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	num_tc = mqprio->num_tc;
 
 	/* Sanity-check the number of traffic classes requested */
 	if ((dev->real_num_tx_queues <= 1) ||
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index d73da8afe08e..60abc9250f56 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1089,7 +1089,7 @@ static int temac_of_probe(struct platform_device *op)
 
 	lp->phy_node = of_parse_phandle(op->dev.of_node, "phy-handle", 0);
 	if (lp->phy_node)
-		dev_dbg(lp->dev, "using PHY node %s (%p)\n", np->full_name, np);
+		dev_dbg(lp->dev, "using PHY node %pOF (%p)\n", np, np);
 
 	/* Add the device attributes */
 	rc = sysfs_create_group(&lp->dev->kobj, &temac_attr_group);
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index af27f7d1cbf3..5ef626331f85 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -389,7 +389,7 @@ struct axidma_bd {
  * @dma_err_tasklet: Tasklet structure to process Axi DMA errors
  * @tx_irq:	Axidma TX IRQ number
  * @rx_irq:	Axidma RX IRQ number
- * @phy_type:	Phy type to identify between MII/GMII/RGMII/SGMII/1000 Base-X
+ * @phy_mode:	Phy type to identify between MII/GMII/RGMII/SGMII/1000 Base-X
  * @options:	AxiEthernet option word
  * @last_link:	Phy link state in which the PHY was negotiated earlier
  * @features:	Stores the extended features supported by the axienet hw
@@ -432,7 +432,7 @@ struct axienet_local {
 
 	int tx_irq;
 	int rx_irq;
-	u32 phy_type;
+	phy_interface_t phy_mode;
 
 	u32 options;			/* Current options word */
 	u32 last_link;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 33c595f4691d..e74e1e897864 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -531,11 +531,11 @@ static void axienet_adjust_link(struct net_device *ndev)
 	link_state = phy->speed | (phy->duplex << 1) | phy->link;
 	if (lp->last_link != link_state) {
 		if ((phy->speed == SPEED_10) || (phy->speed == SPEED_100)) {
-			if (lp->phy_type == XAE_PHY_TYPE_1000BASE_X)
+			if (lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX)
 				setspeed = 0;
 		} else {
 			if ((phy->speed == SPEED_1000) &&
-			    (lp->phy_type == XAE_PHY_TYPE_MII))
+			    (lp->phy_mode == PHY_INTERFACE_MODE_MII))
 				setspeed = 0;
 		}
 
@@ -935,15 +935,8 @@ static int axienet_open(struct net_device *ndev)
 		return ret;
 
 	if (lp->phy_node) {
-		if (lp->phy_type == XAE_PHY_TYPE_GMII) {
-			phydev = of_phy_connect(lp->ndev, lp->phy_node,
-						axienet_adjust_link, 0,
-						PHY_INTERFACE_MODE_GMII);
-		} else if (lp->phy_type == XAE_PHY_TYPE_RGMII_2_0) {
-			phydev = of_phy_connect(lp->ndev, lp->phy_node,
-						axienet_adjust_link, 0,
-						PHY_INTERFACE_MODE_RGMII_ID);
-		}
+		phydev = of_phy_connect(lp->ndev, lp->phy_node,
+					axienet_adjust_link, 0, lp->phy_mode);
 
 		if (!phydev)
 			dev_err(lp->dev, "of_phy_connect() failed\n");
@@ -1539,7 +1532,38 @@ static int axienet_probe(struct platform_device *pdev)
 	 * the device-tree and accordingly set flags.
 	 */
 	of_property_read_u32(pdev->dev.of_node, "xlnx,rxmem", &lp->rxmem);
-	of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &lp->phy_type);
+
+	/* Start with the proprietary, and broken phy_type */
+	ret = of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &value);
+	if (!ret) {
+		netdev_warn(ndev, "Please upgrade your device tree binary blob to use phy-mode");
+		switch (value) {
+		case XAE_PHY_TYPE_MII:
+			lp->phy_mode = PHY_INTERFACE_MODE_MII;
+			break;
+		case XAE_PHY_TYPE_GMII:
+			lp->phy_mode = PHY_INTERFACE_MODE_GMII;
+			break;
+		case XAE_PHY_TYPE_RGMII_2_0:
+			lp->phy_mode = PHY_INTERFACE_MODE_RGMII_ID;
+			break;
+		case XAE_PHY_TYPE_SGMII:
+			lp->phy_mode = PHY_INTERFACE_MODE_SGMII;
+			break;
+		case XAE_PHY_TYPE_1000BASE_X:
+			lp->phy_mode = PHY_INTERFACE_MODE_1000BASEX;
+			break;
+		default:
+			ret = -EINVAL;
+			goto free_netdev;
+		}
+	} else {
+		lp->phy_mode = of_get_phy_mode(pdev->dev.of_node);
+		if (lp->phy_mode < 0) {
+			ret = -EINVAL;
+			goto free_netdev;
+		}
+	}
 
 	/* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
 	np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index de8156c6b292..745d57ae95d7 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -715,6 +715,7 @@ free_dst:
 
 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
 				       struct net_device *dev,
+				       struct geneve_sock *gs4,
 				       struct flowi4 *fl4,
 				       const struct ip_tunnel_info *info)
 {
@@ -724,7 +725,7 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
 	struct rtable *rt = NULL;
 	__u8 tos;
 
-	if (!rcu_dereference(geneve->sock4))
+	if (!gs4)
 		return ERR_PTR(-EIO);
 
 	memset(fl4, 0, sizeof(*fl4));
@@ -764,6 +765,7 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_IPV6)
 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
 					   struct net_device *dev,
+					   struct geneve_sock *gs6,
 					   struct flowi6 *fl6,
 					   const struct ip_tunnel_info *info)
 {
@@ -771,10 +773,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
 	struct geneve_dev *geneve = netdev_priv(dev);
 	struct dst_entry *dst = NULL;
 	struct dst_cache *dst_cache;
-	struct geneve_sock *gs6;
 	__u8 prio;
 
-	gs6 = rcu_dereference(geneve->sock6);
 	if (!gs6)
 		return ERR_PTR(-EIO);
 
@@ -827,7 +827,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	__be16 df;
 	int err;
 
-	rt = geneve_get_v4_rt(skb, dev, &fl4, info);
+	rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
@@ -866,7 +866,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	__be16 sport;
 	int err;
 
-	dst = geneve_get_v6_dst(skb, dev, &fl6, info);
+	dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
@@ -951,8 +951,9 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 	if (ip_tunnel_info_af(info) == AF_INET) {
 		struct rtable *rt;
 		struct flowi4 fl4;
+		struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
 
-		rt = geneve_get_v4_rt(skb, dev, &fl4, info);
+		rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
 		if (IS_ERR(rt))
 			return PTR_ERR(rt);
 
@@ -962,8 +963,9 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 	} else if (ip_tunnel_info_af(info) == AF_INET6) {
 		struct dst_entry *dst;
 		struct flowi6 fl6;
+		struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
 
-		dst = geneve_get_v6_dst(skb, dev, &fl6, info);
+		dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
 		if (IS_ERR(dst))
 			return PTR_ERR(dst);
 
@@ -1014,16 +1016,22 @@ static struct device_type geneve_type = {
  * supply the listening GENEVE udp ports. Callers are expected
  * to implement the ndo_udp_tunnel_add.
  */
-static void geneve_push_rx_ports(struct net_device *dev)
+static void geneve_offload_rx_ports(struct net_device *dev, bool push)
 {
 	struct net *net = dev_net(dev);
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(gs, &gn->sock_list, list)
-		udp_tunnel_push_rx_port(dev, gs->sock,
-					UDP_TUNNEL_TYPE_GENEVE);
+	list_for_each_entry_rcu(gs, &gn->sock_list, list) {
+		if (push) {
+			udp_tunnel_push_rx_port(dev, gs->sock,
+						UDP_TUNNEL_TYPE_GENEVE);
+		} else {
+			udp_tunnel_drop_rx_port(dev, gs->sock,
+						UDP_TUNNEL_TYPE_GENEVE);
+		}
+	}
 	rcu_read_unlock();
 }
 
@@ -1140,6 +1148,15 @@ static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
 		return true;
 }
 
+static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
+				  struct ip_tunnel_info *b)
+{
+	if (ip_tunnel_info_af(a) == AF_INET)
+		return a->key.u.ipv4.dst == b->key.u.ipv4.dst;
+	else
+		return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst);
+}
+
 static int geneve_configure(struct net *net, struct net_device *dev,
 			    const struct ip_tunnel_info *info,
 			    bool metadata, bool ipv6_rx_csum)
@@ -1197,24 +1214,22 @@ static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
 	info->key.tp_dst = htons(dst_port);
 }
 
-static int geneve_newlink(struct net *net, struct net_device *dev,
-			  struct nlattr *tb[], struct nlattr *data[],
-			  struct netlink_ext_ack *extack)
+static int geneve_nl2info(struct net_device *dev, struct nlattr *tb[],
+			  struct nlattr *data[], struct ip_tunnel_info *info,
+			  bool *metadata, bool *use_udp6_rx_checksums,
+			  bool changelink)
 {
-	bool use_udp6_rx_checksums = false;
-	struct ip_tunnel_info info;
-	bool metadata = false;
-
-	init_tnl_info(&info, GENEVE_UDP_PORT);
-
 	if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6])
 		return -EINVAL;
 
 	if (data[IFLA_GENEVE_REMOTE]) {
-		info.key.u.ipv4.dst =
+		if (changelink && (ip_tunnel_info_af(info) == AF_INET6))
+			return -EOPNOTSUPP;
+
+		info->key.u.ipv4.dst =
 			nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
 
-		if (IN_MULTICAST(ntohl(info.key.u.ipv4.dst))) {
+		if (IN_MULTICAST(ntohl(info->key.u.ipv4.dst))) {
 			netdev_dbg(dev, "multicast remote is unsupported\n");
 			return -EINVAL;
 		}
@@ -1222,21 +1237,24 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
 
 	if (data[IFLA_GENEVE_REMOTE6]) {
  #if IS_ENABLED(CONFIG_IPV6)
-		info.mode = IP_TUNNEL_INFO_IPV6;
-		info.key.u.ipv6.dst =
+		if (changelink && (ip_tunnel_info_af(info) == AF_INET))
+			return -EOPNOTSUPP;
+
+		info->mode = IP_TUNNEL_INFO_IPV6;
+		info->key.u.ipv6.dst =
 			nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
 
-		if (ipv6_addr_type(&info.key.u.ipv6.dst) &
+		if (ipv6_addr_type(&info->key.u.ipv6.dst) &
 		    IPV6_ADDR_LINKLOCAL) {
 			netdev_dbg(dev, "link-local remote is unsupported\n");
 			return -EINVAL;
 		}
-		if (ipv6_addr_is_multicast(&info.key.u.ipv6.dst)) {
+		if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) {
 			netdev_dbg(dev, "multicast remote is unsupported\n");
 			return -EINVAL;
 		}
-		info.key.tun_flags |= TUNNEL_CSUM;
-		use_udp6_rx_checksums = true;
+		info->key.tun_flags |= TUNNEL_CSUM;
+		*use_udp6_rx_checksums = true;
 #else
 		return -EPFNOSUPPORT;
 #endif
@@ -1245,48 +1263,169 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
 	if (data[IFLA_GENEVE_ID]) {
 		__u32 vni;
 		__u8 tvni[3];
+		__be64 tunid;
 
 		vni = nla_get_u32(data[IFLA_GENEVE_ID]);
 		tvni[0] = (vni & 0x00ff0000) >> 16;
 		tvni[1] = (vni & 0x0000ff00) >> 8;
 		tvni[2] =  vni & 0x000000ff;
 
-		info.key.tun_id = vni_to_tunnel_id(tvni);
+		tunid = vni_to_tunnel_id(tvni);
+		if (changelink && (tunid != info->key.tun_id))
+			return -EOPNOTSUPP;
+		info->key.tun_id = tunid;
 	}
+
 	if (data[IFLA_GENEVE_TTL])
-		info.key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
+		info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
 
 	if (data[IFLA_GENEVE_TOS])
-		info.key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
+		info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
 
 	if (data[IFLA_GENEVE_LABEL]) {
-		info.key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
+		info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
 				  IPV6_FLOWLABEL_MASK;
-		if (info.key.label && (!(info.mode & IP_TUNNEL_INFO_IPV6)))
+		if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6)))
 			return -EINVAL;
 	}
 
-	if (data[IFLA_GENEVE_PORT])
-		info.key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
+	if (data[IFLA_GENEVE_PORT]) {
+		if (changelink)
+			return -EOPNOTSUPP;
+		info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
+	}
 
-	if (data[IFLA_GENEVE_COLLECT_METADATA])
-		metadata = true;
+	if (data[IFLA_GENEVE_COLLECT_METADATA]) {
+		if (changelink)
+			return -EOPNOTSUPP;
+		*metadata = true;
+	}
 
-	if (data[IFLA_GENEVE_UDP_CSUM] &&
-	    nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
-		info.key.tun_flags |= TUNNEL_CSUM;
+	if (data[IFLA_GENEVE_UDP_CSUM]) {
+		if (changelink)
+			return -EOPNOTSUPP;
+		if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
+			info->key.tun_flags |= TUNNEL_CSUM;
+	}
+
+	if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
+		if (changelink)
+			return -EOPNOTSUPP;
+		if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
+			info->key.tun_flags &= ~TUNNEL_CSUM;
+	}
+
+	if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) {
+		if (changelink)
+			return -EOPNOTSUPP;
+		if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
+			*use_udp6_rx_checksums = false;
+	}
+
+	return 0;
+}
 
-	if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] &&
-	    nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
-		info.key.tun_flags &= ~TUNNEL_CSUM;
+static int geneve_newlink(struct net *net, struct net_device *dev,
+			  struct nlattr *tb[], struct nlattr *data[],
+			  struct netlink_ext_ack *extack)
+{
+	bool use_udp6_rx_checksums = false;
+	struct ip_tunnel_info info;
+	bool metadata = false;
+	int err;
 
-	if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] &&
-	    nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
-		use_udp6_rx_checksums = false;
+	init_tnl_info(&info, GENEVE_UDP_PORT);
+	err = geneve_nl2info(dev, tb, data, &info, &metadata,
+			     &use_udp6_rx_checksums, false);
+	if (err)
+		return err;
 
 	return geneve_configure(net, dev, &info, metadata, use_udp6_rx_checksums);
 }
 
+/* Quiesces the geneve device data path for both TX and RX.
+ *
+ * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
+ * So, if we set that socket to NULL under RCU and wait for synchronize_net()
+ * to complete for the existing set of in-flight packets to be transmitted,
+ * then we would have quiesced the transmit data path. All the future packets
+ * will get dropped until we unquiesce the data path.
+ *
+ * On receive geneve dereference the geneve_sock stashed in the socket. So,
+ * if we set that to NULL under RCU and wait for synchronize_net() to
+ * complete, then we would have quiesced the receive data path.
+ */
+static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4,
+			   struct geneve_sock **gs6)
+{
+	*gs4 = rtnl_dereference(geneve->sock4);
+	rcu_assign_pointer(geneve->sock4, NULL);
+	if (*gs4)
+		rcu_assign_sk_user_data((*gs4)->sock->sk, NULL);
+#if IS_ENABLED(CONFIG_IPV6)
+	*gs6 = rtnl_dereference(geneve->sock6);
+	rcu_assign_pointer(geneve->sock6, NULL);
+	if (*gs6)
+		rcu_assign_sk_user_data((*gs6)->sock->sk, NULL);
+#else
+	*gs6 = NULL;
+#endif
+	synchronize_net();
+}
+
+/* Resumes the geneve device data path for both TX and RX. */
+static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4,
+			     struct geneve_sock __maybe_unused *gs6)
+{
+	rcu_assign_pointer(geneve->sock4, gs4);
+	if (gs4)
+		rcu_assign_sk_user_data(gs4->sock->sk, gs4);
+#if IS_ENABLED(CONFIG_IPV6)
+	rcu_assign_pointer(geneve->sock6, gs6);
+	if (gs6)
+		rcu_assign_sk_user_data(gs6->sock->sk, gs6);
+#endif
+	synchronize_net();
+}
+
+static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
+			     struct nlattr *data[],
+			     struct netlink_ext_ack *extack)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct geneve_sock *gs4, *gs6;
+	struct ip_tunnel_info info;
+	bool metadata;
+	bool use_udp6_rx_checksums;
+	int err;
+
+	/* If the geneve device is configured for metadata (or externally
+	 * controlled, for example, OVS), then nothing can be changed.
+	 */
+	if (geneve->collect_md)
+		return -EOPNOTSUPP;
+
+	/* Start with the existing info. */
+	memcpy(&info, &geneve->info, sizeof(info));
+	metadata = geneve->collect_md;
+	use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
+	err = geneve_nl2info(dev, tb, data, &info, &metadata,
+			     &use_udp6_rx_checksums, true);
+	if (err)
+		return err;
+
+	if (!geneve_dst_addr_equal(&geneve->info, &info))
+		dst_cache_reset(&info.dst_cache);
+
+	geneve_quiesce(geneve, &gs4, &gs6);
+	geneve->info = info;
+	geneve->collect_md = metadata;
+	geneve->use_udp6_rx_checksums = use_udp6_rx_checksums;
+	geneve_unquiesce(geneve, gs4, gs6);
+
+	return 0;
+}
+
 static void geneve_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct geneve_dev *geneve = netdev_priv(dev);
@@ -1375,6 +1514,7 @@ static struct rtnl_link_ops geneve_link_ops __read_mostly = {
 	.setup		= geneve_setup,
 	.validate	= geneve_validate,
 	.newlink	= geneve_newlink,
+	.changelink	= geneve_changelink,
 	.dellink	= geneve_dellink,
 	.get_size	= geneve_get_size,
 	.fill_info	= geneve_fill_info,
@@ -1426,8 +1566,14 @@ static int geneve_netdevice_event(struct notifier_block *unused,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
-	if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
-		geneve_push_rx_ports(dev);
+	if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
+	    event == NETDEV_UDP_TUNNEL_DROP_INFO) {
+		geneve_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
+	} else if (event == NETDEV_UNREGISTER) {
+		geneve_offload_rx_ports(dev, false);
+	} else if (event == NETDEV_REGISTER) {
+		geneve_offload_rx_ports(dev, true);
+	}
 
 	return NOTIFY_DONE;
 }
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index dec6b76bc0fb..cde41200f40a 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -581,7 +581,7 @@ static int __init setup_adapter(int card_base, int type, int n)
 		priv->param.dma = -1;
 		INIT_WORK(&priv->rx_work, rx_bh);
 		dev->ml_priv = priv;
-		sprintf(dev->name, "dmascc%i", 2 * n + i);
+		snprintf(dev->name, sizeof(dev->name), "dmascc%i", 2 * n + i);
 		dev->base_addr = card_base;
 		dev->irq = irq;
 		dev->netdev_ops = &scc_netdev_ops;
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d6c25580f8dd..c701b059c5ac 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -147,7 +147,6 @@ struct hv_netvsc_packet {
 struct netvsc_device_info {
 	unsigned char mac_adr[ETH_ALEN];
 	int  ring_size;
-	u32  max_num_vrss_chns;
 	u32  num_chn;
 };
 
@@ -183,13 +182,16 @@ struct rndis_device {
 /* Interface */
 struct rndis_message;
 struct netvsc_device;
-int netvsc_device_add(struct hv_device *device,
-		      const struct netvsc_device_info *info);
+struct net_device_context;
+
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+					const struct netvsc_device_info *info);
+int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx);
 void netvsc_device_remove(struct hv_device *device);
-int netvsc_send(struct hv_device *device,
+int netvsc_send(struct net_device_context *ndc,
 		struct hv_netvsc_packet *packet,
 		struct rndis_message *rndis_msg,
-		struct hv_page_buffer **page_buffer,
+		struct hv_page_buffer *page_buffer,
 		struct sk_buff *skb);
 void netvsc_linkstatus_callback(struct hv_device *device_obj,
 				struct rndis_message *resp);
@@ -200,10 +202,11 @@ int netvsc_recv_callback(struct net_device *net,
 			 const struct ndis_pkt_8021q_info *vlan);
 void netvsc_channel_cb(void *context);
 int netvsc_poll(struct napi_struct *napi, int budget);
+bool rndis_filter_opened(const struct netvsc_device *nvdev);
 int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
-int rndis_filter_device_add(struct hv_device *dev,
-			    struct netvsc_device_info *info);
+struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+					      struct netvsc_device_info *info);
 void rndis_filter_update(struct netvsc_device *nvdev);
 void rndis_filter_device_remove(struct hv_device *dev,
 				struct netvsc_device *nvdev);
@@ -215,7 +218,8 @@ int rndis_filter_receive(struct net_device *ndev,
 			 struct vmbus_channel *channel,
 			 void *data, u32 buflen);
 
-int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
+int rndis_filter_set_device_mac(struct netvsc_device *ndev,
+				const char *mac);
 
 void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
 
@@ -654,13 +658,10 @@ struct recv_comp_data {
 	u32 status;
 };
 
-/* Netvsc Receive Slots Max */
-#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1)
-
 struct multi_recv_comp {
-	void *buf; /* queued receive completions */
-	u32 first; /* first data entry */
-	u32 next; /* next entry for writing */
+	struct recv_comp_data *slots;
+	u32 first;	/* first data entry */
+	u32 next;	/* next entry for writing */
 };
 
 struct netvsc_stats {
@@ -679,6 +680,15 @@ struct netvsc_ethtool_stats {
 	unsigned long tx_busy;
 };
 
+struct netvsc_vf_pcpu_stats {
+	u64     rx_packets;
+	u64     rx_bytes;
+	u64     tx_packets;
+	u64     tx_bytes;
+	struct u64_stats_sync   syncp;
+	u32	tx_dropped;
+};
+
 struct netvsc_reconfig {
 	struct list_head list;
 	u32 event;
@@ -712,6 +722,9 @@ struct net_device_context {
 
 	/* State to manage the associated VF interface. */
 	struct net_device __rcu *vf_netdev;
+	struct netvsc_vf_pcpu_stats __percpu *vf_stats;
+	struct work_struct vf_takeover;
+	struct work_struct vf_notify;
 
 	/* 1: allocated, serial number is valid. 0: not allocated */
 	u32 vf_alloc;
@@ -724,6 +737,7 @@ struct net_device_context {
 /* Per channel data */
 struct netvsc_channel {
 	struct vmbus_channel *channel;
+	struct netvsc_device *net_device;
 	const struct vmpacket_descriptor *desc;
 	struct napi_struct napi;
 	struct multi_send_data msd;
@@ -746,7 +760,7 @@ struct netvsc_device {
 	u32 recv_buf_size;
 	u32 recv_buf_gpadl_handle;
 	u32 recv_section_cnt;
-	struct nvsp_1_receive_buffer_section *recv_section;
+	u32 recv_completion_cnt;
 
 	/* Send buffer allocated by us */
 	void *send_buf;
@@ -774,8 +788,6 @@ struct netvsc_device {
 	u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
 	u32 pkt_align; /* alignment bytes, e.g. 8 */
 
-	atomic_t num_outstanding_recvs;
-
 	atomic_t open_cnt;
 
 	struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
@@ -783,18 +795,6 @@ struct netvsc_device {
 	struct rcu_head rcu;
 };
 
-static inline struct netvsc_device *
-net_device_to_netvsc_device(struct net_device *ndev)
-{
-	return ((struct net_device_context *)netdev_priv(ndev))->nvdev;
-}
-
-static inline struct netvsc_device *
-hv_device_to_netvsc_device(struct hv_device *device)
-{
-	return net_device_to_netvsc_device(hv_get_drvdata(device));
-}
-
 /* NdisInitialize message */
 struct rndis_initialize_request {
 	u32 req_id;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 0a9167dd72fb..9598220b3bcc 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -29,6 +29,9 @@
 #include <linux/netdevice.h>
 #include <linux/if_ether.h>
 #include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+#include <linux/prefetch.h>
+
 #include <asm/sync_bitops.h>
 
 #include "hyperv_net.h"
@@ -41,7 +44,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 	struct hv_device *dev = net_device_ctx->device_ctx;
-	struct netvsc_device *nv_dev = net_device_ctx->nvdev;
+	struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
 	struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
 
 	memset(init_pkt, 0, sizeof(struct nvsp_message));
@@ -69,9 +72,6 @@ static struct netvsc_device *alloc_net_device(void)
 	if (!net_device)
 		return NULL;
 
-	net_device->chan_table[0].mrc.buf
-		= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
-
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
 	atomic_set(&net_device->open_cnt, 0);
@@ -89,7 +89,7 @@ static void free_netvsc_device(struct rcu_head *head)
 	int i;
 
 	for (i = 0; i < VRSS_CHANNEL_MAX; i++)
-		vfree(nvdev->chan_table[i].mrc.buf);
+		vfree(nvdev->chan_table[i].mrc.slots);
 
 	kfree(nvdev);
 }
@@ -103,7 +103,8 @@ static void netvsc_destroy_buf(struct hv_device *device)
 {
 	struct nvsp_message *revoke_packet;
 	struct net_device *ndev = hv_get_drvdata(device);
-	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+	struct net_device_context *ndc = netdev_priv(ndev);
+	struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
 	int ret;
 
 	/*
@@ -167,12 +168,6 @@ static void netvsc_destroy_buf(struct hv_device *device)
 		net_device->recv_buf = NULL;
 	}
 
-	if (net_device->recv_section) {
-		net_device->recv_section_cnt = 0;
-		kfree(net_device->recv_section);
-		net_device->recv_section = NULL;
-	}
-
 	/* Deal with the send buffer we may have setup.
 	 * If we got a  send section size, it means we received a
 	 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
@@ -235,11 +230,26 @@ static void netvsc_destroy_buf(struct hv_device *device)
 	kfree(net_device->send_section_map);
 }
 
+int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
+{
+	struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
+	int node = cpu_to_node(nvchan->channel->target_cpu);
+	size_t size;
+
+	size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
+	nvchan->mrc.slots = vzalloc_node(size, node);
+	if (!nvchan->mrc.slots)
+		nvchan->mrc.slots = vzalloc(size);
+
+	return nvchan->mrc.slots ? 0 : -ENOMEM;
+}
+
 static int netvsc_init_buf(struct hv_device *device,
 			   struct netvsc_device *net_device)
 {
 	int ret = 0;
 	struct nvsp_message *init_packet;
+	struct nvsp_1_message_send_receive_buffer_complete *resp;
 	struct net_device *ndev;
 	size_t map_words;
 	int node;
@@ -296,43 +306,41 @@ static int netvsc_init_buf(struct hv_device *device,
 	wait_for_completion(&net_device->channel_init_wait);
 
 	/* Check the response */
-	if (init_packet->msg.v1_msg.
-	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
-		netdev_err(ndev, "Unable to complete receive buffer "
-			   "initialization with NetVsp - status %d\n",
-			   init_packet->msg.v1_msg.
-			   send_recv_buf_complete.status);
+	resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
+	if (resp->status != NVSP_STAT_SUCCESS) {
+		netdev_err(ndev,
+			   "Unable to complete receive buffer initialization with NetVsp - status %d\n",
+			   resp->status);
 		ret = -EINVAL;
 		goto cleanup;
 	}
 
 	/* Parse the response */
+	netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
+		   resp->num_sections, resp->sections[0].sub_alloc_size,
+		   resp->sections[0].num_sub_allocs);
 
-	net_device->recv_section_cnt = init_packet->msg.
-		v1_msg.send_recv_buf_complete.num_sections;
-
-	net_device->recv_section = kmemdup(
-		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
-		net_device->recv_section_cnt *
-		sizeof(struct nvsp_1_receive_buffer_section),
-		GFP_KERNEL);
-	if (net_device->recv_section == NULL) {
-		ret = -EINVAL;
-		goto cleanup;
-	}
+	net_device->recv_section_cnt = resp->num_sections;
 
 	/*
 	 * For 1st release, there should only be 1 section that represents the
 	 * entire receive buffer
 	 */
 	if (net_device->recv_section_cnt != 1 ||
-	    net_device->recv_section->offset != 0) {
+	    resp->sections[0].offset != 0) {
 		ret = -EINVAL;
 		goto cleanup;
 	}
 
-	/* Now setup the send buffer.
-	 */
+	/* Setup receive completion ring */
+	net_device->recv_completion_cnt
+		= round_up(resp->sections[0].num_sub_allocs + 1,
+			   PAGE_SIZE / sizeof(u64));
+	ret = netvsc_alloc_recv_comp_ring(net_device, 0);
+	if (ret)
+		goto cleanup;
+
+	/* Now setup the send buffer. */
 	net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
 	if (!net_device->send_buf)
 		net_device->send_buf = vzalloc(net_device->send_buf_size);
@@ -549,7 +557,8 @@ void netvsc_device_remove(struct hv_device *device)
 {
 	struct net_device *ndev = hv_get_drvdata(device);
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *net_device = net_device_ctx->nvdev;
+	struct netvsc_device *net_device
+		= rtnl_dereference(net_device_ctx->nvdev);
 	int i;
 
 	netvsc_disconnect_vsp(device);
@@ -692,7 +701,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 				   u32 pend_size,
 				   struct hv_netvsc_packet *packet,
 				   struct rndis_message *rndis_msg,
-				   struct hv_page_buffer **pb,
+				   struct hv_page_buffer *pb,
 				   struct sk_buff *skb)
 {
 	char *start = net_device->send_buf;
@@ -713,9 +722,9 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 	}
 
 	for (i = 0; i < page_count; i++) {
-		char *src = phys_to_virt((*pb)[i].pfn << PAGE_SHIFT);
-		u32 offset = (*pb)[i].offset;
-		u32 len = (*pb)[i].len;
+		char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
+		u32 offset = pb[i].offset;
+		u32 len = pb[i].len;
 
 		memcpy(dest, (src + offset), len);
 		msg_size += len;
@@ -734,36 +743,32 @@ static inline int netvsc_send_pkt(
 	struct hv_device *device,
 	struct hv_netvsc_packet *packet,
 	struct netvsc_device *net_device,
-	struct hv_page_buffer **pb,
+	struct hv_page_buffer *pb,
 	struct sk_buff *skb)
 {
 	struct nvsp_message nvmsg;
-	struct netvsc_channel *nvchan
-		= &net_device->chan_table[packet->q_idx];
+	struct nvsp_1_message_send_rndis_packet * const rpkt =
+		&nvmsg.msg.v1_msg.send_rndis_pkt;
+	struct netvsc_channel * const nvchan =
+		&net_device->chan_table[packet->q_idx];
 	struct vmbus_channel *out_channel = nvchan->channel;
 	struct net_device *ndev = hv_get_drvdata(device);
 	struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
 	u64 req_id;
 	int ret;
-	struct hv_page_buffer *pgbuf;
 	u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
 
 	nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
-	if (skb != NULL) {
-		/* 0 is RMC_DATA; */
-		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
-	} else {
-		/* 1 is RMC_CONTROL; */
-		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
-	}
+	if (skb)
+		rpkt->channel_type = 0;		/* 0 is RMC_DATA */
+	else
+		rpkt->channel_type = 1;		/* 1 is RMC_CONTROL */
 
-	nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
-		packet->send_buf_index;
+	rpkt->send_buf_section_index = packet->send_buf_index;
 	if (packet->send_buf_index == NETVSC_INVALID_INDEX)
-		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+		rpkt->send_buf_section_size = 0;
 	else
-		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
-			packet->total_data_buflen;
+		rpkt->send_buf_section_size = packet->total_data_buflen;
 
 	req_id = (ulong)skb;
 
@@ -771,11 +776,11 @@ static inline int netvsc_send_pkt(
 		return -ENODEV;
 
 	if (packet->page_buf_cnt) {
-		pgbuf = packet->cp_partial ? (*pb) +
-			packet->rmsg_pgcnt : (*pb);
+		if (packet->cp_partial)
+			pb += packet->rmsg_pgcnt;
+
 		ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
-						      pgbuf,
-						      packet->page_buf_cnt,
+						      pb, packet->page_buf_cnt,
 						      &nvmsg,
 						      sizeof(struct nvsp_message),
 						      req_id,
@@ -800,8 +805,10 @@ static inline int netvsc_send_pkt(
 			ret = -ENOSPC;
 		}
 	} else {
-		netdev_err(ndev, "Unable to send packet %p ret %d\n",
-			   packet, ret);
+		netdev_err(ndev,
+			   "Unable to send packet pages %u len %u, ret %d\n",
+			   packet->page_buf_cnt, packet->total_data_buflen,
+			   ret);
 	}
 
 	return ret;
@@ -819,13 +826,16 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
 	msdp->count = 0;
 }
 
-int netvsc_send(struct hv_device *device,
+/* RCU already held by caller */
+int netvsc_send(struct net_device_context *ndev_ctx,
 		struct hv_netvsc_packet *packet,
 		struct rndis_message *rndis_msg,
-		struct hv_page_buffer **pb,
+		struct hv_page_buffer *pb,
 		struct sk_buff *skb)
 {
-	struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
+	struct netvsc_device *net_device
+		= rcu_dereference_bh(ndev_ctx->nvdev);
+	struct hv_device *device = ndev_ctx->device_ctx;
 	int ret = 0;
 	struct netvsc_channel *nvchan;
 	u32 pktlen = packet->total_data_buflen, msd_len = 0;
@@ -837,7 +847,7 @@ int netvsc_send(struct hv_device *device,
 	bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
 
 	/* If device is rescinded, return error and packet will get dropped. */
-	if (unlikely(net_device->destroy))
+	if (unlikely(!net_device || net_device->destroy))
 		return -ENODEV;
 
 	/* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
@@ -942,130 +952,94 @@ send_now:
 	return ret;
 }
 
-static int netvsc_send_recv_completion(struct vmbus_channel *channel,
-				       u64 transaction_id, u32 status)
+/* Send pending recv completions */
+static int send_recv_completions(struct netvsc_channel *nvchan)
 {
-	struct nvsp_message recvcompMessage;
+	struct netvsc_device *nvdev = nvchan->net_device;
+	struct multi_recv_comp *mrc = &nvchan->mrc;
+	struct recv_comp_msg {
+		struct nvsp_message_header hdr;
+		u32 status;
+	}  __packed;
+	struct recv_comp_msg msg = {
+		.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
+	};
 	int ret;
 
-	recvcompMessage.hdr.msg_type =
-				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
+	while (mrc->first != mrc->next) {
+		const struct recv_comp_data *rcd
+			= mrc->slots + mrc->first;
 
-	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
+		msg.status = rcd->status;
+		ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
+				       rcd->tid, VM_PKT_COMP, 0);
+		if (unlikely(ret))
+			return ret;
 
-	/* Send the completion */
-	ret = vmbus_sendpacket(channel, &recvcompMessage,
-			       sizeof(struct nvsp_message_header) + sizeof(u32),
-			       transaction_id, VM_PKT_COMP, 0);
-
-	return ret;
-}
-
-static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
-					u32 *filled, u32 *avail)
-{
-	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	u32 first = mrc->first;
-	u32 next = mrc->next;
-
-	*filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
-		  next - first;
-
-	*avail = NETVSC_RECVSLOT_MAX - *filled - 1;
-}
-
-/* Read the first filled slot, no change to index */
-static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
-							 *nvdev, u16 q_idx)
-{
-	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	u32 filled, avail;
-
-	if (unlikely(!mrc->buf))
-		return NULL;
+		if (++mrc->first == nvdev->recv_completion_cnt)
+			mrc->first = 0;
+	}
 
-	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
-	if (!filled)
-		return NULL;
+	/* receive completion ring has been emptied */
+	if (unlikely(nvdev->destroy))
+		wake_up(&nvdev->wait_drain);
 
-	return mrc->buf + mrc->first * sizeof(struct recv_comp_data);
+	return 0;
 }
 
-/* Put the first filled slot back to available pool */
-static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
+/* Count how many receive completions are outstanding */
+static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
+				 const struct multi_recv_comp *mrc,
+				 u32 *filled, u32 *avail)
 {
-	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	int num_recv;
-
-	mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX;
+	u32 count = nvdev->recv_completion_cnt;
 
-	num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
+	if (mrc->next >= mrc->first)
+		*filled = mrc->next - mrc->first;
+	else
+		*filled = (count - mrc->first) + mrc->next;
 
-	if (nvdev->destroy && num_recv == 0)
-		wake_up(&nvdev->wait_drain);
+	*avail = count - *filled - 1;
 }
 
-/* Check and send pending recv completions */
-static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
-				 struct vmbus_channel *channel, u16 q_idx)
+/* Add receive complete to ring to send to host. */
+static void enq_receive_complete(struct net_device *ndev,
+				 struct netvsc_device *nvdev, u16 q_idx,
+				 u64 tid, u32 status)
 {
+	struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
+	struct multi_recv_comp *mrc = &nvchan->mrc;
 	struct recv_comp_data *rcd;
-	int ret;
+	u32 filled, avail;
 
-	while (true) {
-		rcd = read_recv_comp_slot(nvdev, q_idx);
-		if (!rcd)
-			break;
+	recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
 
-		ret = netvsc_send_recv_completion(channel, rcd->tid,
-						  rcd->status);
-		if (ret)
-			break;
-
-		put_recv_comp_slot(nvdev, q_idx);
+	if (unlikely(filled > NAPI_POLL_WEIGHT)) {
+		send_recv_completions(nvchan);
+		recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
 	}
-}
 
-#define NETVSC_RCD_WATERMARK 80
-
-/* Get next available slot */
-static inline struct recv_comp_data *get_recv_comp_slot(
-	struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
-{
-	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	u32 filled, avail, next;
-	struct recv_comp_data *rcd;
-
-	if (unlikely(!nvdev->recv_section))
-		return NULL;
-
-	if (unlikely(!mrc->buf))
-		return NULL;
-
-	if (atomic_read(&nvdev->num_outstanding_recvs) >
-	    nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100)
-		netvsc_chk_recv_comp(nvdev, channel, q_idx);
-
-	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
-	if (!avail)
-		return NULL;
-
-	next = mrc->next;
-	rcd = mrc->buf + next * sizeof(struct recv_comp_data);
-	mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX;
+	if (unlikely(!avail)) {
+		netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+			   q_idx, tid);
+		return;
+	}
 
-	atomic_inc(&nvdev->num_outstanding_recvs);
+	rcd = mrc->slots + mrc->next;
+	rcd->tid = tid;
+	rcd->status = status;
 
-	return rcd;
+	if (++mrc->next == nvdev->recv_completion_cnt)
+		mrc->next = 0;
 }
 
 static int netvsc_receive(struct net_device *ndev,
-		   struct netvsc_device *net_device,
-		   struct net_device_context *net_device_ctx,
-		   struct hv_device *device,
-		   struct vmbus_channel *channel,
-		   const struct vmpacket_descriptor *desc,
-		   struct nvsp_message *nvsp)
+			  struct netvsc_device *net_device,
+			  struct net_device_context *net_device_ctx,
+			  struct hv_device *device,
+			  struct vmbus_channel *channel,
+			  const struct vmpacket_descriptor *desc,
+			  struct nvsp_message *nvsp)
 {
 	const struct vmtransfer_page_packet_header *vmxferpage_packet
 		= container_of(desc, const struct vmtransfer_page_packet_header, d);
@@ -1074,7 +1048,6 @@ static int netvsc_receive(struct net_device *ndev,
 	u32 status = NVSP_STAT_SUCCESS;
 	int i;
 	int count = 0;
-	int ret;
 
 	/* Make sure this is a valid nvsp packet */
 	if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
@@ -1105,25 +1078,9 @@ static int netvsc_receive(struct net_device *ndev,
 					      channel, data, buflen);
 	}
 
-	if (net_device->chan_table[q_idx].mrc.buf) {
-		struct recv_comp_data *rcd;
+	enq_receive_complete(ndev, net_device, q_idx,
+			     vmxferpage_packet->d.trans_id, status);
 
-		rcd = get_recv_comp_slot(net_device, channel, q_idx);
-		if (rcd) {
-			rcd->tid = vmxferpage_packet->d.trans_id;
-			rcd->status = status;
-		} else {
-			netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
-				   q_idx, vmxferpage_packet->d.trans_id);
-		}
-	} else {
-		ret = netvsc_send_recv_completion(channel,
-						  vmxferpage_packet->d.trans_id,
-						  status);
-		if (ret)
-			netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
-				   q_idx, vmxferpage_packet->d.trans_id, ret);
-	}
 	return count;
 }
 
@@ -1219,11 +1176,10 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 {
 	struct netvsc_channel *nvchan
 		= container_of(napi, struct netvsc_channel, napi);
+	struct netvsc_device *net_device = nvchan->net_device;
 	struct vmbus_channel *channel = nvchan->channel;
 	struct hv_device *device = netvsc_channel_to_device(channel);
-	u16 q_idx = channel->offermsg.offer.sub_channel_index;
 	struct net_device *ndev = hv_get_drvdata(device);
-	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
 	int work_done = 0;
 
 	/* If starting a new interval */
@@ -1236,17 +1192,23 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 		nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
 	}
 
-	/* If receive ring was exhausted
-	 * and not doing busy poll
+	/* if ring is empty, signal host */
+	if (!nvchan->desc)
+		hv_pkt_iter_close(channel);
+
+	/* If send of pending receive completions suceeded
+	 *   and did not exhaust NAPI budget this time
+	 *   and not doing busy poll
 	 * then re-enable host interrupts
-	 *  and reschedule if ring is not empty.
+	 *     and reschedule if ring is not empty.
 	 */
-	if (work_done < budget &&
+	if (send_recv_completions(nvchan) == 0 &&
+	    work_done < budget &&
 	    napi_complete_done(napi, work_done) &&
-	    hv_end_read(&channel->inbound) != 0)
+	    hv_end_read(&channel->inbound)) {
+		hv_begin_read(&channel->inbound);
 		napi_reschedule(napi);
-
-	netvsc_chk_recv_comp(net_device, channel, q_idx);
+	}
 
 	/* Driver may overshoot since multiple packets per descriptor */
 	return min(work_done, budget);
@@ -1258,10 +1220,15 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 void netvsc_channel_cb(void *context)
 {
 	struct netvsc_channel *nvchan = context;
+	struct vmbus_channel *channel = nvchan->channel;
+	struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+	/* preload first vmpacket descriptor */
+	prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
 
 	if (napi_schedule_prep(&nvchan->napi)) {
 		/* disable interupts from host */
-		hv_begin_read(&nvchan->channel->inbound);
+		hv_begin_read(rbi);
 
 		__napi_schedule(&nvchan->napi);
 	}
@@ -1271,8 +1238,8 @@ void netvsc_channel_cb(void *context)
  * netvsc_device_add - Callback when the device belonging to this
  * driver is added
  */
-int netvsc_device_add(struct hv_device *device,
-		      const struct netvsc_device_info *device_info)
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+				const struct netvsc_device_info *device_info)
 {
 	int i, ret = 0;
 	int ring_size = device_info->ring_size;
@@ -1282,7 +1249,7 @@ int netvsc_device_add(struct hv_device *device,
 
 	net_device = alloc_net_device();
 	if (!net_device)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	net_device->ring_size = ring_size;
 
@@ -1302,6 +1269,7 @@ int netvsc_device_add(struct hv_device *device,
 		struct netvsc_channel *nvchan = &net_device->chan_table[i];
 
 		nvchan->channel = device->channel;
+		nvchan->net_device = net_device;
 	}
 
 	/* Enable NAPI handler before init callbacks */
@@ -1338,10 +1306,11 @@ int netvsc_device_add(struct hv_device *device,
 		goto close;
 	}
 
-	return ret;
+	return net_device;
 
 close:
-	netif_napi_del(&net_device->chan_table[0].napi);
+	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+	napi_disable(&net_device->chan_table[0].napi);
 
 	/* Now, we can close the channel safely */
 	vmbus_close(device->channel);
@@ -1349,6 +1318,5 @@ close:
 cleanup:
 	free_netvsc_device(&net_device->rcu);
 
-	return ret;
-
+	return ERR_PTR(ret);
 }
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 0d78727f1a14..e75c0f852a63 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -33,6 +33,9 @@
 #include <linux/if_vlan.h>
 #include <linux/in.h>
 #include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <linux/netpoll.h>
+
 #include <net/arp.h>
 #include <net/route.h>
 #include <net/sock.h>
@@ -69,7 +72,8 @@ static void netvsc_set_multicast_list(struct net_device *net)
 static int netvsc_open(struct net_device *net)
 {
 	struct net_device_context *ndev_ctx = netdev_priv(net);
-	struct netvsc_device *nvdev = ndev_ctx->nvdev;
+	struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev);
 	struct rndis_device *rdev;
 	int ret = 0;
 
@@ -85,15 +89,29 @@ static int netvsc_open(struct net_device *net)
 	netif_tx_wake_all_queues(net);
 
 	rdev = nvdev->extension;
-	if (!rdev->link_state && !ndev_ctx->datapath)
+
+	if (!rdev->link_state)
 		netif_carrier_on(net);
 
-	return ret;
+	if (vf_netdev) {
+		/* Setting synthetic device up transparently sets
+		 * slave as up. If open fails, then slave will be
+		 * still be offline (and not used).
+		 */
+		ret = dev_open(vf_netdev);
+		if (ret)
+			netdev_warn(net,
+				    "unable to open slave: %s: %d\n",
+				    vf_netdev->name, ret);
+	}
+	return 0;
 }
 
 static int netvsc_close(struct net_device *net)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
+	struct net_device *vf_netdev
+		= rtnl_dereference(net_device_ctx->vf_netdev);
 	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 	int ret;
 	u32 aread, i, msec = 10, retry = 0, retry_max = 20;
@@ -139,6 +157,9 @@ static int netvsc_close(struct net_device *net)
 		ret = -ETIMEDOUT;
 	}
 
+	if (vf_netdev)
+		dev_close(vf_netdev);
+
 	return ret;
 }
 
@@ -222,13 +243,11 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
  *
  * TODO support XPS - but get_xps_queue not exported
  */
-static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
-			void *accel_priv, select_queue_fallback_t fallback)
+static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
 {
-	unsigned int num_tx_queues = ndev->real_num_tx_queues;
 	int q_idx = sk_tx_queue_get(skb->sk);
 
-	if (q_idx < 0 || skb->ooo_okay) {
+	if (q_idx < 0 || skb->ooo_okay || q_idx >= ndev->real_num_tx_queues) {
 		/* If forwarding a packet, we use the recorded queue when
 		 * available for better cache locality.
 		 */
@@ -238,12 +257,33 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 			q_idx = netvsc_get_tx_queue(ndev, skb, q_idx);
 	}
 
-	while (unlikely(q_idx >= num_tx_queues))
-		q_idx -= num_tx_queues;
-
 	return q_idx;
 }
 
+static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+			       void *accel_priv,
+			       select_queue_fallback_t fallback)
+{
+	struct net_device_context *ndc = netdev_priv(ndev);
+	struct net_device *vf_netdev;
+	u16 txq;
+
+	rcu_read_lock();
+	vf_netdev = rcu_dereference(ndc->vf_netdev);
+	if (vf_netdev) {
+		txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+		qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+	} else {
+		txq = netvsc_pick_tx(ndev, skb);
+	}
+	rcu_read_unlock();
+
+	while (unlikely(txq >= ndev->real_num_tx_queues))
+		txq -= ndev->real_num_tx_queues;
+
+	return txq;
+}
+
 static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
 			struct hv_page_buffer *pb)
 {
@@ -280,9 +320,8 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
 
 static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
 			   struct hv_netvsc_packet *packet,
-			   struct hv_page_buffer **page_buf)
+			   struct hv_page_buffer *pb)
 {
-	struct hv_page_buffer *pb = *page_buf;
 	u32 slots_used = 0;
 	char *data = skb->data;
 	int frags = skb_shinfo(skb)->nr_frags;
@@ -359,13 +398,40 @@ static u32 net_checksum_info(struct sk_buff *skb)
 
 		if (ip6->nexthdr == IPPROTO_TCP)
 			return TRANSPORT_INFO_IPV6_TCP;
-		else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
+		else if (ip6->nexthdr == IPPROTO_UDP)
 			return TRANSPORT_INFO_IPV6_UDP;
 	}
 
 	return TRANSPORT_INFO_NOT_IP;
 }
 
+/* Send skb on the slave VF device. */
+static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev,
+			  struct sk_buff *skb)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	unsigned int len = skb->len;
+	int rc;
+
+	skb->dev = vf_netdev;
+	skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
+
+	rc = dev_queue_xmit(skb);
+	if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) {
+		struct netvsc_vf_pcpu_stats *pcpu_stats
+			= this_cpu_ptr(ndev_ctx->vf_stats);
+
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->tx_packets++;
+		pcpu_stats->tx_bytes += len;
+		u64_stats_update_end(&pcpu_stats->syncp);
+	} else {
+		this_cpu_inc(ndev_ctx->vf_stats->tx_dropped);
+	}
+
+	return rc;
+}
+
 static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -374,11 +440,19 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	unsigned int num_data_pgs;
 	struct rndis_message *rndis_msg;
 	struct rndis_packet *rndis_pkt;
+	struct net_device *vf_netdev;
 	u32 rndis_msg_size;
 	struct rndis_per_packet_info *ppi;
 	u32 hash;
-	struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
-	struct hv_page_buffer *pb = page_buf;
+	struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT];
+
+	/* if VF is present and up then redirect packets
+	 * already called with rcu_read_lock_bh
+	 */
+	vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev);
+	if (vf_netdev && netif_running(vf_netdev) &&
+	    !netpoll_tx_running(net))
+		return netvsc_vf_xmit(net, vf_netdev, skb);
 
 	/* We will atmost need two pages to describe the rndis
 	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
@@ -524,12 +598,12 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	rndis_msg->msg_len += rndis_msg_size;
 	packet->total_data_buflen = rndis_msg->msg_len;
 	packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
-					       skb, packet, &pb);
+					       skb, packet, pb);
 
 	/* timestamp packet in software */
 	skb_tx_timestamp(skb);
-	ret = netvsc_send(net_device_ctx->device_ctx, packet,
-			  rndis_msg, &pb, skb);
+
+	ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb);
 	if (likely(ret == 0))
 		return NETDEV_TX_OK;
 
@@ -658,29 +732,18 @@ int netvsc_recv_callback(struct net_device *net,
 	struct netvsc_device *net_device;
 	u16 q_idx = channel->offermsg.offer.sub_channel_index;
 	struct netvsc_channel *nvchan;
-	struct net_device *vf_netdev;
 	struct sk_buff *skb;
 	struct netvsc_stats *rx_stats;
 
 	if (net->reg_state != NETREG_REGISTERED)
 		return NVSP_STAT_FAIL;
 
-	/*
-	 * If necessary, inject this packet into the VF interface.
-	 * On Hyper-V, multicast and brodcast packets are only delivered
-	 * to the synthetic interface (after subjecting these to
-	 * policy filters on the host). Deliver these via the VF
-	 * interface in the guest.
-	 */
 	rcu_read_lock();
 	net_device = rcu_dereference(net_device_ctx->nvdev);
 	if (unlikely(!net_device))
 		goto drop;
 
 	nvchan = &net_device->chan_table[q_idx];
-	vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
-	if (vf_netdev && (vf_netdev->flags & IFF_UP))
-		net = vf_netdev;
 
 	/* Allocate a skb - TODO direct I/O to pages? */
 	skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
@@ -692,8 +755,7 @@ drop:
 		return NVSP_STAT_FAIL;
 	}
 
-	if (net != vf_netdev)
-		skb_record_rx_queue(skb, q_idx);
+	skb_record_rx_queue(skb, q_idx);
 
 	/*
 	 * Even if injecting the packet, record the statistics
@@ -736,39 +798,16 @@ static void netvsc_get_channels(struct net_device *net,
 	}
 }
 
-static int netvsc_set_queues(struct net_device *net, struct hv_device *dev,
-			     u32 num_chn)
-{
-	struct netvsc_device_info device_info;
-	int ret;
-
-	memset(&device_info, 0, sizeof(device_info));
-	device_info.num_chn = num_chn;
-	device_info.ring_size = ring_size;
-	device_info.max_num_vrss_chns = num_chn;
-
-	ret = rndis_filter_device_add(dev, &device_info);
-	if (ret)
-		return ret;
-
-	ret = netif_set_real_num_tx_queues(net, num_chn);
-	if (ret)
-		return ret;
-
-	ret = netif_set_real_num_rx_queues(net, num_chn);
-
-	return ret;
-}
-
 static int netvsc_set_channels(struct net_device *net,
 			       struct ethtool_channels *channels)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
 	struct hv_device *dev = net_device_ctx->device_ctx;
 	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
-	unsigned int count = channels->combined_count;
-	bool was_running;
-	int ret;
+	unsigned int orig, count = channels->combined_count;
+	struct netvsc_device_info device_info;
+	bool was_opened;
+	int ret = 0;
 
 	/* We do not support separate count for rx, tx, or other */
 	if (count == 0 ||
@@ -787,25 +826,32 @@ static int netvsc_set_channels(struct net_device *net,
 	if (count > nvdev->max_chn)
 		return -EINVAL;
 
-	was_running = netif_running(net);
-	if (was_running) {
-		ret = netvsc_close(net);
-		if (ret)
-			return ret;
-	}
+	orig = nvdev->num_chn;
+	was_opened = rndis_filter_opened(nvdev);
+	if (was_opened)
+		rndis_filter_close(nvdev);
 
 	rndis_filter_device_remove(dev, nvdev);
 
-	ret = netvsc_set_queues(net, dev, count);
-	if (ret == 0)
-		nvdev->num_chn = count;
-	else
-		netvsc_set_queues(net, dev, nvdev->num_chn);
+	memset(&device_info, 0, sizeof(device_info));
+	device_info.num_chn = count;
+	device_info.ring_size = ring_size;
+
+	nvdev = rndis_filter_device_add(dev, &device_info);
+	if (!IS_ERR(nvdev)) {
+		netif_set_real_num_tx_queues(net, nvdev->num_chn);
+		netif_set_real_num_rx_queues(net, nvdev->num_chn);
+	} else {
+		ret = PTR_ERR(nvdev);
+		device_info.num_chn = orig;
+		rndis_filter_device_add(dev, &device_info);
+	}
 
-	if (was_running)
-		ret = netvsc_open(net);
+	if (was_opened)
+		rndis_filter_open(nvdev);
 
 	/* We may have missed link change notifications */
+	net_device_ctx->last_reconfig = 0;
 	schedule_delayed_work(&net_device_ctx->dwork, 0);
 
 	return ret;
@@ -869,41 +915,53 @@ static int netvsc_set_link_ksettings(struct net_device *dev,
 static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 {
 	struct net_device_context *ndevctx = netdev_priv(ndev);
+	struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
 	struct hv_device *hdev = ndevctx->device_ctx;
+	int orig_mtu = ndev->mtu;
 	struct netvsc_device_info device_info;
-	bool was_running;
+	bool was_opened;
 	int ret = 0;
 
 	if (!nvdev || nvdev->destroy)
 		return -ENODEV;
 
-	was_running = netif_running(ndev);
-	if (was_running) {
-		ret = netvsc_close(ndev);
+	/* Change MTU of underlying VF netdev first. */
+	if (vf_netdev) {
+		ret = dev_set_mtu(vf_netdev, mtu);
 		if (ret)
 			return ret;
 	}
 
+	netif_device_detach(ndev);
+	was_opened = rndis_filter_opened(nvdev);
+	if (was_opened)
+		rndis_filter_close(nvdev);
+
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;
 	device_info.num_chn = nvdev->num_chn;
-	device_info.max_num_vrss_chns = nvdev->num_chn;
 
 	rndis_filter_device_remove(hdev, nvdev);
 
-	/* 'nvdev' has been freed in rndis_filter_device_remove() ->
-	 * netvsc_device_remove () -> free_netvsc_device().
-	 * We mustn't access it before it's re-created in
-	 * rndis_filter_device_add() -> netvsc_device_add().
-	 */
-
 	ndev->mtu = mtu;
 
-	rndis_filter_device_add(hdev, &device_info);
+	nvdev = rndis_filter_device_add(hdev, &device_info);
+	if (IS_ERR(nvdev)) {
+		ret = PTR_ERR(nvdev);
+
+		/* Attempt rollback to original MTU */
+		ndev->mtu = orig_mtu;
+		rndis_filter_device_add(hdev, &device_info);
+
+		if (vf_netdev)
+			dev_set_mtu(vf_netdev, orig_mtu);
+	}
+
+	if (was_opened)
+		rndis_filter_open(nvdev);
 
-	if (was_running)
-		ret = netvsc_open(ndev);
+	netif_device_attach(ndev);
 
 	/* We may have missed link change notifications */
 	schedule_delayed_work(&ndevctx->dwork, 0);
@@ -911,16 +969,56 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	return ret;
 }
 
+static void netvsc_get_vf_stats(struct net_device *net,
+				struct netvsc_vf_pcpu_stats *tot)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	int i;
+
+	memset(tot, 0, sizeof(*tot));
+
+	for_each_possible_cpu(i) {
+		const struct netvsc_vf_pcpu_stats *stats
+			= per_cpu_ptr(ndev_ctx->vf_stats, i);
+		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			rx_packets = stats->rx_packets;
+			tx_packets = stats->tx_packets;
+			rx_bytes = stats->rx_bytes;
+			tx_bytes = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		tot->rx_packets += rx_packets;
+		tot->tx_packets += tx_packets;
+		tot->rx_bytes   += rx_bytes;
+		tot->tx_bytes   += tx_bytes;
+		tot->tx_dropped += stats->tx_dropped;
+	}
+}
+
 static void netvsc_get_stats64(struct net_device *net,
 			       struct rtnl_link_stats64 *t)
 {
 	struct net_device_context *ndev_ctx = netdev_priv(net);
 	struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
-	int i;
+	struct netvsc_vf_pcpu_stats vf_tot;
+		int i;
 
 	if (!nvdev)
 		return;
 
+	netdev_stats_to_stats64(t, &net->stats);
+
+	netvsc_get_vf_stats(net, &vf_tot);
+	t->rx_packets += vf_tot.rx_packets;
+	t->tx_packets += vf_tot.tx_packets;
+	t->rx_bytes   += vf_tot.rx_bytes;
+	t->tx_bytes   += vf_tot.tx_bytes;
+	t->tx_dropped += vf_tot.tx_dropped;
+
 	for (i = 0; i < nvdev->num_chn; i++) {
 		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
 		const struct netvsc_stats *stats;
@@ -949,16 +1047,12 @@ static void netvsc_get_stats64(struct net_device *net,
 		t->rx_packets	+= packets;
 		t->multicast	+= multicast;
 	}
-
-	t->tx_dropped	= net->stats.tx_dropped;
-	t->tx_errors	= net->stats.tx_errors;
-
-	t->rx_dropped	= net->stats.rx_dropped;
-	t->rx_errors	= net->stats.rx_errors;
 }
 
 static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
 {
+	struct net_device_context *ndc = netdev_priv(ndev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	struct sockaddr *addr = p;
 	char save_adr[ETH_ALEN];
 	unsigned char save_aatype;
@@ -971,7 +1065,10 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
 	if (err != 0)
 		return err;
 
-	err = rndis_filter_set_device_mac(ndev, addr->sa_data);
+	if (!nvdev)
+		return -ENODEV;
+
+	err = rndis_filter_set_device_mac(nvdev, addr->sa_data);
 	if (err != 0) {
 		/* roll back to saved MAC */
 		memcpy(ndev->dev_addr, save_adr, ETH_ALEN);
@@ -990,9 +1087,16 @@ static const struct {
 	{ "tx_no_space",  offsetof(struct netvsc_ethtool_stats, tx_no_space) },
 	{ "tx_too_big",	  offsetof(struct netvsc_ethtool_stats, tx_too_big) },
 	{ "tx_busy",	  offsetof(struct netvsc_ethtool_stats, tx_busy) },
+}, vf_stats[] = {
+	{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
+	{ "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
+	{ "vf_tx_packets", offsetof(struct netvsc_vf_pcpu_stats, tx_packets) },
+	{ "vf_tx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, tx_bytes) },
+	{ "vf_tx_dropped", offsetof(struct netvsc_vf_pcpu_stats, tx_dropped) },
 };
 
 #define NETVSC_GLOBAL_STATS_LEN	ARRAY_SIZE(netvsc_stats)
+#define NETVSC_VF_STATS_LEN	ARRAY_SIZE(vf_stats)
 
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
@@ -1007,7 +1111,9 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 
 	switch (string_set) {
 	case ETH_SS_STATS:
-		return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev);
+		return NETVSC_GLOBAL_STATS_LEN
+			+ NETVSC_VF_STATS_LEN
+			+ NETVSC_QUEUE_STATS_LEN(nvdev);
 	default:
 		return -EINVAL;
 	}
@@ -1017,9 +1123,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 				     struct ethtool_stats *stats, u64 *data)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
-	struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	const void *nds = &ndc->eth_stats;
 	const struct netvsc_stats *qstats;
+	struct netvsc_vf_pcpu_stats sum;
 	unsigned int start;
 	u64 packets, bytes;
 	int i, j;
@@ -1030,6 +1137,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
 		data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
 
+	netvsc_get_vf_stats(dev, &sum);
+	for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
+		data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
+
 	for (j = 0; j < nvdev->num_chn; j++) {
 		qstats = &nvdev->chan_table[j].tx_stats;
 
@@ -1055,7 +1166,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
-	struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	u8 *p = data;
 	int i;
 
@@ -1064,11 +1175,16 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
-			memcpy(p + i * ETH_GSTRING_LEN,
-			       netvsc_stats[i].name, ETH_GSTRING_LEN);
+		for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) {
+			memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(vf_stats); i++) {
+			memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
 
-		p += i * ETH_GSTRING_LEN;
 		for (i = 0; i < nvdev->num_chn; i++) {
 			sprintf(p, "tx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
@@ -1113,7 +1229,7 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 		 u32 *rules)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
-	struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 
 	if (!nvdev)
 		return -ENODEV;
@@ -1163,7 +1279,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
 			   u8 *hfunc)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
-	struct netvsc_device *ndev = rcu_dereference(ndc->nvdev);
+	struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
 	struct rndis_device *rndis_dev;
 	int i;
 
@@ -1308,8 +1424,7 @@ static void netvsc_link_change(struct work_struct *w)
 	case RNDIS_STATUS_MEDIA_CONNECT:
 		if (rdev->link_state) {
 			rdev->link_state = false;
-			if (!ndev_ctx->datapath)
-				netif_carrier_on(net);
+			netif_carrier_on(net);
 			netif_tx_wake_all_queues(net);
 		} else {
 			notify = true;
@@ -1386,7 +1501,7 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
 			continue;	/* not a netvsc device */
 
 		net_device_ctx = netdev_priv(dev);
-		if (net_device_ctx->nvdev == NULL)
+		if (!rtnl_dereference(net_device_ctx->nvdev))
 			continue;	/* device is removed */
 
 		if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev)
@@ -1396,6 +1511,108 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
 	return NULL;
 }
 
+/* Called when VF is injecting data into network stack.
+ * Change the associated network device from VF to netvsc.
+ * note: already called with rcu_read_lock
+ */
+static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data);
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	struct netvsc_vf_pcpu_stats *pcpu_stats
+		 = this_cpu_ptr(ndev_ctx->vf_stats);
+
+	skb->dev = ndev;
+
+	u64_stats_update_begin(&pcpu_stats->syncp);
+	pcpu_stats->rx_packets++;
+	pcpu_stats->rx_bytes += skb->len;
+	u64_stats_update_end(&pcpu_stats->syncp);
+
+	return RX_HANDLER_ANOTHER;
+}
+
+static int netvsc_vf_join(struct net_device *vf_netdev,
+			  struct net_device *ndev)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	int ret;
+
+	ret = netdev_rx_handler_register(vf_netdev,
+					 netvsc_vf_handle_frame, ndev);
+	if (ret != 0) {
+		netdev_err(vf_netdev,
+			   "can not register netvsc VF receive handler (err = %d)\n",
+			   ret);
+		goto rx_handler_failed;
+	}
+
+	ret = netdev_upper_dev_link(vf_netdev, ndev);
+	if (ret != 0) {
+		netdev_err(vf_netdev,
+			   "can not set master device %s (err = %d)\n",
+			   ndev->name, ret);
+		goto upper_link_failed;
+	}
+
+	/* set slave flag before open to prevent IPv6 addrconf */
+	vf_netdev->flags |= IFF_SLAVE;
+
+	schedule_work(&ndev_ctx->vf_takeover);
+
+	netdev_info(vf_netdev, "joined to %s\n", ndev->name);
+	return 0;
+
+upper_link_failed:
+	netdev_rx_handler_unregister(vf_netdev);
+rx_handler_failed:
+	return ret;
+}
+
+static void __netvsc_vf_setup(struct net_device *ndev,
+			      struct net_device *vf_netdev)
+{
+	int ret;
+
+	call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
+
+	/* Align MTU of VF with master */
+	ret = dev_set_mtu(vf_netdev, ndev->mtu);
+	if (ret)
+		netdev_warn(vf_netdev,
+			    "unable to change mtu to %u\n", ndev->mtu);
+
+	if (netif_running(ndev)) {
+		ret = dev_open(vf_netdev);
+		if (ret)
+			netdev_warn(vf_netdev,
+				    "unable to open: %d\n", ret);
+	}
+}
+
+/* Setup VF as slave of the synthetic device.
+ * Runs in workqueue to avoid recursion in netlink callbacks.
+ */
+static void netvsc_vf_setup(struct work_struct *w)
+{
+	struct net_device_context *ndev_ctx
+		= container_of(w, struct net_device_context, vf_takeover);
+	struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+	struct net_device *vf_netdev;
+
+	if (!rtnl_trylock()) {
+		schedule_work(w);
+		return;
+	}
+
+	vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+	if (vf_netdev)
+		__netvsc_vf_setup(ndev, vf_netdev);
+
+	rtnl_unlock();
+}
+
 static int netvsc_register_vf(struct net_device *vf_netdev)
 {
 	struct net_device *ndev;
@@ -1419,10 +1636,12 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
 		return NOTIFY_DONE;
 
+	if (netvsc_vf_join(vf_netdev, ndev) != 0)
+		return NOTIFY_DONE;
+
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
-	/*
-	 * Take a reference on the module.
-	 */
+
+	/* Prevent this module from being unloaded while VF is registered */
 	try_module_get(THIS_MODULE);
 
 	dev_hold(vf_netdev);
@@ -1430,61 +1649,63 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	return NOTIFY_OK;
 }
 
-static int netvsc_vf_up(struct net_device *vf_netdev)
+/* Change datapath */
+static void netvsc_vf_update(struct work_struct *w)
 {
-	struct net_device *ndev;
+	struct net_device_context *ndev_ctx
+		= container_of(w, struct net_device_context, vf_notify);
+	struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
 	struct netvsc_device *netvsc_dev;
-	struct net_device_context *net_device_ctx;
-
-	ndev = get_netvsc_byref(vf_netdev);
-	if (!ndev)
-		return NOTIFY_DONE;
-
-	net_device_ctx = netdev_priv(ndev);
-	netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
-
-	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-
-	/*
-	 * Open the device before switching data path.
-	 */
-	rndis_filter_open(netvsc_dev);
-
-	/*
-	 * notify the host to switch the data path.
-	 */
-	netvsc_switch_datapath(ndev, true);
-	netdev_info(ndev, "Data path switched to VF: %s\n", vf_netdev->name);
+	struct net_device *vf_netdev;
+	bool vf_is_up;
 
-	netif_carrier_off(ndev);
+	if (!rtnl_trylock()) {
+		schedule_work(w);
+		return;
+	}
 
-	/* Now notify peers through VF device. */
-	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
+	vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+	if (!vf_netdev)
+		goto unlock;
+
+	netvsc_dev = rtnl_dereference(ndev_ctx->nvdev);
+	if (!netvsc_dev)
+		goto unlock;
+
+	vf_is_up = netif_running(vf_netdev);
+	if (vf_is_up != ndev_ctx->datapath) {
+		if (vf_is_up) {
+			netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
+			rndis_filter_open(netvsc_dev);
+			netvsc_switch_datapath(ndev, true);
+			netdev_info(ndev, "Data path switched to VF: %s\n",
+				    vf_netdev->name);
+		} else {
+			netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
+			netvsc_switch_datapath(ndev, false);
+			rndis_filter_close(netvsc_dev);
+			netdev_info(ndev, "Data path switched from VF: %s\n",
+				    vf_netdev->name);
+		}
 
-	return NOTIFY_OK;
+		/* Now notify peers through VF device. */
+		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
+	}
+unlock:
+	rtnl_unlock();
 }
 
-static int netvsc_vf_down(struct net_device *vf_netdev)
+static int netvsc_vf_notify(struct net_device *vf_netdev)
 {
-	struct net_device *ndev;
-	struct netvsc_device *netvsc_dev;
 	struct net_device_context *net_device_ctx;
+	struct net_device *ndev;
 
 	ndev = get_netvsc_byref(vf_netdev);
 	if (!ndev)
 		return NOTIFY_DONE;
 
 	net_device_ctx = netdev_priv(ndev);
-	netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
-
-	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-	netvsc_switch_datapath(ndev, false);
-	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
-	rndis_filter_close(netvsc_dev);
-	netif_carrier_on(ndev);
-
-	/* Now notify peers through netvsc device. */
-	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
+	schedule_work(&net_device_ctx->vf_notify);
 
 	return NOTIFY_OK;
 }
@@ -1499,9 +1720,12 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 
 	net_device_ctx = netdev_priv(ndev);
+	cancel_work_sync(&net_device_ctx->vf_takeover);
+	cancel_work_sync(&net_device_ctx->vf_notify);
 
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
+	netdev_upper_dev_unlink(vf_netdev, ndev);
 	RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
 	dev_put(vf_netdev);
 	module_put(THIS_MODULE);
@@ -1515,12 +1739,12 @@ static int netvsc_probe(struct hv_device *dev,
 	struct net_device_context *net_device_ctx;
 	struct netvsc_device_info device_info;
 	struct netvsc_device *nvdev;
-	int ret;
+	int ret = -ENOMEM;
 
 	net = alloc_etherdev_mq(sizeof(struct net_device_context),
 				VRSS_CHANNEL_MAX);
 	if (!net)
-		return -ENOMEM;
+		goto no_net;
 
 	netif_carrier_off(net);
 
@@ -1539,6 +1763,13 @@ static int netvsc_probe(struct hv_device *dev,
 
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
+	INIT_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
+	INIT_WORK(&net_device_ctx->vf_notify, netvsc_vf_update);
+
+	net_device_ctx->vf_stats
+		= netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
+	if (!net_device_ctx->vf_stats)
+		goto no_stats;
 
 	net->netdev_ops = &device_ops;
 	net->ethtool_ops = &ethtool_ops;
@@ -1551,13 +1782,14 @@ static int netvsc_probe(struct hv_device *dev,
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;
 	device_info.num_chn = VRSS_CHANNEL_DEFAULT;
-	ret = rndis_filter_device_add(dev, &device_info);
-	if (ret != 0) {
+
+	nvdev = rndis_filter_device_add(dev, &device_info);
+	if (IS_ERR(nvdev)) {
+		ret = PTR_ERR(nvdev);
 		netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
-		free_netdev(net);
-		hv_set_drvdata(dev, NULL);
-		return ret;
+		goto rndis_failed;
 	}
+
 	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
 
 	/* hw_features computed in rndis_filter_device_add */
@@ -1566,11 +1798,11 @@ static int netvsc_probe(struct hv_device *dev,
 		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 	net->vlan_features = net->features;
 
-	/* RCU not necessary here, device not registered */
-	nvdev = net_device_ctx->nvdev;
 	netif_set_real_num_tx_queues(net, nvdev->num_chn);
 	netif_set_real_num_rx_queues(net, nvdev->num_chn);
 
+	netdev_lockdep_set_classes(net);
+
 	/* MTU range: 68 - 1500 or 65521 */
 	net->min_mtu = NETVSC_MTU_MIN;
 	if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
@@ -1581,11 +1813,20 @@ static int netvsc_probe(struct hv_device *dev,
 	ret = register_netdev(net);
 	if (ret != 0) {
 		pr_err("Unable to register netdev.\n");
-		rndis_filter_device_remove(dev, nvdev);
-		free_netdev(net);
+		goto register_failed;
 	}
 
 	return ret;
+
+register_failed:
+	rndis_filter_device_remove(dev, nvdev);
+rndis_failed:
+	free_percpu(net_device_ctx->vf_stats);
+no_stats:
+	hv_set_drvdata(dev, NULL);
+	free_netdev(net);
+no_net:
+	return ret;
 }
 
 static int netvsc_remove(struct hv_device *dev)
@@ -1611,13 +1852,15 @@ static int netvsc_remove(struct hv_device *dev)
 	 * removed. Also blocks mtu and channel changes.
 	 */
 	rtnl_lock();
-	rndis_filter_device_remove(dev, ndev_ctx->nvdev);
+	rndis_filter_device_remove(dev,
+				   rtnl_dereference(ndev_ctx->nvdev));
 	rtnl_unlock();
 
 	unregister_netdev(net);
 
 	hv_set_drvdata(dev, NULL);
 
+	free_percpu(ndev_ctx->vf_stats);
 	free_netdev(net);
 	return 0;
 }
@@ -1672,9 +1915,8 @@ static int netvsc_netdev_event(struct notifier_block *this,
 	case NETDEV_UNREGISTER:
 		return netvsc_unregister_vf(event_dev);
 	case NETDEV_UP:
-		return netvsc_vf_up(event_dev);
 	case NETDEV_DOWN:
-		return netvsc_vf_down(event_dev);
+		return netvsc_vf_notify(event_dev);
 	default:
 		return NOTIFY_DONE;
 	}
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 85c00e1c52b6..44165fe328a4 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -28,6 +28,7 @@
 #include <linux/if_vlan.h>
 #include <linux/nls.h>
 #include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
 
 #include "hyperv_net.h"
 
@@ -213,11 +214,11 @@ static void dump_rndis_message(struct hv_device *hv_dev,
 static int rndis_filter_send_request(struct rndis_device *dev,
 				  struct rndis_request *req)
 {
-	int ret;
 	struct hv_netvsc_packet *packet;
 	struct hv_page_buffer page_buf[2];
 	struct hv_page_buffer *pb = page_buf;
 	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
+	int ret;
 
 	/* Setup the packet to send it */
 	packet = &req->pkt;
@@ -243,7 +244,10 @@ static int rndis_filter_send_request(struct rndis_device *dev,
 			pb[0].len;
 	}
 
-	ret = netvsc_send(net_device_ctx->device_ctx, packet, NULL, &pb, NULL);
+	rcu_read_lock_bh();
+	ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL);
+	rcu_read_unlock_bh();
+
 	return ret;
 }
 
@@ -443,8 +447,9 @@ int rndis_filter_receive(struct net_device *ndev,
 	return 0;
 }
 
-static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
-				  void *result, u32 *result_size)
+static int rndis_filter_query_device(struct rndis_device *dev,
+				     struct netvsc_device *nvdev,
+				     u32 oid, void *result, u32 *result_size)
 {
 	struct rndis_request *request;
 	u32 inresult_size = *result_size;
@@ -471,8 +476,6 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
 	query->dev_vc_handle = 0;
 
 	if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) {
-		struct net_device_context *ndevctx = netdev_priv(dev->ndev);
-		struct netvsc_device *nvdev = ndevctx->nvdev;
 		struct ndis_offload *hwcaps;
 		u32 nvsp_version = nvdev->nvsp_version;
 		u8 ndis_rev;
@@ -541,14 +544,15 @@ cleanup:
 
 /* Get the hardware offload capabilities */
 static int
-rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
+rndis_query_hwcaps(struct rndis_device *dev, struct netvsc_device *net_device,
+		   struct ndis_offload *caps)
 {
 	u32 caps_len = sizeof(*caps);
 	int ret;
 
 	memset(caps, 0, sizeof(*caps));
 
-	ret = rndis_filter_query_device(dev,
+	ret = rndis_filter_query_device(dev, net_device,
 					OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
 					caps, &caps_len);
 	if (ret)
@@ -577,11 +581,12 @@ rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
 	return 0;
 }
 
-static int rndis_filter_query_device_mac(struct rndis_device *dev)
+static int rndis_filter_query_device_mac(struct rndis_device *dev,
+					 struct netvsc_device *net_device)
 {
 	u32 size = ETH_ALEN;
 
-	return rndis_filter_query_device(dev,
+	return rndis_filter_query_device(dev, net_device,
 				      RNDIS_OID_802_3_PERMANENT_ADDRESS,
 				      dev->hw_mac_adr, &size);
 }
@@ -589,9 +594,9 @@ static int rndis_filter_query_device_mac(struct rndis_device *dev)
 #define NWADR_STR "NetworkAddress"
 #define NWADR_STRLEN 14
 
-int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
+int rndis_filter_set_device_mac(struct netvsc_device *nvdev,
+				const char *mac)
 {
-	struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
 	struct rndis_device *rdev = nvdev->extension;
 	struct rndis_request *request;
 	struct rndis_set_request *set;
@@ -645,11 +650,8 @@ int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
 	wait_for_completion(&request->wait_event);
 
 	set_complete = &request->response_msg.msg.set_complete;
-	if (set_complete->status != RNDIS_STATUS_SUCCESS) {
-		netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
-			   set_complete->status);
-		ret = -EINVAL;
-	}
+	if (set_complete->status != RNDIS_STATUS_SUCCESS)
+		ret = -EIO;
 
 cleanup:
 	put_rndis_request(rdev, request);
@@ -658,9 +660,9 @@ cleanup:
 
 static int
 rndis_filter_set_offload_params(struct net_device *ndev,
+				struct netvsc_device *nvdev,
 				struct ndis_offload_params *req_offloads)
 {
-	struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
 	struct rndis_device *rdev = nvdev->extension;
 	struct rndis_request *request;
 	struct rndis_set_request *set;
@@ -782,27 +784,27 @@ cleanup:
 	return ret;
 }
 
-static int rndis_filter_query_device_link_status(struct rndis_device *dev)
+static int rndis_filter_query_device_link_status(struct rndis_device *dev,
+						 struct netvsc_device *net_device)
 {
 	u32 size = sizeof(u32);
 	u32 link_status;
-	int ret;
-
-	ret = rndis_filter_query_device(dev,
-				      RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
-				      &link_status, &size);
 
-	return ret;
+	return rndis_filter_query_device(dev, net_device,
+					 RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
+					 &link_status, &size);
 }
 
-static int rndis_filter_query_link_speed(struct rndis_device *dev)
+static int rndis_filter_query_link_speed(struct rndis_device *dev,
+					 struct netvsc_device *net_device)
 {
 	u32 size = sizeof(u32);
 	u32 link_speed;
 	struct net_device_context *ndc;
 	int ret;
 
-	ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED,
+	ret = rndis_filter_query_device(dev, net_device,
+					RNDIS_OID_GEN_LINK_SPEED,
 					&link_speed, &size);
 
 	if (!ret) {
@@ -871,14 +873,14 @@ void rndis_filter_update(struct netvsc_device *nvdev)
 	schedule_work(&rdev->mcast_work);
 }
 
-static int rndis_filter_init_device(struct rndis_device *dev)
+static int rndis_filter_init_device(struct rndis_device *dev,
+				    struct netvsc_device *nvdev)
 {
 	struct rndis_request *request;
 	struct rndis_initialize_request *init;
 	struct rndis_initialize_complete *init_complete;
 	u32 status;
 	int ret;
-	struct netvsc_device *nvdev = net_device_to_netvsc_device(dev->ndev);
 
 	request = get_rndis_request(dev, RNDIS_MSG_INIT,
 			RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
@@ -926,12 +928,12 @@ static bool netvsc_device_idle(const struct netvsc_device *nvdev)
 {
 	int i;
 
-	if (atomic_read(&nvdev->num_outstanding_recvs) > 0)
-		return false;
-
 	for (i = 0; i < nvdev->num_chn; i++) {
 		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
 
+		if (nvchan->mrc.first != nvchan->mrc.next)
+			return false;
+
 		if (atomic_read(&nvchan->queue_sends) > 0)
 			return false;
 	}
@@ -944,7 +946,7 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
 	struct rndis_request *request;
 	struct rndis_halt_request *halt;
 	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
-	struct netvsc_device *nvdev = net_device_ctx->nvdev;
+	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
 	/* Attempt to do a rndis device halt */
 	request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -1015,20 +1017,20 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 {
 	struct net_device *ndev =
 		hv_get_drvdata(new_sc->primary_channel->device_obj);
-	struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	struct netvsc_device *nvscdev;
 	u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
 	struct netvsc_channel *nvchan;
 	int ret;
 
-	if (chn_index >= nvscdev->num_chn)
+	/* This is safe because this callback only happens when
+	 * new device is being setup and waiting on the channel_init_wait.
+	 */
+	nvscdev = rcu_dereference_raw(ndev_ctx->nvdev);
+	if (!nvscdev || chn_index >= nvscdev->num_chn)
 		return;
 
 	nvchan = nvscdev->chan_table + chn_index;
-	nvchan->mrc.buf
-		= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
-
-	if (!nvchan->mrc.buf)
-		return;
 
 	/* Because the device uses NAPI, all the interrupt batching and
 	 * control is done via Net softirq, not the channel handling
@@ -1052,8 +1054,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 		complete(&nvscdev->channel_init_wait);
 }
 
-int rndis_filter_device_add(struct hv_device *dev,
-			    struct netvsc_device_info *device_info)
+struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+				      struct netvsc_device_info *device_info)
 {
 	struct net_device *net = hv_get_drvdata(dev);
 	struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -1072,21 +1074,20 @@ int rndis_filter_device_add(struct hv_device *dev,
 
 	rndis_device = get_rndis_device();
 	if (!rndis_device)
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
 	/*
 	 * Let the inner driver handle this first to create the netvsc channel
 	 * NOTE! Once the channel is created, we may get a receive callback
 	 * (RndisFilterOnReceive()) before this call is completed
 	 */
-	ret = netvsc_device_add(dev, device_info);
-	if (ret != 0) {
+	net_device = netvsc_device_add(dev, device_info);
+	if (IS_ERR(net_device)) {
 		kfree(rndis_device);
-		return ret;
+		return net_device;
 	}
 
 	/* Initialize the rndis device */
-	net_device = net_device_ctx->nvdev;
 	net_device->max_chn = 1;
 	net_device->num_chn = 1;
 
@@ -1096,35 +1097,29 @@ int rndis_filter_device_add(struct hv_device *dev,
 	rndis_device->ndev = net;
 
 	/* Send the rndis initialization message */
-	ret = rndis_filter_init_device(rndis_device);
-	if (ret != 0) {
-		rndis_filter_device_remove(dev, net_device);
-		return ret;
-	}
+	ret = rndis_filter_init_device(rndis_device, net_device);
+	if (ret != 0)
+		goto err_dev_remv;
 
 	/* Get the MTU from the host */
 	size = sizeof(u32);
-	ret = rndis_filter_query_device(rndis_device,
+	ret = rndis_filter_query_device(rndis_device, net_device,
 					RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
 					&mtu, &size);
 	if (ret == 0 && size == sizeof(u32) && mtu < net->mtu)
 		net->mtu = mtu;
 
 	/* Get the mac address */
-	ret = rndis_filter_query_device_mac(rndis_device);
-	if (ret != 0) {
-		rndis_filter_device_remove(dev, net_device);
-		return ret;
-	}
+	ret = rndis_filter_query_device_mac(rndis_device, net_device);
+	if (ret != 0)
+		goto err_dev_remv;
 
 	memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
 
 	/* Find HW offload capabilities */
-	ret = rndis_query_hwcaps(rndis_device, &hwcaps);
-	if (ret != 0) {
-		rndis_filter_device_remove(dev, net_device);
-		return ret;
-	}
+	ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps);
+	if (ret != 0)
+		goto err_dev_remv;
 
 	/* A value of zero means "no change"; now turn on what we want. */
 	memset(&offloads, 0, sizeof(struct ndis_offload_params));
@@ -1179,24 +1174,24 @@ int rndis_filter_device_add(struct hv_device *dev,
 
 	netif_set_gso_max_size(net, gso_max_size);
 
-	ret = rndis_filter_set_offload_params(net, &offloads);
+	ret = rndis_filter_set_offload_params(net, net_device, &offloads);
 	if (ret)
 		goto err_dev_remv;
 
-	rndis_filter_query_device_link_status(rndis_device);
+	rndis_filter_query_device_link_status(rndis_device, net_device);
 
 	netdev_dbg(net, "Device MAC %pM link state %s\n",
 		   rndis_device->hw_mac_adr,
 		   rndis_device->link_state ? "down" : "up");
 
 	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
-		return 0;
+		return net_device;
 
-	rndis_filter_query_link_speed(rndis_device);
+	rndis_filter_query_link_speed(rndis_device, net_device);
 
 	/* vRSS setup */
 	memset(&rsscap, 0, rsscap_size);
-	ret = rndis_filter_query_device(rndis_device,
+	ret = rndis_filter_query_device(rndis_device, net_device,
 					OID_GEN_RECEIVE_SCALE_CAPABILITIES,
 					&rsscap, &rsscap_size);
 	if (ret || rsscap.num_recv_que < 2)
@@ -1223,7 +1218,16 @@ int rndis_filter_device_add(struct hv_device *dev,
 
 	num_rss_qs = net_device->num_chn - 1;
 	if (num_rss_qs == 0)
-		return 0;
+		return net_device;
+
+	for (i = 1; i < net_device->num_chn; i++) {
+		ret = netvsc_alloc_recv_comp_ring(net_device, i);
+		if (ret) {
+			while (--i != 0)
+				vfree(net_device->chan_table[i].mrc.slots);
+			goto out;
+		}
+	}
 
 	refcount_set(&net_device->sc_offered, num_rss_qs);
 	vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
@@ -1260,11 +1264,11 @@ out:
 		net_device->num_chn = 1;
 	}
 
-	return 0; /* return 0 because primary channel can be used alone */
+	return net_device;
 
 err_dev_remv:
 	rndis_filter_device_remove(dev, net_device);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 void rndis_filter_device_remove(struct hv_device *dev,
@@ -1302,3 +1306,8 @@ int rndis_filter_close(struct netvsc_device *nvdev)
 
 	return rndis_filter_close_device(nvdev->extension);
 }
+
+bool rndis_filter_opened(const struct netvsc_device *nvdev)
+{
+	return atomic_read(&nvdev->open_cnt) > 0;
+}
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index a626c539fb17..326243fae7e2 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -66,6 +66,7 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
+#include <linux/interrupt.h>
 
 #include <net/ieee802154_netdev.h>
 #include <net/mac802154.h>
diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index 7d334963dc08..ee7084b2d52d 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -1330,7 +1330,8 @@ static int mrf24j40_probe(struct spi_device *spi)
 	if (spi->max_speed_hz > MAX_SPI_SPEED_HZ) {
 		dev_warn(&spi->dev, "spi clock above possible maximum: %d",
 			 MAX_SPI_SPEED_HZ);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_register_device;
 	}
 
 	ret = mrf24j40_hw_init(devrec);
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index f37e3c1fd4e7..fdde20735416 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -169,7 +169,7 @@ static void ipvlan_port_destroy(struct net_device *dev)
 
 #define IPVLAN_FEATURES \
 	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
-	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
+	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \
 	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
 	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
 
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
index 22f133ea8d7b..5dea2063dbc8 100644
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -24,7 +24,7 @@
 #include <linux/virtio_net.h>
 
 #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
-		      NETIF_F_TSO6 | NETIF_F_UFO)
+		      NETIF_F_TSO6)
 
 static dev_t ipvtap_major;
 static struct cdev ipvtap_cdev;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 0f581ee74fe4..ca35c6ba7947 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -841,7 +841,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
 
 #define MACVLAN_FEATURES \
 	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
-	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \
+	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \
 	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
 	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
 
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 91e7b19bbf86..c2d0ea2fb019 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -49,7 +49,7 @@ static struct class macvtap_class = {
 static struct cdev macvtap_cdev;
 
 #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
-		      NETIF_F_TSO6 | NETIF_F_UFO)
+		      NETIF_F_TSO6)
 
 static void macvtap_count_tx_dropped(struct tap_dev *tap)
 {
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 928fd892f167..bf73969a9d2b 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -106,6 +106,16 @@ config MDIO_HISI_FEMAC
 	  This module provides a driver for the MDIO busses found in the
 	  Hisilicon SoC that have an Fast Ethernet MAC.
 
+config MDIO_I2C
+	tristate
+	depends on I2C
+	help
+	  Support I2C based PHYs.  This provides a MDIO bus bridged
+	  to I2C to allow PHYs connected in I2C mode to be accessed
+	  using the existing infrastructure.
+
+	  This is library mode.
+
 config MDIO_MOXART
         tristate "MOXA ART MDIO interface support"
         depends on ARCH_MOXART
@@ -159,6 +169,16 @@ menuconfig PHYLIB
 	  devices.  This option provides infrastructure for
 	  managing PHY devices.
 
+config PHYLINK
+	tristate
+	depends on NETDEVICES
+	select PHYLIB
+	select SWPHY
+	help
+	  PHYlink models the link between the PHY and MAC, allowing fixed
+	  configuration links, PHYs, and Serdes links with MAC level
+	  autonegotiation modes.
+
 if PHYLIB
 
 config SWPHY
@@ -180,6 +200,11 @@ config LED_TRIGGER_PHY
 
 comment "MII PHY device drivers"
 
+config SFP
+	tristate "SFP cage support"
+	depends on I2C && PHYLINK
+	select MDIO_I2C
+
 config AMD_PHY
 	tristate "AMD PHYs"
 	---help---
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 8e9b9f349384..7237255bad68 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -18,6 +18,7 @@ endif
 libphy-$(CONFIG_SWPHY)		+= swphy.o
 libphy-$(CONFIG_LED_TRIGGER_PHY)	+= phy_led_triggers.o
 
+obj-$(CONFIG_PHYLINK)		+= phylink.o
 obj-$(CONFIG_PHYLIB)		+= libphy.o
 
 obj-$(CONFIG_MDIO_BCM_IPROC)	+= mdio-bcm-iproc.o
@@ -30,12 +31,17 @@ obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
 obj-$(CONFIG_MDIO_CAVIUM)	+= mdio-cavium.o
 obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
 obj-$(CONFIG_MDIO_HISI_FEMAC)	+= mdio-hisi-femac.o
+obj-$(CONFIG_MDIO_I2C)		+= mdio-i2c.o
 obj-$(CONFIG_MDIO_MOXART)	+= mdio-moxart.o
 obj-$(CONFIG_MDIO_OCTEON)	+= mdio-octeon.o
 obj-$(CONFIG_MDIO_SUN4I)	+= mdio-sun4i.o
 obj-$(CONFIG_MDIO_THUNDER)	+= mdio-thunder.o
 obj-$(CONFIG_MDIO_XGENE)	+= mdio-xgene.o
 
+obj-$(CONFIG_SFP)		+= sfp.o
+sfp-obj-$(CONFIG_SFP)		+= sfp-bus.o
+obj-y				+= $(sfp-obj-y) $(sfp-obj-m)
+
 obj-$(CONFIG_AMD_PHY)		+= amd.o
 obj-$(CONFIG_AQUANTIA_PHY)	+= aquantia.o
 obj-$(CONFIG_AT803X_PHY)	+= at803x.o
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 5d314f143aea..15cbcdba618a 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -55,43 +55,35 @@
 #define MII_M1011_IMASK_INIT		0x6400
 #define MII_M1011_IMASK_CLEAR		0x0000
 
-#define MII_M1011_PHY_SCR		0x10
-#define MII_M1011_PHY_SCR_MDI		0x0000
-#define MII_M1011_PHY_SCR_MDI_X		0x0020
-#define MII_M1011_PHY_SCR_AUTO_CROSS	0x0060
-
-#define MII_M1145_PHY_EXT_SR		0x1b
-#define MII_M1145_PHY_EXT_CR		0x14
-#define MII_M1145_RGMII_RX_DELAY	0x0080
-#define MII_M1145_RGMII_TX_DELAY	0x0002
-#define MII_M1145_HWCFG_MODE_SGMII_NO_CLK	0x4
-#define MII_M1145_HWCFG_MODE_MASK		0xf
-#define MII_M1145_HWCFG_FIBER_COPPER_AUTO	0x8000
-
-#define MII_M1145_HWCFG_MODE_SGMII_NO_CLK	0x4
-#define MII_M1145_HWCFG_MODE_MASK		0xf
-#define MII_M1145_HWCFG_FIBER_COPPER_AUTO	0x8000
+#define MII_M1011_PHY_SCR			0x10
+#define MII_M1011_PHY_SCR_DOWNSHIFT_EN		BIT(11)
+#define MII_M1011_PHY_SCR_DOWNSHIFT_SHIFT	12
+#define MII_M1011_PHY_SRC_DOWNSHIFT_MASK	0x7800
+#define MII_M1011_PHY_SCR_MDI			(0x0 << 5)
+#define MII_M1011_PHY_SCR_MDI_X			(0x1 << 5)
+#define MII_M1011_PHY_SCR_AUTO_CROSS		(0x3 << 5)
 
 #define MII_M1111_PHY_LED_CONTROL	0x18
 #define MII_M1111_PHY_LED_DIRECT	0x4100
 #define MII_M1111_PHY_LED_COMBINE	0x411c
 #define MII_M1111_PHY_EXT_CR		0x14
-#define MII_M1111_RX_DELAY		0x80
-#define MII_M1111_TX_DELAY		0x2
+#define MII_M1111_RGMII_RX_DELAY	BIT(7)
+#define MII_M1111_RGMII_TX_DELAY	BIT(1)
 #define MII_M1111_PHY_EXT_SR		0x1b
 
 #define MII_M1111_HWCFG_MODE_MASK		0xf
-#define MII_M1111_HWCFG_MODE_COPPER_RGMII	0xb
 #define MII_M1111_HWCFG_MODE_FIBER_RGMII	0x3
 #define MII_M1111_HWCFG_MODE_SGMII_NO_CLK	0x4
+#define MII_M1111_HWCFG_MODE_RTBI		0x7
 #define MII_M1111_HWCFG_MODE_COPPER_RTBI	0x9
-#define MII_M1111_HWCFG_FIBER_COPPER_AUTO	0x8000
-#define MII_M1111_HWCFG_FIBER_COPPER_RES	0x2000
+#define MII_M1111_HWCFG_MODE_COPPER_RGMII	0xb
+#define MII_M1111_HWCFG_FIBER_COPPER_RES	BIT(13)
+#define MII_M1111_HWCFG_FIBER_COPPER_AUTO	BIT(15)
 
 #define MII_88E1121_PHY_MSCR_REG	21
 #define MII_88E1121_PHY_MSCR_RX_DELAY	BIT(5)
 #define MII_88E1121_PHY_MSCR_TX_DELAY	BIT(4)
-#define MII_88E1121_PHY_MSCR_DELAY_MASK	(~(0x3 << 4))
+#define MII_88E1121_PHY_MSCR_DELAY_MASK	(~(BIT(5) | BIT(4)))
 
 #define MII_88E1121_MISC_TEST				0x1a
 #define MII_88E1510_MISC_TEST_TEMP_THRESHOLD_MASK	0x1f00
@@ -108,24 +100,24 @@
 #define MII_88E1318S_PHY_MSCR1_PAD_ODD	BIT(6)
 
 /* Copper Specific Interrupt Enable Register */
-#define MII_88E1318S_PHY_CSIER                              0x12
+#define MII_88E1318S_PHY_CSIER				0x12
 /* WOL Event Interrupt Enable */
-#define MII_88E1318S_PHY_CSIER_WOL_EIE                      BIT(7)
+#define MII_88E1318S_PHY_CSIER_WOL_EIE			BIT(7)
 
 /* LED Timer Control Register */
-#define MII_88E1318S_PHY_LED_TCR                            0x12
-#define MII_88E1318S_PHY_LED_TCR_FORCE_INT                  BIT(15)
-#define MII_88E1318S_PHY_LED_TCR_INTn_ENABLE                BIT(7)
-#define MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW             BIT(11)
+#define MII_88E1318S_PHY_LED_TCR			0x12
+#define MII_88E1318S_PHY_LED_TCR_FORCE_INT		BIT(15)
+#define MII_88E1318S_PHY_LED_TCR_INTn_ENABLE		BIT(7)
+#define MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW		BIT(11)
 
 /* Magic Packet MAC address registers */
-#define MII_88E1318S_PHY_MAGIC_PACKET_WORD2                 0x17
-#define MII_88E1318S_PHY_MAGIC_PACKET_WORD1                 0x18
-#define MII_88E1318S_PHY_MAGIC_PACKET_WORD0                 0x19
+#define MII_88E1318S_PHY_MAGIC_PACKET_WORD2		0x17
+#define MII_88E1318S_PHY_MAGIC_PACKET_WORD1		0x18
+#define MII_88E1318S_PHY_MAGIC_PACKET_WORD0		0x19
 
-#define MII_88E1318S_PHY_WOL_CTRL                           0x10
-#define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS          BIT(12)
-#define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE BIT(14)
+#define MII_88E1318S_PHY_WOL_CTRL				0x10
+#define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS		BIT(12)
+#define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE	BIT(14)
 
 #define MII_88E1121_PHY_LED_CTRL	16
 #define MII_88E1121_PHY_LED_DEF		0x0030
@@ -138,8 +130,6 @@
 #define MII_M1011_PHY_STATUS_RESOLVED	0x0800
 #define MII_M1011_PHY_STATUS_LINK	0x0400
 
-#define MII_M1116R_CONTROL_REG_MAC	21
-
 #define MII_88E3016_PHY_SPEC_CTRL	0x10
 #define MII_88E3016_DISABLE_SCRAMBLER	0x0200
 #define MII_88E3016_AUTO_MDIX_CROSSOVER	0x0030
@@ -152,7 +142,7 @@
 #define LPA_FIBER_1000HALF	0x40
 #define LPA_FIBER_1000FULL	0x20
 
-#define LPA_PAUSE_FIBER	0x180
+#define LPA_PAUSE_FIBER		0x180
 #define LPA_PAUSE_ASYM_FIBER	0x100
 
 #define ADVERTISE_FIBER_1000HALF	0x40
@@ -274,6 +264,23 @@ static int marvell_set_polarity(struct phy_device *phydev, int polarity)
 	return 0;
 }
 
+static int marvell_set_downshift(struct phy_device *phydev, bool enable,
+				 u8 retries)
+{
+	int reg;
+
+	reg = phy_read(phydev, MII_M1011_PHY_SCR);
+	if (reg < 0)
+		return reg;
+
+	reg &= MII_M1011_PHY_SRC_DOWNSHIFT_MASK;
+	reg |= ((retries - 1) << MII_M1011_PHY_SCR_DOWNSHIFT_SHIFT);
+	if (enable)
+		reg |= MII_M1011_PHY_SCR_DOWNSHIFT_EN;
+
+	return phy_write(phydev, MII_M1011_PHY_SCR, reg);
+}
+
 static int marvell_config_aneg(struct phy_device *phydev)
 {
 	int err;
@@ -292,17 +299,11 @@ static int marvell_config_aneg(struct phy_device *phydev)
 		return err;
 
 	if (phydev->autoneg != AUTONEG_ENABLE) {
-		int bmcr;
-
 		/* A write to speed/duplex bits (that is performed by
 		 * genphy_config_aneg() call above) must be followed by
 		 * a software reset. Otherwise, the write has no effect.
 		 */
-		bmcr = phy_read(phydev, MII_BMCR);
-		if (bmcr < 0)
-			return bmcr;
-
-		err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET);
+		err = genphy_soft_reset(phydev);
 		if (err < 0)
 			return err;
 	}
@@ -318,8 +319,7 @@ static int m88e1101_config_aneg(struct phy_device *phydev)
 	 * that certain registers get written in order
 	 * to restart autonegotiation
 	 */
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
-
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
@@ -354,7 +354,7 @@ static int m88e1111_config_aneg(struct phy_device *phydev)
 	 * that certain registers get written in order
 	 * to restart autonegotiation
 	 */
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+	err = genphy_soft_reset(phydev);
 
 	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
@@ -370,17 +370,11 @@ static int m88e1111_config_aneg(struct phy_device *phydev)
 		return err;
 
 	if (phydev->autoneg != AUTONEG_ENABLE) {
-		int bmcr;
-
 		/* A write to speed/duplex bits (that is performed by
 		 * genphy_config_aneg() call above) must be followed by
 		 * a software reset. Otherwise, the write has no effect.
 		 */
-		bmcr = phy_read(phydev, MII_BMCR);
-		if (bmcr < 0)
-			return bmcr;
-
-		err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET);
+		err = genphy_soft_reset(phydev);
 		if (err < 0)
 			return err;
 	}
@@ -466,7 +460,7 @@ static int marvell_of_reg_init(struct phy_device *phydev)
 }
 #endif /* CONFIG_OF_MDIO */
 
-static int m88e1121_config_aneg(struct phy_device *phydev)
+static int m88e1121_config_aneg_rgmii_delays(struct phy_device *phydev)
 {
 	int err, oldpage, mscr;
 
@@ -474,31 +468,45 @@ static int m88e1121_config_aneg(struct phy_device *phydev)
 	if (oldpage < 0)
 		return oldpage;
 
-	if (phy_interface_is_rgmii(phydev)) {
-		mscr = phy_read(phydev, MII_88E1121_PHY_MSCR_REG) &
-			MII_88E1121_PHY_MSCR_DELAY_MASK;
-
-		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
-			mscr |= (MII_88E1121_PHY_MSCR_RX_DELAY |
-				 MII_88E1121_PHY_MSCR_TX_DELAY);
-		else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
-			mscr |= MII_88E1121_PHY_MSCR_RX_DELAY;
-		else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
-			mscr |= MII_88E1121_PHY_MSCR_TX_DELAY;
-
-		err = phy_write(phydev, MII_88E1121_PHY_MSCR_REG, mscr);
-		if (err < 0)
-			return err;
+	mscr = phy_read(phydev, MII_88E1121_PHY_MSCR_REG);
+	if (mscr < 0) {
+		err = mscr;
+		goto out;
 	}
 
+	mscr &= MII_88E1121_PHY_MSCR_DELAY_MASK;
+
+	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
+		mscr |= (MII_88E1121_PHY_MSCR_RX_DELAY |
+			 MII_88E1121_PHY_MSCR_TX_DELAY);
+	else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
+		mscr |= MII_88E1121_PHY_MSCR_RX_DELAY;
+	else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
+		mscr |= MII_88E1121_PHY_MSCR_TX_DELAY;
+
+	err = phy_write(phydev, MII_88E1121_PHY_MSCR_REG, mscr);
+
+out:
 	marvell_set_page(phydev, oldpage);
 
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+	return err;
+}
+
+static int m88e1121_config_aneg(struct phy_device *phydev)
+{
+	int err = 0;
+
+	if (phy_interface_is_rgmii(phydev)) {
+		err = m88e1121_config_aneg_rgmii_delays(phydev);
+		if (err)
+			return err;
+	}
+
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
-	err = phy_write(phydev, MII_M1011_PHY_SCR,
-			MII_M1011_PHY_SCR_AUTO_CROSS);
+	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
 		return err;
 
@@ -596,7 +604,7 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev)
 
 	if (changed == 0) {
 		/* Advertisement hasn't changed, but maybe aneg was never on to
-		 * begin with?  Or maybe phy was isolated?
+		 * begin with?	Or maybe phy was isolated?
 		 */
 		int ctl = phy_read(phydev, MII_BMCR);
 
@@ -653,12 +661,9 @@ static int marvell_config_init(struct phy_device *phydev)
 
 static int m88e1116r_config_init(struct phy_device *phydev)
 {
-	int temp;
 	int err;
 
-	temp = phy_read(phydev, MII_BMCR);
-	temp |= BMCR_RESET;
-	err = phy_write(phydev, MII_BMCR, temp);
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
@@ -668,35 +673,22 @@ static int m88e1116r_config_init(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	temp = phy_read(phydev, MII_M1011_PHY_SCR);
-	temp |= (7 << 12);	/* max number of gigabit attempts */
-	temp |= (1 << 11);	/* enable downshift */
-	temp |= MII_M1011_PHY_SCR_AUTO_CROSS;
-	err = phy_write(phydev, MII_M1011_PHY_SCR, temp);
+	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
 		return err;
 
-	err = marvell_set_page(phydev, MII_MARVELL_MSCR_PAGE);
-	if (err < 0)
-		return err;
-	temp = phy_read(phydev, MII_M1116R_CONTROL_REG_MAC);
-	temp |= (1 << 5);
-	temp |= (1 << 4);
-	err = phy_write(phydev, MII_M1116R_CONTROL_REG_MAC, temp);
+	err = marvell_set_downshift(phydev, true, 8);
 	if (err < 0)
 		return err;
-	err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
+
+	err = m88e1121_config_aneg_rgmii_delays(phydev);
 	if (err < 0)
 		return err;
 
-	temp = phy_read(phydev, MII_BMCR);
-	temp |= BMCR_RESET;
-	err = phy_write(phydev, MII_BMCR, temp);
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
-	mdelay(500);
-
 	return marvell_config_init(phydev);
 }
 
@@ -719,9 +711,29 @@ static int m88e3016_config_init(struct phy_device *phydev)
 	return marvell_config_init(phydev);
 }
 
-static int m88e1111_config_init_rgmii(struct phy_device *phydev)
+static int m88e1111_config_init_hwcfg_mode(struct phy_device *phydev,
+					   u16 mode,
+					   int fibre_copper_auto)
+{
+	int temp;
+
+	temp = phy_read(phydev, MII_M1111_PHY_EXT_SR);
+	if (temp < 0)
+		return temp;
+
+	temp &= ~(MII_M1111_HWCFG_MODE_MASK |
+		  MII_M1111_HWCFG_FIBER_COPPER_AUTO |
+		  MII_M1111_HWCFG_FIBER_COPPER_RES);
+	temp |= mode;
+
+	if (fibre_copper_auto)
+		temp |= MII_M1111_HWCFG_FIBER_COPPER_AUTO;
+
+	return phy_write(phydev, MII_M1111_PHY_EXT_SR, temp);
+}
+
+static int m88e1111_config_init_rgmii_delays(struct phy_device *phydev)
 {
-	int err;
 	int temp;
 
 	temp = phy_read(phydev, MII_M1111_PHY_EXT_CR);
@@ -729,16 +741,24 @@ static int m88e1111_config_init_rgmii(struct phy_device *phydev)
 		return temp;
 
 	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) {
-		temp |= (MII_M1111_RX_DELAY | MII_M1111_TX_DELAY);
+		temp |= (MII_M1111_RGMII_RX_DELAY | MII_M1111_RGMII_TX_DELAY);
 	} else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
-		temp &= ~MII_M1111_TX_DELAY;
-		temp |= MII_M1111_RX_DELAY;
+		temp &= ~MII_M1111_RGMII_TX_DELAY;
+		temp |= MII_M1111_RGMII_RX_DELAY;
 	} else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
-		temp &= ~MII_M1111_RX_DELAY;
-		temp |= MII_M1111_TX_DELAY;
+		temp &= ~MII_M1111_RGMII_RX_DELAY;
+		temp |= MII_M1111_RGMII_TX_DELAY;
 	}
 
-	err = phy_write(phydev, MII_M1111_PHY_EXT_CR, temp);
+	return phy_write(phydev, MII_M1111_PHY_EXT_CR, temp);
+}
+
+static int m88e1111_config_init_rgmii(struct phy_device *phydev)
+{
+	int temp;
+	int err;
+
+	err = m88e1111_config_init_rgmii_delays(phydev);
 	if (err < 0)
 		return err;
 
@@ -759,17 +779,11 @@ static int m88e1111_config_init_rgmii(struct phy_device *phydev)
 static int m88e1111_config_init_sgmii(struct phy_device *phydev)
 {
 	int err;
-	int temp;
-
-	temp = phy_read(phydev, MII_M1111_PHY_EXT_SR);
-	if (temp < 0)
-		return temp;
-
-	temp &= ~(MII_M1111_HWCFG_MODE_MASK);
-	temp |= MII_M1111_HWCFG_MODE_SGMII_NO_CLK;
-	temp |= MII_M1111_HWCFG_FIBER_COPPER_AUTO;
 
-	err = phy_write(phydev, MII_M1111_PHY_EXT_SR, temp);
+	err = m88e1111_config_init_hwcfg_mode(
+		phydev,
+		MII_M1111_HWCFG_MODE_SGMII_NO_CLK,
+		MII_M1111_HWCFG_FIBER_COPPER_AUTO);
 	if (err < 0)
 		return err;
 
@@ -780,48 +794,27 @@ static int m88e1111_config_init_sgmii(struct phy_device *phydev)
 static int m88e1111_config_init_rtbi(struct phy_device *phydev)
 {
 	int err;
-	int temp;
 
-	temp = phy_read(phydev, MII_M1111_PHY_EXT_CR);
-	if (temp < 0)
-		return temp;
-
-	temp |= (MII_M1111_RX_DELAY | MII_M1111_TX_DELAY);
-	err = phy_write(phydev, MII_M1111_PHY_EXT_CR, temp);
-	if (err < 0)
+	err = m88e1111_config_init_rgmii_delays(phydev);
+	if (err)
 		return err;
 
-	temp = phy_read(phydev, MII_M1111_PHY_EXT_SR);
-	if (temp < 0)
-		return temp;
-
-	temp &= ~(MII_M1111_HWCFG_MODE_MASK |
-		  MII_M1111_HWCFG_FIBER_COPPER_RES);
-	temp |= 0x7 | MII_M1111_HWCFG_FIBER_COPPER_AUTO;
-
-	err = phy_write(phydev, MII_M1111_PHY_EXT_SR, temp);
+	err = m88e1111_config_init_hwcfg_mode(
+		phydev,
+		MII_M1111_HWCFG_MODE_RTBI,
+		MII_M1111_HWCFG_FIBER_COPPER_AUTO);
 	if (err < 0)
 		return err;
 
 	/* soft reset */
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
-	do
-		temp = phy_read(phydev, MII_BMCR);
-	while (temp & BMCR_RESET);
-
-	temp = phy_read(phydev, MII_M1111_PHY_EXT_SR);
-	if (temp < 0)
-		return temp;
-
-	temp &= ~(MII_M1111_HWCFG_MODE_MASK |
-		  MII_M1111_HWCFG_FIBER_COPPER_RES);
-	temp |= MII_M1111_HWCFG_MODE_COPPER_RTBI |
-		MII_M1111_HWCFG_FIBER_COPPER_AUTO;
-
-	return phy_write(phydev, MII_M1111_PHY_EXT_SR, temp);
+	return m88e1111_config_init_hwcfg_mode(
+		phydev,
+		MII_M1111_HWCFG_MODE_RTBI,
+		MII_M1111_HWCFG_FIBER_COPPER_AUTO);
 }
 
 static int m88e1111_config_init(struct phy_device *phydev)
@@ -850,7 +843,7 @@ static int m88e1111_config_init(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	return phy_write(phydev, MII_BMCR, BMCR_RESET);
+	return genphy_soft_reset(phydev);
 }
 
 static int m88e1121_config_init(struct phy_device *phydev)
@@ -912,12 +905,11 @@ static int m88e1118_config_aneg(struct phy_device *phydev)
 {
 	int err;
 
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
-	err = phy_write(phydev, MII_M1011_PHY_SCR,
-			MII_M1011_PHY_SCR_AUTO_CROSS);
+	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
 		return err;
 
@@ -961,7 +953,7 @@ static int m88e1118_config_init(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	return phy_write(phydev, MII_BMCR, BMCR_RESET);
+	return genphy_soft_reset(phydev);
 }
 
 static int m88e1149_config_init(struct phy_device *phydev)
@@ -987,20 +979,15 @@ static int m88e1149_config_init(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	return phy_write(phydev, MII_BMCR, BMCR_RESET);
+	return genphy_soft_reset(phydev);
 }
 
 static int m88e1145_config_init_rgmii(struct phy_device *phydev)
 {
+	int temp;
 	int err;
-	int temp = phy_read(phydev, MII_M1145_PHY_EXT_CR);
-
-	if (temp < 0)
-		return temp;
 
-	temp |= (MII_M1145_RGMII_RX_DELAY | MII_M1145_RGMII_TX_DELAY);
-
-	err = phy_write(phydev, MII_M1145_PHY_EXT_CR, temp);
+	err = m88e1111_config_init_rgmii_delays(phydev);
 	if (err < 0)
 		return err;
 
@@ -1032,16 +1019,9 @@ static int m88e1145_config_init_rgmii(struct phy_device *phydev)
 
 static int m88e1145_config_init_sgmii(struct phy_device *phydev)
 {
-	int temp = phy_read(phydev, MII_M1145_PHY_EXT_SR);
-
-	if (temp < 0)
-		return temp;
-
-	temp &= ~MII_M1145_HWCFG_MODE_MASK;
-	temp |= MII_M1145_HWCFG_MODE_SGMII_NO_CLK;
-	temp |= MII_M1145_HWCFG_FIBER_COPPER_AUTO;
-
-	return phy_write(phydev, MII_M1145_PHY_EXT_SR, temp);
+	return m88e1111_config_init_hwcfg_mode(
+		phydev, MII_M1111_HWCFG_MODE_SGMII_NO_CLK,
+		MII_M1111_HWCFG_FIBER_COPPER_AUTO);
 }
 
 static int m88e1145_config_init(struct phy_device *phydev)
@@ -1515,7 +1495,7 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data)
 }
 
 #ifndef UINT64_MAX
-#define UINT64_MAX              (u64)(~((u64)0))
+#define UINT64_MAX		(u64)(~((u64)0))
 #endif
 static u64 marvell_get_stat(struct phy_device *phydev, int i)
 {
diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c
index 34395230ce70..89425ca48412 100644
--- a/drivers/net/phy/mdio-bcm-unimac.c
+++ b/drivers/net/phy/mdio-bcm-unimac.c
@@ -21,6 +21,8 @@
 #include <linux/of_platform.h>
 #include <linux/of_mdio.h>
 
+#include <linux/platform_data/mdio-bcm-unimac.h>
+
 #define MDIO_CMD		0x00
 #define  MDIO_START_BUSY	(1 << 29)
 #define  MDIO_READ_FAIL		(1 << 28)
@@ -41,6 +43,8 @@
 struct unimac_mdio_priv {
 	struct mii_bus		*mii_bus;
 	void __iomem		*base;
+	int (*wait_func)	(void *wait_func_data);
+	void			*wait_func_data;
 };
 
 static inline void unimac_mdio_start(struct unimac_mdio_priv *priv)
@@ -57,10 +61,28 @@ static inline unsigned int unimac_mdio_busy(struct unimac_mdio_priv *priv)
 	return __raw_readl(priv->base + MDIO_CMD) & MDIO_START_BUSY;
 }
 
+static int unimac_mdio_poll(void *wait_func_data)
+{
+	struct unimac_mdio_priv *priv = wait_func_data;
+	unsigned int timeout = 1000;
+
+	do {
+		if (!unimac_mdio_busy(priv))
+			return 0;
+
+		usleep_range(1000, 2000);
+	} while (timeout--);
+
+	if (!timeout)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
 static int unimac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
 {
 	struct unimac_mdio_priv *priv = bus->priv;
-	unsigned int timeout = 1000;
+	int ret;
 	u32 cmd;
 
 	/* Prepare the read operation */
@@ -70,15 +92,9 @@ static int unimac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
 	/* Start MDIO transaction */
 	unimac_mdio_start(priv);
 
-	do {
-		if (!unimac_mdio_busy(priv))
-			break;
-
-		usleep_range(1000, 2000);
-	} while (timeout--);
-
-	if (!timeout)
-		return -ETIMEDOUT;
+	ret = priv->wait_func(priv->wait_func_data);
+	if (ret)
+		return ret;
 
 	cmd = __raw_readl(priv->base + MDIO_CMD);
 
@@ -97,7 +113,6 @@ static int unimac_mdio_write(struct mii_bus *bus, int phy_id,
 			     int reg, u16 val)
 {
 	struct unimac_mdio_priv *priv = bus->priv;
-	unsigned int timeout = 1000;
 	u32 cmd;
 
 	/* Prepare the write operation */
@@ -107,17 +122,7 @@ static int unimac_mdio_write(struct mii_bus *bus, int phy_id,
 
 	unimac_mdio_start(priv);
 
-	do {
-		if (!unimac_mdio_busy(priv))
-			break;
-
-		usleep_range(1000, 2000);
-	} while (timeout--);
-
-	if (!timeout)
-		return -ETIMEDOUT;
-
-	return 0;
+	return priv->wait_func(priv->wait_func_data);
 }
 
 /* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
@@ -155,8 +160,10 @@ static int unimac_mdio_reset(struct mii_bus *bus)
 	}
 
 	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
-		if (read_mask & 1 << addr)
+		if (read_mask & 1 << addr) {
+			dev_dbg(&bus->dev, "Workaround for PHY @ %d\n", addr);
 			mdiobus_read(bus, addr, MII_BMSR);
+		}
 	}
 
 	return 0;
@@ -164,6 +171,7 @@ static int unimac_mdio_reset(struct mii_bus *bus)
 
 static int unimac_mdio_probe(struct platform_device *pdev)
 {
+	struct unimac_mdio_pdata *pdata = pdev->dev.platform_data;
 	struct unimac_mdio_priv *priv;
 	struct device_node *np;
 	struct mii_bus *bus;
@@ -193,12 +201,21 @@ static int unimac_mdio_probe(struct platform_device *pdev)
 
 	bus = priv->mii_bus;
 	bus->priv = priv;
-	bus->name = "unimac MII bus";
+	if (pdata) {
+		bus->name = pdata->bus_name;
+		priv->wait_func = pdata->wait_func;
+		priv->wait_func_data = pdata->wait_func_data;
+		bus->phy_mask = ~pdata->phy_mask;
+	} else {
+		bus->name = "unimac MII bus";
+		priv->wait_func_data = priv;
+		priv->wait_func = unimac_mdio_poll;
+	}
 	bus->parent = &pdev->dev;
 	bus->read = unimac_mdio_read;
 	bus->write = unimac_mdio_write;
 	bus->reset = unimac_mdio_reset;
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id);
 
 	ret = of_mdiobus_register(bus, np);
 	if (ret) {
@@ -240,7 +257,7 @@ MODULE_DEVICE_TABLE(of, unimac_mdio_ids);
 
 static struct platform_driver unimac_mdio_driver = {
 	.driver = {
-		.name = "unimac-mdio",
+		.name = UNIMAC_MDIO_DRV_NAME,
 		.of_match_table = unimac_mdio_ids,
 	},
 	.probe	= unimac_mdio_probe,
@@ -251,4 +268,4 @@ module_platform_driver(unimac_mdio_driver);
 MODULE_AUTHOR("Broadcom Corporation");
 MODULE_DESCRIPTION("Broadcom UniMAC MDIO bus controller");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:unimac-mdio");
+MODULE_ALIAS("platform:" UNIMAC_MDIO_DRV_NAME);
diff --git a/drivers/net/phy/mdio-i2c.c b/drivers/net/phy/mdio-i2c.c
new file mode 100644
index 000000000000..6d24fd13ca86
--- /dev/null
+++ b/drivers/net/phy/mdio-i2c.c
@@ -0,0 +1,109 @@
+/*
+ * MDIO I2C bridge
+ *
+ * Copyright (C) 2015-2016 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Network PHYs can appear on I2C buses when they are part of SFP module.
+ * This driver exposes these PHYs to the networking PHY code, allowing
+ * our PHY drivers access to these PHYs, and so allowing configuration
+ * of their settings.
+ */
+#include <linux/i2c.h>
+#include <linux/phy.h>
+
+#include "mdio-i2c.h"
+
+/*
+ * I2C bus addresses 0x50 and 0x51 are normally an EEPROM, which is
+ * specified to be present in SFP modules.  These correspond with PHY
+ * addresses 16 and 17.  Disallow access to these "phy" addresses.
+ */
+static bool i2c_mii_valid_phy_id(int phy_id)
+{
+	return phy_id != 0x10 && phy_id != 0x11;
+}
+
+static unsigned int i2c_mii_phy_addr(int phy_id)
+{
+	return phy_id + 0x40;
+}
+
+static int i2c_mii_read(struct mii_bus *bus, int phy_id, int reg)
+{
+	struct i2c_adapter *i2c = bus->priv;
+	struct i2c_msg msgs[2];
+	u8 data[2], dev_addr = reg;
+	int bus_addr, ret;
+
+	if (!i2c_mii_valid_phy_id(phy_id))
+		return 0xffff;
+
+	bus_addr = i2c_mii_phy_addr(phy_id);
+	msgs[0].addr = bus_addr;
+	msgs[0].flags = 0;
+	msgs[0].len = 1;
+	msgs[0].buf = &dev_addr;
+	msgs[1].addr = bus_addr;
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].len = sizeof(data);
+	msgs[1].buf = data;
+
+	ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs))
+		return 0xffff;
+
+	return data[0] << 8 | data[1];
+}
+
+static int i2c_mii_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
+{
+	struct i2c_adapter *i2c = bus->priv;
+	struct i2c_msg msg;
+	int ret;
+	u8 data[3];
+
+	if (!i2c_mii_valid_phy_id(phy_id))
+		return 0;
+
+	data[0] = reg;
+	data[1] = val >> 8;
+	data[2] = val;
+
+	msg.addr = i2c_mii_phy_addr(phy_id);
+	msg.flags = 0;
+	msg.len = 3;
+	msg.buf = data;
+
+	ret = i2c_transfer(i2c, &msg, 1);
+
+	return ret < 0 ? ret : 0;
+}
+
+struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c)
+{
+	struct mii_bus *mii;
+
+	if (!i2c_check_functionality(i2c, I2C_FUNC_I2C))
+		return ERR_PTR(-EINVAL);
+
+	mii = mdiobus_alloc();
+	if (!mii)
+		return ERR_PTR(-ENOMEM);
+
+	snprintf(mii->id, MII_BUS_ID_SIZE, "i2c:%s", dev_name(parent));
+	mii->parent = parent;
+	mii->read = i2c_mii_read;
+	mii->write = i2c_mii_write;
+	mii->priv = i2c;
+
+	return mii;
+}
+EXPORT_SYMBOL_GPL(mdio_i2c_alloc);
+
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("MDIO I2C bridge library");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/phy/mdio-i2c.h b/drivers/net/phy/mdio-i2c.h
new file mode 100644
index 000000000000..889ab57d7f3e
--- /dev/null
+++ b/drivers/net/phy/mdio-i2c.h
@@ -0,0 +1,19 @@
+/*
+ * MDIO I2C bridge
+ *
+ * Copyright (C) 2015 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef MDIO_I2C_H
+#define MDIO_I2C_H
+
+struct device;
+struct i2c_adapter;
+struct mii_bus;
+
+struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c);
+
+#endif
diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c
index 6a33646bdf05..c3825c7da038 100644
--- a/drivers/net/phy/mdio-mux-mmioreg.c
+++ b/drivers/net/phy/mdio-mux-mmioreg.c
@@ -105,7 +105,7 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 	const __be32 *iprop;
 	int len, ret;
 
-	dev_dbg(&pdev->dev, "probing node %s\n", np->full_name);
+	dev_dbg(&pdev->dev, "probing node %pOF\n", np);
 
 	s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL);
 	if (!s)
@@ -113,8 +113,8 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 
 	ret = of_address_to_resource(np, 0, &res);
 	if (ret) {
-		dev_err(&pdev->dev, "could not obtain memory map for node %s\n",
-			np->full_name);
+		dev_err(&pdev->dev, "could not obtain memory map for node %pOF\n",
+			np);
 		return ret;
 	}
 	s->phys = res.start;
@@ -145,15 +145,15 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 	for_each_available_child_of_node(np, np2) {
 		iprop = of_get_property(np2, "reg", &len);
 		if (!iprop || len != sizeof(uint32_t)) {
-			dev_err(&pdev->dev, "mdio-mux child node %s is "
-				"missing a 'reg' property\n", np2->full_name);
+			dev_err(&pdev->dev, "mdio-mux child node %pOF is "
+				"missing a 'reg' property\n", np2);
 			of_node_put(np2);
 			return -ENODEV;
 		}
 		if (be32_to_cpup(iprop) & ~s->mask) {
-			dev_err(&pdev->dev, "mdio-mux child node %s has "
+			dev_err(&pdev->dev, "mdio-mux child node %pOF has "
 				"a 'reg' value with unmasked bits\n",
-				np2->full_name);
+				np2);
 			of_node_put(np2);
 			return -ENODEV;
 		}
@@ -162,8 +162,8 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 	ret = mdio_mux_init(&pdev->dev, mdio_mux_mmioreg_switch_fn,
 			    &s->mux_handle, s, NULL);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to register mdio-mux bus %s\n",
-			np->full_name);
+		dev_err(&pdev->dev, "failed to register mdio-mux bus %pOF\n",
+			np);
 		return ret;
 	}
 
diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index c608e1dfaf09..942ceaf3fd3f 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -138,16 +138,16 @@ int mdio_mux_init(struct device *dev,
 		r = of_property_read_u32(child_bus_node, "reg", &v);
 		if (r) {
 			dev_err(dev,
-				"Error: Failed to find reg for child %s\n",
-				of_node_full_name(child_bus_node));
+				"Error: Failed to find reg for child %pOF\n",
+				child_bus_node);
 			continue;
 		}
 
 		cb = devm_kzalloc(dev, sizeof(*cb), GFP_KERNEL);
 		if (cb == NULL) {
 			dev_err(dev,
-				"Error: Failed to allocate memory for child %s\n",
-				of_node_full_name(child_bus_node));
+				"Error: Failed to allocate memory for child %pOF\n",
+				child_bus_node);
 			ret_val = -ENOMEM;
 			continue;
 		}
@@ -157,8 +157,8 @@ int mdio_mux_init(struct device *dev,
 		cb->mii_bus = mdiobus_alloc();
 		if (!cb->mii_bus) {
 			dev_err(dev,
-				"Error: Failed to allocate MDIO bus for child %s\n",
-				of_node_full_name(child_bus_node));
+				"Error: Failed to allocate MDIO bus for child %pOF\n",
+				child_bus_node);
 			ret_val = -ENOMEM;
 			devm_kfree(dev, cb);
 			continue;
@@ -174,8 +174,8 @@ int mdio_mux_init(struct device *dev,
 		r = of_mdiobus_register(cb->mii_bus, child_bus_node);
 		if (r) {
 			dev_err(dev,
-				"Error: Failed to register MDIO bus for child %s\n",
-				of_node_full_name(child_bus_node));
+				"Error: Failed to register MDIO bus for child %pOF\n",
+				child_bus_node);
 			mdiobus_free(cb->mii_bus);
 			devm_kfree(dev, cb);
 		} else {
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 2df7b62c1a36..b6f9fa670168 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -399,8 +399,7 @@ error:
 	}
 
 	/* Put PHYs in RESET to save power */
-	if (bus->reset_gpiod)
-		gpiod_set_value_cansleep(bus->reset_gpiod, 1);
+	gpiod_set_value_cansleep(bus->reset_gpiod, 1);
 
 	device_del(&bus->dev);
 	return err;
@@ -425,8 +424,7 @@ void mdiobus_unregister(struct mii_bus *bus)
 	}
 
 	/* Put PHYs in RESET to save power */
-	if (bus->reset_gpiod)
-		gpiod_set_value_cansleep(bus->reset_gpiod, 1);
+	gpiod_set_value_cansleep(bus->reset_gpiod, 1);
 
 	device_del(&bus->dev);
 }
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 6739b738bbaf..21f75ae244b3 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -9,6 +9,186 @@
 #include <linux/export.h>
 #include <linux/phy.h>
 
+const char *phy_speed_to_str(int speed)
+{
+	switch (speed) {
+	case SPEED_10:
+		return "10Mbps";
+	case SPEED_100:
+		return "100Mbps";
+	case SPEED_1000:
+		return "1Gbps";
+	case SPEED_2500:
+		return "2.5Gbps";
+	case SPEED_5000:
+		return "5Gbps";
+	case SPEED_10000:
+		return "10Gbps";
+	case SPEED_14000:
+		return "14Gbps";
+	case SPEED_20000:
+		return "20Gbps";
+	case SPEED_25000:
+		return "25Gbps";
+	case SPEED_40000:
+		return "40Gbps";
+	case SPEED_50000:
+		return "50Gbps";
+	case SPEED_56000:
+		return "56Gbps";
+	case SPEED_100000:
+		return "100Gbps";
+	case SPEED_UNKNOWN:
+		return "Unknown";
+	default:
+		return "Unsupported (update phy-core.c)";
+	}
+}
+EXPORT_SYMBOL_GPL(phy_speed_to_str);
+
+const char *phy_duplex_to_str(unsigned int duplex)
+{
+	if (duplex == DUPLEX_HALF)
+		return "Half";
+	if (duplex == DUPLEX_FULL)
+		return "Full";
+	if (duplex == DUPLEX_UNKNOWN)
+		return "Unknown";
+	return "Unsupported (update phy-core.c)";
+}
+EXPORT_SYMBOL_GPL(phy_duplex_to_str);
+
+/* A mapping of all SUPPORTED settings to speed/duplex.  This table
+ * must be grouped by speed and sorted in descending match priority
+ * - iow, descending speed. */
+static const struct phy_setting settings[] = {
+	{
+		.speed = SPEED_10000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+	},
+	{
+		.speed = SPEED_10000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+	},
+	{
+		.speed = SPEED_10000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+	},
+	{
+		.speed = SPEED_2500,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
+	},
+	{
+		.speed = SPEED_1000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+	},
+	{
+		.speed = SPEED_1000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+	},
+	{
+		.speed = SPEED_1000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+	},
+	{
+		.speed = SPEED_1000,
+		.duplex = DUPLEX_HALF,
+		.bit = ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+	},
+	{
+		.speed = SPEED_100,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+	},
+	{
+		.speed = SPEED_100,
+		.duplex = DUPLEX_HALF,
+		.bit = ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+	},
+	{
+		.speed = SPEED_10,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+	},
+	{
+		.speed = SPEED_10,
+		.duplex = DUPLEX_HALF,
+		.bit = ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+	},
+};
+
+/**
+ * phy_lookup_setting - lookup a PHY setting
+ * @speed: speed to match
+ * @duplex: duplex to match
+ * @mask: allowed link modes
+ * @maxbit: bit size of link modes
+ * @exact: an exact match is required
+ *
+ * Search the settings array for a setting that matches the speed and
+ * duplex, and which is supported.
+ *
+ * If @exact is unset, either an exact match or %NULL for no match will
+ * be returned.
+ *
+ * If @exact is set, an exact match, the fastest supported setting at
+ * or below the specified speed, the slowest supported setting, or if
+ * they all fail, %NULL will be returned.
+ */
+const struct phy_setting *
+phy_lookup_setting(int speed, int duplex, const unsigned long *mask,
+		   size_t maxbit, bool exact)
+{
+	const struct phy_setting *p, *match = NULL, *last = NULL;
+	int i;
+
+	for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) {
+		if (p->bit < maxbit && test_bit(p->bit, mask)) {
+			last = p;
+			if (p->speed == speed && p->duplex == duplex) {
+				/* Exact match for speed and duplex */
+				match = p;
+				break;
+			} else if (!exact) {
+				if (!match && p->speed <= speed)
+					/* Candidate */
+					match = p;
+
+				if (p->speed < speed)
+					break;
+			}
+		}
+	}
+
+	if (!match && !exact)
+		match = last;
+
+	return match;
+}
+EXPORT_SYMBOL_GPL(phy_lookup_setting);
+
+size_t phy_speeds(unsigned int *speeds, size_t size,
+		  unsigned long *mask, size_t maxbit)
+{
+	size_t count;
+	int i;
+
+	for (i = 0, count = 0; i < ARRAY_SIZE(settings) && count < size; i++)
+		if (settings[i].bit < maxbit &&
+		    test_bit(settings[i].bit, mask) &&
+		    (count == 0 || speeds[count - 1] != settings[i].speed))
+			speeds[count++] = settings[i].speed;
+
+	return count;
+}
+
 static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad,
 			     u16 regnum)
 {
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 5068c582d502..dae13f028c84 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -30,7 +30,6 @@
 #include <linux/ethtool.h>
 #include <linux/phy.h>
 #include <linux/phy_led_triggers.h>
-#include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/mdio.h>
 #include <linux/io.h>
@@ -39,42 +38,6 @@
 
 #include <asm/irq.h>
 
-static const char *phy_speed_to_str(int speed)
-{
-	switch (speed) {
-	case SPEED_10:
-		return "10Mbps";
-	case SPEED_100:
-		return "100Mbps";
-	case SPEED_1000:
-		return "1Gbps";
-	case SPEED_2500:
-		return "2.5Gbps";
-	case SPEED_5000:
-		return "5Gbps";
-	case SPEED_10000:
-		return "10Gbps";
-	case SPEED_14000:
-		return "14Gbps";
-	case SPEED_20000:
-		return "20Gbps";
-	case SPEED_25000:
-		return "25Gbps";
-	case SPEED_40000:
-		return "40Gbps";
-	case SPEED_50000:
-		return "50Gbps";
-	case SPEED_56000:
-		return "56Gbps";
-	case SPEED_100000:
-		return "100Gbps";
-	case SPEED_UNKNOWN:
-		return "Unknown";
-	default:
-		return "Unsupported (update phy.c)";
-	}
-}
-
 #define PHY_STATE_STR(_state)			\
 	case PHY_##_state:			\
 		return __stringify(_state);	\
@@ -110,7 +73,7 @@ void phy_print_status(struct phy_device *phydev)
 		netdev_info(phydev->attached_dev,
 			"Link is Up - %s/%s - flow control %s\n",
 			phy_speed_to_str(phydev->speed),
-			DUPLEX_FULL == phydev->duplex ? "Full" : "Half",
+			phy_duplex_to_str(phydev->duplex),
 			phydev->pause ? "rx/tx" : "off");
 	} else	{
 		netdev_info(phydev->attached_dev, "Link is Down\n");
@@ -194,123 +157,6 @@ int phy_aneg_done(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_aneg_done);
 
-/* A structure for mapping a particular speed and duplex
- * combination to a particular SUPPORTED and ADVERTISED value
- */
-struct phy_setting {
-	int speed;
-	int duplex;
-	u32 setting;
-};
-
-/* A mapping of all SUPPORTED settings to speed/duplex.  This table
- * must be grouped by speed and sorted in descending match priority
- * - iow, descending speed. */
-static const struct phy_setting settings[] = {
-	{
-		.speed = SPEED_10000,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_10000baseKR_Full,
-	},
-	{
-		.speed = SPEED_10000,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_10000baseKX4_Full,
-	},
-	{
-		.speed = SPEED_10000,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_10000baseT_Full,
-	},
-	{
-		.speed = SPEED_2500,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_2500baseX_Full,
-	},
-	{
-		.speed = SPEED_1000,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_1000baseKX_Full,
-	},
-	{
-		.speed = SPEED_1000,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_1000baseT_Full,
-	},
-	{
-		.speed = SPEED_1000,
-		.duplex = DUPLEX_HALF,
-		.setting = SUPPORTED_1000baseT_Half,
-	},
-	{
-		.speed = SPEED_100,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_100baseT_Full,
-	},
-	{
-		.speed = SPEED_100,
-		.duplex = DUPLEX_HALF,
-		.setting = SUPPORTED_100baseT_Half,
-	},
-	{
-		.speed = SPEED_10,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_10baseT_Full,
-	},
-	{
-		.speed = SPEED_10,
-		.duplex = DUPLEX_HALF,
-		.setting = SUPPORTED_10baseT_Half,
-	},
-};
-
-/**
- * phy_lookup_setting - lookup a PHY setting
- * @speed: speed to match
- * @duplex: duplex to match
- * @features: allowed link modes
- * @exact: an exact match is required
- *
- * Search the settings array for a setting that matches the speed and
- * duplex, and which is supported.
- *
- * If @exact is unset, either an exact match or %NULL for no match will
- * be returned.
- *
- * If @exact is set, an exact match, the fastest supported setting at
- * or below the specified speed, the slowest supported setting, or if
- * they all fail, %NULL will be returned.
- */
-static const struct phy_setting *
-phy_lookup_setting(int speed, int duplex, u32 features, bool exact)
-{
-	const struct phy_setting *p, *match = NULL, *last = NULL;
-	int i;
-
-	for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) {
-		if (p->setting & features) {
-			last = p;
-			if (p->speed == speed && p->duplex == duplex) {
-				/* Exact match for speed and duplex */
-				match = p;
-				break;
-			} else if (!exact) {
-				if (!match && p->speed <= speed)
-					/* Candidate */
-					match = p;
-
-				if (p->speed < speed)
-					break;
-			}
-		}
-	}
-
-	if (!match && !exact)
-		match = last;
-
-	return match;
-}
-
 /**
  * phy_find_valid - find a PHY setting that matches the requested parameters
  * @speed: desired speed
@@ -327,7 +173,9 @@ phy_lookup_setting(int speed, int duplex, u32 features, bool exact)
 static const struct phy_setting *
 phy_find_valid(int speed, int duplex, u32 supported)
 {
-	return phy_lookup_setting(speed, duplex, supported, false);
+	unsigned long mask = supported;
+
+	return phy_lookup_setting(speed, duplex, &mask, BITS_PER_LONG, false);
 }
 
 /**
@@ -344,16 +192,9 @@ unsigned int phy_supported_speeds(struct phy_device *phy,
 				  unsigned int *speeds,
 				  unsigned int size)
 {
-	unsigned int count = 0;
-	unsigned int idx = 0;
+	unsigned long supported = phy->supported;
 
-	for (idx = 0; idx < ARRAY_SIZE(settings) && count < size; idx++)
-		/* Assumes settings are grouped by speed */
-		if ((settings[idx].setting & phy->supported) &&
-		    (count == 0 || speeds[count - 1] != settings[idx].speed))
-			speeds[count++] = settings[idx].speed;
-
-	return count;
+	return phy_speeds(speeds, size, &supported, BITS_PER_LONG);
 }
 
 /**
@@ -367,7 +208,9 @@ unsigned int phy_supported_speeds(struct phy_device *phy,
  */
 static inline bool phy_check_valid(int speed, int duplex, u32 features)
 {
-	return !!phy_lookup_setting(speed, duplex, features, true);
+	unsigned long mask = features;
+
+	return !!phy_lookup_setting(speed, duplex, &mask, BITS_PER_LONG, true);
 }
 
 /**
@@ -705,14 +548,15 @@ EXPORT_SYMBOL(phy_start_aneg);
  *
  * Description: The PHY infrastructure can run a state machine
  *   which tracks whether the PHY is starting up, negotiating,
- *   etc.  This function starts the timer which tracks the state
- *   of the PHY.  If you want to maintain your own state machine,
+ *   etc.  This function starts the delayed workqueue which tracks
+ *   the state of the PHY. If you want to maintain your own state machine,
  *   do not call this function.
  */
 void phy_start_machine(struct phy_device *phydev)
 {
 	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
 }
+EXPORT_SYMBOL_GPL(phy_start_machine);
 
 /**
  * phy_trigger_machine - trigger the state machine to run
@@ -737,9 +581,9 @@ void phy_trigger_machine(struct phy_device *phydev, bool sync)
  * phy_stop_machine - stop the PHY state machine tracking
  * @phydev: target phy_device struct
  *
- * Description: Stops the state machine timer, sets the state to UP
- *   (unless it wasn't up yet). This function must be called BEFORE
- *   phy_detach.
+ * Description: Stops the state machine delayed workqueue, sets the
+ *   state to UP (unless it wasn't up yet). This function must be
+ *   called BEFORE phy_detach.
  */
 void phy_stop_machine(struct phy_device *phydev)
 {
@@ -1022,9 +866,15 @@ void phy_start(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_start);
 
-static void phy_adjust_link(struct phy_device *phydev)
+static void phy_link_up(struct phy_device *phydev)
 {
-	phydev->adjust_link(phydev->attached_dev);
+	phydev->phy_link_change(phydev, true, true);
+	phy_led_trigger_change_speed(phydev);
+}
+
+static void phy_link_down(struct phy_device *phydev, bool do_carrier)
+{
+	phydev->phy_link_change(phydev, false, do_carrier);
 	phy_led_trigger_change_speed(phydev);
 }
 
@@ -1069,8 +919,7 @@ void phy_state_machine(struct work_struct *work)
 		/* If the link is down, give up on negotiation for now */
 		if (!phydev->link) {
 			phydev->state = PHY_NOLINK;
-			netif_carrier_off(phydev->attached_dev);
-			phy_adjust_link(phydev);
+			phy_link_down(phydev, true);
 			break;
 		}
 
@@ -1082,9 +931,7 @@ void phy_state_machine(struct work_struct *work)
 		/* If AN is done, we're running */
 		if (err > 0) {
 			phydev->state = PHY_RUNNING;
-			netif_carrier_on(phydev->attached_dev);
-			phy_adjust_link(phydev);
-
+			phy_link_up(phydev);
 		} else if (0 == phydev->link_timeout--)
 			needs_aneg = true;
 		break;
@@ -1109,8 +956,7 @@ void phy_state_machine(struct work_struct *work)
 				}
 			}
 			phydev->state = PHY_RUNNING;
-			netif_carrier_on(phydev->attached_dev);
-			phy_adjust_link(phydev);
+			phy_link_up(phydev);
 		}
 		break;
 	case PHY_FORCING:
@@ -1120,13 +966,12 @@ void phy_state_machine(struct work_struct *work)
 
 		if (phydev->link) {
 			phydev->state = PHY_RUNNING;
-			netif_carrier_on(phydev->attached_dev);
+			phy_link_up(phydev);
 		} else {
 			if (0 == phydev->link_timeout--)
 				needs_aneg = true;
+			phy_link_down(phydev, false);
 		}
-
-		phy_adjust_link(phydev);
 		break;
 	case PHY_RUNNING:
 		/* Only register a CHANGE if we are polling and link changed
@@ -1158,14 +1003,12 @@ void phy_state_machine(struct work_struct *work)
 
 		if (phydev->link) {
 			phydev->state = PHY_RUNNING;
-			netif_carrier_on(phydev->attached_dev);
+			phy_link_up(phydev);
 		} else {
 			phydev->state = PHY_NOLINK;
-			netif_carrier_off(phydev->attached_dev);
+			phy_link_down(phydev, true);
 		}
 
-		phy_adjust_link(phydev);
-
 		if (phy_interrupt_is_valid(phydev))
 			err = phy_config_interrupt(phydev,
 						   PHY_INTERRUPT_ENABLED);
@@ -1173,8 +1016,7 @@ void phy_state_machine(struct work_struct *work)
 	case PHY_HALTED:
 		if (phydev->link) {
 			phydev->link = 0;
-			netif_carrier_off(phydev->attached_dev);
-			phy_adjust_link(phydev);
+			phy_link_down(phydev, true);
 			do_suspend = true;
 		}
 		break;
@@ -1194,11 +1036,11 @@ void phy_state_machine(struct work_struct *work)
 
 				if (phydev->link) {
 					phydev->state = PHY_RUNNING;
-					netif_carrier_on(phydev->attached_dev);
+					phy_link_up(phydev);
 				} else	{
 					phydev->state = PHY_NOLINK;
+					phy_link_down(phydev, false);
 				}
-				phy_adjust_link(phydev);
 			} else {
 				phydev->state = PHY_AN;
 				phydev->link_timeout = PHY_AN_TIMEOUT;
@@ -1210,11 +1052,11 @@ void phy_state_machine(struct work_struct *work)
 
 			if (phydev->link) {
 				phydev->state = PHY_RUNNING;
-				netif_carrier_on(phydev->attached_dev);
+				phy_link_up(phydev);
 			} else	{
 				phydev->state = PHY_NOLINK;
+				phy_link_down(phydev, false);
 			}
-			phy_adjust_link(phydev);
 		}
 		break;
 	}
@@ -1229,9 +1071,10 @@ void phy_state_machine(struct work_struct *work)
 	if (err < 0)
 		phy_error(phydev);
 
-	phydev_dbg(phydev, "PHY state change %s -> %s\n",
-		   phy_state_to_str(old_state),
-		   phy_state_to_str(phydev->state));
+	if (old_state != phydev->state)
+		phydev_dbg(phydev, "PHY state change %s -> %s\n",
+			   phy_state_to_str(old_state),
+			   phy_state_to_str(phydev->state));
 
 	/* Only re-schedule a PHY state machine change if we are polling the
 	 * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 1790f7fec125..9493fb369682 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -688,6 +688,19 @@ struct phy_device *phy_find_first(struct mii_bus *bus)
 }
 EXPORT_SYMBOL(phy_find_first);
 
+static void phy_link_change(struct phy_device *phydev, bool up, bool do_carrier)
+{
+	struct net_device *netdev = phydev->attached_dev;
+
+	if (do_carrier) {
+		if (up)
+			netif_carrier_on(netdev);
+		else
+			netif_carrier_off(netdev);
+	}
+	phydev->adjust_link(netdev);
+}
+
 /**
  * phy_prepare_link - prepares the PHY layer to monitor link status
  * @phydev: target phy_device struct
@@ -951,6 +964,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		goto error;
 	}
 
+	phydev->phy_link_change = phy_link_change;
 	phydev->attached_dev = dev;
 	dev->phydev = phydev;
 
@@ -1070,6 +1084,7 @@ void phy_detach(struct phy_device *phydev)
 	phydev->attached_dev->phydev = NULL;
 	phydev->attached_dev = NULL;
 	phy_suspend(phydev);
+	phydev->phylink = NULL;
 
 	phy_led_triggers_unregister(phydev);
 
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
new file mode 100644
index 000000000000..32917bdd1432
--- /dev/null
+++ b/drivers/net/phy/phylink.c
@@ -0,0 +1,1462 @@
+/*
+ * phylink models the MAC to optional PHY connection, supporting
+ * technologies such as SFP cages where the PHY is hot-pluggable.
+ *
+ * Copyright (C) 2015 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/ethtool.h>
+#include <linux/export.h>
+#include <linux/gpio/consumer.h>
+#include <linux/netdevice.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
+#include <linux/phylink.h>
+#include <linux/rtnetlink.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "sfp.h"
+#include "swphy.h"
+
+#define SUPPORTED_INTERFACES \
+	(SUPPORTED_TP | SUPPORTED_MII | SUPPORTED_FIBRE | \
+	 SUPPORTED_BNC | SUPPORTED_AUI | SUPPORTED_Backplane)
+#define ADVERTISED_INTERFACES \
+	(ADVERTISED_TP | ADVERTISED_MII | ADVERTISED_FIBRE | \
+	 ADVERTISED_BNC | ADVERTISED_AUI | ADVERTISED_Backplane)
+
+enum {
+	PHYLINK_DISABLE_STOPPED,
+	PHYLINK_DISABLE_LINK,
+};
+
+struct phylink {
+	struct net_device *netdev;
+	const struct phylink_mac_ops *ops;
+
+	unsigned long phylink_disable_state; /* bitmask of disables */
+	struct phy_device *phydev;
+	phy_interface_t link_interface;	/* PHY_INTERFACE_xxx */
+	u8 link_an_mode;		/* MLO_AN_xxx */
+	u8 link_port;			/* The current non-phy ethtool port */
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+
+	/* The link configuration settings */
+	struct phylink_link_state link_config;
+	struct gpio_desc *link_gpio;
+
+	struct mutex state_mutex;
+	struct phylink_link_state phy_state;
+	struct work_struct resolve;
+
+	bool mac_link_dropped;
+
+	struct sfp_bus *sfp_bus;
+};
+
+static inline void linkmode_zero(unsigned long *dst)
+{
+	bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_copy(unsigned long *dst, const unsigned long *src)
+{
+	bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_and(unsigned long *dst, const unsigned long *a,
+				const unsigned long *b)
+{
+	bitmap_and(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_or(unsigned long *dst, const unsigned long *a,
+				const unsigned long *b)
+{
+	bitmap_or(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline bool linkmode_empty(const unsigned long *src)
+{
+	return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+void phylink_set_port_modes(unsigned long *mask)
+{
+	phylink_set(mask, TP);
+	phylink_set(mask, AUI);
+	phylink_set(mask, MII);
+	phylink_set(mask, FIBRE);
+	phylink_set(mask, BNC);
+	phylink_set(mask, Backplane);
+}
+EXPORT_SYMBOL_GPL(phylink_set_port_modes);
+
+static int phylink_is_empty_linkmode(const unsigned long *linkmode)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(tmp) = { 0, };
+
+	phylink_set_port_modes(tmp);
+	phylink_set(tmp, Autoneg);
+	phylink_set(tmp, Pause);
+	phylink_set(tmp, Asym_Pause);
+
+	bitmap_andnot(tmp, linkmode, tmp, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	return linkmode_empty(tmp);
+}
+
+static const char *phylink_an_mode_str(unsigned int mode)
+{
+	static const char *modestr[] = {
+		[MLO_AN_PHY] = "phy",
+		[MLO_AN_FIXED] = "fixed",
+		[MLO_AN_SGMII] = "SGMII",
+		[MLO_AN_8023Z] = "802.3z",
+	};
+
+	return mode < ARRAY_SIZE(modestr) ? modestr[mode] : "unknown";
+}
+
+static int phylink_validate(struct phylink *pl, unsigned long *supported,
+			    struct phylink_link_state *state)
+{
+	pl->ops->validate(pl->netdev, supported, state);
+
+	return phylink_is_empty_linkmode(supported) ? -EINVAL : 0;
+}
+
+static int phylink_parse_fixedlink(struct phylink *pl, struct device_node *np)
+{
+	struct device_node *fixed_node;
+	const struct phy_setting *s;
+	struct gpio_desc *desc;
+	const __be32 *fixed_prop;
+	u32 speed;
+	int ret, len;
+
+	fixed_node = of_get_child_by_name(np, "fixed-link");
+	if (fixed_node) {
+		ret = of_property_read_u32(fixed_node, "speed", &speed);
+
+		pl->link_config.speed = speed;
+		pl->link_config.duplex = DUPLEX_HALF;
+
+		if (of_property_read_bool(fixed_node, "full-duplex"))
+			pl->link_config.duplex = DUPLEX_FULL;
+
+		/* We treat the "pause" and "asym-pause" terminology as
+		 * defining the link partner's ability. */
+		if (of_property_read_bool(fixed_node, "pause"))
+			pl->link_config.pause |= MLO_PAUSE_SYM;
+		if (of_property_read_bool(fixed_node, "asym-pause"))
+			pl->link_config.pause |= MLO_PAUSE_ASYM;
+
+		if (ret == 0) {
+			desc = fwnode_get_named_gpiod(&fixed_node->fwnode,
+						      "link-gpios", 0,
+						      GPIOD_IN, "?");
+
+			if (!IS_ERR(desc))
+				pl->link_gpio = desc;
+			else if (desc == ERR_PTR(-EPROBE_DEFER))
+				ret = -EPROBE_DEFER;
+		}
+		of_node_put(fixed_node);
+
+		if (ret)
+			return ret;
+	} else {
+		fixed_prop = of_get_property(np, "fixed-link", &len);
+		if (!fixed_prop) {
+			netdev_err(pl->netdev, "broken fixed-link?\n");
+			return -EINVAL;
+		}
+		if (len == 5 * sizeof(*fixed_prop)) {
+			pl->link_config.duplex = be32_to_cpu(fixed_prop[1]) ?
+						DUPLEX_FULL : DUPLEX_HALF;
+			pl->link_config.speed = be32_to_cpu(fixed_prop[2]);
+			if (be32_to_cpu(fixed_prop[3]))
+				pl->link_config.pause |= MLO_PAUSE_SYM;
+			if (be32_to_cpu(fixed_prop[4]))
+				pl->link_config.pause |= MLO_PAUSE_ASYM;
+		}
+	}
+
+	if (pl->link_config.speed > SPEED_1000 &&
+	    pl->link_config.duplex != DUPLEX_FULL)
+		netdev_warn(pl->netdev, "fixed link specifies half duplex for %dMbps link?\n",
+			    pl->link_config.speed);
+
+	bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	linkmode_copy(pl->link_config.advertising, pl->supported);
+	phylink_validate(pl, pl->supported, &pl->link_config);
+
+	s = phy_lookup_setting(pl->link_config.speed, pl->link_config.duplex,
+			       pl->supported,
+			       __ETHTOOL_LINK_MODE_MASK_NBITS, true);
+	linkmode_zero(pl->supported);
+	phylink_set(pl->supported, MII);
+	if (s) {
+		__set_bit(s->bit, pl->supported);
+	} else {
+		netdev_warn(pl->netdev, "fixed link %s duplex %dMbps not recognised\n",
+			    pl->link_config.duplex == DUPLEX_FULL ? "full" : "half",
+			    pl->link_config.speed);
+	}
+
+	linkmode_and(pl->link_config.advertising, pl->link_config.advertising,
+		     pl->supported);
+
+	pl->link_config.link = 1;
+	pl->link_config.an_complete = 1;
+
+	return 0;
+}
+
+static int phylink_parse_mode(struct phylink *pl, struct device_node *np)
+{
+	struct device_node *dn;
+	const char *managed;
+
+	dn = of_get_child_by_name(np, "fixed-link");
+	if (dn || of_find_property(np, "fixed-link", NULL))
+		pl->link_an_mode = MLO_AN_FIXED;
+	of_node_put(dn);
+
+	if (of_property_read_string(np, "managed", &managed) == 0 &&
+	    strcmp(managed, "in-band-status") == 0) {
+		if (pl->link_an_mode == MLO_AN_FIXED) {
+			netdev_err(pl->netdev,
+				   "can't use both fixed-link and in-band-status\n");
+			return -EINVAL;
+		}
+
+		linkmode_zero(pl->supported);
+		phylink_set(pl->supported, MII);
+		phylink_set(pl->supported, Autoneg);
+		phylink_set(pl->supported, Asym_Pause);
+		phylink_set(pl->supported, Pause);
+		pl->link_config.an_enabled = true;
+
+		switch (pl->link_config.interface) {
+		case PHY_INTERFACE_MODE_SGMII:
+			phylink_set(pl->supported, 10baseT_Half);
+			phylink_set(pl->supported, 10baseT_Full);
+			phylink_set(pl->supported, 100baseT_Half);
+			phylink_set(pl->supported, 100baseT_Full);
+			phylink_set(pl->supported, 1000baseT_Half);
+			phylink_set(pl->supported, 1000baseT_Full);
+			pl->link_an_mode = MLO_AN_SGMII;
+			break;
+
+		case PHY_INTERFACE_MODE_1000BASEX:
+			phylink_set(pl->supported, 1000baseX_Full);
+			pl->link_an_mode = MLO_AN_8023Z;
+			break;
+
+		case PHY_INTERFACE_MODE_2500BASEX:
+			phylink_set(pl->supported, 2500baseX_Full);
+			pl->link_an_mode = MLO_AN_8023Z;
+			break;
+
+		case PHY_INTERFACE_MODE_10GKR:
+			phylink_set(pl->supported, 10baseT_Half);
+			phylink_set(pl->supported, 10baseT_Full);
+			phylink_set(pl->supported, 100baseT_Half);
+			phylink_set(pl->supported, 100baseT_Full);
+			phylink_set(pl->supported, 1000baseT_Half);
+			phylink_set(pl->supported, 1000baseT_Full);
+			phylink_set(pl->supported, 1000baseX_Full);
+			phylink_set(pl->supported, 10000baseKR_Full);
+			phylink_set(pl->supported, 10000baseCR_Full);
+			phylink_set(pl->supported, 10000baseSR_Full);
+			phylink_set(pl->supported, 10000baseLR_Full);
+			phylink_set(pl->supported, 10000baseLRM_Full);
+			phylink_set(pl->supported, 10000baseER_Full);
+			pl->link_an_mode = MLO_AN_SGMII;
+			break;
+
+		default:
+			netdev_err(pl->netdev,
+				   "incorrect link mode %s for in-band status\n",
+				   phy_modes(pl->link_config.interface));
+			return -EINVAL;
+		}
+
+		linkmode_copy(pl->link_config.advertising, pl->supported);
+
+		if (phylink_validate(pl, pl->supported, &pl->link_config)) {
+			netdev_err(pl->netdev,
+				   "failed to validate link configuration for in-band status\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static void phylink_mac_config(struct phylink *pl,
+			       const struct phylink_link_state *state)
+{
+	netdev_dbg(pl->netdev,
+		   "%s: mode=%s/%s/%s/%s adv=%*pb pause=%02x link=%u an=%u\n",
+		   __func__, phylink_an_mode_str(pl->link_an_mode),
+		   phy_modes(state->interface),
+		   phy_speed_to_str(state->speed),
+		   phy_duplex_to_str(state->duplex),
+		   __ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising,
+		   state->pause, state->link, state->an_enabled);
+
+	pl->ops->mac_config(pl->netdev, pl->link_an_mode, state);
+}
+
+static void phylink_mac_an_restart(struct phylink *pl)
+{
+	if (pl->link_config.an_enabled &&
+	    (pl->link_config.interface == PHY_INTERFACE_MODE_1000BASEX ||
+	     pl->link_config.interface == PHY_INTERFACE_MODE_2500BASEX))
+		pl->ops->mac_an_restart(pl->netdev);
+}
+
+static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state *state)
+{
+	struct net_device *ndev = pl->netdev;
+
+	linkmode_copy(state->advertising, pl->link_config.advertising);
+	linkmode_zero(state->lp_advertising);
+	state->interface = pl->link_config.interface;
+	state->an_enabled = pl->link_config.an_enabled;
+	state->link = 1;
+
+	return pl->ops->mac_link_state(ndev, state);
+}
+
+/* The fixed state is... fixed except for the link state,
+ * which may be determined by a GPIO.
+ */
+static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_state *state)
+{
+	*state = pl->link_config;
+	if (pl->link_gpio)
+		state->link = !!gpiod_get_value(pl->link_gpio);
+}
+
+/* Flow control is resolved according to our and the link partners
+ * advertisments using the following drawn from the 802.3 specs:
+ *  Local device  Link partner
+ *  Pause AsymDir Pause AsymDir Result
+ *    1     X       1     X     TX+RX
+ *    0     1       1     1     RX
+ *    1     1       0     1     TX
+ */
+static void phylink_resolve_flow(struct phylink *pl,
+	struct phylink_link_state *state)
+{
+	int new_pause = 0;
+
+	if (pl->link_config.pause & MLO_PAUSE_AN) {
+		int pause = 0;
+
+		if (phylink_test(pl->link_config.advertising, Pause))
+			pause |= MLO_PAUSE_SYM;
+		if (phylink_test(pl->link_config.advertising, Asym_Pause))
+			pause |= MLO_PAUSE_ASYM;
+
+		pause &= state->pause;
+
+		if (pause & MLO_PAUSE_SYM)
+			new_pause = MLO_PAUSE_TX | MLO_PAUSE_RX;
+		else if (pause & MLO_PAUSE_ASYM)
+			new_pause = state->pause & MLO_PAUSE_SYM ?
+				 MLO_PAUSE_RX : MLO_PAUSE_TX;
+	} else {
+		new_pause = pl->link_config.pause & MLO_PAUSE_TXRX_MASK;
+	}
+
+	state->pause &= ~MLO_PAUSE_TXRX_MASK;
+	state->pause |= new_pause;
+}
+
+static const char *phylink_pause_to_str(int pause)
+{
+	switch (pause & MLO_PAUSE_TXRX_MASK) {
+	case MLO_PAUSE_TX | MLO_PAUSE_RX:
+		return "rx/tx";
+	case MLO_PAUSE_TX:
+		return "tx";
+	case MLO_PAUSE_RX:
+		return "rx";
+	default:
+		return "off";
+	}
+}
+
+static void phylink_resolve(struct work_struct *w)
+{
+	struct phylink *pl = container_of(w, struct phylink, resolve);
+	struct phylink_link_state link_state;
+	struct net_device *ndev = pl->netdev;
+
+	mutex_lock(&pl->state_mutex);
+	if (pl->phylink_disable_state) {
+		pl->mac_link_dropped = false;
+		link_state.link = false;
+	} else if (pl->mac_link_dropped) {
+		link_state.link = false;
+	} else {
+		switch (pl->link_an_mode) {
+		case MLO_AN_PHY:
+			link_state = pl->phy_state;
+			phylink_resolve_flow(pl, &link_state);
+			phylink_mac_config(pl, &link_state);
+			break;
+
+		case MLO_AN_FIXED:
+			phylink_get_fixed_state(pl, &link_state);
+			phylink_mac_config(pl, &link_state);
+			break;
+
+		case MLO_AN_SGMII:
+			phylink_get_mac_state(pl, &link_state);
+			if (pl->phydev) {
+				bool changed = false;
+
+				link_state.link = link_state.link &&
+						  pl->phy_state.link;
+
+				if (pl->phy_state.interface !=
+				    link_state.interface) {
+					link_state.interface = pl->phy_state.interface;
+					changed = true;
+				}
+
+				/* Propagate the flow control from the PHY
+				 * to the MAC. Also propagate the interface
+				 * if changed.
+				 */
+				if (pl->phy_state.link || changed) {
+					link_state.pause |= pl->phy_state.pause;
+					phylink_resolve_flow(pl, &link_state);
+
+					phylink_mac_config(pl, &link_state);
+				}
+			}
+			break;
+
+		case MLO_AN_8023Z:
+			phylink_get_mac_state(pl, &link_state);
+			break;
+		}
+	}
+
+	if (link_state.link != netif_carrier_ok(ndev)) {
+		if (!link_state.link) {
+			netif_carrier_off(ndev);
+			pl->ops->mac_link_down(ndev, pl->link_an_mode);
+			netdev_info(ndev, "Link is Down\n");
+		} else {
+			pl->ops->mac_link_up(ndev, pl->link_an_mode,
+					     pl->phydev);
+
+			netif_carrier_on(ndev);
+
+			netdev_info(ndev,
+				    "Link is Up - %s/%s - flow control %s\n",
+				    phy_speed_to_str(link_state.speed),
+				    phy_duplex_to_str(link_state.duplex),
+				    phylink_pause_to_str(link_state.pause));
+		}
+	}
+	if (!link_state.link && pl->mac_link_dropped) {
+		pl->mac_link_dropped = false;
+		queue_work(system_power_efficient_wq, &pl->resolve);
+	}
+	mutex_unlock(&pl->state_mutex);
+}
+
+static void phylink_run_resolve(struct phylink *pl)
+{
+	if (!pl->phylink_disable_state)
+		queue_work(system_power_efficient_wq, &pl->resolve);
+}
+
+static const struct sfp_upstream_ops sfp_phylink_ops;
+
+static int phylink_register_sfp(struct phylink *pl, struct device_node *np)
+{
+	struct device_node *sfp_np;
+
+	sfp_np = of_parse_phandle(np, "sfp", 0);
+	if (!sfp_np)
+		return 0;
+
+	pl->sfp_bus = sfp_register_upstream(sfp_np, pl->netdev, pl,
+					    &sfp_phylink_ops);
+	if (!pl->sfp_bus)
+		return -ENOMEM;
+
+	return 0;
+}
+
+struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
+	phy_interface_t iface, const struct phylink_mac_ops *ops)
+{
+	struct phylink *pl;
+	int ret;
+
+	pl = kzalloc(sizeof(*pl), GFP_KERNEL);
+	if (!pl)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&pl->state_mutex);
+	INIT_WORK(&pl->resolve, phylink_resolve);
+	pl->netdev = ndev;
+	pl->phy_state.interface = iface;
+	pl->link_interface = iface;
+	pl->link_port = PORT_MII;
+	pl->link_config.interface = iface;
+	pl->link_config.pause = MLO_PAUSE_AN;
+	pl->link_config.speed = SPEED_UNKNOWN;
+	pl->link_config.duplex = DUPLEX_UNKNOWN;
+	pl->ops = ops;
+	__set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
+
+	bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	linkmode_copy(pl->link_config.advertising, pl->supported);
+	phylink_validate(pl, pl->supported, &pl->link_config);
+
+	ret = phylink_parse_mode(pl, np);
+	if (ret < 0) {
+		kfree(pl);
+		return ERR_PTR(ret);
+	}
+
+	if (pl->link_an_mode == MLO_AN_FIXED) {
+		ret = phylink_parse_fixedlink(pl, np);
+		if (ret < 0) {
+			kfree(pl);
+			return ERR_PTR(ret);
+		}
+	}
+
+	ret = phylink_register_sfp(pl, np);
+	if (ret < 0) {
+		kfree(pl);
+		return ERR_PTR(ret);
+	}
+
+	return pl;
+}
+EXPORT_SYMBOL_GPL(phylink_create);
+
+void phylink_destroy(struct phylink *pl)
+{
+	if (pl->sfp_bus)
+		sfp_unregister_upstream(pl->sfp_bus);
+
+	cancel_work_sync(&pl->resolve);
+	kfree(pl);
+}
+EXPORT_SYMBOL_GPL(phylink_destroy);
+
+void phylink_phy_change(struct phy_device *phydev, bool up, bool do_carrier)
+{
+	struct phylink *pl = phydev->phylink;
+
+	mutex_lock(&pl->state_mutex);
+	pl->phy_state.speed = phydev->speed;
+	pl->phy_state.duplex = phydev->duplex;
+	pl->phy_state.pause = MLO_PAUSE_NONE;
+	if (phydev->pause)
+		pl->phy_state.pause |= MLO_PAUSE_SYM;
+	if (phydev->asym_pause)
+		pl->phy_state.pause |= MLO_PAUSE_ASYM;
+	pl->phy_state.interface = phydev->interface;
+	pl->phy_state.link = up;
+	mutex_unlock(&pl->state_mutex);
+
+	phylink_run_resolve(pl);
+
+	netdev_dbg(pl->netdev, "phy link %s %s/%s/%s\n", up ? "up" : "down",
+	           phy_modes(phydev->interface),
+		   phy_speed_to_str(phydev->speed),
+		   phy_duplex_to_str(phydev->duplex));
+}
+
+static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
+{
+	struct phylink_link_state config;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+	u32 advertising;
+	int ret;
+
+	memset(&config, 0, sizeof(config));
+	ethtool_convert_legacy_u32_to_link_mode(supported, phy->supported);
+	ethtool_convert_legacy_u32_to_link_mode(config.advertising,
+						phy->advertising);
+	config.interface = pl->link_config.interface;
+
+	/*
+	 * This is the new way of dealing with flow control for PHYs,
+	 * as described by Timur Tabi in commit 529ed1275263 ("net: phy:
+	 * phy drivers should not set SUPPORTED_[Asym_]Pause") except
+	 * using our validate call to the MAC, we rely upon the MAC
+	 * clearing the bits from both supported and advertising fields.
+	 */
+	if (phylink_test(supported, Pause))
+		phylink_set(config.advertising, Pause);
+	if (phylink_test(supported, Asym_Pause))
+		phylink_set(config.advertising, Asym_Pause);
+
+	ret = phylink_validate(pl, supported, &config);
+	if (ret)
+		return ret;
+
+	phy->phylink = pl;
+	phy->phy_link_change = phylink_phy_change;
+
+	netdev_info(pl->netdev,
+		    "PHY [%s] driver [%s]\n", dev_name(&phy->mdio.dev),
+		    phy->drv->name);
+
+	mutex_lock(&phy->lock);
+	mutex_lock(&pl->state_mutex);
+	pl->netdev->phydev = phy;
+	pl->phydev = phy;
+	linkmode_copy(pl->supported, supported);
+	linkmode_copy(pl->link_config.advertising, config.advertising);
+
+	/* Restrict the phy advertisment according to the MAC support. */
+	ethtool_convert_link_mode_to_legacy_u32(&advertising, config.advertising);
+	phy->advertising = advertising;
+	mutex_unlock(&pl->state_mutex);
+	mutex_unlock(&phy->lock);
+
+	netdev_dbg(pl->netdev,
+		   "phy: setting supported %*pb advertising 0x%08x\n",
+		   __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported,
+		   phy->advertising);
+
+	phy_start_machine(phy);
+	if (phy->irq > 0)
+		phy_start_interrupts(phy);
+
+	return 0;
+}
+
+int phylink_connect_phy(struct phylink *pl, struct phy_device *phy)
+{
+	int ret;
+
+	ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface);
+	if (ret)
+		return ret;
+
+	ret = phylink_bringup_phy(pl, phy);
+	if (ret)
+		phy_detach(phy);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_connect_phy);
+
+int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn)
+{
+	struct device_node *phy_node;
+	struct phy_device *phy_dev;
+	int ret;
+
+	/* Fixed links are handled without needing a PHY */
+	if (pl->link_an_mode == MLO_AN_FIXED)
+		return 0;
+
+	phy_node = of_parse_phandle(dn, "phy-handle", 0);
+	if (!phy_node)
+		phy_node = of_parse_phandle(dn, "phy", 0);
+	if (!phy_node)
+		phy_node = of_parse_phandle(dn, "phy-device", 0);
+
+	if (!phy_node) {
+		if (pl->link_an_mode == MLO_AN_PHY) {
+			netdev_err(pl->netdev, "unable to find PHY node\n");
+			return -ENODEV;
+		}
+		return 0;
+	}
+
+	phy_dev = of_phy_attach(pl->netdev, phy_node, 0, pl->link_interface);
+	/* We're done with the phy_node handle */
+	of_node_put(phy_node);
+
+	if (!phy_dev)
+		return -ENODEV;
+
+	ret = phylink_bringup_phy(pl, phy_dev);
+	if (ret)
+		phy_detach(phy_dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_of_phy_connect);
+
+void phylink_disconnect_phy(struct phylink *pl)
+{
+	struct phy_device *phy;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	phy = pl->phydev;
+	if (phy) {
+		mutex_lock(&phy->lock);
+		mutex_lock(&pl->state_mutex);
+		pl->netdev->phydev = NULL;
+		pl->phydev = NULL;
+		mutex_unlock(&pl->state_mutex);
+		mutex_unlock(&phy->lock);
+		flush_work(&pl->resolve);
+
+		phy_disconnect(phy);
+	}
+}
+EXPORT_SYMBOL_GPL(phylink_disconnect_phy);
+
+void phylink_mac_change(struct phylink *pl, bool up)
+{
+	if (!up)
+		pl->mac_link_dropped = true;
+	phylink_run_resolve(pl);
+	netdev_dbg(pl->netdev, "mac link %s\n", up ? "up" : "down");
+}
+EXPORT_SYMBOL_GPL(phylink_mac_change);
+
+void phylink_start(struct phylink *pl)
+{
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	netdev_info(pl->netdev, "configuring for %s/%s link mode\n",
+		    phylink_an_mode_str(pl->link_an_mode),
+		    phy_modes(pl->link_config.interface));
+
+	/* Apply the link configuration to the MAC when starting. This allows
+	 * a fixed-link to start with the correct parameters, and also
+	 * ensures that we set the appropriate advertisment for Serdes links.
+	 */
+	phylink_resolve_flow(pl, &pl->link_config);
+	phylink_mac_config(pl, &pl->link_config);
+
+	clear_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
+	phylink_run_resolve(pl);
+
+	if (pl->sfp_bus)
+		sfp_upstream_start(pl->sfp_bus);
+	if (pl->phydev)
+		phy_start(pl->phydev);
+}
+EXPORT_SYMBOL_GPL(phylink_start);
+
+void phylink_stop(struct phylink *pl)
+{
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		phy_stop(pl->phydev);
+	if (pl->sfp_bus)
+		sfp_upstream_stop(pl->sfp_bus);
+
+	set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
+	flush_work(&pl->resolve);
+}
+EXPORT_SYMBOL_GPL(phylink_stop);
+
+void phylink_ethtool_get_wol(struct phylink *pl, struct ethtool_wolinfo *wol)
+{
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	wol->supported = 0;
+	wol->wolopts = 0;
+
+	if (pl->phydev)
+		phy_ethtool_get_wol(pl->phydev, wol);
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_get_wol);
+
+int phylink_ethtool_set_wol(struct phylink *pl, struct ethtool_wolinfo *wol)
+{
+	int ret = -EOPNOTSUPP;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_ethtool_set_wol(pl->phydev, wol);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_set_wol);
+
+static void phylink_merge_link_mode(unsigned long *dst, const unsigned long *b)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask);
+
+	linkmode_zero(mask);
+	phylink_set_port_modes(mask);
+
+	linkmode_and(dst, dst, mask);
+	linkmode_or(dst, dst, b);
+}
+
+static void phylink_get_ksettings(const struct phylink_link_state *state,
+				  struct ethtool_link_ksettings *kset)
+{
+	phylink_merge_link_mode(kset->link_modes.advertising, state->advertising);
+	linkmode_copy(kset->link_modes.lp_advertising, state->lp_advertising);
+	kset->base.speed = state->speed;
+	kset->base.duplex = state->duplex;
+	kset->base.autoneg = state->an_enabled ? AUTONEG_ENABLE :
+				AUTONEG_DISABLE;
+}
+
+int phylink_ethtool_ksettings_get(struct phylink *pl,
+	struct ethtool_link_ksettings *kset)
+{
+	struct phylink_link_state link_state;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev) {
+		phy_ethtool_ksettings_get(pl->phydev, kset);
+	} else {
+		kset->base.port = pl->link_port;
+	}
+
+	linkmode_copy(kset->link_modes.supported, pl->supported);
+
+	switch (pl->link_an_mode) {
+	case MLO_AN_FIXED:
+		/* We are using fixed settings. Report these as the
+		 * current link settings - and note that these also
+		 * represent the supported speeds/duplex/pause modes.
+		 */
+		phylink_get_fixed_state(pl, &link_state);
+		phylink_get_ksettings(&link_state, kset);
+		break;
+
+	case MLO_AN_SGMII:
+		/* If there is a phy attached, then use the reported
+		 * settings from the phy with no modification.
+		 */
+		if (pl->phydev)
+			break;
+
+	case MLO_AN_8023Z:
+		phylink_get_mac_state(pl, &link_state);
+
+		/* The MAC is reporting the link results from its own PCS
+		 * layer via in-band status. Report these as the current
+		 * link settings.
+		 */
+		phylink_get_ksettings(&link_state, kset);
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_get);
+
+int phylink_ethtool_ksettings_set(struct phylink *pl,
+	const struct ethtool_link_ksettings *kset)
+{
+	struct ethtool_link_ksettings our_kset;
+	struct phylink_link_state config;
+	int ret;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (kset->base.autoneg != AUTONEG_DISABLE &&
+	    kset->base.autoneg != AUTONEG_ENABLE)
+		return -EINVAL;
+
+	config = pl->link_config;
+
+	/* Mask out unsupported advertisments */
+	linkmode_and(config.advertising, kset->link_modes.advertising,
+		     pl->supported);
+
+	/* FIXME: should we reject autoneg if phy/mac does not support it? */
+	if (kset->base.autoneg == AUTONEG_DISABLE) {
+		const struct phy_setting *s;
+
+		/* Autonegotiation disabled, select a suitable speed and
+		 * duplex.
+		 */
+		s = phy_lookup_setting(kset->base.speed, kset->base.duplex,
+				       pl->supported,
+				       __ETHTOOL_LINK_MODE_MASK_NBITS, false);
+		if (!s)
+			return -EINVAL;
+
+		/* If we have a fixed link (as specified by firmware), refuse
+		 * to change link parameters.
+		 */
+		if (pl->link_an_mode == MLO_AN_FIXED &&
+		    (s->speed != pl->link_config.speed ||
+		     s->duplex != pl->link_config.duplex))
+			return -EINVAL;
+
+		config.speed = s->speed;
+		config.duplex = s->duplex;
+		config.an_enabled = false;
+
+		__clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising);
+	} else {
+		/* If we have a fixed link, refuse to enable autonegotiation */
+		if (pl->link_an_mode == MLO_AN_FIXED)
+			return -EINVAL;
+
+		config.speed = SPEED_UNKNOWN;
+		config.duplex = DUPLEX_UNKNOWN;
+		config.an_enabled = true;
+
+		__set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising);
+	}
+
+	if (phylink_validate(pl, pl->supported, &config))
+		return -EINVAL;
+
+	/* If autonegotiation is enabled, we must have an advertisment */
+	if (config.an_enabled && phylink_is_empty_linkmode(config.advertising))
+		return -EINVAL;
+
+	our_kset = *kset;
+	linkmode_copy(our_kset.link_modes.advertising, config.advertising);
+	our_kset.base.speed = config.speed;
+	our_kset.base.duplex = config.duplex;
+
+	/* If we have a PHY, configure the phy */
+	if (pl->phydev) {
+		ret = phy_ethtool_ksettings_set(pl->phydev, &our_kset);
+		if (ret)
+			return ret;
+	}
+
+	mutex_lock(&pl->state_mutex);
+	/* Configure the MAC to match the new settings */
+	linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising);
+	pl->link_config.speed = our_kset.base.speed;
+	pl->link_config.duplex = our_kset.base.duplex;
+	pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE;
+
+	if (!test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) {
+		phylink_mac_config(pl, &pl->link_config);
+		phylink_mac_an_restart(pl);
+	}
+	mutex_unlock(&pl->state_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_set);
+
+int phylink_ethtool_nway_reset(struct phylink *pl)
+{
+	int ret = 0;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_restart_aneg(pl->phydev);
+	phylink_mac_an_restart(pl);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_nway_reset);
+
+void phylink_ethtool_get_pauseparam(struct phylink *pl,
+				    struct ethtool_pauseparam *pause)
+{
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	pause->autoneg = !!(pl->link_config.pause & MLO_PAUSE_AN);
+	pause->rx_pause = !!(pl->link_config.pause & MLO_PAUSE_RX);
+	pause->tx_pause = !!(pl->link_config.pause & MLO_PAUSE_TX);
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_get_pauseparam);
+
+int phylink_ethtool_set_pauseparam(struct phylink *pl,
+				   struct ethtool_pauseparam *pause)
+{
+	struct phylink_link_state *config = &pl->link_config;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (!phylink_test(pl->supported, Pause) &&
+	    !phylink_test(pl->supported, Asym_Pause))
+		return -EOPNOTSUPP;
+
+	if (!phylink_test(pl->supported, Asym_Pause) &&
+	    !pause->autoneg && pause->rx_pause != pause->tx_pause)
+		return -EINVAL;
+
+	config->pause &= ~(MLO_PAUSE_AN | MLO_PAUSE_TXRX_MASK);
+
+	if (pause->autoneg)
+		config->pause |= MLO_PAUSE_AN;
+	if (pause->rx_pause)
+		config->pause |= MLO_PAUSE_RX;
+	if (pause->tx_pause)
+		config->pause |= MLO_PAUSE_TX;
+
+	if (!test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) {
+		switch (pl->link_an_mode) {
+		case MLO_AN_PHY:
+			/* Silently mark the carrier down, and then trigger a resolve */
+			netif_carrier_off(pl->netdev);
+			phylink_run_resolve(pl);
+			break;
+
+		case MLO_AN_FIXED:
+			/* Should we allow fixed links to change against the config? */
+			phylink_resolve_flow(pl, config);
+			phylink_mac_config(pl, config);
+			break;
+
+		case MLO_AN_SGMII:
+		case MLO_AN_8023Z:
+			phylink_mac_config(pl, config);
+			phylink_mac_an_restart(pl);
+			break;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_set_pauseparam);
+
+int phylink_ethtool_get_module_info(struct phylink *pl,
+				    struct ethtool_modinfo *modinfo)
+{
+	int ret = -EOPNOTSUPP;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->sfp_bus)
+		ret = sfp_get_module_info(pl->sfp_bus, modinfo);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_get_module_info);
+
+int phylink_ethtool_get_module_eeprom(struct phylink *pl,
+				      struct ethtool_eeprom *ee, u8 *buf)
+{
+	int ret = -EOPNOTSUPP;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->sfp_bus)
+		ret = sfp_get_module_eeprom(pl->sfp_bus, ee, buf);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_get_module_eeprom);
+
+int phylink_init_eee(struct phylink *pl, bool clk_stop_enable)
+{
+	int ret = -EPROTONOSUPPORT;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_init_eee(pl->phydev, clk_stop_enable);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_init_eee);
+
+int phylink_get_eee_err(struct phylink *pl)
+{
+	int ret = 0;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_get_eee_err(pl->phydev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_get_eee_err);
+
+int phylink_ethtool_get_eee(struct phylink *pl, struct ethtool_eee *eee)
+{
+	int ret = -EOPNOTSUPP;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_ethtool_get_eee(pl->phydev, eee);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_get_eee);
+
+int phylink_ethtool_set_eee(struct phylink *pl, struct ethtool_eee *eee)
+{
+	int ret = -EOPNOTSUPP;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_ethtool_set_eee(pl->phydev, eee);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_set_eee);
+
+/* This emulates MII registers for a fixed-mode phy operating as per the
+ * passed in state. "aneg" defines if we report negotiation is possible.
+ *
+ * FIXME: should deal with negotiation state too.
+ */
+static int phylink_mii_emul_read(struct net_device *ndev, unsigned int reg,
+				 struct phylink_link_state *state, bool aneg)
+{
+	struct fixed_phy_status fs;
+	int val;
+
+	fs.link = state->link;
+	fs.speed = state->speed;
+	fs.duplex = state->duplex;
+	fs.pause = state->pause & MLO_PAUSE_SYM;
+	fs.asym_pause = state->pause & MLO_PAUSE_ASYM;
+
+	val = swphy_read_reg(reg, &fs);
+	if (reg == MII_BMSR) {
+		if (!state->an_complete)
+			val &= ~BMSR_ANEGCOMPLETE;
+		if (!aneg)
+			val &= ~BMSR_ANEGCAPABLE;
+	}
+	return val;
+}
+
+static int phylink_phy_read(struct phylink *pl, unsigned int phy_id,
+			    unsigned int reg)
+{
+	struct phy_device *phydev = pl->phydev;
+	int prtad, devad;
+
+	if (mdio_phy_id_is_c45(phy_id)) {
+		prtad = mdio_phy_id_prtad(phy_id);
+		devad = mdio_phy_id_devad(phy_id);
+		devad = MII_ADDR_C45 | devad << 16 | reg;
+	} else if (phydev->is_c45) {
+		switch (reg) {
+		case MII_BMCR:
+		case MII_BMSR:
+		case MII_PHYSID1:
+		case MII_PHYSID2:
+			devad = __ffs(phydev->c45_ids.devices_in_package);
+			break;
+		case MII_ADVERTISE:
+		case MII_LPA:
+			if (!(phydev->c45_ids.devices_in_package & MDIO_DEVS_AN))
+				return -EINVAL;
+			devad = MDIO_MMD_AN;
+			if (reg == MII_ADVERTISE)
+				reg = MDIO_AN_ADVERTISE;
+			else
+				reg = MDIO_AN_LPA;
+			break;
+		default:
+			return -EINVAL;
+		}
+		prtad = phy_id;
+		devad = MII_ADDR_C45 | devad << 16 | reg;
+	} else {
+		prtad = phy_id;
+		devad = reg;
+	}
+	return mdiobus_read(pl->phydev->mdio.bus, prtad, devad);
+}
+
+static int phylink_phy_write(struct phylink *pl, unsigned int phy_id,
+			     unsigned int reg, unsigned int val)
+{
+	struct phy_device *phydev = pl->phydev;
+	int prtad, devad;
+
+	if (mdio_phy_id_is_c45(phy_id)) {
+		prtad = mdio_phy_id_prtad(phy_id);
+		devad = mdio_phy_id_devad(phy_id);
+		devad = MII_ADDR_C45 | devad << 16 | reg;
+	} else if (phydev->is_c45) {
+		switch (reg) {
+		case MII_BMCR:
+		case MII_BMSR:
+		case MII_PHYSID1:
+		case MII_PHYSID2:
+			devad = __ffs(phydev->c45_ids.devices_in_package);
+			break;
+		case MII_ADVERTISE:
+		case MII_LPA:
+			if (!(phydev->c45_ids.devices_in_package & MDIO_DEVS_AN))
+				return -EINVAL;
+			devad = MDIO_MMD_AN;
+			if (reg == MII_ADVERTISE)
+				reg = MDIO_AN_ADVERTISE;
+			else
+				reg = MDIO_AN_LPA;
+			break;
+		default:
+			return -EINVAL;
+		}
+		prtad = phy_id;
+		devad = MII_ADDR_C45 | devad << 16 | reg;
+	} else {
+		prtad = phy_id;
+		devad = reg;
+	}
+
+	return mdiobus_write(phydev->mdio.bus, prtad, devad, val);
+}
+
+static int phylink_mii_read(struct phylink *pl, unsigned int phy_id,
+			    unsigned int reg)
+{
+	struct phylink_link_state state;
+	int val = 0xffff;
+
+	switch (pl->link_an_mode) {
+	case MLO_AN_FIXED:
+		if (phy_id == 0) {
+			phylink_get_fixed_state(pl, &state);
+			val = phylink_mii_emul_read(pl->netdev, reg, &state,
+						    true);
+		}
+		break;
+
+	case MLO_AN_PHY:
+		return -EOPNOTSUPP;
+
+	case MLO_AN_SGMII:
+		/* No phy, fall through to 8023z method */
+	case MLO_AN_8023Z:
+		if (phy_id == 0) {
+			val = phylink_get_mac_state(pl, &state);
+			if (val < 0)
+				return val;
+
+			val = phylink_mii_emul_read(pl->netdev, reg, &state,
+						    true);
+		}
+		break;
+	}
+
+	return val & 0xffff;
+}
+
+static int phylink_mii_write(struct phylink *pl, unsigned int phy_id,
+			     unsigned int reg, unsigned int val)
+{
+	switch (pl->link_an_mode) {
+	case MLO_AN_FIXED:
+		break;
+
+	case MLO_AN_PHY:
+		return -EOPNOTSUPP;
+
+	case MLO_AN_SGMII:
+		/* No phy, fall through to 8023z method */
+	case MLO_AN_8023Z:
+		break;
+	}
+
+	return 0;
+}
+
+int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd)
+{
+	struct mii_ioctl_data *mii = if_mii(ifr);
+	int  ret;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev) {
+		/* PHYs only exist for MLO_AN_PHY and MLO_AN_SGMII */
+		switch (cmd) {
+		case SIOCGMIIPHY:
+			mii->phy_id = pl->phydev->mdio.addr;
+
+		case SIOCGMIIREG:
+			ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num);
+			if (ret >= 0) {
+				mii->val_out = ret;
+				ret = 0;
+			}
+			break;
+
+		case SIOCSMIIREG:
+			ret = phylink_phy_write(pl, mii->phy_id, mii->reg_num,
+						mii->val_in);
+			break;
+
+		default:
+			ret = phy_mii_ioctl(pl->phydev, ifr, cmd);
+			break;
+		}
+	} else {
+		switch (cmd) {
+		case SIOCGMIIPHY:
+			mii->phy_id = 0;
+
+		case SIOCGMIIREG:
+			ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num);
+			if (ret >= 0) {
+				mii->val_out = ret;
+				ret = 0;
+			}
+			break;
+
+		case SIOCSMIIREG:
+			ret = phylink_mii_write(pl, mii->phy_id, mii->reg_num,
+						mii->val_in);
+			break;
+
+		default:
+			ret = -EOPNOTSUPP;
+			break;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_mii_ioctl);
+
+
+
+static int phylink_sfp_module_insert(void *upstream,
+				     const struct sfp_eeprom_id *id)
+{
+	struct phylink *pl = upstream;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(support) = { 0, };
+	struct phylink_link_state config;
+	phy_interface_t iface;
+	int mode, ret = 0;
+	bool changed;
+	u8 port;
+
+	sfp_parse_support(pl->sfp_bus, id, support);
+	port = sfp_parse_port(pl->sfp_bus, id, support);
+	iface = sfp_parse_interface(pl->sfp_bus, id);
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	switch (iface) {
+	case PHY_INTERFACE_MODE_SGMII:
+		mode = MLO_AN_SGMII;
+		break;
+	case PHY_INTERFACE_MODE_1000BASEX:
+		mode = MLO_AN_8023Z;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	memset(&config, 0, sizeof(config));
+	linkmode_copy(config.advertising, support);
+	config.interface = iface;
+	config.speed = SPEED_UNKNOWN;
+	config.duplex = DUPLEX_UNKNOWN;
+	config.pause = MLO_PAUSE_AN;
+	config.an_enabled = pl->link_config.an_enabled;
+
+	/* Ignore errors if we're expecting a PHY to attach later */
+	ret = phylink_validate(pl, support, &config);
+	if (ret) {
+		netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
+			   phylink_an_mode_str(mode), phy_modes(config.interface),
+			   __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+		return ret;
+	}
+
+	netdev_dbg(pl->netdev, "requesting link mode %s/%s with support %*pb\n",
+		   phylink_an_mode_str(mode), phy_modes(config.interface),
+		   __ETHTOOL_LINK_MODE_MASK_NBITS, support);
+
+	if (mode == MLO_AN_8023Z && pl->phydev)
+		return -EINVAL;
+
+	changed = !bitmap_equal(pl->supported, support,
+				__ETHTOOL_LINK_MODE_MASK_NBITS);
+	if (changed) {
+		linkmode_copy(pl->supported, support);
+		linkmode_copy(pl->link_config.advertising, config.advertising);
+	}
+
+	if (pl->link_an_mode != mode ||
+	    pl->link_config.interface != config.interface) {
+		pl->link_config.interface = config.interface;
+		pl->link_an_mode = mode;
+
+		changed = true;
+
+		netdev_info(pl->netdev, "switched to %s/%s link mode\n",
+			    phylink_an_mode_str(mode),
+			    phy_modes(config.interface));
+	}
+
+	pl->link_port = port;
+
+	if (changed && !test_bit(PHYLINK_DISABLE_STOPPED,
+				 &pl->phylink_disable_state))
+		phylink_mac_config(pl, &pl->link_config);
+
+	return ret;
+}
+
+static void phylink_sfp_link_down(void *upstream)
+{
+	struct phylink *pl = upstream;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
+	flush_work(&pl->resolve);
+
+	netif_carrier_off(pl->netdev);
+}
+
+static void phylink_sfp_link_up(void *upstream)
+{
+	struct phylink *pl = upstream;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	clear_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
+	phylink_run_resolve(pl);
+}
+
+static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
+{
+	return phylink_connect_phy(upstream, phy);
+}
+
+static void phylink_sfp_disconnect_phy(void *upstream)
+{
+	phylink_disconnect_phy(upstream);
+}
+
+static const struct sfp_upstream_ops sfp_phylink_ops = {
+	.module_insert = phylink_sfp_module_insert,
+	.link_up = phylink_sfp_link_up,
+	.link_down = phylink_sfp_link_down,
+	.connect_phy = phylink_sfp_connect_phy,
+	.disconnect_phy = phylink_sfp_disconnect_phy,
+};
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
new file mode 100644
index 000000000000..5cb5384697ea
--- /dev/null
+++ b/drivers/net/phy/sfp-bus.c
@@ -0,0 +1,475 @@
+#include <linux/export.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/phylink.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+
+#include "sfp.h"
+
+struct sfp_bus {
+	struct kref kref;
+	struct list_head node;
+	struct device_node *device_node;
+
+	const struct sfp_socket_ops *socket_ops;
+	struct device *sfp_dev;
+	struct sfp *sfp;
+
+	const struct sfp_upstream_ops *upstream_ops;
+	void *upstream;
+	struct net_device *netdev;
+	struct phy_device *phydev;
+
+	bool registered;
+	bool started;
+};
+
+
+int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
+		   unsigned long *support)
+{
+	int port;
+
+	/* port is the physical connector, set this from the connector field. */
+	switch (id->base.connector) {
+	case SFP_CONNECTOR_SC:
+	case SFP_CONNECTOR_FIBERJACK:
+	case SFP_CONNECTOR_LC:
+	case SFP_CONNECTOR_MT_RJ:
+	case SFP_CONNECTOR_MU:
+	case SFP_CONNECTOR_OPTICAL_PIGTAIL:
+		if (support)
+			phylink_set(support, FIBRE);
+		port = PORT_FIBRE;
+		break;
+
+	case SFP_CONNECTOR_RJ45:
+		if (support)
+			phylink_set(support, TP);
+		port = PORT_TP;
+		break;
+
+	case SFP_CONNECTOR_UNSPEC:
+		if (id->base.e1000_base_t) {
+			if (support)
+				phylink_set(support, TP);
+			port = PORT_TP;
+			break;
+		}
+		/* fallthrough */
+	case SFP_CONNECTOR_SG: /* guess */
+	case SFP_CONNECTOR_MPO_1X12:
+	case SFP_CONNECTOR_MPO_2X16:
+	case SFP_CONNECTOR_HSSDC_II:
+	case SFP_CONNECTOR_COPPER_PIGTAIL:
+	case SFP_CONNECTOR_NOSEPARATE:
+	case SFP_CONNECTOR_MXC_2X16:
+		port = PORT_OTHER;
+		break;
+	default:
+		dev_warn(bus->sfp_dev, "SFP: unknown connector id 0x%02x\n",
+			 id->base.connector);
+		port = PORT_OTHER;
+		break;
+	}
+
+	return port;
+}
+EXPORT_SYMBOL_GPL(sfp_parse_port);
+
+phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
+				    const struct sfp_eeprom_id *id)
+{
+	phy_interface_t iface;
+
+	/* Setting the serdes link mode is guesswork: there's no field in
+	 * the EEPROM which indicates what mode should be used.
+	 *
+	 * If the module wants 64b66b, then it must be >= 10G.
+	 *
+	 * If it's a gigabit-only fiber module, it probably does not have
+	 * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
+	 * to SGMII mode (which is required to support non-gigabit speeds).
+	 */
+	switch (id->base.encoding) {
+	case SFP_ENCODING_8472_64B66B:
+		iface = PHY_INTERFACE_MODE_10GKR;
+		break;
+
+	case SFP_ENCODING_8B10B:
+		if (!id->base.e1000_base_t &&
+		    !id->base.e100_base_lx &&
+		    !id->base.e100_base_fx)
+			iface = PHY_INTERFACE_MODE_1000BASEX;
+		else
+			iface = PHY_INTERFACE_MODE_SGMII;
+		break;
+
+	default:
+		iface = PHY_INTERFACE_MODE_NA;
+		dev_err(bus->sfp_dev,
+			"SFP module encoding does not support 8b10b nor 64b66b\n");
+		break;
+	}
+
+	return iface;
+}
+EXPORT_SYMBOL_GPL(sfp_parse_interface);
+
+void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
+		       unsigned long *support)
+{
+	phylink_set(support, Autoneg);
+	phylink_set(support, Pause);
+	phylink_set(support, Asym_Pause);
+
+	/* Set ethtool support from the compliance fields. */
+	if (id->base.e10g_base_sr)
+		phylink_set(support, 10000baseSR_Full);
+	if (id->base.e10g_base_lr)
+		phylink_set(support, 10000baseLR_Full);
+	if (id->base.e10g_base_lrm)
+		phylink_set(support, 10000baseLRM_Full);
+	if (id->base.e10g_base_er)
+		phylink_set(support, 10000baseER_Full);
+	if (id->base.e1000_base_sx ||
+	    id->base.e1000_base_lx ||
+	    id->base.e1000_base_cx)
+		phylink_set(support, 1000baseX_Full);
+	if (id->base.e1000_base_t) {
+		phylink_set(support, 1000baseT_Half);
+		phylink_set(support, 1000baseT_Full);
+	}
+
+	switch (id->base.extended_cc) {
+	case 0x00: /* Unspecified */
+		break;
+	case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
+		phylink_set(support, 100000baseSR4_Full);
+		phylink_set(support, 25000baseSR_Full);
+		break;
+	case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
+	case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
+		phylink_set(support, 100000baseLR4_ER4_Full);
+		break;
+	case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
+	case 0x0c: /* 25Gbase-CR CA-S */
+	case 0x0d: /* 25Gbase-CR CA-N */
+		phylink_set(support, 100000baseCR4_Full);
+		phylink_set(support, 25000baseCR_Full);
+		break;
+	default:
+		dev_warn(bus->sfp_dev,
+			 "Unknown/unsupported extended compliance code: 0x%02x\n",
+			 id->base.extended_cc);
+		break;
+	}
+
+	/* For fibre channel SFP, derive possible BaseX modes */
+	if (id->base.fc_speed_100 ||
+	    id->base.fc_speed_200 ||
+	    id->base.fc_speed_400) {
+		if (id->base.br_nominal >= 31)
+			phylink_set(support, 2500baseX_Full);
+		if (id->base.br_nominal >= 12)
+			phylink_set(support, 1000baseX_Full);
+	}
+
+	switch (id->base.connector) {
+	case SFP_CONNECTOR_SC:
+	case SFP_CONNECTOR_FIBERJACK:
+	case SFP_CONNECTOR_LC:
+	case SFP_CONNECTOR_MT_RJ:
+	case SFP_CONNECTOR_MU:
+	case SFP_CONNECTOR_OPTICAL_PIGTAIL:
+		break;
+
+	case SFP_CONNECTOR_UNSPEC:
+		if (id->base.e1000_base_t)
+			break;
+
+	case SFP_CONNECTOR_SG: /* guess */
+	case SFP_CONNECTOR_MPO_1X12:
+	case SFP_CONNECTOR_MPO_2X16:
+	case SFP_CONNECTOR_HSSDC_II:
+	case SFP_CONNECTOR_COPPER_PIGTAIL:
+	case SFP_CONNECTOR_NOSEPARATE:
+	case SFP_CONNECTOR_MXC_2X16:
+	default:
+		/* a guess at the supported link modes */
+		dev_warn(bus->sfp_dev,
+			 "Guessing link modes, please report...\n");
+		phylink_set(support, 1000baseT_Half);
+		phylink_set(support, 1000baseT_Full);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(sfp_parse_support);
+
+
+static LIST_HEAD(sfp_buses);
+static DEFINE_MUTEX(sfp_mutex);
+
+static const struct sfp_upstream_ops *sfp_get_upstream_ops(struct sfp_bus *bus)
+{
+	return bus->registered ? bus->upstream_ops : NULL;
+}
+
+static struct sfp_bus *sfp_bus_get(struct device_node *np)
+{
+	struct sfp_bus *sfp, *new, *found = NULL;
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+
+	mutex_lock(&sfp_mutex);
+
+	list_for_each_entry(sfp, &sfp_buses, node) {
+		if (sfp->device_node == np) {
+			kref_get(&sfp->kref);
+			found = sfp;
+			break;
+		}
+	}
+
+	if (!found && new) {
+		kref_init(&new->kref);
+		new->device_node = np;
+		list_add(&new->node, &sfp_buses);
+		found = new;
+		new = NULL;
+	}
+
+	mutex_unlock(&sfp_mutex);
+
+	kfree(new);
+
+	return found;
+}
+
+static void sfp_bus_release(struct kref *kref) __releases(sfp_mutex)
+{
+	struct sfp_bus *bus = container_of(kref, struct sfp_bus, kref);
+
+	list_del(&bus->node);
+	mutex_unlock(&sfp_mutex);
+	kfree(bus);
+}
+
+static void sfp_bus_put(struct sfp_bus *bus)
+{
+	kref_put_mutex(&bus->kref, sfp_bus_release, &sfp_mutex);
+}
+
+static int sfp_register_bus(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = bus->upstream_ops;
+	int ret;
+
+	if (ops) {
+		if (ops->link_down)
+			ops->link_down(bus->upstream);
+		if (ops->connect_phy && bus->phydev) {
+			ret = ops->connect_phy(bus->upstream, bus->phydev);
+			if (ret)
+				return ret;
+		}
+	}
+	if (bus->started)
+		bus->socket_ops->start(bus->sfp);
+	bus->registered = true;
+	return 0;
+}
+
+static void sfp_unregister_bus(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = bus->upstream_ops;
+
+	if (bus->registered) {
+		if (bus->started)
+			bus->socket_ops->stop(bus->sfp);
+		if (bus->phydev && ops && ops->disconnect_phy)
+			ops->disconnect_phy(bus->upstream);
+	}
+	bus->registered = false;
+}
+
+
+int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo)
+{
+	if (!bus->registered)
+		return -ENOIOCTLCMD;
+	return bus->socket_ops->module_info(bus->sfp, modinfo);
+}
+EXPORT_SYMBOL_GPL(sfp_get_module_info);
+
+int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
+	u8 *data)
+{
+	if (!bus->registered)
+		return -ENOIOCTLCMD;
+	return bus->socket_ops->module_eeprom(bus->sfp, ee, data);
+}
+EXPORT_SYMBOL_GPL(sfp_get_module_eeprom);
+
+void sfp_upstream_start(struct sfp_bus *bus)
+{
+	if (bus->registered)
+		bus->socket_ops->start(bus->sfp);
+	bus->started = true;
+}
+EXPORT_SYMBOL_GPL(sfp_upstream_start);
+
+void sfp_upstream_stop(struct sfp_bus *bus)
+{
+	if (bus->registered)
+		bus->socket_ops->stop(bus->sfp);
+	bus->started = false;
+}
+EXPORT_SYMBOL_GPL(sfp_upstream_stop);
+
+struct sfp_bus *sfp_register_upstream(struct device_node *np,
+	struct net_device *ndev, void *upstream,
+	const struct sfp_upstream_ops *ops)
+{
+	struct sfp_bus *bus = sfp_bus_get(np);
+	int ret = 0;
+
+	if (bus) {
+		rtnl_lock();
+		bus->upstream_ops = ops;
+		bus->upstream = upstream;
+		bus->netdev = ndev;
+
+		if (bus->sfp)
+			ret = sfp_register_bus(bus);
+		rtnl_unlock();
+	}
+
+	if (ret) {
+		sfp_bus_put(bus);
+		bus = NULL;
+	}
+
+	return bus;
+}
+EXPORT_SYMBOL_GPL(sfp_register_upstream);
+
+void sfp_unregister_upstream(struct sfp_bus *bus)
+{
+	rtnl_lock();
+	sfp_unregister_bus(bus);
+	bus->upstream = NULL;
+	bus->netdev = NULL;
+	rtnl_unlock();
+
+	sfp_bus_put(bus);
+}
+EXPORT_SYMBOL_GPL(sfp_unregister_upstream);
+
+
+/* Socket driver entry points */
+int sfp_add_phy(struct sfp_bus *bus, struct phy_device *phydev)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+	int ret = 0;
+
+	if (ops && ops->connect_phy)
+		ret = ops->connect_phy(bus->upstream, phydev);
+
+	if (ret == 0)
+		bus->phydev = phydev;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sfp_add_phy);
+
+void sfp_remove_phy(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+
+	if (ops && ops->disconnect_phy)
+		ops->disconnect_phy(bus->upstream);
+	bus->phydev = NULL;
+}
+EXPORT_SYMBOL_GPL(sfp_remove_phy);
+
+
+void sfp_link_up(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+
+	if (ops && ops->link_up)
+		ops->link_up(bus->upstream);
+}
+EXPORT_SYMBOL_GPL(sfp_link_up);
+
+void sfp_link_down(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+
+	if (ops && ops->link_down)
+		ops->link_down(bus->upstream);
+}
+EXPORT_SYMBOL_GPL(sfp_link_down);
+
+int sfp_module_insert(struct sfp_bus *bus, const struct sfp_eeprom_id *id)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+	int ret = 0;
+
+	if (ops && ops->module_insert)
+		ret = ops->module_insert(bus->upstream, id);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sfp_module_insert);
+
+void sfp_module_remove(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+
+	if (ops && ops->module_remove)
+		ops->module_remove(bus->upstream);
+}
+EXPORT_SYMBOL_GPL(sfp_module_remove);
+
+struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp,
+				    const struct sfp_socket_ops *ops)
+{
+	struct sfp_bus *bus = sfp_bus_get(dev->of_node);
+	int ret = 0;
+
+	if (bus) {
+		rtnl_lock();
+		bus->sfp_dev = dev;
+		bus->sfp = sfp;
+		bus->socket_ops = ops;
+
+		if (bus->netdev)
+			ret = sfp_register_bus(bus);
+		rtnl_unlock();
+	}
+
+	if (ret) {
+		sfp_bus_put(bus);
+		bus = NULL;
+	}
+
+	return bus;
+}
+EXPORT_SYMBOL_GPL(sfp_register_socket);
+
+void sfp_unregister_socket(struct sfp_bus *bus)
+{
+	rtnl_lock();
+	sfp_unregister_bus(bus);
+	bus->sfp_dev = NULL;
+	bus->sfp = NULL;
+	bus->socket_ops = NULL;
+	rtnl_unlock();
+
+	sfp_bus_put(bus);
+}
+EXPORT_SYMBOL_GPL(sfp_unregister_socket);
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
new file mode 100644
index 000000000000..fb2cf4342f48
--- /dev/null
+++ b/drivers/net/phy/sfp.c
@@ -0,0 +1,915 @@
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include "mdio-i2c.h"
+#include "sfp.h"
+#include "swphy.h"
+
+enum {
+	GPIO_MODDEF0,
+	GPIO_LOS,
+	GPIO_TX_FAULT,
+	GPIO_TX_DISABLE,
+	GPIO_RATE_SELECT,
+	GPIO_MAX,
+
+	SFP_F_PRESENT = BIT(GPIO_MODDEF0),
+	SFP_F_LOS = BIT(GPIO_LOS),
+	SFP_F_TX_FAULT = BIT(GPIO_TX_FAULT),
+	SFP_F_TX_DISABLE = BIT(GPIO_TX_DISABLE),
+	SFP_F_RATE_SELECT = BIT(GPIO_RATE_SELECT),
+
+	SFP_E_INSERT = 0,
+	SFP_E_REMOVE,
+	SFP_E_DEV_DOWN,
+	SFP_E_DEV_UP,
+	SFP_E_TX_FAULT,
+	SFP_E_TX_CLEAR,
+	SFP_E_LOS_HIGH,
+	SFP_E_LOS_LOW,
+	SFP_E_TIMEOUT,
+
+	SFP_MOD_EMPTY = 0,
+	SFP_MOD_PROBE,
+	SFP_MOD_PRESENT,
+	SFP_MOD_ERROR,
+
+	SFP_DEV_DOWN = 0,
+	SFP_DEV_UP,
+
+	SFP_S_DOWN = 0,
+	SFP_S_INIT,
+	SFP_S_WAIT_LOS,
+	SFP_S_LINK_UP,
+	SFP_S_TX_FAULT,
+	SFP_S_REINIT,
+	SFP_S_TX_DISABLE,
+};
+
+static const char *gpio_of_names[] = {
+	"moddef0",
+	"los",
+	"tx-fault",
+	"tx-disable",
+	"rate-select",
+};
+
+static const enum gpiod_flags gpio_flags[] = {
+	GPIOD_IN,
+	GPIOD_IN,
+	GPIOD_IN,
+	GPIOD_ASIS,
+	GPIOD_ASIS,
+};
+
+#define T_INIT_JIFFIES	msecs_to_jiffies(300)
+#define T_RESET_US	10
+#define T_FAULT_RECOVER	msecs_to_jiffies(1000)
+
+/* SFP module presence detection is poor: the three MOD DEF signals are
+ * the same length on the PCB, which means it's possible for MOD DEF 0 to
+ * connect before the I2C bus on MOD DEF 1/2.
+ *
+ * The SFP MSA specifies 300ms as t_init (the time taken for TX_FAULT to
+ * be deasserted) but makes no mention of the earliest time before we can
+ * access the I2C EEPROM.  However, Avago modules require 300ms.
+ */
+#define T_PROBE_INIT	msecs_to_jiffies(300)
+#define T_PROBE_RETRY	msecs_to_jiffies(100)
+
+/*
+ * SFP modules appear to always have their PHY configured for bus address
+ * 0x56 (which with mdio-i2c, translates to a PHY address of 22).
+ */
+#define SFP_PHY_ADDR	22
+
+/*
+ * Give this long for the PHY to reset.
+ */
+#define T_PHY_RESET_MS	50
+
+static DEFINE_MUTEX(sfp_mutex);
+
+struct sfp {
+	struct device *dev;
+	struct i2c_adapter *i2c;
+	struct mii_bus *i2c_mii;
+	struct sfp_bus *sfp_bus;
+	struct phy_device *mod_phy;
+
+	unsigned int (*get_state)(struct sfp *);
+	void (*set_state)(struct sfp *, unsigned int);
+	int (*read)(struct sfp *, bool, u8, void *, size_t);
+
+	struct gpio_desc *gpio[GPIO_MAX];
+
+	unsigned int state;
+	struct delayed_work poll;
+	struct delayed_work timeout;
+	struct mutex sm_mutex;
+	unsigned char sm_mod_state;
+	unsigned char sm_dev_state;
+	unsigned short sm_state;
+	unsigned int sm_retries;
+
+	struct sfp_eeprom_id id;
+};
+
+static unsigned long poll_jiffies;
+
+static unsigned int sfp_gpio_get_state(struct sfp *sfp)
+{
+	unsigned int i, state, v;
+
+	for (i = state = 0; i < GPIO_MAX; i++) {
+		if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i])
+			continue;
+
+		v = gpiod_get_value_cansleep(sfp->gpio[i]);
+		if (v)
+			state |= BIT(i);
+	}
+
+	return state;
+}
+
+static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
+{
+	if (state & SFP_F_PRESENT) {
+		/* If the module is present, drive the signals */
+		if (sfp->gpio[GPIO_TX_DISABLE])
+			gpiod_direction_output(sfp->gpio[GPIO_TX_DISABLE],
+						state & SFP_F_TX_DISABLE);
+		if (state & SFP_F_RATE_SELECT)
+			gpiod_direction_output(sfp->gpio[GPIO_RATE_SELECT],
+						state & SFP_F_RATE_SELECT);
+	} else {
+		/* Otherwise, let them float to the pull-ups */
+		if (sfp->gpio[GPIO_TX_DISABLE])
+			gpiod_direction_input(sfp->gpio[GPIO_TX_DISABLE]);
+		if (state & SFP_F_RATE_SELECT)
+			gpiod_direction_input(sfp->gpio[GPIO_RATE_SELECT]);
+	}
+}
+
+static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
+	void *buf, size_t len)
+{
+	struct i2c_msg msgs[2];
+	int ret;
+
+	msgs[0].addr = bus_addr;
+	msgs[0].flags = 0;
+	msgs[0].len = 1;
+	msgs[0].buf = &dev_addr;
+	msgs[1].addr = bus_addr;
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].len = len;
+	msgs[1].buf = buf;
+
+	ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		return ret;
+
+	return ret == ARRAY_SIZE(msgs) ? len : 0;
+}
+
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
+	size_t len)
+{
+	return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+}
+
+static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
+{
+	struct mii_bus *i2c_mii;
+	int ret;
+
+	if (!i2c_check_functionality(i2c, I2C_FUNC_I2C))
+		return -EINVAL;
+
+	sfp->i2c = i2c;
+	sfp->read = sfp_i2c_read;
+
+	i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
+	if (IS_ERR(i2c_mii))
+		return PTR_ERR(i2c_mii);
+
+	i2c_mii->name = "SFP I2C Bus";
+	i2c_mii->phy_mask = ~0;
+
+	ret = mdiobus_register(i2c_mii);
+	if (ret < 0) {
+		mdiobus_free(i2c_mii);
+		return ret;
+	}
+
+	sfp->i2c_mii = i2c_mii;
+
+	return 0;
+}
+
+
+/* Interface */
+static unsigned int sfp_get_state(struct sfp *sfp)
+{
+	return sfp->get_state(sfp);
+}
+
+static void sfp_set_state(struct sfp *sfp, unsigned int state)
+{
+	sfp->set_state(sfp, state);
+}
+
+static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+	return sfp->read(sfp, a2, addr, buf, len);
+}
+
+static unsigned int sfp_check(void *buf, size_t len)
+{
+	u8 *p, check;
+
+	for (p = buf, check = 0; len; p++, len--)
+		check += *p;
+
+	return check;
+}
+
+/* Helpers */
+static void sfp_module_tx_disable(struct sfp *sfp)
+{
+	dev_dbg(sfp->dev, "tx disable %u -> %u\n",
+		sfp->state & SFP_F_TX_DISABLE ? 1 : 0, 1);
+	sfp->state |= SFP_F_TX_DISABLE;
+	sfp_set_state(sfp, sfp->state);
+}
+
+static void sfp_module_tx_enable(struct sfp *sfp)
+{
+	dev_dbg(sfp->dev, "tx disable %u -> %u\n",
+		sfp->state & SFP_F_TX_DISABLE ? 1 : 0, 0);
+	sfp->state &= ~SFP_F_TX_DISABLE;
+	sfp_set_state(sfp, sfp->state);
+}
+
+static void sfp_module_tx_fault_reset(struct sfp *sfp)
+{
+	unsigned int state = sfp->state;
+
+	if (state & SFP_F_TX_DISABLE)
+		return;
+
+	sfp_set_state(sfp, state | SFP_F_TX_DISABLE);
+
+	udelay(T_RESET_US);
+
+	sfp_set_state(sfp, state);
+}
+
+/* SFP state machine */
+static void sfp_sm_set_timer(struct sfp *sfp, unsigned int timeout)
+{
+	if (timeout)
+		mod_delayed_work(system_power_efficient_wq, &sfp->timeout,
+				 timeout);
+	else
+		cancel_delayed_work(&sfp->timeout);
+}
+
+static void sfp_sm_next(struct sfp *sfp, unsigned int state,
+			unsigned int timeout)
+{
+	sfp->sm_state = state;
+	sfp_sm_set_timer(sfp, timeout);
+}
+
+static void sfp_sm_ins_next(struct sfp *sfp, unsigned int state, unsigned int timeout)
+{
+	sfp->sm_mod_state = state;
+	sfp_sm_set_timer(sfp, timeout);
+}
+
+static void sfp_sm_phy_detach(struct sfp *sfp)
+{
+	phy_stop(sfp->mod_phy);
+	sfp_remove_phy(sfp->sfp_bus);
+	phy_device_remove(sfp->mod_phy);
+	phy_device_free(sfp->mod_phy);
+	sfp->mod_phy = NULL;
+}
+
+static void sfp_sm_probe_phy(struct sfp *sfp)
+{
+	struct phy_device *phy;
+	int err;
+
+	msleep(T_PHY_RESET_MS);
+
+	phy = mdiobus_scan(sfp->i2c_mii, SFP_PHY_ADDR);
+	if (IS_ERR(phy)) {
+		dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy));
+		return;
+	}
+	if (!phy) {
+		dev_info(sfp->dev, "no PHY detected\n");
+		return;
+	}
+
+	err = sfp_add_phy(sfp->sfp_bus, phy);
+	if (err) {
+		phy_device_remove(phy);
+		phy_device_free(phy);
+		dev_err(sfp->dev, "sfp_add_phy failed: %d\n", err);
+		return;
+	}
+
+	sfp->mod_phy = phy;
+	phy_start(phy);
+}
+
+static void sfp_sm_link_up(struct sfp *sfp)
+{
+	sfp_link_up(sfp->sfp_bus);
+	sfp_sm_next(sfp, SFP_S_LINK_UP, 0);
+}
+
+static void sfp_sm_link_down(struct sfp *sfp)
+{
+	sfp_link_down(sfp->sfp_bus);
+}
+
+static void sfp_sm_link_check_los(struct sfp *sfp)
+{
+	unsigned int los = sfp->state & SFP_F_LOS;
+
+	/* FIXME: what if neither SFP_OPTIONS_LOS_INVERTED nor
+	 * SFP_OPTIONS_LOS_NORMAL are set?  For now, we assume
+	 * the same as SFP_OPTIONS_LOS_NORMAL set.
+	 */
+	if (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED)
+		los ^= SFP_F_LOS;
+
+	if (los)
+		sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
+	else
+		sfp_sm_link_up(sfp);
+}
+
+static void sfp_sm_fault(struct sfp *sfp, bool warn)
+{
+	if (sfp->sm_retries && !--sfp->sm_retries) {
+		dev_err(sfp->dev, "module persistently indicates fault, disabling\n");
+		sfp_sm_next(sfp, SFP_S_TX_DISABLE, 0);
+	} else {
+		if (warn)
+			dev_err(sfp->dev, "module transmit fault indicated\n");
+
+		sfp_sm_next(sfp, SFP_S_TX_FAULT, T_FAULT_RECOVER);
+	}
+}
+
+static void sfp_sm_mod_init(struct sfp *sfp)
+{
+	sfp_module_tx_enable(sfp);
+
+	/* Wait t_init before indicating that the link is up, provided the
+	 * current state indicates no TX_FAULT.  If TX_FAULT clears before
+	 * this time, that's fine too.
+	 */
+	sfp_sm_next(sfp, SFP_S_INIT, T_INIT_JIFFIES);
+	sfp->sm_retries = 5;
+
+	/* Setting the serdes link mode is guesswork: there's no
+	 * field in the EEPROM which indicates what mode should
+	 * be used.
+	 *
+	 * If it's a gigabit-only fiber module, it probably does
+	 * not have a PHY, so switch to 802.3z negotiation mode.
+	 * Otherwise, switch to SGMII mode (which is required to
+	 * support non-gigabit speeds) and probe for a PHY.
+	 */
+	if (sfp->id.base.e1000_base_t ||
+	    sfp->id.base.e100_base_lx ||
+	    sfp->id.base.e100_base_fx)
+		sfp_sm_probe_phy(sfp);
+}
+
+static int sfp_sm_mod_probe(struct sfp *sfp)
+{
+	/* SFP module inserted - read I2C data */
+	struct sfp_eeprom_id id;
+	char vendor[17];
+	char part[17];
+	char sn[17];
+	char date[9];
+	char rev[5];
+	u8 check;
+	int err;
+
+	err = sfp_read(sfp, false, 0, &id, sizeof(id));
+	if (err < 0) {
+		dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+		return -EAGAIN;
+	}
+
+	if (err != sizeof(id)) {
+		dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+		return -EAGAIN;
+	}
+
+	/* Validate the checksum over the base structure */
+	check = sfp_check(&id.base, sizeof(id.base) - 1);
+	if (check != id.base.cc_base) {
+		dev_err(sfp->dev,
+			"EEPROM base structure checksum failure: 0x%02x\n",
+			check);
+		print_hex_dump(KERN_ERR, "sfp EE: ", DUMP_PREFIX_OFFSET,
+			       16, 1, &id, sizeof(id.base) - 1, true);
+		return -EINVAL;
+	}
+
+	check = sfp_check(&id.ext, sizeof(id.ext) - 1);
+	if (check != id.ext.cc_ext) {
+		dev_err(sfp->dev,
+			"EEPROM extended structure checksum failure: 0x%02x\n",
+			check);
+		memset(&id.ext, 0, sizeof(id.ext));
+	}
+
+	sfp->id = id;
+
+	memcpy(vendor, sfp->id.base.vendor_name, 16);
+	vendor[16] = '\0';
+	memcpy(part, sfp->id.base.vendor_pn, 16);
+	part[16] = '\0';
+	memcpy(rev, sfp->id.base.vendor_rev, 4);
+	rev[4] = '\0';
+	memcpy(sn, sfp->id.ext.vendor_sn, 16);
+	sn[16] = '\0';
+	memcpy(date, sfp->id.ext.datecode, 8);
+	date[8] = '\0';
+
+	dev_info(sfp->dev, "module %s %s rev %s sn %s dc %s\n", vendor, part, rev, sn, date);
+
+	/* We only support SFP modules, not the legacy GBIC modules. */
+	if (sfp->id.base.phys_id != SFP_PHYS_ID_SFP ||
+	    sfp->id.base.phys_ext_id != SFP_PHYS_EXT_ID_SFP) {
+		dev_err(sfp->dev, "module is not SFP - phys id 0x%02x 0x%02x\n",
+			sfp->id.base.phys_id, sfp->id.base.phys_ext_id);
+		return -EINVAL;
+	}
+
+	return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+}
+
+static void sfp_sm_mod_remove(struct sfp *sfp)
+{
+	sfp_module_remove(sfp->sfp_bus);
+
+	if (sfp->mod_phy)
+		sfp_sm_phy_detach(sfp);
+
+	sfp_module_tx_disable(sfp);
+
+	memset(&sfp->id, 0, sizeof(sfp->id));
+
+	dev_info(sfp->dev, "module removed\n");
+}
+
+static void sfp_sm_event(struct sfp *sfp, unsigned int event)
+{
+	mutex_lock(&sfp->sm_mutex);
+
+	dev_dbg(sfp->dev, "SM: enter %u:%u:%u event %u\n",
+		sfp->sm_mod_state, sfp->sm_dev_state, sfp->sm_state, event);
+
+	/* This state machine tracks the insert/remove state of
+	 * the module, and handles probing the on-board EEPROM.
+	 */
+	switch (sfp->sm_mod_state) {
+	default:
+		if (event == SFP_E_INSERT) {
+			sfp_module_tx_disable(sfp);
+			sfp_sm_ins_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT);
+		}
+		break;
+
+	case SFP_MOD_PROBE:
+		if (event == SFP_E_REMOVE) {
+			sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
+		} else if (event == SFP_E_TIMEOUT) {
+			int err = sfp_sm_mod_probe(sfp);
+
+			if (err == 0)
+				sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+			else if (err == -EAGAIN)
+				sfp_sm_set_timer(sfp, T_PROBE_RETRY);
+			else
+				sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+		}
+		break;
+
+	case SFP_MOD_PRESENT:
+	case SFP_MOD_ERROR:
+		if (event == SFP_E_REMOVE) {
+			sfp_sm_mod_remove(sfp);
+			sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
+		}
+		break;
+	}
+
+	/* This state machine tracks the netdev up/down state */
+	switch (sfp->sm_dev_state) {
+	default:
+		if (event == SFP_E_DEV_UP)
+			sfp->sm_dev_state = SFP_DEV_UP;
+		break;
+
+	case SFP_DEV_UP:
+		if (event == SFP_E_DEV_DOWN) {
+			/* If the module has a PHY, avoid raising TX disable
+			 * as this resets the PHY. Otherwise, raise it to
+			 * turn the laser off.
+			 */
+			if (!sfp->mod_phy)
+				sfp_module_tx_disable(sfp);
+			sfp->sm_dev_state = SFP_DEV_DOWN;
+		}
+		break;
+	}
+
+	/* Some events are global */
+	if (sfp->sm_state != SFP_S_DOWN &&
+	    (sfp->sm_mod_state != SFP_MOD_PRESENT ||
+	     sfp->sm_dev_state != SFP_DEV_UP)) {
+		if (sfp->sm_state == SFP_S_LINK_UP &&
+		    sfp->sm_dev_state == SFP_DEV_UP)
+			sfp_sm_link_down(sfp);
+		if (sfp->mod_phy)
+			sfp_sm_phy_detach(sfp);
+		sfp_sm_next(sfp, SFP_S_DOWN, 0);
+		mutex_unlock(&sfp->sm_mutex);
+		return;
+	}
+
+	/* The main state machine */
+	switch (sfp->sm_state) {
+	case SFP_S_DOWN:
+		if (sfp->sm_mod_state == SFP_MOD_PRESENT &&
+		    sfp->sm_dev_state == SFP_DEV_UP)
+			sfp_sm_mod_init(sfp);
+		break;
+
+	case SFP_S_INIT:
+		if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT)
+			sfp_sm_fault(sfp, true);
+		else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR)
+			sfp_sm_link_check_los(sfp);
+		break;
+
+	case SFP_S_WAIT_LOS:
+		if (event == SFP_E_TX_FAULT)
+			sfp_sm_fault(sfp, true);
+		else if (event ==
+			 (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ?
+			  SFP_E_LOS_HIGH : SFP_E_LOS_LOW))
+			sfp_sm_link_up(sfp);
+		break;
+
+	case SFP_S_LINK_UP:
+		if (event == SFP_E_TX_FAULT) {
+			sfp_sm_link_down(sfp);
+			sfp_sm_fault(sfp, true);
+		} else if (event ==
+			   (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ?
+			    SFP_E_LOS_LOW : SFP_E_LOS_HIGH)) {
+			sfp_sm_link_down(sfp);
+			sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
+		}
+		break;
+
+	case SFP_S_TX_FAULT:
+		if (event == SFP_E_TIMEOUT) {
+			sfp_module_tx_fault_reset(sfp);
+			sfp_sm_next(sfp, SFP_S_REINIT, T_INIT_JIFFIES);
+		}
+		break;
+
+	case SFP_S_REINIT:
+		if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) {
+			sfp_sm_fault(sfp, false);
+		} else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
+			dev_info(sfp->dev, "module transmit fault recovered\n");
+			sfp_sm_link_check_los(sfp);
+		}
+		break;
+
+	case SFP_S_TX_DISABLE:
+		break;
+	}
+
+	dev_dbg(sfp->dev, "SM: exit %u:%u:%u\n",
+		sfp->sm_mod_state, sfp->sm_dev_state, sfp->sm_state);
+
+	mutex_unlock(&sfp->sm_mutex);
+}
+
+static void sfp_start(struct sfp *sfp)
+{
+	sfp_sm_event(sfp, SFP_E_DEV_UP);
+}
+
+static void sfp_stop(struct sfp *sfp)
+{
+	sfp_sm_event(sfp, SFP_E_DEV_DOWN);
+}
+
+static int sfp_module_info(struct sfp *sfp, struct ethtool_modinfo *modinfo)
+{
+	/* locking... and check module is present */
+
+	if (sfp->id.ext.sff8472_compliance) {
+		modinfo->type = ETH_MODULE_SFF_8472;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+	} else {
+		modinfo->type = ETH_MODULE_SFF_8079;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+	}
+	return 0;
+}
+
+static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee,
+	u8 *data)
+{
+	unsigned int first, last, len;
+	int ret;
+
+	if (ee->len == 0)
+		return -EINVAL;
+
+	first = ee->offset;
+	last = ee->offset + ee->len;
+	if (first < ETH_MODULE_SFF_8079_LEN) {
+		len = min_t(unsigned int, last, ETH_MODULE_SFF_8079_LEN);
+		len -= first;
+
+		ret = sfp->read(sfp, false, first, data, len);
+		if (ret < 0)
+			return ret;
+
+		first += len;
+		data += len;
+	}
+	if (first >= ETH_MODULE_SFF_8079_LEN &&
+	    first < ETH_MODULE_SFF_8472_LEN) {
+		len = min_t(unsigned int, last, ETH_MODULE_SFF_8472_LEN);
+		len -= first;
+		first -= ETH_MODULE_SFF_8079_LEN;
+
+		ret = sfp->read(sfp, true, first, data, len);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static const struct sfp_socket_ops sfp_module_ops = {
+	.start = sfp_start,
+	.stop = sfp_stop,
+	.module_info = sfp_module_info,
+	.module_eeprom = sfp_module_eeprom,
+};
+
+static void sfp_timeout(struct work_struct *work)
+{
+	struct sfp *sfp = container_of(work, struct sfp, timeout.work);
+
+	rtnl_lock();
+	sfp_sm_event(sfp, SFP_E_TIMEOUT);
+	rtnl_unlock();
+}
+
+static void sfp_check_state(struct sfp *sfp)
+{
+	unsigned int state, i, changed;
+
+	state = sfp_get_state(sfp);
+	changed = state ^ sfp->state;
+	changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT;
+
+	for (i = 0; i < GPIO_MAX; i++)
+		if (changed & BIT(i))
+			dev_dbg(sfp->dev, "%s %u -> %u\n", gpio_of_names[i],
+				!!(sfp->state & BIT(i)), !!(state & BIT(i)));
+
+	state |= sfp->state & (SFP_F_TX_DISABLE | SFP_F_RATE_SELECT);
+	sfp->state = state;
+
+	rtnl_lock();
+	if (changed & SFP_F_PRESENT)
+		sfp_sm_event(sfp, state & SFP_F_PRESENT ?
+				SFP_E_INSERT : SFP_E_REMOVE);
+
+	if (changed & SFP_F_TX_FAULT)
+		sfp_sm_event(sfp, state & SFP_F_TX_FAULT ?
+				SFP_E_TX_FAULT : SFP_E_TX_CLEAR);
+
+	if (changed & SFP_F_LOS)
+		sfp_sm_event(sfp, state & SFP_F_LOS ?
+				SFP_E_LOS_HIGH : SFP_E_LOS_LOW);
+	rtnl_unlock();
+}
+
+static irqreturn_t sfp_irq(int irq, void *data)
+{
+	struct sfp *sfp = data;
+
+	sfp_check_state(sfp);
+
+	return IRQ_HANDLED;
+}
+
+static void sfp_poll(struct work_struct *work)
+{
+	struct sfp *sfp = container_of(work, struct sfp, poll.work);
+
+	sfp_check_state(sfp);
+	mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
+}
+
+static struct sfp *sfp_alloc(struct device *dev)
+{
+	struct sfp *sfp;
+
+	sfp = kzalloc(sizeof(*sfp), GFP_KERNEL);
+	if (!sfp)
+		return ERR_PTR(-ENOMEM);
+
+	sfp->dev = dev;
+
+	mutex_init(&sfp->sm_mutex);
+	INIT_DELAYED_WORK(&sfp->poll, sfp_poll);
+	INIT_DELAYED_WORK(&sfp->timeout, sfp_timeout);
+
+	return sfp;
+}
+
+static void sfp_cleanup(void *data)
+{
+	struct sfp *sfp = data;
+
+	cancel_delayed_work_sync(&sfp->poll);
+	cancel_delayed_work_sync(&sfp->timeout);
+	if (sfp->i2c_mii) {
+		mdiobus_unregister(sfp->i2c_mii);
+		mdiobus_free(sfp->i2c_mii);
+	}
+	if (sfp->i2c)
+		i2c_put_adapter(sfp->i2c);
+	kfree(sfp);
+}
+
+static int sfp_probe(struct platform_device *pdev)
+{
+	struct sfp *sfp;
+	bool poll = false;
+	int irq, err, i;
+
+	sfp = sfp_alloc(&pdev->dev);
+	if (IS_ERR(sfp))
+		return PTR_ERR(sfp);
+
+	platform_set_drvdata(pdev, sfp);
+
+	err = devm_add_action(sfp->dev, sfp_cleanup, sfp);
+	if (err < 0)
+		return err;
+
+	if (pdev->dev.of_node) {
+		struct device_node *node = pdev->dev.of_node;
+		struct device_node *np;
+
+		np = of_parse_phandle(node, "i2c-bus", 0);
+		if (np) {
+			struct i2c_adapter *i2c;
+
+			i2c = of_find_i2c_adapter_by_node(np);
+			of_node_put(np);
+			if (!i2c)
+				return -EPROBE_DEFER;
+
+			err = sfp_i2c_configure(sfp, i2c);
+			if (err < 0) {
+				i2c_put_adapter(i2c);
+				return err;
+			}
+		}
+
+		for (i = 0; i < GPIO_MAX; i++) {
+			sfp->gpio[i] = devm_gpiod_get_optional(sfp->dev,
+					   gpio_of_names[i], gpio_flags[i]);
+			if (IS_ERR(sfp->gpio[i]))
+				return PTR_ERR(sfp->gpio[i]);
+		}
+
+		sfp->get_state = sfp_gpio_get_state;
+		sfp->set_state = sfp_gpio_set_state;
+	}
+
+	sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
+	if (!sfp->sfp_bus)
+		return -ENOMEM;
+
+	/* Get the initial state, and always signal TX disable,
+	 * since the network interface will not be up.
+	 */
+	sfp->state = sfp_get_state(sfp) | SFP_F_TX_DISABLE;
+
+	if (sfp->gpio[GPIO_RATE_SELECT] &&
+	    gpiod_get_value_cansleep(sfp->gpio[GPIO_RATE_SELECT]))
+		sfp->state |= SFP_F_RATE_SELECT;
+	sfp_set_state(sfp, sfp->state);
+	sfp_module_tx_disable(sfp);
+	rtnl_lock();
+	if (sfp->state & SFP_F_PRESENT)
+		sfp_sm_event(sfp, SFP_E_INSERT);
+	rtnl_unlock();
+
+	for (i = 0; i < GPIO_MAX; i++) {
+		if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i])
+			continue;
+
+		irq = gpiod_to_irq(sfp->gpio[i]);
+		if (!irq) {
+			poll = true;
+			continue;
+		}
+
+		err = devm_request_threaded_irq(sfp->dev, irq, NULL, sfp_irq,
+						IRQF_ONESHOT |
+						IRQF_TRIGGER_RISING |
+						IRQF_TRIGGER_FALLING,
+						dev_name(sfp->dev), sfp);
+		if (err)
+			poll = true;
+	}
+
+	if (poll)
+		mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
+
+	return 0;
+}
+
+static int sfp_remove(struct platform_device *pdev)
+{
+	struct sfp *sfp = platform_get_drvdata(pdev);
+
+	sfp_unregister_socket(sfp->sfp_bus);
+
+	return 0;
+}
+
+static const struct of_device_id sfp_of_match[] = {
+	{ .compatible = "sff,sfp", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, sfp_of_match);
+
+static struct platform_driver sfp_driver = {
+	.probe = sfp_probe,
+	.remove = sfp_remove,
+	.driver = {
+		.name = "sfp",
+		.of_match_table = sfp_of_match,
+	},
+};
+
+static int sfp_init(void)
+{
+	poll_jiffies = msecs_to_jiffies(100);
+
+	return platform_driver_register(&sfp_driver);
+}
+module_init(sfp_init);
+
+static void sfp_exit(void)
+{
+	platform_driver_unregister(&sfp_driver);
+}
+module_exit(sfp_exit);
+
+MODULE_ALIAS("platform:sfp");
+MODULE_AUTHOR("Russell King");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h
new file mode 100644
index 000000000000..31b0acf337e2
--- /dev/null
+++ b/drivers/net/phy/sfp.h
@@ -0,0 +1,28 @@
+#ifndef SFP_H
+#define SFP_H
+
+#include <linux/ethtool.h>
+#include <linux/sfp.h>
+
+struct sfp;
+
+struct sfp_socket_ops {
+	void (*start)(struct sfp *sfp);
+	void (*stop)(struct sfp *sfp);
+	int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo);
+	int (*module_eeprom)(struct sfp *sfp, struct ethtool_eeprom *ee,
+			     u8 *data);
+};
+
+int sfp_add_phy(struct sfp_bus *bus, struct phy_device *phydev);
+void sfp_remove_phy(struct sfp_bus *bus);
+void sfp_link_up(struct sfp_bus *bus);
+void sfp_link_down(struct sfp_bus *bus);
+int sfp_module_insert(struct sfp_bus *bus, const struct sfp_eeprom_id *id);
+void sfp_module_remove(struct sfp_bus *bus);
+int sfp_link_configure(struct sfp_bus *bus, const struct sfp_eeprom_id *id);
+struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp,
+				    const struct sfp_socket_ops *ops);
+void sfp_unregister_socket(struct sfp_bus *bus);
+
+#endif
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 3570c7576993..ca267fd28ab8 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -943,9 +943,6 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
 			if (arg & TUN_F_TSO6)
 				feature_mask |= NETIF_F_TSO6;
 		}
-
-		if (arg & TUN_F_UFO)
-			feature_mask |= NETIF_F_UFO;
 	}
 
 	/* tun/tap driver inverts the usage for TSO offloads, where
@@ -956,7 +953,7 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
 	 * When user space turns off TSO, we turn off GSO/LRO so that
 	 * user-space will not receive TSO frames.
 	 */
-	if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO))
+	if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6))
 		features |= RX_OFFLOADS;
 	else
 		features &= ~RX_OFFLOADS;
@@ -1078,7 +1075,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
 	case TUNSETOFFLOAD:
 		/* let the user check for future flags */
 		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
-			    TUN_F_TSO_ECN | TUN_F_UFO))
+			    TUN_F_TSO_ECN))
 			return -EINVAL;
 
 		rtnl_lock();
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 32ad87345f57..d21510d47aa2 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -199,7 +199,7 @@ struct tun_struct {
 	struct net_device	*dev;
 	netdev_features_t	set_features;
 #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
-			  NETIF_F_TSO6|NETIF_F_UFO)
+			  NETIF_F_TSO6)
 
 	int			align;
 	int			vnet_hdr_sz;
@@ -892,7 +892,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	    sk_filter(tfile->socket.sk, skb))
 		goto drop;
 
-	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 		goto drop;
 
 	skb_tx_timestamp(skb);
@@ -1921,11 +1921,6 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
 				features |= NETIF_F_TSO6;
 			arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
 		}
-
-		if (arg & TUN_F_UFO) {
-			features |= NETIF_F_UFO;
-			arg &= ~TUN_F_UFO;
-		}
 	}
 
 	/* This gives the user a way to test for new features in future by
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 8f572b9f3625..811b18215cae 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -367,7 +367,7 @@ static struct attribute *cdc_ncm_sysfs_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group cdc_ncm_sysfs_attr_group = {
+static const struct attribute_group cdc_ncm_sysfs_attr_group = {
 	.name = "cdc_ncm",
 	.attrs = cdc_ncm_sysfs_attrs,
 };
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 98f17b05c68b..e90de2186ffc 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -57,6 +57,11 @@ DECLARE_EWMA(pkt_len, 0, 64)
 
 #define VIRTNET_DRIVER_VERSION "1.0.0"
 
+const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_TSO4,
+					 VIRTIO_NET_F_GUEST_TSO6,
+					 VIRTIO_NET_F_GUEST_ECN,
+					 VIRTIO_NET_F_GUEST_UFO };
+
 struct virtnet_stats {
 	struct u64_stats_sync tx_syncp;
 	struct u64_stats_sync rx_syncp;
@@ -164,10 +169,13 @@ struct virtnet_info {
 	u8 ctrl_promisc;
 	u8 ctrl_allmulti;
 	u16 ctrl_vid;
+	u64 ctrl_offloads;
 
 	/* Ethtool settings */
 	u8 duplex;
 	u32 speed;
+
+	unsigned long guest_offloads;
 };
 
 struct padded_vnet_hdr {
@@ -270,6 +278,23 @@ static void skb_xmit_done(struct virtqueue *vq)
 		netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
+#define MRG_CTX_HEADER_SHIFT 22
+static void *mergeable_len_to_ctx(unsigned int truesize,
+				  unsigned int headroom)
+{
+	return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
+}
+
+static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
+{
+	return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
+}
+
+static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
+{
+	return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
+}
+
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 				   struct receive_queue *rq,
@@ -390,19 +415,85 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
 	return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
 }
 
+/* We copy the packet for XDP in the following cases:
+ *
+ * 1) Packet is scattered across multiple rx buffers.
+ * 2) Headroom space is insufficient.
+ *
+ * This is inefficient but it's a temporary condition that
+ * we hit right after XDP is enabled and until queue is refilled
+ * with large buffers with sufficient headroom - so it should affect
+ * at most queue size packets.
+ * Afterwards, the conditions to enable
+ * XDP should preclude the underlying device from sending packets
+ * across multiple buffers (num_buf > 1), and we make sure buffers
+ * have enough headroom.
+ */
+static struct page *xdp_linearize_page(struct receive_queue *rq,
+				       u16 *num_buf,
+				       struct page *p,
+				       int offset,
+				       int page_off,
+				       unsigned int *len)
+{
+	struct page *page = alloc_page(GFP_ATOMIC);
+
+	if (!page)
+		return NULL;
+
+	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
+	page_off += *len;
+
+	while (--*num_buf) {
+		unsigned int buflen;
+		void *buf;
+		int off;
+
+		buf = virtqueue_get_buf(rq->vq, &buflen);
+		if (unlikely(!buf))
+			goto err_buf;
+
+		p = virt_to_head_page(buf);
+		off = buf - page_address(p);
+
+		/* guard against a misconfigured or uncooperative backend that
+		 * is sending packet larger than the MTU.
+		 */
+		if ((page_off + buflen) > PAGE_SIZE) {
+			put_page(p);
+			goto err_buf;
+		}
+
+		memcpy(page_address(page) + page_off,
+		       page_address(p) + off, buflen);
+		page_off += buflen;
+		put_page(p);
+	}
+
+	/* Headroom does not contribute to packet length */
+	*len = page_off - VIRTIO_XDP_HEADROOM;
+	return page;
+err_buf:
+	__free_pages(page, 0);
+	return NULL;
+}
+
 static struct sk_buff *receive_small(struct net_device *dev,
 				     struct virtnet_info *vi,
 				     struct receive_queue *rq,
-				     void *buf, unsigned int len)
+				     void *buf, void *ctx,
+				     unsigned int len)
 {
 	struct sk_buff *skb;
 	struct bpf_prog *xdp_prog;
-	unsigned int xdp_headroom = virtnet_get_headroom(vi);
+	unsigned int xdp_headroom = (unsigned long)ctx;
 	unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
 	unsigned int headroom = vi->hdr_len + header_offset;
 	unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
 			      SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	struct page *page = virt_to_head_page(buf);
 	unsigned int delta = 0;
+	struct page *xdp_page;
 	len -= vi->hdr_len;
 
 	rcu_read_lock();
@@ -416,6 +507,27 @@ static struct sk_buff *receive_small(struct net_device *dev,
 		if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
 			goto err_xdp;
 
+		if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
+			int offset = buf - page_address(page) + header_offset;
+			unsigned int tlen = len + vi->hdr_len;
+			u16 num_buf = 1;
+
+			xdp_headroom = virtnet_get_headroom(vi);
+			header_offset = VIRTNET_RX_PAD + xdp_headroom;
+			headroom = vi->hdr_len + header_offset;
+			buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+				 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+			xdp_page = xdp_linearize_page(rq, &num_buf, page,
+						      offset, header_offset,
+						      &tlen);
+			if (!xdp_page)
+				goto err_xdp;
+
+			buf = page_address(xdp_page);
+			put_page(page);
+			page = xdp_page;
+		}
+
 		xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
 		xdp.data = xdp.data_hard_start + xdp_headroom;
 		xdp.data_end = xdp.data + len;
@@ -444,7 +556,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 
 	skb = build_skb(buf, buflen);
 	if (!skb) {
-		put_page(virt_to_head_page(buf));
+		put_page(page);
 		goto err;
 	}
 	skb_reserve(skb, headroom - delta);
@@ -460,7 +572,7 @@ err:
 err_xdp:
 	rcu_read_unlock();
 	dev->stats.rx_dropped++;
-	put_page(virt_to_head_page(buf));
+	put_page(page);
 xdp_xmit:
 	return NULL;
 }
@@ -485,66 +597,6 @@ err:
 	return NULL;
 }
 
-/* The conditions to enable XDP should preclude the underlying device from
- * sending packets across multiple buffers (num_buf > 1). However per spec
- * it does not appear to be illegal to do so but rather just against convention.
- * So in order to avoid making a system unresponsive the packets are pushed
- * into a page and the XDP program is run. This will be extremely slow and we
- * push a warning to the user to fix this as soon as possible. Fixing this may
- * require resolving the underlying hardware to determine why multiple buffers
- * are being received or simply loading the XDP program in the ingress stack
- * after the skb is built because there is no advantage to running it here
- * anymore.
- */
-static struct page *xdp_linearize_page(struct receive_queue *rq,
-				       u16 *num_buf,
-				       struct page *p,
-				       int offset,
-				       unsigned int *len)
-{
-	struct page *page = alloc_page(GFP_ATOMIC);
-	unsigned int page_off = VIRTIO_XDP_HEADROOM;
-
-	if (!page)
-		return NULL;
-
-	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
-	page_off += *len;
-
-	while (--*num_buf) {
-		unsigned int buflen;
-		void *buf;
-		int off;
-
-		buf = virtqueue_get_buf(rq->vq, &buflen);
-		if (unlikely(!buf))
-			goto err_buf;
-
-		p = virt_to_head_page(buf);
-		off = buf - page_address(p);
-
-		/* guard against a misconfigured or uncooperative backend that
-		 * is sending packet larger than the MTU.
-		 */
-		if ((page_off + buflen) > PAGE_SIZE) {
-			put_page(p);
-			goto err_buf;
-		}
-
-		memcpy(page_address(page) + page_off,
-		       page_address(p) + off, buflen);
-		page_off += buflen;
-		put_page(p);
-	}
-
-	/* Headroom does not contribute to packet length */
-	*len = page_off - VIRTIO_XDP_HEADROOM;
-	return page;
-err_buf:
-	__free_pages(page, 0);
-	return NULL;
-}
-
 static struct sk_buff *receive_mergeable(struct net_device *dev,
 					 struct virtnet_info *vi,
 					 struct receive_queue *rq,
@@ -559,6 +611,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	struct sk_buff *head_skb, *curr_skb;
 	struct bpf_prog *xdp_prog;
 	unsigned int truesize;
+	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
 
 	head_skb = NULL;
 
@@ -571,10 +624,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		u32 act;
 
 		/* This happens when rx buffer size is underestimated */
-		if (unlikely(num_buf > 1)) {
+		if (unlikely(num_buf > 1 ||
+			     headroom < virtnet_get_headroom(vi))) {
 			/* linearize data for XDP */
 			xdp_page = xdp_linearize_page(rq, &num_buf,
-						      page, offset, &len);
+						      page, offset,
+						      VIRTIO_XDP_HEADROOM,
+						      &len);
 			if (!xdp_page)
 				goto err_xdp;
 			offset = VIRTIO_XDP_HEADROOM;
@@ -639,13 +695,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	}
 	rcu_read_unlock();
 
-	if (unlikely(len > (unsigned long)ctx)) {
+	truesize = mergeable_ctx_to_truesize(ctx);
+	if (unlikely(len > truesize)) {
 		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
 			 dev->name, len, (unsigned long)ctx);
 		dev->stats.rx_length_errors++;
 		goto err_skb;
 	}
-	truesize = (unsigned long)ctx;
+
 	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
 	curr_skb = head_skb;
 
@@ -665,13 +722,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		}
 
 		page = virt_to_head_page(buf);
-		if (unlikely(len > (unsigned long)ctx)) {
+
+		truesize = mergeable_ctx_to_truesize(ctx);
+		if (unlikely(len > truesize)) {
 			pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
 				 dev->name, len, (unsigned long)ctx);
 			dev->stats.rx_length_errors++;
 			goto err_skb;
 		}
-		truesize = (unsigned long)ctx;
 
 		num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
 		if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
@@ -754,7 +812,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
 	else if (vi->big_packets)
 		skb = receive_big(dev, vi, rq, buf, len);
 	else
-		skb = receive_small(dev, vi, rq, buf, len);
+		skb = receive_small(dev, vi, rq, buf, ctx, len);
 
 	if (unlikely(!skb))
 		return 0;
@@ -787,12 +845,18 @@ frame_err:
 	return 0;
 }
 
+/* Unlike mergeable buffers, all buffers are allocated to the
+ * same size, except for the headroom. For this reason we do
+ * not need to use  mergeable_len_to_ctx here - it is enough
+ * to store the headroom as the context ignoring the truesize.
+ */
 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
 			     gfp_t gfp)
 {
 	struct page_frag *alloc_frag = &rq->alloc_frag;
 	char *buf;
 	unsigned int xdp_headroom = virtnet_get_headroom(vi);
+	void *ctx = (void *)(unsigned long)xdp_headroom;
 	int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
 	int err;
 
@@ -806,10 +870,9 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
 	alloc_frag->offset += len;
 	sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
 		    vi->hdr_len + GOOD_PACKET_LEN);
-	err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
+	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
 	if (err < 0)
 		put_page(virt_to_head_page(buf));
-
 	return err;
 }
 
@@ -902,7 +965,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 	}
 
 	sg_init_one(rq->sg, buf, len);
-	ctx = (void *)(unsigned long)len;
+	ctx = mergeable_len_to_ctx(len, headroom);
 	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
 	if (err < 0)
 		put_page(virt_to_head_page(buf));
@@ -1014,7 +1077,7 @@ static int virtnet_receive(struct receive_queue *rq, int budget)
 	void *buf;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
-	if (vi->mergeable_rx_bufs) {
+	if (!vi->big_packets || vi->mergeable_rx_bufs) {
 		void *ctx;
 
 		while (received < budget &&
@@ -1813,7 +1876,6 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 }
 
 static int init_vqs(struct virtnet_info *vi);
-static void _remove_vq_common(struct virtnet_info *vi);
 
 static int virtnet_restore_up(struct virtio_device *vdev)
 {
@@ -1842,37 +1904,45 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 	return err;
 }
 
-static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
+static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
 {
-	struct virtio_device *dev = vi->vdev;
-	int ret;
+	struct scatterlist sg;
+	vi->ctrl_offloads = cpu_to_virtio64(vi->vdev, offloads);
 
-	virtio_config_disable(dev);
-	dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
-	virtnet_freeze_down(dev);
-	_remove_vq_common(vi);
+	sg_init_one(&sg, &vi->ctrl_offloads, sizeof(vi->ctrl_offloads));
 
-	virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
-	virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
+	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
+				  VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
+		dev_warn(&vi->dev->dev, "Fail to set guest offload. \n");
+		return -EINVAL;
+	}
 
-	ret = virtio_finalize_features(dev);
-	if (ret)
-		goto err;
+	return 0;
+}
 
-	vi->xdp_queue_pairs = xdp_qp;
-	ret = virtnet_restore_up(dev);
-	if (ret)
-		goto err;
-	ret = _virtnet_set_queues(vi, curr_qp);
-	if (ret)
-		goto err;
+static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
+{
+	u64 offloads = 0;
 
-	virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-	virtio_config_enable(dev);
-	return 0;
-err:
-	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
-	return ret;
+	if (!vi->guest_offloads)
+		return 0;
+
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+		offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+
+	return virtnet_set_guest_offloads(vi, offloads);
+}
+
+static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
+{
+	u64 offloads = vi->guest_offloads;
+
+	if (!vi->guest_offloads)
+		return 0;
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+		offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+
+	return virtnet_set_guest_offloads(vi, offloads);
 }
 
 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
@@ -1884,10 +1954,11 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 	u16 xdp_qp = 0, curr_qp;
 	int i, err;
 
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
-	    virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
-	    virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
-	    virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) {
+	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
+	    && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
+	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
+		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
 		NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
 		return -EOPNOTSUPP;
 	}
@@ -1921,35 +1992,35 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 			return PTR_ERR(prog);
 	}
 
-	/* Changing the headroom in buffers is a disruptive operation because
-	 * existing buffers must be flushed and reallocated. This will happen
-	 * when a xdp program is initially added or xdp is disabled by removing
-	 * the xdp program resulting in number of XDP queues changing.
-	 */
-	if (vi->xdp_queue_pairs != xdp_qp) {
-		err = virtnet_reset(vi, curr_qp + xdp_qp, xdp_qp);
-		if (err) {
-			dev_warn(&dev->dev, "XDP reset failure.\n");
-			goto virtio_reset_err;
-		}
-	}
+	/* Make sure NAPI is not using any XDP TX queues for RX. */
+	for (i = 0; i < vi->max_queue_pairs; i++)
+		napi_disable(&vi->rq[i].napi);
 
 	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+	err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+	if (err)
+		goto err;
+	vi->xdp_queue_pairs = xdp_qp;
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
 		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+		if (i == 0) {
+			if (!old_prog)
+				virtnet_clear_guest_offloads(vi);
+			if (!prog)
+				virtnet_restore_guest_offloads(vi);
+		}
 		if (old_prog)
 			bpf_prog_put(old_prog);
+		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
 	}
 
 	return 0;
 
-virtio_reset_err:
-	/* On reset error do our best to unwind XDP changes inflight and return
-	 * error up to user space for resolution. The underlying reset hung on
-	 * us so not much we can do here.
-	 */
+err:
+	for (i = 0; i < vi->max_queue_pairs; i++)
+		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
 	if (prog)
 		bpf_prog_sub(prog, vi->max_queue_pairs - 1);
 	return err;
@@ -2182,7 +2253,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
 	if (!names)
 		goto err_names;
-	if (vi->mergeable_rx_bufs) {
+	if (!vi->big_packets || vi->mergeable_rx_bufs) {
 		ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
 		if (!ctx)
 			goto err_ctx;
@@ -2428,7 +2499,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 			dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
 
 		if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
-			dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
+			dev->hw_features |= NETIF_F_TSO
 				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
 		}
 		/* Individual feature bits: what can host handle? */
@@ -2438,13 +2509,11 @@ static int virtnet_probe(struct virtio_device *vdev)
 			dev->hw_features |= NETIF_F_TSO6;
 		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
 			dev->hw_features |= NETIF_F_TSO_ECN;
-		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
-			dev->hw_features |= NETIF_F_UFO;
 
 		dev->features |= NETIF_F_GSO_ROBUST;
 
 		if (gso)
-			dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
+			dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
 		/* (!csum && gso) case will be fixed by register_netdev() */
 	}
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
@@ -2577,6 +2646,10 @@ static int virtnet_probe(struct virtio_device *vdev)
 		netif_carrier_on(dev);
 	}
 
+	for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
+		if (virtio_has_feature(vi->vdev, guest_offloads[i]))
+			set_bit(guest_offloads[i], &vi->guest_offloads);
+
 	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
 		 dev->name, max_queue_pairs);
 
@@ -2597,15 +2670,6 @@ free:
 	return err;
 }
 
-static void _remove_vq_common(struct virtnet_info *vi)
-{
-	vi->vdev->config->reset(vi->vdev);
-	free_unused_bufs(vi);
-	_free_receive_bufs(vi);
-	free_receive_page_frags(vi);
-	virtnet_del_vqs(vi);
-}
-
 static void remove_vq_common(struct virtnet_info *vi)
 {
 	vi->vdev->config->reset(vi->vdev);
@@ -2637,8 +2701,7 @@ static void virtnet_remove(struct virtio_device *vdev)
 	free_netdev(vi->dev);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int virtnet_freeze(struct virtio_device *vdev)
+static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
 
@@ -2649,7 +2712,7 @@ static int virtnet_freeze(struct virtio_device *vdev)
 	return 0;
 }
 
-static int virtnet_restore(struct virtio_device *vdev)
+static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
 	int err;
@@ -2665,7 +2728,6 @@ static int virtnet_restore(struct virtio_device *vdev)
 
 	return 0;
 }
-#endif
 
 static struct virtio_device_id id_table[] = {
 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
@@ -2682,7 +2744,7 @@ static struct virtio_device_id id_table[] = {
 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
 	VIRTIO_NET_F_CTRL_MAC_ADDR, \
-	VIRTIO_NET_F_MTU
+	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
 
 static unsigned int features[] = {
 	VIRTNET_FEATURES,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 96aa7e6cf214..dbca067540d0 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2608,7 +2608,7 @@ static struct device_type vxlan_type = {
  * supply the listening VXLAN udp ports. Callers are expected
  * to implement the ndo_udp_tunnel_add.
  */
-static void vxlan_push_rx_ports(struct net_device *dev)
+static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
 {
 	struct vxlan_sock *vs;
 	struct net *net = dev_net(dev);
@@ -2617,11 +2617,19 @@ static void vxlan_push_rx_ports(struct net_device *dev)
 
 	spin_lock(&vn->sock_lock);
 	for (i = 0; i < PORT_HASH_SIZE; ++i) {
-		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist)
-			udp_tunnel_push_rx_port(dev, vs->sock,
-						(vs->flags & VXLAN_F_GPE) ?
-						UDP_TUNNEL_TYPE_VXLAN_GPE :
-						UDP_TUNNEL_TYPE_VXLAN);
+		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
+			unsigned short type;
+
+			if (vs->flags & VXLAN_F_GPE)
+				type = UDP_TUNNEL_TYPE_VXLAN_GPE;
+			else
+				type = UDP_TUNNEL_TYPE_VXLAN;
+
+			if (push)
+				udp_tunnel_push_rx_port(dev, vs->sock, type);
+			else
+				udp_tunnel_drop_rx_port(dev, vs->sock, type);
+		}
 	}
 	spin_unlock(&vn->sock_lock);
 }
@@ -3630,10 +3638,15 @@ static int vxlan_netdevice_event(struct notifier_block *unused,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 
-	if (event == NETDEV_UNREGISTER)
+	if (event == NETDEV_UNREGISTER) {
+		vxlan_offload_rx_ports(dev, false);
 		vxlan_handle_lowerdev_unregister(vn, dev);
-	else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
-		vxlan_push_rx_ports(dev);
+	} else if (event == NETDEV_REGISTER) {
+		vxlan_offload_rx_ports(dev, true);
+	} else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
+		   event == NETDEV_UDP_TUNNEL_DROP_INFO) {
+		vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
+	}
 
 	return NOTIFY_DONE;
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
index 1447a8352383..2d3e5e263a32 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
@@ -78,10 +78,7 @@ int brcmf_debug_attach(struct brcmf_pub *drvr)
 		return -ENODEV;
 
 	drvr->dbgfs_dir = debugfs_create_dir(dev_name(dev), root_folder);
-	if (IS_ERR(drvr->dbgfs_dir))
-		return PTR_ERR(drvr->dbgfs_dir);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(drvr->dbgfs_dir);
 }
 
 void brcmf_debug_detach(struct brcmf_pub *drvr)
diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 84143a02adce..54201c02fdb8 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -7837,7 +7837,7 @@ static int writerids(struct net_device *dev, aironet_ioctl *comp) {
 	struct airo_info *ai = dev->ml_priv;
 	int  ridcode;
         int  enabled;
-	static int (* writer)(struct airo_info *, u16 rid, const void *, int, int);
+	int (*writer)(struct airo_info *, u16 rid, const void *, int, int);
 	unsigned char *iobuf;
 
 	/* Only super-user can write RIDs */
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
index aaaca4d08e2b..ccbe74589eec 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
@@ -4324,7 +4324,7 @@ static struct attribute *ipw2100_sysfs_entries[] = {
 	NULL,
 };
 
-static struct attribute_group ipw2100_attribute_group = {
+static const struct attribute_group ipw2100_attribute_group = {
 	.attrs = ipw2100_sysfs_entries,
 };
 
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
index 9368abdf18e2..c311b1a994c1 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
@@ -11500,7 +11500,7 @@ static struct attribute *ipw_sysfs_entries[] = {
 	NULL
 };
 
-static struct attribute_group ipw_attribute_group = {
+static const struct attribute_group ipw_attribute_group = {
 	.name = NULL,		/* put in device directory */
 	.attrs = ipw_sysfs_entries,
 };
diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
index 38bf403bb1e1..329f3a63dadd 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
@@ -3464,7 +3464,7 @@ static struct attribute *il3945_sysfs_entries[] = {
 	NULL
 };
 
-static struct attribute_group il3945_attribute_group = {
+static const struct attribute_group il3945_attribute_group = {
 	.name = NULL,		/* put in device directory */
 	.attrs = il3945_sysfs_entries,
 };
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
index 5b51fba75595..de9b6522c43f 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
@@ -4654,7 +4654,7 @@ static struct attribute *il_sysfs_entries[] = {
 	NULL
 };
 
-static struct attribute_group il_attribute_group = {
+static const struct attribute_group il_attribute_group = {
 	.name = NULL,		/* put in device directory */
 	.attrs = il_sysfs_entries,
 };
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
index 55f238a2a310..c58393eab6a1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
@@ -478,7 +478,6 @@ u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	u16 read_point = 0, write_point = 0, remind_cnt = 0;
 	u32 tmp_4byte = 0;
-	static u16 last_read_point;
 	static bool start_rx;
 
 	tmp_4byte = rtl_read_dword(rtlpriv, REG_RXQ_TXBD_IDX);
@@ -506,7 +505,6 @@ u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index)
 
 	rtlpci->rx_ring[queue_index].next_rx_rp = write_point;
 
-	last_read_point = read_point;
 	return remind_cnt;
 }
 
@@ -917,7 +915,6 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	u16 cur_tx_rp = 0;
 	u16 cur_tx_wp = 0;
-	static u16 last_txw_point;
 	static bool over_run;
 	u32 tmp = 0;
 	u8 q_idx = *val;
@@ -951,9 +948,6 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
 				rtl_write_word(rtlpriv,
 					       get_desc_addr_fr_q_idx(q_idx),
 					       ring->cur_tx_wp);
-
-				if (q_idx == 1)
-					last_txw_point = cur_tx_wp;
 			}
 
 			if (ring->avl_desc < (max_tx_desc - 15)) {
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index e0dbd6e48a98..a0d27c04e22f 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -422,13 +422,11 @@ int of_phy_register_fixed_link(struct device_node *np)
 	struct fixed_phy_status status = {};
 	struct device_node *fixed_link_node;
 	const __be32 *fixed_link_prop;
-	int link_gpio;
-	int len, err;
 	struct phy_device *phy;
 	const char *managed;
+	int link_gpio, len;
 
-	err = of_property_read_string(np, "managed", &managed);
-	if (err == 0) {
+	if (of_property_read_string(np, "managed", &managed) == 0) {
 		if (strcmp(managed, "in-band-status") == 0) {
 			/* status is zeroed, namely its .link member */
 			phy = fixed_phy_register(PHY_POLL, &status, -1, np);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 06d044862e58..ba08b78ed630 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -533,6 +533,7 @@ static void handle_tx(struct vhost_net *net)
 			ubuf->callback = vhost_zerocopy_callback;
 			ubuf->ctx = nvq->ubufs;
 			ubuf->desc = nvq->upend_idx;
+			atomic_set(&ubuf->refcnt, 1);
 			msg.msg_control = ubuf;
 			msg.msg_controllen = sizeof(ubuf);
 			ubufs = nvq->ubufs;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5e1b548828e6..9aaa177e8209 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -391,7 +391,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	vq->desc_state[head].data = data;
 	if (indirect)
 		vq->desc_state[head].indir_desc = desc;
-	if (ctx)
+	else
 		vq->desc_state[head].indir_desc = ctx;
 
 	/* Put entry in available array (but don't update avail->idx until they
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 100b207efc9e..c05f1f1c0d41 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
-#include <rxrpc/packet.h>
 #include "internal.h"
 #include "afs_fs.h"
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 02781e78ffb6..10743043d431 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -14,7 +14,6 @@
 
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
-#include <rxrpc/packet.h>
 #include "internal.h"
 #include "afs_cm.h"
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b69e7a5869ff..6353c7474dba 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -318,6 +318,12 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
+
+/* Map specifics */
+struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
+void __dev_map_flush(struct bpf_map *map);
+
 #else
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -356,6 +362,20 @@ static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
 static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 {
 }
+
+static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
+						       u32 key)
+{
+	return NULL;
+}
+
+static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index)
+{
+}
+
+static inline void __dev_map_flush(struct bpf_map *map)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 /* verifier prototypes for helper functions called from eBPF programs */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 3d137c33d664..b1e1035ca24b 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -35,3 +35,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
+#ifdef CONFIG_NET
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+#endif
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 83cc9863444b..afdbb701fdb4 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -374,5 +374,9 @@ struct ethtool_ops {
 				      struct ethtool_link_ksettings *);
 	int	(*set_link_ksettings)(struct net_device *,
 				      const struct ethtool_link_ksettings *);
+	int	(*get_fecparam)(struct net_device *,
+				      struct ethtool_fecparam *);
+	int	(*set_fecparam)(struct net_device *,
+				      struct ethtool_fecparam *);
 };
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index bfef1e5734f8..d19ed3c15e1e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -711,7 +711,21 @@ bool bpf_helper_changes_pkt_data(void *func);
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
+
+/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
+ * same cpu context. Further for best results no more than a single map
+ * for the do_redirect/do_flush pair should be used. This limitation is
+ * because we only track one map and force a flush when the map changes.
+ * This does not appear to be a real limitation for existing software.
+ */
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb);
+int xdp_do_redirect(struct net_device *dev,
+		    struct xdp_buff *xdp,
+		    struct bpf_prog *prog);
+void xdp_do_flush_map(void);
+
 void bpf_warn_invalid_xdp_action(u32 act);
+void bpf_warn_invalid_xdp_redirect(u32 ifindex);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/include/linux/net.h b/include/linux/net.h
index dda2cc939a53..b5c15b31709b 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -190,8 +190,16 @@ struct proto_ops {
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
 	int		(*set_peek_off)(struct sock *sk, int val);
 	int		(*peek_len)(struct socket *sock);
+
+	/* The following functions are called internally by kernel with
+	 * sock lock already held.
+	 */
 	int		(*read_sock)(struct sock *sk, read_descriptor_t *desc,
 				     sk_read_actor_t recv_actor);
+	int		(*sendpage_locked)(struct sock *sk, struct page *page,
+					   int offset, size_t size, int flags);
+	int		(*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
+					  size_t size);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
@@ -279,6 +287,8 @@ do {									\
 
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len);
+int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
+			  struct kvec *vec, size_t num, size_t len);
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len, int flags);
 
@@ -297,6 +307,8 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
 		      unsigned int optlen);
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags);
+int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			   size_t size, int flags);
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 1d4737cffc71..dc8b4896b77b 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -36,7 +36,6 @@ enum {
 	/**/NETIF_F_GSO_SHIFT,		/* keep the order of SKB_GSO_* bits */
 	NETIF_F_TSO_BIT			/* ... TCPv4 segmentation */
 		= NETIF_F_GSO_SHIFT,
-	NETIF_F_UFO_BIT,		/* ... UDPv4 fragmentation */
 	NETIF_F_GSO_ROBUST_BIT,		/* ... ->SKB_GSO_DODGY */
 	NETIF_F_TSO_ECN_BIT,		/* ... TCP ECN support */
 	NETIF_F_TSO_MANGLEID_BIT,	/* ... IPV4 ID mangling allowed */
@@ -76,6 +75,7 @@ enum {
 	NETIF_F_HW_TC_BIT,		/* Offload TC infrastructure */
 	NETIF_F_HW_ESP_BIT,		/* Hardware ESP transformation offload */
 	NETIF_F_HW_ESP_TX_CSUM_BIT,	/* ESP with TX checksum offload */
+	NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */
 
 	/*
 	 * Add your fresh new feature above and remember to update
@@ -118,7 +118,6 @@ enum {
 #define NETIF_F_TSO6		__NETIF_F(TSO6)
 #define NETIF_F_TSO_ECN		__NETIF_F(TSO_ECN)
 #define NETIF_F_TSO		__NETIF_F(TSO)
-#define NETIF_F_UFO		__NETIF_F(UFO)
 #define NETIF_F_VLAN_CHALLENGED	__NETIF_F(VLAN_CHALLENGED)
 #define NETIF_F_RXFCS		__NETIF_F(RXFCS)
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
@@ -140,6 +139,7 @@ enum {
 #define NETIF_F_HW_TC		__NETIF_F(HW_TC)
 #define NETIF_F_HW_ESP		__NETIF_F(HW_ESP)
 #define NETIF_F_HW_ESP_TX_CSUM	__NETIF_F(HW_ESP_TX_CSUM)
+#define	NETIF_F_RX_UDP_TUNNEL_PORT  __NETIF_F(RX_UDP_TUNNEL_PORT)
 
 #define for_each_netdev_feature(mask_addr, bit)	\
 	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
@@ -172,7 +172,7 @@ enum {
 				 NETIF_F_FSO)
 
 /* List of features with software fallbacks. */
-#define NETIF_F_GSO_SOFTWARE	(NETIF_F_ALL_TSO | NETIF_F_UFO | \
+#define NETIF_F_GSO_SOFTWARE	(NETIF_F_ALL_TSO | \
 				 NETIF_F_GSO_SCTP)
 
 /*
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 779b23595596..1d238d54c484 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -66,6 +66,7 @@ struct mpls_dev;
 /* UDP Tunnel offloads */
 struct udp_tunnel_info;
 struct bpf_prog;
+struct xdp_buff;
 
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
@@ -770,31 +771,14 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 				       struct sk_buff *skb);
 
-/* These structures hold the attributes of qdisc and classifiers
- * that are being passed to the netdevice through the setup_tc op.
- */
-enum {
+enum tc_setup_type {
 	TC_SETUP_MQPRIO,
 	TC_SETUP_CLSU32,
 	TC_SETUP_CLSFLOWER,
-	TC_SETUP_MATCHALL,
+	TC_SETUP_CLSMATCHALL,
 	TC_SETUP_CLSBPF,
 };
 
-struct tc_cls_u32_offload;
-
-struct tc_to_netdev {
-	unsigned int type;
-	union {
-		struct tc_cls_u32_offload *cls_u32;
-		struct tc_cls_flower_offload *cls_flower;
-		struct tc_cls_matchall_offload *cls_mall;
-		struct tc_cls_bpf_offload *cls_bpf;
-		struct tc_mqprio_qopt *mqprio;
-	};
-	bool egress_dev;
-};
-
 /* These structures hold the attributes of xdp state that are being passed
  * to the netdevice through the xdp op.
  */
@@ -977,8 +961,8 @@ struct xfrmdev_ops {
  *      with PF and querying it may introduce a theoretical security risk.
  * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
- * int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u32 chain_index,
- *		       __be16 protocol, struct tc_to_netdev *tc);
+ * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type,
+ *		       void *type_data);
  *	Called to setup any 'tc' scheduler, classifier or action on @dev.
  *	This is always called from the stack with the rtnl lock held and netif
  *	tx queues stopped. This allows the netdevice to perform queue
@@ -1138,7 +1122,12 @@ struct xfrmdev_ops {
  * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp);
  *	This function is used to set or query state related to XDP on the
  *	netdevice. See definition of enum xdp_netdev_command for details.
- *
+ * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp);
+ *	This function is used to submit a XDP packet for transmit on a
+ *	netdevice.
+ * void (*ndo_xdp_flush)(struct net_device *dev);
+ *	This function is used to inform the driver to flush a paticular
+ *	xpd tx queue. Must be called on same CPU as xdp_xmit.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1221,9 +1210,8 @@ struct net_device_ops {
 						   struct net_device *dev,
 						   int vf, bool setting);
 	int			(*ndo_setup_tc)(struct net_device *dev,
-						u32 handle, u32 chain_index,
-						__be16 protocol,
-						struct tc_to_netdev *tc);
+						enum tc_setup_type type,
+						void *type_data);
 #if IS_ENABLED(CONFIG_FCOE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
@@ -1323,6 +1311,9 @@ struct net_device_ops {
 						       int needed_headroom);
 	int			(*ndo_xdp)(struct net_device *dev,
 					   struct netdev_xdp *xdp);
+	int			(*ndo_xdp_xmit)(struct net_device *dev,
+						struct xdp_buff *xdp);
+	void			(*ndo_xdp_flush)(struct net_device *dev);
 };
 
 /**
@@ -2308,6 +2299,7 @@ struct netdev_lag_lower_state_info {
 #define NETDEV_PRECHANGEUPPER	0x001A
 #define NETDEV_CHANGELOWERSTATE	0x001B
 #define NETDEV_UDP_TUNNEL_PUSH_INFO	0x001C
+#define NETDEV_UDP_TUNNEL_DROP_INFO	0x001D
 #define NETDEV_CHANGE_TX_QUEUE_LEN	0x001E
 
 int register_netdevice_notifier(struct notifier_block *nb);
@@ -2423,8 +2415,8 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
 struct net_device *__dev_get_by_name(struct net *net, const char *name);
 int dev_alloc_name(struct net_device *dev, const char *name);
 int dev_open(struct net_device *dev);
-int dev_close(struct net_device *dev);
-int dev_close_many(struct list_head *head, bool unlink);
+void dev_close(struct net_device *dev);
+void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
@@ -4089,7 +4081,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 
 	/* check flags correspondence */
 	BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
-	BUILD_BUG_ON(SKB_GSO_UDP     != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_DODGY   != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT));
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 0bb5b212ab42..d78cd01ea513 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -182,6 +182,7 @@ static inline const char *phy_modes(phy_interface_t interface)
 #define MII_ADDR_C45 (1<<30)
 
 struct device;
+struct phylink;
 struct sk_buff;
 
 /*
@@ -469,11 +470,13 @@ struct phy_device {
 
 	struct mutex lock;
 
+	struct phylink *phylink;
 	struct net_device *attached_dev;
 
 	u8 mdix;
 	u8 mdix_ctrl;
 
+	void (*phy_link_change)(struct phy_device *, bool up, bool do_carrier);
 	void (*adjust_link)(struct net_device *dev);
 };
 #define to_phy_device(d) container_of(to_mdio_device(d), \
@@ -667,6 +670,24 @@ struct phy_fixup {
 	int (*run)(struct phy_device *phydev);
 };
 
+const char *phy_speed_to_str(int speed);
+const char *phy_duplex_to_str(unsigned int duplex);
+
+/* A structure for mapping a particular speed and duplex
+ * combination to a particular SUPPORTED and ADVERTISED value
+ */
+struct phy_setting {
+	u32 speed;
+	u8 duplex;
+	u8 bit;
+};
+
+const struct phy_setting *
+phy_lookup_setting(int speed, int duplex, const unsigned long *mask,
+		   size_t maxbit, bool exact);
+size_t phy_speeds(unsigned int *speeds, size_t size,
+		  unsigned long *mask, size_t maxbit);
+
 /**
  * phy_read_mmd - Convenience function for reading a register
  * from an MMD on a given PHY.
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
new file mode 100644
index 000000000000..af67edd4ae38
--- /dev/null
+++ b/include/linux/phylink.h
@@ -0,0 +1,148 @@
+#ifndef NETDEV_PCS_H
+#define NETDEV_PCS_H
+
+#include <linux/phy.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+struct device_node;
+struct ethtool_cmd;
+struct net_device;
+
+enum {
+	MLO_PAUSE_NONE,
+	MLO_PAUSE_ASYM = BIT(0),
+	MLO_PAUSE_SYM = BIT(1),
+	MLO_PAUSE_RX = BIT(2),
+	MLO_PAUSE_TX = BIT(3),
+	MLO_PAUSE_TXRX_MASK = MLO_PAUSE_TX | MLO_PAUSE_RX,
+	MLO_PAUSE_AN = BIT(4),
+
+	MLO_AN_PHY = 0,	/* Conventional PHY */
+	MLO_AN_FIXED,	/* Fixed-link mode */
+	MLO_AN_SGMII,	/* Cisco SGMII protocol */
+	MLO_AN_8023Z,	/* 1000base-X protocol */
+};
+
+static inline bool phylink_autoneg_inband(unsigned int mode)
+{
+	return mode == MLO_AN_SGMII || mode == MLO_AN_8023Z;
+}
+
+struct phylink_link_state {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising);
+	phy_interface_t interface;	/* PHY_INTERFACE_xxx */
+	int speed;
+	int duplex;
+	int pause;
+	unsigned int link:1;
+	unsigned int an_enabled:1;
+	unsigned int an_complete:1;
+};
+
+struct phylink_mac_ops {
+	/**
+	 * validate: validate and update the link configuration
+	 * @ndev: net_device structure associated with MAC
+	 * @config: configuration to validate
+	 *
+	 * Update the %config->supported and %config->advertised masks
+	 * clearing bits that can not be supported.
+	 *
+	 * Note: the PHY may be able to transform from one connection
+	 * technology to another, so, eg, don't clear 1000BaseX just
+	 * because the MAC is unable to support it.  This is more about
+	 * clearing unsupported speeds and duplex settings.
+	 *
+	 * If the %config->interface mode is %PHY_INTERFACE_MODE_1000BASEX
+	 * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode
+	 * based on %config->advertised and/or %config->speed.
+	 */
+	void (*validate)(struct net_device *ndev, unsigned long *supported,
+			 struct phylink_link_state *state);
+
+	/* Read the current link state from the hardware */
+	int (*mac_link_state)(struct net_device *, struct phylink_link_state *);
+
+	/* Configure the MAC */
+	/**
+	 * mac_config: configure the MAC for the selected mode and state
+	 * @ndev: net_device structure for the MAC
+	 * @mode: one of MLO_AN_FIXED, MLO_AN_PHY, MLO_AN_8023Z, MLO_AN_SGMII
+	 * @state: state structure
+	 *
+	 * The action performed depends on the currently selected mode:
+	 *
+	 * %MLO_AN_FIXED, %MLO_AN_PHY:
+	 *   set the specified speed, duplex, pause mode, and phy interface
+	 *   mode in the provided @state.
+	 * %MLO_AN_8023Z:
+	 *   place the link in 1000base-X mode, advertising the parameters
+	 *   given in advertising in @state.
+	 * %MLO_AN_SGMII:
+	 *   place the link in Cisco SGMII mode - there is no advertisment
+	 *   to make as the PHY communicates the speed and duplex to the
+	 *   MAC over the in-band control word.  Configuration of the pause
+	 *   mode is as per MLO_AN_PHY since this is not included.
+	 */
+	void (*mac_config)(struct net_device *ndev, unsigned int mode,
+			   const struct phylink_link_state *state);
+
+	/**
+	 * mac_an_restart: restart 802.3z BaseX autonegotiation
+	 * @ndev: net_device structure for the MAC
+	 */
+	void (*mac_an_restart)(struct net_device *ndev);
+
+	void (*mac_link_down)(struct net_device *, unsigned int mode);
+	void (*mac_link_up)(struct net_device *, unsigned int mode,
+			    struct phy_device *);
+};
+
+struct phylink *phylink_create(struct net_device *, struct device_node *,
+	phy_interface_t iface, const struct phylink_mac_ops *ops);
+void phylink_destroy(struct phylink *);
+
+int phylink_connect_phy(struct phylink *, struct phy_device *);
+int phylink_of_phy_connect(struct phylink *, struct device_node *);
+void phylink_disconnect_phy(struct phylink *);
+
+void phylink_mac_change(struct phylink *, bool up);
+
+void phylink_start(struct phylink *);
+void phylink_stop(struct phylink *);
+
+void phylink_ethtool_get_wol(struct phylink *, struct ethtool_wolinfo *);
+int phylink_ethtool_set_wol(struct phylink *, struct ethtool_wolinfo *);
+
+int phylink_ethtool_ksettings_get(struct phylink *,
+				  struct ethtool_link_ksettings *);
+int phylink_ethtool_ksettings_set(struct phylink *,
+				  const struct ethtool_link_ksettings *);
+int phylink_ethtool_nway_reset(struct phylink *);
+void phylink_ethtool_get_pauseparam(struct phylink *,
+				    struct ethtool_pauseparam *);
+int phylink_ethtool_set_pauseparam(struct phylink *,
+				   struct ethtool_pauseparam *);
+int phylink_ethtool_get_module_info(struct phylink *, struct ethtool_modinfo *);
+int phylink_ethtool_get_module_eeprom(struct phylink *,
+				      struct ethtool_eeprom *, u8 *);
+int phylink_init_eee(struct phylink *, bool);
+int phylink_get_eee_err(struct phylink *);
+int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *);
+int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *);
+int phylink_mii_ioctl(struct phylink *, struct ifreq *, int);
+
+#define phylink_zero(bm) \
+	bitmap_zero(bm, __ETHTOOL_LINK_MODE_MASK_NBITS)
+#define __phylink_do_bit(op, bm, mode) \
+	op(ETHTOOL_LINK_MODE_ ## mode ## _BIT, bm)
+
+#define phylink_set(bm, mode)	__phylink_do_bit(__set_bit, bm, mode)
+#define phylink_clear(bm, mode)	__phylink_do_bit(__clear_bit, bm, mode)
+#define phylink_test(bm, mode)	__phylink_do_bit(test_bit, bm, mode)
+
+void phylink_set_port_modes(unsigned long *bits);
+
+#endif
diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h
new file mode 100644
index 000000000000..8a5f9f0b2c52
--- /dev/null
+++ b/include/linux/platform_data/mdio-bcm-unimac.h
@@ -0,0 +1,13 @@
+#ifndef __MDIO_BCM_UNIMAC_PDATA_H
+#define __MDIO_BCM_UNIMAC_PDATA_H
+
+struct unimac_mdio_pdata {
+	u32 phy_mask;
+	int (*wait_func)(void *data);
+	void *wait_func_data;
+	const char *bus_name;
+};
+
+#define UNIMAC_MDIO_DRV_NAME	"unimac-mdio"
+
+#endif /* __MDIO_BCM_UNIMAC_PDATA_H */
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 0eef0a2b1901..d60de4a39810 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -323,6 +323,7 @@ struct qed_eth_ops {
 
 	int (*configure_arfs_searcher)(struct qed_dev *cdev,
 				       bool en_searcher);
+	int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle);
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(void);
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index ef39c7f40ae6..cc646ca97974 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -161,6 +161,18 @@ enum qed_nvm_images {
 	QED_NVM_IMAGE_FCOE_CFG,
 };
 
+struct qed_link_eee_params {
+	u32 tx_lpi_timer;
+#define QED_EEE_1G_ADV		BIT(0)
+#define QED_EEE_10G_ADV		BIT(1)
+
+	/* Capabilities are represented using QED_EEE_*_ADV values */
+	u8 adv_caps;
+	u8 lp_adv_caps;
+	bool enable;
+	bool tx_lpi_enable;
+};
+
 enum qed_led_mode {
 	QED_LED_MODE_OFF,
 	QED_LED_MODE_ON,
@@ -172,8 +184,9 @@ enum qed_led_mode {
 
 #define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr))
 
-#define QED_COALESCE_MAX 0xFF
+#define QED_COALESCE_MAX 0x1FF
 #define QED_DEFAULT_RX_USECS 12
+#define QED_DEFAULT_TX_USECS 48
 
 /* forward */
 struct qed_dev;
@@ -408,6 +421,7 @@ struct qed_link_params {
 #define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED    BIT(2)
 #define QED_LINK_OVERRIDE_PAUSE_CONFIG          BIT(3)
 #define QED_LINK_OVERRIDE_LOOPBACK_MODE         BIT(4)
+#define QED_LINK_OVERRIDE_EEE_CONFIG            BIT(5)
 	u32	override_flags;
 	bool	autoneg;
 	u32	adv_speeds;
@@ -422,6 +436,7 @@ struct qed_link_params {
 #define QED_LINK_LOOPBACK_EXT                   BIT(3)
 #define QED_LINK_LOOPBACK_MAC                   BIT(4)
 	u32	loopback_mode;
+	struct qed_link_eee_params eee;
 };
 
 struct qed_link_output {
@@ -437,6 +452,12 @@ struct qed_link_output {
 	u8	port;                   /* In PORT defs */
 	bool	autoneg;
 	u32	pause_config;
+
+	/* EEE - capability & param */
+	bool eee_supported;
+	bool eee_active;
+	u8 sup_caps;
+	struct qed_link_eee_params eee;
 };
 
 struct qed_probe_params {
@@ -654,16 +675,6 @@ struct qed_common_ops {
 			     enum qed_nvm_images type, u8 *buf, u16 len);
 
 /**
- * @brief get_coalesce - Get coalesce parameters in usec
- *
- * @param cdev
- * @param rx_coal - Rx coalesce value in usec
- * @param tx_coal - Tx coalesce value in usec
- *
- */
-	void (*get_coalesce)(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal);
-
-/**
  * @brief set_coalesce - Configure Rx coalesce value in usec
  *
  * @param cdev
@@ -674,8 +685,8 @@ struct qed_common_ops {
  *
  * @return 0 on success, error otherwise.
  */
-	int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
-			    u16 qid, u16 sb_id);
+	int (*set_coalesce)(struct qed_dev *cdev,
+			    u16 rx_coal, u16 tx_coal, void *handle);
 
 /**
  * @brief set_led - Configure LED mode
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 5d5415e129d4..3c07e4135127 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -36,7 +36,8 @@ struct user_struct {
 	struct hlist_node uidhash_node;
 	kuid_t uid;
 
-#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
+#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \
+    defined(CONFIG_NET)
 	atomic_long_t locked_vm;
 #endif
 };
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 99e866487e2f..82b171e1aa0b 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -273,87 +273,85 @@ struct sctp_init_chunk {
 
 
 /* Section 3.3.2.1. IPv4 Address Parameter (5) */
-typedef struct sctp_ipv4addr_param {
+struct sctp_ipv4addr_param {
 	struct sctp_paramhdr param_hdr;
-	struct in_addr  addr;
-} sctp_ipv4addr_param_t;
+	struct in_addr addr;
+};
 
 /* Section 3.3.2.1. IPv6 Address Parameter (6) */
-typedef struct sctp_ipv6addr_param {
+struct sctp_ipv6addr_param {
 	struct sctp_paramhdr param_hdr;
 	struct in6_addr addr;
-} sctp_ipv6addr_param_t;
+};
 
 /* Section 3.3.2.1 Cookie Preservative (9) */
-typedef struct sctp_cookie_preserve_param {
+struct sctp_cookie_preserve_param {
 	struct sctp_paramhdr param_hdr;
-	__be32          lifespan_increment;
-} sctp_cookie_preserve_param_t;
+	__be32 lifespan_increment;
+};
 
 /* Section 3.3.2.1 Host Name Address (11) */
-typedef struct sctp_hostname_param {
+struct sctp_hostname_param {
 	struct sctp_paramhdr param_hdr;
 	uint8_t hostname[0];
-} sctp_hostname_param_t;
+};
 
 /* Section 3.3.2.1 Supported Address Types (12) */
-typedef struct sctp_supported_addrs_param {
+struct sctp_supported_addrs_param {
 	struct sctp_paramhdr param_hdr;
 	__be16 types[0];
-} sctp_supported_addrs_param_t;
-
-/* Appendix A. ECN Capable (32768) */
-typedef struct sctp_ecn_capable_param {
-	struct sctp_paramhdr param_hdr;
-} sctp_ecn_capable_param_t;
+};
 
 /* ADDIP Section 3.2.6 Adaptation Layer Indication */
-typedef struct sctp_adaptation_ind_param {
+struct sctp_adaptation_ind_param {
 	struct sctp_paramhdr param_hdr;
 	__be32 adaptation_ind;
-} sctp_adaptation_ind_param_t;
+};
 
 /* ADDIP Section 4.2.7 Supported Extensions Parameter */
-typedef struct sctp_supported_ext_param {
+struct sctp_supported_ext_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
-} sctp_supported_ext_param_t;
+};
 
 /* AUTH Section 3.1 Random */
-typedef struct sctp_random_param {
+struct sctp_random_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 random_val[0];
-} sctp_random_param_t;
+};
 
 /* AUTH Section 3.2 Chunk List */
-typedef struct sctp_chunks_param {
+struct sctp_chunks_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
-} sctp_chunks_param_t;
+};
 
 /* AUTH Section 3.3 HMAC Algorithm */
-typedef struct sctp_hmac_algo_param {
+struct sctp_hmac_algo_param {
 	struct sctp_paramhdr param_hdr;
 	__be16 hmac_ids[0];
-} sctp_hmac_algo_param_t;
+};
 
 /* RFC 2960.  Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2):
  *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
  *   association.
  */
-typedef struct sctp_init_chunk sctp_initack_chunk_t;
+struct sctp_initack_chunk {
+	struct sctp_chunkhdr chunk_hdr;
+	struct sctp_inithdr init_hdr;
+};
 
 /* Section 3.3.3.1 State Cookie (7) */
-typedef struct sctp_cookie_param {
+struct sctp_cookie_param {
 	struct sctp_paramhdr p;
 	__u8 body[0];
-} sctp_cookie_param_t;
+};
 
 /* Section 3.3.3.1 Unrecognized Parameters (8) */
-typedef struct sctp_unrecognized_param {
+struct sctp_unrecognized_param {
 	struct sctp_paramhdr param_hdr;
 	struct sctp_paramhdr unrecognized;
-} sctp_unrecognized_param_t;
+};
 
 
 
@@ -365,30 +363,28 @@ typedef struct sctp_unrecognized_param {
  *  subsequences of DATA chunks as represented by their TSNs.
  */
 
-typedef struct sctp_gap_ack_block {
+struct sctp_gap_ack_block {
 	__be16 start;
 	__be16 end;
-} sctp_gap_ack_block_t;
-
-typedef __be32 sctp_dup_tsn_t;
+};
 
-typedef union {
-	sctp_gap_ack_block_t	gab;
-        sctp_dup_tsn_t		dup;
-} sctp_sack_variable_t;
+union sctp_sack_variable {
+	struct sctp_gap_ack_block gab;
+	__be32 dup;
+};
 
-typedef struct sctp_sackhdr {
+struct sctp_sackhdr {
 	__be32 cum_tsn_ack;
 	__be32 a_rwnd;
 	__be16 num_gap_ack_blocks;
 	__be16 num_dup_tsns;
-	sctp_sack_variable_t variable[0];
-} sctp_sackhdr_t;
+	union sctp_sack_variable variable[0];
+};
 
-typedef struct sctp_sack_chunk {
+struct sctp_sack_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_sackhdr_t sack_hdr;
-} sctp_sack_chunk_t;
+	struct sctp_sackhdr sack_hdr;
+};
 
 
 /* RFC 2960.  Section 3.3.5 Heartbeat Request (HEARTBEAT) (4):
@@ -398,49 +394,49 @@ typedef struct sctp_sack_chunk {
  *  the present association.
  */
 
-typedef struct sctp_heartbeathdr {
+struct sctp_heartbeathdr {
 	struct sctp_paramhdr info;
-} sctp_heartbeathdr_t;
+};
 
-typedef struct sctp_heartbeat_chunk {
+struct sctp_heartbeat_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_heartbeathdr_t hb_hdr;
-} sctp_heartbeat_chunk_t;
+	struct sctp_heartbeathdr hb_hdr;
+};
 
 
 /* For the abort and shutdown ACK we must carry the init tag in the
  * common header. Just the common header is all that is needed with a
  * chunk descriptor.
  */
-typedef struct sctp_abort_chunk {
+struct sctp_abort_chunk {
 	struct sctp_chunkhdr uh;
-} sctp_abort_chunk_t;
+};
 
 
 /* For the graceful shutdown we must carry the tag (in common header)
  * and the highest consecutive acking value.
  */
-typedef struct sctp_shutdownhdr {
+struct sctp_shutdownhdr {
 	__be32 cum_tsn_ack;
-} sctp_shutdownhdr_t;
+};
 
-struct sctp_shutdown_chunk_t {
+struct sctp_shutdown_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_shutdownhdr_t shutdown_hdr;
+	struct sctp_shutdownhdr shutdown_hdr;
 };
 
 /* RFC 2960.  Section 3.3.10 Operation Error (ERROR) (9) */
 
-typedef struct sctp_errhdr {
+struct sctp_errhdr {
 	__be16 cause;
 	__be16 length;
 	__u8  variable[0];
-} sctp_errhdr_t;
+};
 
-typedef struct sctp_operr_chunk {
+struct sctp_operr_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_errhdr_t err_hdr;
-} sctp_operr_chunk_t;
+	struct sctp_errhdr err_hdr;
+};
 
 /* RFC 2960 3.3.10 - Operation Error
  *
@@ -461,7 +457,7 @@ typedef struct sctp_operr_chunk {
  *      9              No User Data
  *     10              Cookie Received While Shutting Down
  */
-typedef enum {
+enum sctp_error {
 
 	SCTP_ERROR_NO_ERROR	   = cpu_to_be16(0x00),
 	SCTP_ERROR_INV_STRM	   = cpu_to_be16(0x01),
@@ -516,33 +512,28 @@ typedef enum {
 	 * 0x0105          Unsupported HMAC Identifier
 	 */
 	 SCTP_ERROR_UNSUP_HMAC	= cpu_to_be16(0x0105)
-} sctp_error_t;
+};
 
 
 
 /* RFC 2960.  Appendix A.  Explicit Congestion Notification.
  *   Explicit Congestion Notification Echo (ECNE) (12)
  */
-typedef struct sctp_ecnehdr {
+struct sctp_ecnehdr {
 	__be32 lowest_tsn;
-} sctp_ecnehdr_t;
+};
 
-typedef struct sctp_ecne_chunk {
+struct sctp_ecne_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_ecnehdr_t ence_hdr;
-} sctp_ecne_chunk_t;
+	struct sctp_ecnehdr ence_hdr;
+};
 
 /* RFC 2960.  Appendix A.  Explicit Congestion Notification.
  *   Congestion Window Reduced (CWR) (13)
  */
-typedef struct sctp_cwrhdr {
+struct sctp_cwrhdr {
 	__be32 lowest_tsn;
-} sctp_cwrhdr_t;
-
-typedef struct sctp_cwr_chunk {
-	struct sctp_chunkhdr chunk_hdr;
-	sctp_cwrhdr_t cwr_hdr;
-} sctp_cwr_chunk_t;
+};
 
 /* PR-SCTP
  * 3.2 Forward Cumulative TSN Chunk Definition (FORWARD TSN)
@@ -638,20 +629,20 @@ struct sctp_fwdtsn_chunk {
  *	The ASCONF Parameter Response is used in the ASCONF-ACK to
  *	report status of ASCONF processing.
  */
-typedef struct sctp_addip_param {
-	struct sctp_paramhdr	param_hdr;
-	__be32		crr_id;
-} sctp_addip_param_t;
+struct sctp_addip_param {
+	struct sctp_paramhdr param_hdr;
+	__be32 crr_id;
+};
 
-typedef struct sctp_addiphdr {
+struct sctp_addiphdr {
 	__be32	serial;
 	__u8	params[0];
-} sctp_addiphdr_t;
+};
 
-typedef struct sctp_addip_chunk {
+struct sctp_addip_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_addiphdr_t addip_hdr;
-} sctp_addip_chunk_t;
+	struct sctp_addiphdr addip_hdr;
+};
 
 /* AUTH
  * Section 4.1  Authentication Chunk (AUTH)
@@ -702,16 +693,16 @@ typedef struct sctp_addip_chunk {
  *   HMAC: n bytes (unsigned integer) This hold the result of the HMAC
  *      calculation.
  */
-typedef struct sctp_authhdr {
+struct sctp_authhdr {
 	__be16 shkey_id;
 	__be16 hmac_id;
 	__u8   hmac[0];
-} sctp_authhdr_t;
+};
 
-typedef struct sctp_auth_chunk {
+struct sctp_auth_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_authhdr_t auth_hdr;
-} sctp_auth_chunk_t;
+	struct sctp_authhdr auth_hdr;
+};
 
 struct sctp_infox {
 	struct sctp_info *sctpinfo;
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
new file mode 100644
index 000000000000..4a906f560817
--- /dev/null
+++ b/include/linux/sfp.h
@@ -0,0 +1,434 @@
+#ifndef LINUX_SFP_H
+#define LINUX_SFP_H
+
+#include <linux/phy.h>
+
+struct __packed sfp_eeprom_base {
+	u8 phys_id;
+	u8 phys_ext_id;
+	u8 connector;
+#if defined __BIG_ENDIAN_BITFIELD
+	u8 e10g_base_er:1;
+	u8 e10g_base_lrm:1;
+	u8 e10g_base_lr:1;
+	u8 e10g_base_sr:1;
+	u8 if_1x_sx:1;
+	u8 if_1x_lx:1;
+	u8 if_1x_copper_active:1;
+	u8 if_1x_copper_passive:1;
+
+	u8 escon_mmf_1310_led:1;
+	u8 escon_smf_1310_laser:1;
+	u8 sonet_oc192_short_reach:1;
+	u8 sonet_reach_bit1:1;
+	u8 sonet_reach_bit2:1;
+	u8 sonet_oc48_long_reach:1;
+	u8 sonet_oc48_intermediate_reach:1;
+	u8 sonet_oc48_short_reach:1;
+
+	u8 unallocated_5_7:1;
+	u8 sonet_oc12_smf_long_reach:1;
+	u8 sonet_oc12_smf_intermediate_reach:1;
+	u8 sonet_oc12_short_reach:1;
+	u8 unallocated_5_3:1;
+	u8 sonet_oc3_smf_long_reach:1;
+	u8 sonet_oc3_smf_intermediate_reach:1;
+	u8 sonet_oc3_short_reach:1;
+
+	u8 e_base_px:1;
+	u8 e_base_bx10:1;
+	u8 e100_base_fx:1;
+	u8 e100_base_lx:1;
+	u8 e1000_base_t:1;
+	u8 e1000_base_cx:1;
+	u8 e1000_base_lx:1;
+	u8 e1000_base_sx:1;
+
+	u8 fc_ll_v:1;
+	u8 fc_ll_s:1;
+	u8 fc_ll_i:1;
+	u8 fc_ll_l:1;
+	u8 fc_ll_m:1;
+	u8 fc_tech_sa:1;
+	u8 fc_tech_lc:1;
+	u8 fc_tech_electrical_inter_enclosure:1;
+
+	u8 fc_tech_electrical_intra_enclosure:1;
+	u8 fc_tech_sn:1;
+	u8 fc_tech_sl:1;
+	u8 fc_tech_ll:1;
+	u8 sfp_ct_active:1;
+	u8 sfp_ct_passive:1;
+	u8 unallocated_8_1:1;
+	u8 unallocated_8_0:1;
+
+	u8 fc_media_tw:1;
+	u8 fc_media_tp:1;
+	u8 fc_media_mi:1;
+	u8 fc_media_tv:1;
+	u8 fc_media_m6:1;
+	u8 fc_media_m5:1;
+	u8 unallocated_9_1:1;
+	u8 fc_media_sm:1;
+
+	u8 fc_speed_1200:1;
+	u8 fc_speed_800:1;
+	u8 fc_speed_1600:1;
+	u8 fc_speed_400:1;
+	u8 fc_speed_3200:1;
+	u8 fc_speed_200:1;
+	u8 unallocated_10_1:1;
+	u8 fc_speed_100:1;
+#elif defined __LITTLE_ENDIAN_BITFIELD
+	u8 if_1x_copper_passive:1;
+	u8 if_1x_copper_active:1;
+	u8 if_1x_lx:1;
+	u8 if_1x_sx:1;
+	u8 e10g_base_sr:1;
+	u8 e10g_base_lr:1;
+	u8 e10g_base_lrm:1;
+	u8 e10g_base_er:1;
+
+	u8 sonet_oc3_short_reach:1;
+	u8 sonet_oc3_smf_intermediate_reach:1;
+	u8 sonet_oc3_smf_long_reach:1;
+	u8 unallocated_5_3:1;
+	u8 sonet_oc12_short_reach:1;
+	u8 sonet_oc12_smf_intermediate_reach:1;
+	u8 sonet_oc12_smf_long_reach:1;
+	u8 unallocated_5_7:1;
+
+	u8 sonet_oc48_short_reach:1;
+	u8 sonet_oc48_intermediate_reach:1;
+	u8 sonet_oc48_long_reach:1;
+	u8 sonet_reach_bit2:1;
+	u8 sonet_reach_bit1:1;
+	u8 sonet_oc192_short_reach:1;
+	u8 escon_smf_1310_laser:1;
+	u8 escon_mmf_1310_led:1;
+
+	u8 e1000_base_sx:1;
+	u8 e1000_base_lx:1;
+	u8 e1000_base_cx:1;
+	u8 e1000_base_t:1;
+	u8 e100_base_lx:1;
+	u8 e100_base_fx:1;
+	u8 e_base_bx10:1;
+	u8 e_base_px:1;
+
+	u8 fc_tech_electrical_inter_enclosure:1;
+	u8 fc_tech_lc:1;
+	u8 fc_tech_sa:1;
+	u8 fc_ll_m:1;
+	u8 fc_ll_l:1;
+	u8 fc_ll_i:1;
+	u8 fc_ll_s:1;
+	u8 fc_ll_v:1;
+
+	u8 unallocated_8_0:1;
+	u8 unallocated_8_1:1;
+	u8 sfp_ct_passive:1;
+	u8 sfp_ct_active:1;
+	u8 fc_tech_ll:1;
+	u8 fc_tech_sl:1;
+	u8 fc_tech_sn:1;
+	u8 fc_tech_electrical_intra_enclosure:1;
+
+	u8 fc_media_sm:1;
+	u8 unallocated_9_1:1;
+	u8 fc_media_m5:1;
+	u8 fc_media_m6:1;
+	u8 fc_media_tv:1;
+	u8 fc_media_mi:1;
+	u8 fc_media_tp:1;
+	u8 fc_media_tw:1;
+
+	u8 fc_speed_100:1;
+	u8 unallocated_10_1:1;
+	u8 fc_speed_200:1;
+	u8 fc_speed_3200:1;
+	u8 fc_speed_400:1;
+	u8 fc_speed_1600:1;
+	u8 fc_speed_800:1;
+	u8 fc_speed_1200:1;
+#else
+#error Unknown Endian
+#endif
+	u8 encoding;
+	u8 br_nominal;
+	u8 rate_id;
+	u8 link_len[6];
+	char vendor_name[16];
+	u8 extended_cc;
+	char vendor_oui[3];
+	char vendor_pn[16];
+	char vendor_rev[4];
+	union {
+		__be16 optical_wavelength;
+		u8 cable_spec;
+	};
+	u8 reserved62;
+	u8 cc_base;
+};
+
+struct __packed sfp_eeprom_ext {
+	__be16 options;
+	u8 br_max;
+	u8 br_min;
+	char vendor_sn[16];
+	char datecode[8];
+	u8 diagmon;
+	u8 enhopts;
+	u8 sff8472_compliance;
+	u8 cc_ext;
+};
+
+struct __packed sfp_eeprom_id {
+	struct sfp_eeprom_base base;
+	struct sfp_eeprom_ext ext;
+};
+
+/* SFP EEPROM registers */
+enum {
+	SFP_PHYS_ID			= 0x00,
+	SFP_PHYS_EXT_ID			= 0x01,
+	SFP_CONNECTOR			= 0x02,
+	SFP_COMPLIANCE			= 0x03,
+	SFP_ENCODING			= 0x0b,
+	SFP_BR_NOMINAL			= 0x0c,
+	SFP_RATE_ID			= 0x0d,
+	SFP_LINK_LEN_SM_KM		= 0x0e,
+	SFP_LINK_LEN_SM_100M		= 0x0f,
+	SFP_LINK_LEN_50UM_OM2_10M	= 0x10,
+	SFP_LINK_LEN_62_5UM_OM1_10M	= 0x11,
+	SFP_LINK_LEN_COPPER_1M		= 0x12,
+	SFP_LINK_LEN_50UM_OM4_10M	= 0x12,
+	SFP_LINK_LEN_50UM_OM3_10M	= 0x13,
+	SFP_VENDOR_NAME			= 0x14,
+	SFP_VENDOR_OUI			= 0x25,
+	SFP_VENDOR_PN			= 0x28,
+	SFP_VENDOR_REV			= 0x38,
+	SFP_OPTICAL_WAVELENGTH_MSB	= 0x3c,
+	SFP_OPTICAL_WAVELENGTH_LSB	= 0x3d,
+	SFP_CABLE_SPEC			= 0x3c,
+	SFP_CC_BASE			= 0x3f,
+	SFP_OPTIONS			= 0x40,	/* 2 bytes, MSB, LSB */
+	SFP_BR_MAX			= 0x42,
+	SFP_BR_MIN			= 0x43,
+	SFP_VENDOR_SN			= 0x44,
+	SFP_DATECODE			= 0x54,
+	SFP_DIAGMON			= 0x5c,
+	SFP_ENHOPTS			= 0x5d,
+	SFP_SFF8472_COMPLIANCE		= 0x5e,
+	SFP_CC_EXT			= 0x5f,
+
+	SFP_PHYS_ID_SFP			= 0x03,
+	SFP_PHYS_EXT_ID_SFP		= 0x04,
+	SFP_CONNECTOR_UNSPEC		= 0x00,
+	/* codes 01-05 not supportable on SFP, but some modules have single SC */
+	SFP_CONNECTOR_SC		= 0x01,
+	SFP_CONNECTOR_FIBERJACK		= 0x06,
+	SFP_CONNECTOR_LC		= 0x07,
+	SFP_CONNECTOR_MT_RJ		= 0x08,
+	SFP_CONNECTOR_MU		= 0x09,
+	SFP_CONNECTOR_SG		= 0x0a,
+	SFP_CONNECTOR_OPTICAL_PIGTAIL	= 0x0b,
+	SFP_CONNECTOR_MPO_1X12		= 0x0c,
+	SFP_CONNECTOR_MPO_2X16		= 0x0d,
+	SFP_CONNECTOR_HSSDC_II		= 0x20,
+	SFP_CONNECTOR_COPPER_PIGTAIL	= 0x21,
+	SFP_CONNECTOR_RJ45		= 0x22,
+	SFP_CONNECTOR_NOSEPARATE	= 0x23,
+	SFP_CONNECTOR_MXC_2X16		= 0x24,
+	SFP_ENCODING_UNSPEC		= 0x00,
+	SFP_ENCODING_8B10B		= 0x01,
+	SFP_ENCODING_4B5B		= 0x02,
+	SFP_ENCODING_NRZ		= 0x03,
+	SFP_ENCODING_8472_MANCHESTER	= 0x04,
+	SFP_ENCODING_8472_SONET		= 0x05,
+	SFP_ENCODING_8472_64B66B	= 0x06,
+	SFP_ENCODING_256B257B		= 0x07,
+	SFP_ENCODING_PAM4		= 0x08,
+	SFP_OPTIONS_HIGH_POWER_LEVEL	= BIT(13),
+	SFP_OPTIONS_PAGING_A2		= BIT(12),
+	SFP_OPTIONS_RETIMER		= BIT(11),
+	SFP_OPTIONS_COOLED_XCVR		= BIT(10),
+	SFP_OPTIONS_POWER_DECL		= BIT(9),
+	SFP_OPTIONS_RX_LINEAR_OUT	= BIT(8),
+	SFP_OPTIONS_RX_DECISION_THRESH	= BIT(7),
+	SFP_OPTIONS_TUNABLE_TX		= BIT(6),
+	SFP_OPTIONS_RATE_SELECT		= BIT(5),
+	SFP_OPTIONS_TX_DISABLE		= BIT(4),
+	SFP_OPTIONS_TX_FAULT		= BIT(3),
+	SFP_OPTIONS_LOS_INVERTED	= BIT(2),
+	SFP_OPTIONS_LOS_NORMAL		= BIT(1),
+	SFP_DIAGMON_DDM			= BIT(6),
+	SFP_DIAGMON_INT_CAL		= BIT(5),
+	SFP_DIAGMON_EXT_CAL		= BIT(4),
+	SFP_DIAGMON_RXPWR_AVG		= BIT(3),
+	SFP_DIAGMON_ADDRMODE		= BIT(2),
+	SFP_ENHOPTS_ALARMWARN		= BIT(7),
+	SFP_ENHOPTS_SOFT_TX_DISABLE	= BIT(6),
+	SFP_ENHOPTS_SOFT_TX_FAULT	= BIT(5),
+	SFP_ENHOPTS_SOFT_RX_LOS		= BIT(4),
+	SFP_ENHOPTS_SOFT_RATE_SELECT	= BIT(3),
+	SFP_ENHOPTS_APP_SELECT_SFF8079	= BIT(2),
+	SFP_ENHOPTS_SOFT_RATE_SFF8431	= BIT(1),
+	SFP_SFF8472_COMPLIANCE_NONE	= 0x00,
+	SFP_SFF8472_COMPLIANCE_REV9_3	= 0x01,
+	SFP_SFF8472_COMPLIANCE_REV9_5	= 0x02,
+	SFP_SFF8472_COMPLIANCE_REV10_2	= 0x03,
+	SFP_SFF8472_COMPLIANCE_REV10_4	= 0x04,
+	SFP_SFF8472_COMPLIANCE_REV11_0	= 0x05,
+	SFP_SFF8472_COMPLIANCE_REV11_3	= 0x06,
+	SFP_SFF8472_COMPLIANCE_REV11_4	= 0x07,
+	SFP_SFF8472_COMPLIANCE_REV12_0	= 0x08,
+};
+
+/* SFP Diagnostics */
+enum {
+	/* Alarm and warnings stored MSB at lower address then LSB */
+	SFP_TEMP_HIGH_ALARM		= 0x00,
+	SFP_TEMP_LOW_ALARM		= 0x02,
+	SFP_TEMP_HIGH_WARN		= 0x04,
+	SFP_TEMP_LOW_WARN		= 0x06,
+	SFP_VOLT_HIGH_ALARM		= 0x08,
+	SFP_VOLT_LOW_ALARM		= 0x0a,
+	SFP_VOLT_HIGH_WARN		= 0x0c,
+	SFP_VOLT_LOW_WARN		= 0x0e,
+	SFP_BIAS_HIGH_ALARM		= 0x10,
+	SFP_BIAS_LOW_ALARM		= 0x12,
+	SFP_BIAS_HIGH_WARN		= 0x14,
+	SFP_BIAS_LOW_WARN		= 0x16,
+	SFP_TXPWR_HIGH_ALARM		= 0x18,
+	SFP_TXPWR_LOW_ALARM		= 0x1a,
+	SFP_TXPWR_HIGH_WARN		= 0x1c,
+	SFP_TXPWR_LOW_WARN		= 0x1e,
+	SFP_RXPWR_HIGH_ALARM		= 0x20,
+	SFP_RXPWR_LOW_ALARM		= 0x22,
+	SFP_RXPWR_HIGH_WARN		= 0x24,
+	SFP_RXPWR_LOW_WARN		= 0x26,
+	SFP_LASER_TEMP_HIGH_ALARM	= 0x28,
+	SFP_LASER_TEMP_LOW_ALARM	= 0x2a,
+	SFP_LASER_TEMP_HIGH_WARN	= 0x2c,
+	SFP_LASER_TEMP_LOW_WARN		= 0x2e,
+	SFP_TEC_CUR_HIGH_ALARM		= 0x30,
+	SFP_TEC_CUR_LOW_ALARM		= 0x32,
+	SFP_TEC_CUR_HIGH_WARN		= 0x34,
+	SFP_TEC_CUR_LOW_WARN		= 0x36,
+	SFP_CAL_RXPWR4			= 0x38,
+	SFP_CAL_RXPWR3			= 0x3c,
+	SFP_CAL_RXPWR2			= 0x40,
+	SFP_CAL_RXPWR1			= 0x44,
+	SFP_CAL_RXPWR0			= 0x48,
+	SFP_CAL_TXI_SLOPE		= 0x4c,
+	SFP_CAL_TXI_OFFSET		= 0x4e,
+	SFP_CAL_TXPWR_SLOPE		= 0x50,
+	SFP_CAL_TXPWR_OFFSET		= 0x52,
+	SFP_CAL_T_SLOPE			= 0x54,
+	SFP_CAL_T_OFFSET		= 0x56,
+	SFP_CAL_V_SLOPE			= 0x58,
+	SFP_CAL_V_OFFSET		= 0x5a,
+	SFP_CHKSUM			= 0x5f,
+
+	SFP_TEMP			= 0x60,
+	SFP_VCC				= 0x62,
+	SFP_TX_BIAS			= 0x64,
+	SFP_TX_POWER			= 0x66,
+	SFP_RX_POWER			= 0x68,
+	SFP_LASER_TEMP			= 0x6a,
+	SFP_TEC_CUR			= 0x6c,
+
+	SFP_STATUS			= 0x6e,
+	SFP_ALARM			= 0x70,
+
+	SFP_EXT_STATUS			= 0x76,
+	SFP_VSL				= 0x78,
+	SFP_PAGE			= 0x7f,
+};
+
+struct device_node;
+struct ethtool_eeprom;
+struct ethtool_modinfo;
+struct net_device;
+struct sfp_bus;
+
+struct sfp_upstream_ops {
+	int (*module_insert)(void *, const struct sfp_eeprom_id *id);
+	void (*module_remove)(void *);
+	void (*link_down)(void *);
+	void (*link_up)(void *);
+	int (*connect_phy)(void *, struct phy_device *);
+	void (*disconnect_phy)(void *);
+};
+
+#if IS_ENABLED(CONFIG_SFP)
+int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
+		   unsigned long *support);
+phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
+				    const struct sfp_eeprom_id *id);
+void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
+		       unsigned long *support);
+
+int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
+int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
+			  u8 *data);
+void sfp_upstream_start(struct sfp_bus *bus);
+void sfp_upstream_stop(struct sfp_bus *bus);
+struct sfp_bus *sfp_register_upstream(struct device_node *np,
+				      struct net_device *ndev, void *upstream,
+				      const struct sfp_upstream_ops *ops);
+void sfp_unregister_upstream(struct sfp_bus *bus);
+#else
+static inline int sfp_parse_port(struct sfp_bus *bus,
+				 const struct sfp_eeprom_id *id,
+				 unsigned long *support)
+{
+	return PORT_OTHER;
+}
+
+static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
+						const struct sfp_eeprom_id *id)
+{
+	return PHY_INTERFACE_MODE_NA;
+}
+
+static inline void sfp_parse_support(struct sfp_bus *bus,
+				     const struct sfp_eeprom_id *id,
+				     unsigned long *support)
+{
+}
+
+static inline int sfp_get_module_info(struct sfp_bus *bus,
+				      struct ethtool_modinfo *modinfo)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int sfp_get_module_eeprom(struct sfp_bus *bus,
+					struct ethtool_eeprom *ee, u8 *data)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void sfp_upstream_start(struct sfp_bus *bus)
+{
+}
+
+static inline void sfp_upstream_stop(struct sfp_bus *bus)
+{
+}
+
+static inline struct sfp_bus *sfp_register_upstream(struct device_node *np,
+	struct net_device *ndev, void *upstream,
+	const struct sfp_upstream_ops *ops)
+{
+	return (struct sfp_bus *)-1;
+}
+
+static inline void sfp_unregister_upstream(struct sfp_bus *bus)
+{
+}
+#endif
+
+#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dbe29b6c9bd6..8c0708d2e5e6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -345,6 +345,42 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
 	frag->size -= delta;
 }
 
+static inline bool skb_frag_must_loop(struct page *p)
+{
+#if defined(CONFIG_HIGHMEM)
+	if (PageHighMem(p))
+		return true;
+#endif
+	return false;
+}
+
+/**
+ *	skb_frag_foreach_page - loop over pages in a fragment
+ *
+ *	@f:		skb frag to operate on
+ *	@f_off:		offset from start of f->page.p
+ *	@f_len:		length from f_off to loop over
+ *	@p:		(temp var) current page
+ *	@p_off:		(temp var) offset from start of current page,
+ *	                           non-zero only on first page.
+ *	@p_len:		(temp var) length in current page,
+ *				   < PAGE_SIZE only on first and last page.
+ *	@copied:	(temp var) length so far, excluding current p_len.
+ *
+ *	A fragment can hold a compound page, in which case per-page
+ *	operations, notably kmap_atomic, must be called for each
+ *	regular page.
+ */
+#define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied)	\
+	for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT),		\
+	     p_off = (f_off) & (PAGE_SIZE - 1),				\
+	     p_len = skb_frag_must_loop(p) ?				\
+	     min_t(u32, f_len, PAGE_SIZE - p_off) : f_len,		\
+	     copied = 0;						\
+	     copied < f_len;						\
+	     copied += p_len, p++, p_off = 0,				\
+	     p_len = min_t(u32, f_len - copied, PAGE_SIZE))		\
+
 #define HAVE_HW_TIME_STAMP
 
 /**
@@ -393,6 +429,7 @@ enum {
 	SKBTX_SCHED_TSTAMP = 1 << 6,
 };
 
+#define SKBTX_ZEROCOPY_FRAG	(SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG)
 #define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP    | \
 				 SKBTX_SCHED_TSTAMP)
 #define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
@@ -407,10 +444,46 @@ enum {
  */
 struct ubuf_info {
 	void (*callback)(struct ubuf_info *, bool zerocopy_success);
-	void *ctx;
-	unsigned long desc;
+	union {
+		struct {
+			unsigned long desc;
+			void *ctx;
+		};
+		struct {
+			u32 id;
+			u16 len;
+			u16 zerocopy:1;
+			u32 bytelen;
+		};
+	};
+	atomic_t refcnt;
+
+	struct mmpin {
+		struct user_struct *user;
+		unsigned int num_pg;
+	} mmp;
 };
 
+#define skb_uarg(SKB)	((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+
+struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
+struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
+					struct ubuf_info *uarg);
+
+static inline void sock_zerocopy_get(struct ubuf_info *uarg)
+{
+	atomic_inc(&uarg->refcnt);
+}
+
+void sock_zerocopy_put(struct ubuf_info *uarg);
+void sock_zerocopy_put_abort(struct ubuf_info *uarg);
+
+void sock_zerocopy_callback(struct ubuf_info *uarg, bool success);
+
+int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+			     struct msghdr *msg, int len,
+			     struct ubuf_info *uarg);
+
 /* This data is invariant across clones and lives at
  * the end of the header data, ie. at skb->end.
  */
@@ -463,39 +536,38 @@ enum {
 
 enum {
 	SKB_GSO_TCPV4 = 1 << 0,
-	SKB_GSO_UDP = 1 << 1,
 
 	/* This indicates the skb is from an untrusted source. */
-	SKB_GSO_DODGY = 1 << 2,
+	SKB_GSO_DODGY = 1 << 1,
 
 	/* This indicates the tcp segment has CWR set. */
-	SKB_GSO_TCP_ECN = 1 << 3,
+	SKB_GSO_TCP_ECN = 1 << 2,
 
-	SKB_GSO_TCP_FIXEDID = 1 << 4,
+	SKB_GSO_TCP_FIXEDID = 1 << 3,
 
-	SKB_GSO_TCPV6 = 1 << 5,
+	SKB_GSO_TCPV6 = 1 << 4,
 
-	SKB_GSO_FCOE = 1 << 6,
+	SKB_GSO_FCOE = 1 << 5,
 
-	SKB_GSO_GRE = 1 << 7,
+	SKB_GSO_GRE = 1 << 6,
 
-	SKB_GSO_GRE_CSUM = 1 << 8,
+	SKB_GSO_GRE_CSUM = 1 << 7,
 
-	SKB_GSO_IPXIP4 = 1 << 9,
+	SKB_GSO_IPXIP4 = 1 << 8,
 
-	SKB_GSO_IPXIP6 = 1 << 10,
+	SKB_GSO_IPXIP6 = 1 << 9,
 
-	SKB_GSO_UDP_TUNNEL = 1 << 11,
+	SKB_GSO_UDP_TUNNEL = 1 << 10,
 
-	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12,
+	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
 
-	SKB_GSO_PARTIAL = 1 << 13,
+	SKB_GSO_PARTIAL = 1 << 12,
 
-	SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
+	SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
 
-	SKB_GSO_SCTP = 1 << 15,
+	SKB_GSO_SCTP = 1 << 14,
 
-	SKB_GSO_ESP = 1 << 16,
+	SKB_GSO_ESP = 1 << 15,
 };
 
 #if BITS_PER_LONG > 32
@@ -945,12 +1017,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 	return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
 }
 
-struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
-static inline struct sk_buff *alloc_skb_head(gfp_t priority)
-{
-	return __alloc_skb_head(priority, -1);
-}
-
 struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
@@ -1129,8 +1195,6 @@ static inline __u32 skb_get_hash(struct sk_buff *skb)
 	return skb->hash;
 }
 
-__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6);
-
 static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
 {
 	if (!skb->l4_hash && !skb->sw_hash) {
@@ -1143,20 +1207,6 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6
 	return skb->hash;
 }
 
-__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl);
-
-static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
-{
-	if (!skb->l4_hash && !skb->sw_hash) {
-		struct flow_keys keys;
-		__u32 hash = __get_hash_from_flowi4(fl4, &keys);
-
-		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
-	}
-
-	return skb->hash;
-}
-
 __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb);
 
 static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
@@ -1201,6 +1251,45 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
 	return &skb_shinfo(skb)->hwtstamps;
 }
 
+static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
+{
+	bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY;
+
+	return is_zcopy ? skb_uarg(skb) : NULL;
+}
+
+static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
+{
+	if (skb && uarg && !skb_zcopy(skb)) {
+		sock_zerocopy_get(uarg);
+		skb_shinfo(skb)->destructor_arg = uarg;
+		skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
+	}
+}
+
+/* Release a reference on a zerocopy structure */
+static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
+{
+	struct ubuf_info *uarg = skb_zcopy(skb);
+
+	if (uarg) {
+		uarg->zerocopy = uarg->zerocopy && zerocopy;
+		sock_zerocopy_put(uarg);
+		skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+	}
+}
+
+/* Abort a zerocopy operation and revert zckey on error in send syscall */
+static inline void skb_zcopy_abort(struct sk_buff *skb)
+{
+	struct ubuf_info *uarg = skb_zcopy(skb);
+
+	if (uarg) {
+		sock_zerocopy_put_abort(uarg);
+		skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+	}
+}
+
 /**
  *	skb_queue_empty - check if a queue is empty
  *	@list: queue head
@@ -1783,13 +1872,18 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb)
 	return skb->len - skb->data_len;
 }
 
-static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+static inline unsigned int __skb_pagelen(const struct sk_buff *skb)
 {
 	unsigned int i, len = 0;
 
 	for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--)
 		len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
-	return len + skb_headlen(skb);
+	return len;
+}
+
+static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+{
+	return skb_headlen(skb) + __skb_pagelen(skb);
 }
 
 /**
@@ -2434,7 +2528,17 @@ static inline void skb_orphan(struct sk_buff *skb)
  */
 static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
 {
-	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)))
+	if (likely(!skb_zcopy(skb)))
+		return 0;
+	if (skb_uarg(skb)->callback == sock_zerocopy_callback)
+		return 0;
+	return skb_copy_ubufs(skb, gfp_mask);
+}
+
+/* Frags must be orphaned, even if refcounted, if skb might loop to rx path */
+static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask)
+{
+	if (likely(!skb_zcopy(skb)))
 		return 0;
 	return skb_copy_ubufs(skb, gfp_mask);
 }
@@ -2866,6 +2970,8 @@ static inline int skb_add_data(struct sk_buff *skb,
 static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
 				    const struct page *page, int off)
 {
+	if (skb_zcopy(skb))
+		return false;
 	if (i) {
 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
 
@@ -3120,6 +3226,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
 int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
 		    struct pipe_inode_info *pipe, unsigned int len,
 		    unsigned int flags);
+int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+			 int len);
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
 int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 8b13db5163cc..8ad963cdc88c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -287,6 +287,7 @@ struct ucred {
 #define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
 
+#define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
 #define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 					   descriptor received through
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 542ca1ae02c4..267164a1d559 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -148,12 +148,6 @@ struct tcp_sock {
 	u16	gso_segs;	/* Max number of segs per GSO packet	*/
 
 /*
- *	Header prediction flags
- *	0x5?10 << 16 + snd_wnd in net byte order
- */
-	__be32	pred_flags;
-
-/*
  *	RFC793 variables by their proper names. This means you can
  *	read the code and the spec side by side (and laugh ...)
  *	See RFC793 and RFC1122. The RFC writes these in capitals.
@@ -192,15 +186,6 @@ struct tcp_sock {
 
 	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
 
-	/* Data for direct copy to user */
-	struct {
-		struct sk_buff_head	prequeue;
-		struct task_struct	*task;
-		struct msghdr		*msg;
-		int			memory;
-		int			len;
-	} ucopy;
-
 	u32	snd_wl1;	/* Sequence for window update		*/
 	u32	snd_wnd;	/* The window we expect to receive	*/
 	u32	max_window;	/* Maximal window ever seen from peer	*/
@@ -273,7 +258,7 @@ struct tcp_sock {
 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 	u32	snd_cwnd_used;
 	u32	snd_cwnd_stamp;
-	u32	prior_cwnd;	/* Congestion window at start of Recovery. */
+	u32	prior_cwnd;	/* cwnd right before starting loss recovery */
 	u32	prr_delivered;	/* Number of newly delivered packets to
 				 * receiver in Recovery. */
 	u32	prr_out;	/* Total number of pkts sent during Recovery. */
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 5209b5ed2a64..32fb046f2173 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -18,9 +18,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 		case VIRTIO_NET_HDR_GSO_TCPV6:
 			gso_type = SKB_GSO_TCPV6;
 			break;
-		case VIRTIO_NET_HDR_GSO_UDP:
-			gso_type = SKB_GSO_UDP;
-			break;
 		default:
 			return -EINVAL;
 		}
@@ -73,8 +70,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 		else if (sinfo->gso_type & SKB_GSO_TCPV6)
 			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
-		else if (sinfo->gso_type & SKB_GSO_UDP)
-			hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
 		else
 			return -EINVAL;
 		if (sinfo->gso_type & SKB_GSO_TCP_ECN)
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 53b1a2cca421..afb37f835449 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -58,7 +58,6 @@ struct unix_sock {
 	struct list_head	link;
 	atomic_long_t		inflight;
 	spinlock_t		lock;
-	unsigned char		recursion_level;
 	unsigned long		gc_flags;
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 58969b9a090c..0b1a0622b33c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -256,11 +256,6 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p)
 	return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p);
 }
 
-static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
-{
-	return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
-}
-
 static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 {
 	struct dsa_switch_tree *dst = ds->dst;
@@ -337,13 +332,12 @@ struct dsa_switch_ops {
 				struct phy_device *phy);
 
 	/*
-	 * EEE setttings
+	 * Port's MAC EEE settings
 	 */
-	int	(*set_eee)(struct dsa_switch *ds, int port,
-			   struct phy_device *phydev,
-			   struct ethtool_eee *e);
-	int	(*get_eee)(struct dsa_switch *ds, int port,
-			   struct ethtool_eee *e);
+	int	(*set_mac_eee)(struct dsa_switch *ds, int port,
+			       struct ethtool_eee *e);
+	int	(*get_mac_eee)(struct dsa_switch *ds, int port,
+			       struct ethtool_eee *e);
 
 	/* EEPROM access */
 	int	(*get_eeprom_len)(struct dsa_switch *ds);
diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
new file mode 100644
index 000000000000..241475224f74
--- /dev/null
+++ b/include/net/fib_notifier.h
@@ -0,0 +1,44 @@
+#ifndef __NET_FIB_NOTIFIER_H
+#define __NET_FIB_NOTIFIER_H
+
+#include <linux/types.h>
+#include <linux/notifier.h>
+#include <net/net_namespace.h>
+
+struct fib_notifier_info {
+	struct net *net;
+	int family;
+};
+
+enum fib_event_type {
+	FIB_EVENT_ENTRY_REPLACE,
+	FIB_EVENT_ENTRY_APPEND,
+	FIB_EVENT_ENTRY_ADD,
+	FIB_EVENT_ENTRY_DEL,
+	FIB_EVENT_RULE_ADD,
+	FIB_EVENT_RULE_DEL,
+	FIB_EVENT_NH_ADD,
+	FIB_EVENT_NH_DEL,
+};
+
+struct fib_notifier_ops {
+	int family;
+	struct list_head list;
+	unsigned int (*fib_seq_read)(struct net *net);
+	int (*fib_dump)(struct net *net, struct notifier_block *nb);
+	struct rcu_head rcu;
+};
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+		      enum fib_event_type event_type,
+		      struct fib_notifier_info *info);
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+		       struct fib_notifier_info *info);
+int register_fib_notifier(struct notifier_block *nb,
+			  void (*cb)(struct notifier_block *nb));
+int unregister_fib_notifier(struct notifier_block *nb);
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net);
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops);
+
+#endif
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index c487bfa2f479..3d7f1cefc6f5 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -8,6 +8,7 @@
 #include <linux/refcount.h>
 #include <net/flow.h>
 #include <net/rtnetlink.h>
+#include <net/fib_notifier.h>
 
 struct fib_kuid_range {
 	kuid_t start;
@@ -57,6 +58,7 @@ struct fib_rules_ops {
 	int			addr_size;
 	int			unresolved_rules;
 	int			nr_goto_rules;
+	unsigned int		fib_rules_seq;
 
 	int			(*action)(struct fib_rule *,
 					  struct flowi *, int,
@@ -89,6 +91,11 @@ struct fib_rules_ops {
 	struct rcu_head		rcu;
 };
 
+struct fib_rule_notifier_info {
+	struct fib_notifier_info info; /* must be first */
+	struct fib_rule *rule;
+};
+
 #define FRA_GENERIC_POLICY \
 	[FRA_IIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
 	[FRA_OIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
@@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
 int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
 			 u32 flags);
 bool fib_rule_matchall(const struct fib_rule *rule);
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family);
+unsigned int fib_rules_seq_read(struct net *net, int family);
 
 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		   struct netlink_ext_ack *extack);
diff --git a/include/net/flow.h b/include/net/flow.h
index bae198b3039e..f3dc61b29bb5 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family)
 	return 0;
 }
 
-#define FLOW_DIR_IN	0
-#define FLOW_DIR_OUT	1
-#define FLOW_DIR_FWD	2
-
-struct net;
-struct sock;
-struct flow_cache_ops;
-
-struct flow_cache_object {
-	const struct flow_cache_ops *ops;
-};
-
-struct flow_cache_ops {
-	struct flow_cache_object *(*get)(struct flow_cache_object *);
-	int (*check)(struct flow_cache_object *);
-	void (*delete)(struct flow_cache_object *);
-};
-
-typedef struct flow_cache_object *(*flow_resolve_t)(
-		struct net *net, const struct flowi *key, u16 family,
-		u8 dir, struct flow_cache_object *oldobj, void *ctx);
-
-struct flow_cache_object *flow_cache_lookup(struct net *net,
-					    const struct flowi *key, u16 family,
-					    u8 dir, flow_resolve_t resolver,
-					    void *ctx);
-int flow_cache_init(struct net *net);
-void flow_cache_fini(struct net *net);
-void flow_cache_hp_init(void);
-
-void flow_cache_flush(struct net *net);
-void flow_cache_flush_deferred(struct net *net);
-extern atomic_t flow_cache_genid;
-
 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
 static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
diff --git a/include/net/flowcache.h b/include/net/flowcache.h
deleted file mode 100644
index 51eb971e8973..000000000000
--- a/include/net/flowcache.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _NET_FLOWCACHE_H
-#define _NET_FLOWCACHE_H
-
-#include <linux/interrupt.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/notifier.h>
-
-struct flow_cache_percpu {
-	struct hlist_head		*hash_table;
-	unsigned int			hash_count;
-	u32				hash_rnd;
-	int				hash_rnd_recalc;
-	struct tasklet_struct		flush_tasklet;
-};
-
-struct flow_cache {
-	u32				hash_shift;
-	struct flow_cache_percpu __percpu *percpu;
-	struct hlist_node		node;
-	unsigned int			low_watermark;
-	unsigned int			high_watermark;
-	struct timer_list		rnd_timer;
-};
-#endif	/* _NET_FLOWCACHE_H */
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index f2a215fc78e4..950ed182f62f 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -33,18 +33,12 @@ struct inetpeer_addr {
 };
 
 struct inet_peer {
-	/* group together avl_left,avl_right,v4daddr to speedup lookups */
-	struct inet_peer __rcu	*avl_left, *avl_right;
+	struct rb_node		rb_node;
 	struct inetpeer_addr	daddr;
-	__u32			avl_height;
 
 	u32			metrics[RTAX_MAX];
 	u32			rate_tokens;	/* rate limiting for ICMP */
 	unsigned long		rate_last;
-	union {
-		struct list_head	gc_list;
-		struct rcu_head     gc_rcu;
-	};
 	/*
 	 * Once inet_peer is queued for deletion (refcnt == 0), following field
 	 * is not available: rid
@@ -55,7 +49,6 @@ struct inet_peer {
 			atomic_t			rid;		/* Frag reception counter */
 		};
 		struct rcu_head         rcu;
-		struct inet_peer	*gc_next;
 	};
 
 	/* following fields might be frequently dirtied */
@@ -64,7 +57,7 @@ struct inet_peer {
 };
 
 struct inet_peer_base {
-	struct inet_peer __rcu	*root;
+	struct rb_root		rb_root;
 	seqlock_t		lock;
 	int			total;
 };
diff --git a/include/net/ip.h b/include/net/ip.h
index 821cedcc8e73..9e59dcf1787a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -567,11 +567,12 @@ int ip_forward(struct sk_buff *skb);
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
 		      __be32 daddr, struct rtable *rt, int is_frag);
 
-int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
-		      const struct ip_options *sopt);
-static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
+int __ip_options_echo(struct net *net, struct ip_options *dopt,
+		      struct sk_buff *skb, const struct ip_options *sopt);
+static inline int ip_options_echo(struct net *net, struct ip_options *dopt,
+				  struct sk_buff *skb)
 {
-	return __ip_options_echo(dopt, skb, &IPCB(skb)->opt);
+	return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt);
 }
 
 void ip_options_fragment(struct sk_buff *skb);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1a88008cc6f5..1d790ea40ea7 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -16,10 +16,12 @@
 #include <linux/ipv6_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
+#include <linux/notifier.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/netlink.h>
 #include <net/inetpeer.h>
+#include <net/fib_notifier.h>
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 #define FIB6_TABLE_HASHSZ 256
@@ -185,6 +187,22 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 	dst_release(&rt->dst);
 }
 
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
+
+static inline void rt6_hold(struct rt6_info *rt)
+{
+	atomic_inc(&rt->rt6i_ref);
+}
+
+static inline void rt6_release(struct rt6_info *rt)
+{
+	if (atomic_dec_and_test(&rt->rt6i_ref)) {
+		rt6_free_pcpu(rt);
+		dst_dev_put(&rt->dst);
+		dst_release(&rt->dst);
+	}
+}
+
 enum fib6_walk_state {
 #ifdef CONFIG_IPV6_SUBTREES
 	FWS_S,
@@ -233,6 +251,7 @@ struct fib6_table {
 	struct fib6_node	tb6_root;
 	struct inet_peer_base	tb6_peers;
 	unsigned int		flags;
+	unsigned int		fib_seq;
 #define RT6_TABLE_HAS_DFLT_ROUTER	BIT(0)
 };
 
@@ -256,6 +275,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
 					 struct fib6_table *,
 					 struct flowi6 *, int);
 
+struct fib6_entry_notifier_info {
+	struct fib_notifier_info info; /* must be first */
+	struct rt6_info *rt;
+};
+
 /*
  *	exported functions
  */
@@ -292,9 +316,24 @@ int fib6_init(void);
 
 int ipv6_route_open(struct inode *inode, struct file *file);
 
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info);
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info);
+
+int __net_init fib6_notifier_init(struct net *net);
+void __net_exit fib6_notifier_exit(struct net *net);
+
+unsigned int fib6_tables_seq_read(struct net *net);
+int fib6_tables_dump(struct net *net, struct notifier_block *nb);
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
+bool fib6_rule_default(const struct fib_rule *rule);
+int fib6_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib6_rules_seq_read(struct net *net);
 #else
 static inline int               fib6_rules_init(void)
 {
@@ -304,5 +343,17 @@ static inline void              fib6_rules_cleanup(void)
 {
 	return ;
 }
+static inline bool fib6_rule_default(const struct fib_rule *rule)
+{
+	return true;
+}
+static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return 0;
+}
+static inline unsigned int fib6_rules_seq_read(struct net *net)
+{
+	return 0;
+}
 #endif
 #endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 199056933dcb..907d39a42f6b 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -194,7 +194,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
 	struct rt6_info *rt = (struct rt6_info *)dst;
 
 	return rt->rt6i_flags & RTF_ANYCAST ||
-		(rt->rt6i_dst.plen != 128 &&
+		(rt->rt6i_dst.plen < 127 &&
 		 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
 }
 
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 41d580c6185f..1a7f7e424320 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -19,6 +19,7 @@
 #include <net/flow.h>
 #include <linux/seq_file.h>
 #include <linux/rcupdate.h>
+#include <net/fib_notifier.h>
 #include <net/fib_rules.h>
 #include <net/inetpeer.h>
 #include <linux/percpu.h>
@@ -124,7 +125,6 @@ struct fib_info {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			fib_weight;
 #endif
-	unsigned int		fib_offload_cnt;
 	struct rcu_head		rcu;
 	struct fib_nh		fib_nh[0];
 #define fib_dev		fib_nh[0].nh_dev
@@ -177,18 +177,6 @@ struct fib_result_nl {
 
 __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
 
-static inline void fib_info_offload_inc(struct fib_info *fi)
-{
-	fi->fib_offload_cnt++;
-	fi->fib_flags |= RTNH_F_OFFLOAD;
-}
-
-static inline void fib_info_offload_dec(struct fib_info *fi)
-{
-	if (--fi->fib_offload_cnt == 0)
-		fi->fib_flags &= ~RTNH_F_OFFLOAD;
-}
-
 #define FIB_RES_SADDR(net, res)				\
 	((FIB_RES_NH(res).nh_saddr_genid ==		\
 	  atomic_read(&(net)->ipv4.dev_addr_genid)) ?	\
@@ -201,10 +189,6 @@ static inline void fib_info_offload_dec(struct fib_info *fi)
 #define FIB_RES_PREFSRC(net, res)	((res).fi->fib_prefsrc ? : \
 					 FIB_RES_SADDR(net, res))
 
-struct fib_notifier_info {
-	struct net *net;
-};
-
 struct fib_entry_notifier_info {
 	struct fib_notifier_info info; /* must be first */
 	u32 dst;
@@ -215,44 +199,21 @@ struct fib_entry_notifier_info {
 	u32 tb_id;
 };
 
-struct fib_rule_notifier_info {
-	struct fib_notifier_info info; /* must be first */
-	struct fib_rule *rule;
-};
-
 struct fib_nh_notifier_info {
 	struct fib_notifier_info info; /* must be first */
 	struct fib_nh *fib_nh;
 };
 
-enum fib_event_type {
-	FIB_EVENT_ENTRY_REPLACE,
-	FIB_EVENT_ENTRY_APPEND,
-	FIB_EVENT_ENTRY_ADD,
-	FIB_EVENT_ENTRY_DEL,
-	FIB_EVENT_RULE_ADD,
-	FIB_EVENT_RULE_DEL,
-	FIB_EVENT_NH_ADD,
-	FIB_EVENT_NH_DEL,
-};
-
-int register_fib_notifier(struct notifier_block *nb,
-			  void (*cb)(struct notifier_block *nb));
-int unregister_fib_notifier(struct notifier_block *nb);
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
-		      enum fib_event_type event_type,
-		      struct fib_notifier_info *info);
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
 		       struct fib_notifier_info *info);
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info);
+
+int __net_init fib4_notifier_init(struct net *net);
+void __net_exit fib4_notifier_exit(struct net *net);
 
 void fib_notify(struct net *net, struct notifier_block *nb);
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-void fib_rules_notify(struct net *net, struct notifier_block *nb);
-#else
-static inline void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
-}
-#endif
 
 struct fib_table {
 	struct hlist_node	tb_hlist;
@@ -325,6 +286,16 @@ static inline bool fib4_rule_default(const struct fib_rule *rule)
 	return true;
 }
 
+static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline unsigned int fib4_rules_seq_read(struct net *net)
+{
+	return 0;
+}
+
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -370,6 +341,8 @@ out:
 }
 
 bool fib4_rule_default(const struct fib_rule *rule);
+int fib4_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib4_rules_seq_read(struct net *net);
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1c401bd4c2e0..57faa375eab9 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -88,6 +88,7 @@ struct net {
 	/* core fib_rules */
 	struct list_head	rules_ops;
 
+	struct list_head	fib_notifier_ops;  /* protected by net_mutex */
 
 	struct net_device       *loopback_dev;          /* The loopback */
 	struct netns_core	core;
diff --git a/include/net/netlink.h b/include/net/netlink.h
index ef8e6c3a80a6..82dd298b40c7 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -178,6 +178,7 @@ enum {
 	NLA_S16,
 	NLA_S32,
 	NLA_S64,
+	NLA_BITFIELD32,
 	__NLA_TYPE_MAX,
 };
 
@@ -206,6 +207,7 @@ enum {
  *    NLA_MSECS            Leaving the length field zero will verify the
  *                         given type fits, using it verifies minimum length
  *                         just like "All other"
+ *    NLA_BITFIELD32      A 32-bit bitmap/bitselector attribute
  *    All other            Minimum length of attribute payload
  *
  * Example:
@@ -213,11 +215,13 @@ enum {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
  *	[ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
  *	[ATTR_BAZ] = { .len = sizeof(struct mystruct) },
+ *	[ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags },
  * };
  */
 struct nla_policy {
 	u16		type;
 	u16		len;
+	void            *validation_data;
 };
 
 /**
@@ -1203,6 +1207,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla)
 }
 
 /**
+ * nla_get_bitfield32 - return payload of 32 bitfield attribute
+ * @nla: nla_bitfield32 attribute
+ */
+static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla)
+{
+	struct nla_bitfield32 tmp;
+
+	nla_memcpy(&tmp, nla, sizeof(tmp));
+	return tmp;
+}
+
+/**
  * nla_memdup - duplicate attribute memory (kmemdup)
  * @src: netlink attribute to duplicate from
  * @gfp: GFP mask
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9a14a0850b0e..20d061c805e3 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -159,6 +159,7 @@ struct netns_ipv4 {
 	int sysctl_fib_multipath_hash_policy;
 #endif
 
+	struct fib_notifier_ops	*notifier_ops;
 	unsigned int	fib_seq;	/* protected by rtnl_mutex */
 
 	atomic_t	rt_genid;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index de7745e2edcc..abdf3b40303b 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -86,6 +86,7 @@ struct netns_ipv6 {
 	atomic_t		dev_addr_genid;
 	atomic_t		fib6_sernum;
 	struct seg6_pernet_data *seg6_data;
+	struct fib_notifier_ops	*notifier_ops;
 };
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 27bb9633c69d..611521646dd4 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -6,7 +6,6 @@
 #include <linux/workqueue.h>
 #include <linux/xfrm.h>
 #include <net/dst_ops.h>
-#include <net/flowcache.h>
 
 struct ctl_table_header;
 
@@ -73,16 +72,6 @@ struct netns_xfrm {
 	spinlock_t xfrm_state_lock;
 	spinlock_t xfrm_policy_lock;
 	struct mutex xfrm_cfg_mutex;
-
-	/* flow cache part */
-	struct flow_cache	flow_cache_global;
-	atomic_t		flow_cache_genid;
-	struct list_head	flow_cache_gc_list;
-	atomic_t		flow_cache_gc_count;
-	spinlock_t		flow_cache_gc_lock;
-	struct work_struct	flow_cache_gc_work;
-	struct work_struct	flow_cache_flush_work;
-	struct mutex		flow_flush_sem;
 };
 
 #endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 537d0a0ad4c4..572083af02ac 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -113,36 +113,6 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police)
 	return 0;
 }
 
-/**
- * tcf_exts_is_predicative - check if a predicative extension is present
- * @exts: tc filter extensions handle
- *
- * Returns 1 if a predicative extension is present, i.e. an extension which
- * might cause further actions and thus overrule the regular tcf_result.
- */
-static inline int
-tcf_exts_is_predicative(struct tcf_exts *exts)
-{
-#ifdef CONFIG_NET_CLS_ACT
-	return exts->nr_actions;
-#else
-	return 0;
-#endif
-}
-
-/**
- * tcf_exts_is_available - check if at least one extension is present
- * @exts: tc filter extensions handle
- *
- * Returns 1 if at least one extension is present.
- */
-static inline int
-tcf_exts_is_available(struct tcf_exts *exts)
-{
-	/* All non-predicative extensions must be added here. */
-	return tcf_exts_is_predicative(exts);
-}
-
 static inline void tcf_exts_to_list(const struct tcf_exts *exts,
 				    struct list_head *actions)
 {
@@ -177,46 +147,61 @@ tcf_exts_stats_update(const struct tcf_exts *exts,
 }
 
 /**
+ * tcf_exts_has_actions - check if at least one action is present
+ * @exts: tc filter extensions handle
+ *
+ * Returns true if at least one action is present.
+ */
+static inline bool tcf_exts_has_actions(struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	return exts->nr_actions;
+#else
+	return false;
+#endif
+}
+
+/**
+ * tcf_exts_has_one_action - check if exactly one action is present
+ * @exts: tc filter extensions handle
+ *
+ * Returns true if exactly one action is present.
+ */
+static inline bool tcf_exts_has_one_action(struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	return exts->nr_actions == 1;
+#else
+	return false;
+#endif
+}
+
+/**
  * tcf_exts_exec - execute tc filter extensions
  * @skb: socket buffer
  * @exts: tc filter extensions handle
  * @res: desired result
  *
- * Executes all configured extensions. Returns 0 on a normal execution,
+ * Executes all configured extensions. Returns TC_ACT_OK on a normal execution,
  * a negative number if the filter must be considered unmatched or
  * a positive action code (TC_ACT_*) which must be returned to the
  * underlying layer.
  */
 static inline int
 tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
-	       struct tcf_result *res)
+	      struct tcf_result *res)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (exts->nr_actions)
-		return tcf_action_exec(skb, exts->actions, exts->nr_actions,
-				       res);
+	return tcf_action_exec(skb, exts->actions, exts->nr_actions, res);
 #endif
-	return 0;
+	return TC_ACT_OK;
 }
 
-#ifdef CONFIG_NET_CLS_ACT
-
-#define tc_no_actions(_exts)  ((_exts)->nr_actions == 0)
-#define tc_single_action(_exts) ((_exts)->nr_actions == 1)
-
-#else /* CONFIG_NET_CLS_ACT */
-
-#define tc_no_actions(_exts) true
-#define tc_single_action(_exts) false
-
-#endif /* CONFIG_NET_CLS_ACT */
-
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
 		      struct tcf_exts *exts, bool ovr);
 void tcf_exts_destroy(struct tcf_exts *exts);
-void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
-		     struct tcf_exts *src);
+void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
@@ -333,26 +318,6 @@ int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
 			struct tcf_pkt_info *);
 
 /**
- * tcf_em_tree_change - replace ematch tree of a running classifier
- *
- * @tp: classifier kind handle
- * @dst: destination ematch tree variable
- * @src: source ematch tree (temporary tree from tcf_em_tree_validate)
- *
- * This functions replaces the ematch tree in @dst with the ematch
- * tree in @src. The classifier in charge of the ematch tree may be
- * running.
- */
-static inline void tcf_em_tree_change(struct tcf_proto *tp,
-				      struct tcf_ematch_tree *dst,
-				      struct tcf_ematch_tree *src)
-{
-	tcf_tree_lock(tp);
-	memcpy(dst, src, sizeof(*dst));
-	tcf_tree_unlock(tp);
-}
-
-/**
  * tcf_em_tree_match - evaulate an ematch tree
  *
  * @skb: socket buffer of the packet in question
@@ -386,7 +351,6 @@ struct tcf_ematch_tree {
 #define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0)
 #define tcf_em_tree_destroy(t) do { (void)(t); } while(0)
 #define tcf_em_tree_dump(skb, t, tlv) (0)
-#define tcf_em_tree_change(tp, dst, src) do { } while(0)
 #define tcf_em_tree_match(skb, t, info) ((void)(info), 1)
 
 #endif /* CONFIG_NET_EMATCH */
@@ -441,6 +405,23 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 }
 #endif /* CONFIG_NET_CLS_IND */
 
+struct tc_cls_common_offload {
+	u32 handle;
+	u32 chain_index;
+	__be16 protocol;
+	u32 prio;
+};
+
+static inline void
+tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
+			   const struct tcf_proto *tp)
+{
+	cls_common->handle = tp->q->handle;
+	cls_common->chain_index = tp->chain->index;
+	cls_common->protocol = tp->protocol;
+	cls_common->prio = tp->prio;
+}
+
 struct tc_cls_u32_knode {
 	struct tcf_exts *exts;
 	struct tc_u32_sel *sel;
@@ -467,6 +448,7 @@ enum tc_clsu32_command {
 };
 
 struct tc_cls_u32_offload {
+	struct tc_cls_common_offload common;
 	/* knode values */
 	enum tc_clsu32_command command;
 	union {
@@ -533,13 +515,14 @@ enum tc_fl_command {
 };
 
 struct tc_cls_flower_offload {
+	struct tc_cls_common_offload common;
 	enum tc_fl_command command;
-	u32 prio;
 	unsigned long cookie;
 	struct flow_dissector *dissector;
 	struct fl_flow_key *mask;
 	struct fl_flow_key *key;
 	struct tcf_exts *exts;
+	bool egress_dev;
 };
 
 enum tc_matchall_command {
@@ -548,6 +531,7 @@ enum tc_matchall_command {
 };
 
 struct tc_cls_matchall_offload {
+	struct tc_cls_common_offload common;
 	enum tc_matchall_command command;
 	struct tcf_exts *exts;
 	unsigned long cookie;
@@ -561,6 +545,7 @@ enum tc_clsbpf_command {
 };
 
 struct tc_cls_bpf_offload {
+	struct tc_cls_common_offload common;
 	enum tc_clsbpf_command command;
 	struct tcf_exts *exts;
 	struct bpf_prog *prog;
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index d4679e7a5ed5..376cb78b6247 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -126,8 +126,8 @@ typedef union {
 	__u8 u8;
 	int error;
 	__be16 err;
-	sctp_state_t state;
-	sctp_event_timeout_t to;
+	enum sctp_state state;
+	enum sctp_event_timeout to;
 	struct sctp_chunk *chunk;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
@@ -135,7 +135,7 @@ typedef union {
 	struct sctp_init_chunk *init;
 	struct sctp_ulpevent *ulpevent;
 	struct sctp_packet *packet;
-	sctp_sackhdr_t *sackh;
+	struct sctp_sackhdr *sackh;
 	struct sctp_datamsg *msg;
 } sctp_arg_t;
 
@@ -167,8 +167,8 @@ SCTP_ARG_CONSTRUCTOR(U16,	__u16, u16)
 SCTP_ARG_CONSTRUCTOR(U8,	__u8, u8)
 SCTP_ARG_CONSTRUCTOR(ERROR,     int, error)
 SCTP_ARG_CONSTRUCTOR(PERR,      __be16, err)	/* protocol error */
-SCTP_ARG_CONSTRUCTOR(STATE,	sctp_state_t, state)
-SCTP_ARG_CONSTRUCTOR(TO,	sctp_event_timeout_t, to)
+SCTP_ARG_CONSTRUCTOR(STATE,	enum sctp_state, state)
+SCTP_ARG_CONSTRUCTOR(TO,	enum sctp_event_timeout, to)
 SCTP_ARG_CONSTRUCTOR(CHUNK,	struct sctp_chunk *, chunk)
 SCTP_ARG_CONSTRUCTOR(ASOC,	struct sctp_association *, asoc)
 SCTP_ARG_CONSTRUCTOR(TRANSPORT,	struct sctp_transport *, transport)
@@ -176,7 +176,7 @@ SCTP_ARG_CONSTRUCTOR(BA,	struct sctp_bind_addr *, bp)
 SCTP_ARG_CONSTRUCTOR(PEER_INIT,	struct sctp_init_chunk *, init)
 SCTP_ARG_CONSTRUCTOR(ULPEVENT,  struct sctp_ulpevent *, ulpevent)
 SCTP_ARG_CONSTRUCTOR(PACKET,	struct sctp_packet *, packet)
-SCTP_ARG_CONSTRUCTOR(SACKH,	sctp_sackhdr_t *, sackh)
+SCTP_ARG_CONSTRUCTOR(SACKH,	struct sctp_sackhdr *, sackh)
 SCTP_ARG_CONSTRUCTOR(DATAMSG,	struct sctp_datamsg *, msg)
 
 static inline sctp_arg_t SCTP_FORCE(void)
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 9b18044c551e..deaafa9b09cb 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -42,7 +42,7 @@
 
 #include <linux/sctp.h>
 #include <linux/ipv6.h> /* For ipv6hdr. */
-#include <net/tcp_states.h>  /* For TCP states used in sctp_sock_state_t */
+#include <net/tcp_states.h>  /* For TCP states used in enum sctp_sock_state */
 
 /* Value used for stream negotiation. */
 enum { SCTP_MAX_STREAM = 0xffff };
@@ -71,20 +71,18 @@ enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM };
 					 SCTP_NUM_AUTH_CHUNK_TYPES)
 
 /* These are the different flavours of event.  */
-typedef enum {
-
+enum sctp_event {
 	SCTP_EVENT_T_CHUNK = 1,
 	SCTP_EVENT_T_TIMEOUT,
 	SCTP_EVENT_T_OTHER,
 	SCTP_EVENT_T_PRIMITIVE
-
-} sctp_event_t;
+};
 
 /* As a convenience for the state machine, we append SCTP_EVENT_* and
  * SCTP_ULP_* to the list of possible chunks.
  */
 
-typedef enum {
+enum sctp_event_timeout {
 	SCTP_EVENT_TIMEOUT_NONE = 0,
 	SCTP_EVENT_TIMEOUT_T1_COOKIE,
 	SCTP_EVENT_TIMEOUT_T1_INIT,
@@ -96,21 +94,21 @@ typedef enum {
 	SCTP_EVENT_TIMEOUT_RECONF,
 	SCTP_EVENT_TIMEOUT_SACK,
 	SCTP_EVENT_TIMEOUT_AUTOCLOSE,
-} sctp_event_timeout_t;
+};
 
 #define SCTP_EVENT_TIMEOUT_MAX		SCTP_EVENT_TIMEOUT_AUTOCLOSE
 #define SCTP_NUM_TIMEOUT_TYPES		(SCTP_EVENT_TIMEOUT_MAX + 1)
 
-typedef enum {
+enum sctp_event_other {
 	SCTP_EVENT_NO_PENDING_TSN = 0,
 	SCTP_EVENT_ICMP_PROTO_UNREACH,
-} sctp_event_other_t;
+};
 
 #define SCTP_EVENT_OTHER_MAX		SCTP_EVENT_ICMP_PROTO_UNREACH
 #define SCTP_NUM_OTHER_TYPES		(SCTP_EVENT_OTHER_MAX + 1)
 
 /* These are primitive requests from the ULP.  */
-typedef enum {
+enum sctp_event_primitive {
 	SCTP_PRIMITIVE_ASSOCIATE = 0,
 	SCTP_PRIMITIVE_SHUTDOWN,
 	SCTP_PRIMITIVE_ABORT,
@@ -118,7 +116,7 @@ typedef enum {
 	SCTP_PRIMITIVE_REQUESTHEARTBEAT,
 	SCTP_PRIMITIVE_ASCONF,
 	SCTP_PRIMITIVE_RECONF,
-} sctp_event_primitive_t;
+};
 
 #define SCTP_EVENT_PRIMITIVE_MAX	SCTP_PRIMITIVE_RECONF
 #define SCTP_NUM_PRIMITIVE_TYPES	(SCTP_EVENT_PRIMITIVE_MAX + 1)
@@ -126,25 +124,25 @@ typedef enum {
 /* We define here a utility type for manipulating subtypes.
  * The subtype constructors all work like this:
  *
- * 	sctp_subtype_t foo = SCTP_ST_CHUNK(SCTP_CID_INIT);
+ *   union sctp_subtype foo = SCTP_ST_CHUNK(SCTP_CID_INIT);
  */
 
-typedef union {
+union sctp_subtype {
 	enum sctp_cid chunk;
-	sctp_event_timeout_t timeout;
-	sctp_event_other_t other;
-	sctp_event_primitive_t primitive;
-} sctp_subtype_t;
+	enum sctp_event_timeout timeout;
+	enum sctp_event_other other;
+	enum sctp_event_primitive primitive;
+};
 
 #define SCTP_SUBTYPE_CONSTRUCTOR(_name, _type, _elt) \
-static inline sctp_subtype_t	\
+static inline union sctp_subtype	\
 SCTP_ST_## _name (_type _arg)		\
-{ sctp_subtype_t _retval; _retval._elt = _arg; return _retval; }
+{ union sctp_subtype _retval; _retval._elt = _arg; return _retval; }
 
 SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,		enum sctp_cid,		chunk)
-SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	sctp_event_timeout_t,	timeout)
-SCTP_SUBTYPE_CONSTRUCTOR(OTHER,		sctp_event_other_t,	other)
-SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	sctp_event_primitive_t,	primitive)
+SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	enum sctp_event_timeout, timeout)
+SCTP_SUBTYPE_CONSTRUCTOR(OTHER,		enum sctp_event_other,	other)
+SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	enum sctp_event_primitive, primitive)
 
 
 #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA)
@@ -155,8 +153,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	sctp_event_primitive_t,	primitive)
 				- sizeof(struct sctp_data_chunk)))
 
 /* Internal error codes */
-typedef enum {
-
+enum sctp_ierror {
 	SCTP_IERROR_NO_ERROR	        = 0,
 	SCTP_IERROR_BASE		= 1000,
 	SCTP_IERROR_NO_COOKIE,
@@ -177,12 +174,12 @@ typedef enum {
 	SCTP_IERROR_PROTO_VIOLATION,
 	SCTP_IERROR_ERROR,
 	SCTP_IERROR_ABORT,
-} sctp_ierror_t;
+};
 
 
 
 /* SCTP state defines for internal state machine */
-typedef enum {
+enum sctp_state {
 
 	SCTP_STATE_CLOSED		= 0,
 	SCTP_STATE_COOKIE_WAIT		= 1,
@@ -193,7 +190,7 @@ typedef enum {
 	SCTP_STATE_SHUTDOWN_RECEIVED	= 6,
 	SCTP_STATE_SHUTDOWN_ACK_SENT	= 7,
 
-} sctp_state_t;
+};
 
 #define SCTP_STATE_MAX			SCTP_STATE_SHUTDOWN_ACK_SENT
 #define SCTP_STATE_NUM_STATES		(SCTP_STATE_MAX + 1)
@@ -214,19 +211,19 @@ typedef enum {
  * - A socket in SCTP_SS_ESTABLISHED state indicates that it has a single 
  *   association.
  */
-typedef enum {
+enum sctp_sock_state {
 	SCTP_SS_CLOSED         = TCP_CLOSE,
 	SCTP_SS_LISTENING      = TCP_LISTEN,
 	SCTP_SS_ESTABLISHING   = TCP_SYN_SENT,
 	SCTP_SS_ESTABLISHED    = TCP_ESTABLISHED,
 	SCTP_SS_CLOSING        = TCP_CLOSE_WAIT,
-} sctp_sock_state_t;
+};
 
 /* These functions map various type to printable names.  */
-const char *sctp_cname(const sctp_subtype_t);	/* chunk types */
-const char *sctp_oname(const sctp_subtype_t);	/* other events */
-const char *sctp_tname(const sctp_subtype_t);	/* timeouts */
-const char *sctp_pname(const sctp_subtype_t);	/* primitives */
+const char *sctp_cname(const union sctp_subtype id);	/* chunk types */
+const char *sctp_oname(const union sctp_subtype id);	/* other events */
+const char *sctp_tname(const union sctp_subtype id);	/* timeouts */
+const char *sctp_pname(const union sctp_subtype id);	/* primitives */
 
 /* This is a table of printable names of sctp_state_t's.  */
 extern const char *const sctp_state_tbl[];
@@ -312,19 +309,19 @@ enum { SCTP_MAX_GABS = 16 };
 /* These return values describe the success or failure of a number of
  * routines which form the lower interface to SCTP_outqueue.
  */
-typedef enum {
+enum sctp_xmit {
 	SCTP_XMIT_OK,
 	SCTP_XMIT_PMTU_FULL,
 	SCTP_XMIT_RWND_FULL,
 	SCTP_XMIT_DELAY,
-} sctp_xmit_t;
+};
 
 /* These are the commands for manipulating transports.  */
-typedef enum {
+enum sctp_transport_cmd {
 	SCTP_TRANSPORT_UP,
 	SCTP_TRANSPORT_DOWN,
 	SCTP_TRANSPORT_PF,
-} sctp_transport_cmd_t;
+};
 
 /* These are the address scopes defined mainly for IPv4 addresses
  * based on draft of SCTP IPv4 scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>.
@@ -333,20 +330,22 @@ typedef enum {
  * At this point, the IPv6 scopes will be mapped to these internal scopes
  * as much as possible.
  */
-typedef enum {
+enum sctp_scope {
 	SCTP_SCOPE_GLOBAL,		/* IPv4 global addresses */
 	SCTP_SCOPE_PRIVATE,		/* IPv4 private addresses */
 	SCTP_SCOPE_LINK,		/* IPv4 link local address */
 	SCTP_SCOPE_LOOPBACK,		/* IPv4 loopback address */
 	SCTP_SCOPE_UNUSABLE,		/* IPv4 unusable addresses */
-} sctp_scope_t;
+};
 
-typedef enum {
+enum {
 	SCTP_SCOPE_POLICY_DISABLE,	/* Disable IPv4 address scoping */
 	SCTP_SCOPE_POLICY_ENABLE,	/* Enable IPv4 address scoping */
 	SCTP_SCOPE_POLICY_PRIVATE,	/* Follow draft but allow IPv4 private addresses */
 	SCTP_SCOPE_POLICY_LINK,		/* Follow draft but allow IPv4 link local addresses */
-} sctp_scope_policy_t;
+};
+
+#define SCTP_SCOPE_POLICY_MAX	SCTP_SCOPE_POLICY_LINK
 
 /* Based on IPv4 scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>,
  * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24,
@@ -370,20 +369,20 @@ typedef enum {
 						   peer */
 
 /* Reasons to retransmit. */
-typedef enum {
+enum sctp_retransmit_reason {
 	SCTP_RTXR_T3_RTX,
 	SCTP_RTXR_FAST_RTX,
 	SCTP_RTXR_PMTUD,
 	SCTP_RTXR_T1_RTX,
-} sctp_retransmit_reason_t;
+};
 
 /* Reasons to lower cwnd. */
-typedef enum {
+enum sctp_lower_cwnd {
 	SCTP_LOWER_CWND_T3_RTX,
 	SCTP_LOWER_CWND_FAST_RTX,
 	SCTP_LOWER_CWND_ECNE,
 	SCTP_LOWER_CWND_INACTIVE,
-} sctp_lower_cwnd_t;
+};
 
 
 /* SCTP-AUTH Necessary constants */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 45fd4c6056b5..24ff7931d38c 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -94,8 +94,8 @@
 /*
  * sctp/protocol.c
  */
-int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *,
-			      sctp_scope_t, gfp_t gfp, int flags);
+int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr,
+			      enum sctp_scope, gfp_t gfp, int flags);
 struct sctp_pf *sctp_get_pf_specific(sa_family_t family);
 int sctp_register_pf(struct sctp_pf *, sa_family_t);
 void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int);
@@ -479,13 +479,13 @@ for (pos.v = chunk->member;\
 _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
 
 #define _sctp_walk_errors(err, chunk_hdr, end)\
-for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
+for (err = (struct sctp_errhdr *)((void *)chunk_hdr + \
 	    sizeof(struct sctp_chunkhdr));\
-     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\
+     ((void *)err + offsetof(struct sctp_errhdr, length) + sizeof(err->length) <=\
       (void *)chunk_hdr + end) &&\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
-     ntohs(err->length) >= sizeof(sctp_errhdr_t); \
-     err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
+     ntohs(err->length) >= sizeof(struct sctp_errhdr); \
+     err = (struct sctp_errhdr *)((void *)err + SCTP_PAD4(ntohs(err->length))))
 
 #define sctp_walk_fwdtsn(pos, chunk)\
 _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk))
@@ -558,14 +558,15 @@ static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style)
 /* Is the association in this state? */
 #define sctp_state(asoc, state) __sctp_state((asoc), (SCTP_STATE_##state))
 static inline int __sctp_state(const struct sctp_association *asoc,
-			       sctp_state_t state)
+			       enum sctp_state state)
 {
 	return asoc->state == state;
 }
 
 /* Is the socket in this state? */
 #define sctp_sstate(sk, state) __sctp_sstate((sk), (SCTP_SS_##state))
-static inline int __sctp_sstate(const struct sock *sk, sctp_sock_state_t state)
+static inline int __sctp_sstate(const struct sock *sk,
+				enum sctp_sock_state state)
 {
 	return sk->sk_state == state;
 }
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 860f378333b5..1e7651c3b158 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -73,7 +73,7 @@ typedef struct {
 typedef sctp_disposition_t (sctp_state_fn_t) (struct net *,
 					      const struct sctp_endpoint *,
 					      const struct sctp_association *,
-					      const sctp_subtype_t type,
+					      const union sctp_subtype type,
 					      void *arg,
 					      sctp_cmd_seq_t *);
 typedef void (sctp_timer_event_t) (unsigned long);
@@ -175,10 +175,11 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire;
 
 /* Prototypes for utility support functions.  */
 __u8 sctp_get_chunk_type(struct sctp_chunk *chunk);
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *,
-					    sctp_event_t,
-					    sctp_state_t,
-					    sctp_subtype_t);
+const sctp_sm_table_entry_t *sctp_sm_lookup_event(
+					struct net *net,
+					enum sctp_event event_type,
+					enum sctp_state state,
+					union sctp_subtype event_subtype);
 int sctp_chunk_iif(const struct sctp_chunk *);
 struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *,
 					     struct sctp_chunk *,
@@ -312,12 +313,10 @@ struct sctp_chunk *sctp_process_strreset_resp(
 
 /* Prototypes for statetable processing. */
 
-int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
-	       sctp_state_t state,
-               struct sctp_endpoint *,
-               struct sctp_association *asoc,
-               void *event_arg,
-	       gfp_t gfp);
+int sctp_do_sm(struct net *net, enum sctp_event event_type,
+	       union sctp_subtype subtype, enum sctp_state state,
+	       struct sctp_endpoint *ep, struct sctp_association *asoc,
+	       void *event_arg, gfp_t gfp);
 
 /* 2nd level prototypes */
 void sctp_generate_t3_rtx_event(unsigned long peer);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 5ab29af8ca8a..fbe6e81b889b 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -449,7 +449,7 @@ struct sctp_af {
 	int		(*addr_valid)	(union sctp_addr *,
 					 struct sctp_sock *,
 					 const struct sk_buff *);
-	sctp_scope_t	(*scope) (union sctp_addr *);
+	enum sctp_scope	(*scope)(union sctp_addr *);
 	void		(*inaddr_any)	(union sctp_addr *, __be16);
 	int		(*is_any)	(const union sctp_addr *);
 	int		(*available)	(union sctp_addr *,
@@ -697,10 +697,11 @@ struct sctp_packet {
 void sctp_packet_init(struct sctp_packet *, struct sctp_transport *,
 		      __u16 sport, __u16 dport);
 void sctp_packet_config(struct sctp_packet *, __u32 vtag, int);
-sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *,
-				       struct sctp_chunk *, int, gfp_t);
-sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *,
-                                     struct sctp_chunk *);
+enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
+					  struct sctp_chunk *chunk,
+					  int one_packet, gfp_t gfp);
+enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
+					struct sctp_chunk *chunk);
 int sctp_packet_transmit(struct sctp_packet *, gfp_t);
 void sctp_packet_free(struct sctp_packet *);
 
@@ -950,7 +951,8 @@ int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
 void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32);
-void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t);
+void sctp_transport_lower_cwnd(struct sctp_transport *t,
+			       enum sctp_lower_cwnd reason);
 void sctp_transport_burst_limited(struct sctp_transport *);
 void sctp_transport_burst_reset(struct sctp_transport *);
 unsigned long sctp_transport_timeout(struct sctp_transport *);
@@ -1053,8 +1055,8 @@ int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *);
 int sctp_outq_is_empty(const struct sctp_outq *);
 void sctp_outq_restart(struct sctp_outq *);
 
-void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
-		     sctp_retransmit_reason_t);
+void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
+		     enum sctp_retransmit_reason reason);
 void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
 void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
 void sctp_prsctp_prune(struct sctp_association *asoc,
@@ -1110,7 +1112,7 @@ void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port);
 void sctp_bind_addr_free(struct sctp_bind_addr *);
 int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
-			sctp_scope_t scope, gfp_t gfp,
+			enum sctp_scope scope, gfp_t gfp,
 			int flags);
 int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
@@ -1134,8 +1136,9 @@ union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
 int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len,
 			   __u16 port, gfp_t gfp);
 
-sctp_scope_t sctp_scope(const union sctp_addr *);
-int sctp_in_scope(struct net *net, const union sctp_addr *addr, const sctp_scope_t scope);
+enum sctp_scope sctp_scope(const union sctp_addr *addr);
+int sctp_in_scope(struct net *net, const union sctp_addr *addr,
+		  const enum sctp_scope scope);
 int sctp_is_any(struct sock *sk, const union sctp_addr *addr);
 int sctp_is_ep_boundall(struct sock *sk);
 
@@ -1556,9 +1559,9 @@ struct sctp_association {
 		 * and authenticated chunk list.  All that is part of the
 		 * cookie and these are just pointers to those locations
 		 */
-		sctp_random_param_t *peer_random;
-		sctp_chunks_param_t *peer_chunks;
-		sctp_hmac_algo_param_t *peer_hmacs;
+		struct sctp_random_param *peer_random;
+		struct sctp_chunks_param *peer_chunks;
+		struct sctp_hmac_algo_param *peer_hmacs;
 	} peer;
 
 	/* State       : A state variable indicating what state the
@@ -1574,7 +1577,7 @@ struct sctp_association {
 	 *
 	 *		State takes values from SCTP_STATE_*.
 	 */
-	sctp_state_t state;
+	enum sctp_state state;
 
 	/* Overall     : The overall association error count.
 	 * Error Count : [Clear this any time I get something.]
@@ -1924,8 +1927,8 @@ static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base)
 
 
 struct sctp_association *
-sctp_association_new(const struct sctp_endpoint *, const struct sock *,
-		     sctp_scope_t scope, gfp_t gfp);
+sctp_association_new(const struct sctp_endpoint *ep, const struct sock *sk,
+		     enum sctp_scope scope, gfp_t gfp);
 void sctp_association_free(struct sctp_association *);
 void sctp_association_put(struct sctp_association *);
 void sctp_association_hold(struct sctp_association *);
@@ -1945,9 +1948,10 @@ void sctp_assoc_del_peer(struct sctp_association *asoc,
 			 const union sctp_addr *addr);
 void sctp_assoc_rm_peer(struct sctp_association *asoc,
 			 struct sctp_transport *peer);
-void sctp_assoc_control_transport(struct sctp_association *,
-				  struct sctp_transport *,
-				  sctp_transport_cmd_t, sctp_sn_error_t);
+void sctp_assoc_control_transport(struct sctp_association *asoc,
+				  struct sctp_transport *transport,
+				  enum sctp_transport_cmd command,
+				  sctp_sn_error_t error);
 struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32);
 struct sctp_transport *sctp_assoc_is_match(struct sctp_association *,
 					   struct net *,
@@ -1966,8 +1970,8 @@ void sctp_assoc_set_primary(struct sctp_association *,
 			    struct sctp_transport *);
 void sctp_assoc_del_nonprimary_peers(struct sctp_association *,
 				    struct sctp_transport *);
-int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *,
-				     sctp_scope_t, gfp_t);
+int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
+				     enum sctp_scope scope, gfp_t gfp);
 int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *,
 					 struct sctp_cookie*,
 					 gfp_t gfp);
diff --git a/include/net/sock.h b/include/net/sock.h
index 7c0632c7e870..fe1a0bc25cd3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -294,6 +294,7 @@ struct sock_common {
   *	@sk_stamp: time stamp of last packet received
   *	@sk_tsflags: SO_TIMESTAMPING socket options
   *	@sk_tskey: counter to disambiguate concurrent tstamp requests
+  *	@sk_zckey: counter to order MSG_ZEROCOPY notifications
   *	@sk_socket: Identd and reporting IO signals
   *	@sk_user_data: RPC layer private data
   *	@sk_frag: cached page frag
@@ -462,6 +463,7 @@ struct sock {
 	u16			sk_tsflags;
 	u8			sk_shutdown;
 	u32			sk_tskey;
+	atomic_t		sk_zckey;
 	struct socket		*sk_socket;
 	void			*sk_user_data;
 #ifdef CONFIG_SECURITY
@@ -1531,6 +1533,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 			     gfp_t priority);
 void __sock_wfree(struct sk_buff *skb);
 void sock_wfree(struct sk_buff *skb);
+struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+			     gfp_t priority);
 void skb_orphan_partial(struct sk_buff *skb);
 void sock_rfree(struct sk_buff *skb);
 void sock_efree(struct sk_buff *skb);
@@ -1582,11 +1586,14 @@ int sock_no_shutdown(struct socket *, int);
 int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *);
 int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int);
 int sock_no_sendmsg(struct socket *, struct msghdr *, size_t);
+int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
 int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
 int sock_no_mmap(struct file *file, struct socket *sock,
 		 struct vm_area_struct *vma);
 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
 			 size_t size, int flags);
+ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+				int offset, size_t size, int flags);
 
 /*
  * Functions to fill in entries in struct proto_ops when a protocol
diff --git a/include/net/strparser.h b/include/net/strparser.h
index 0c28ad97c52f..4fe966a0ad92 100644
--- a/include/net/strparser.h
+++ b/include/net/strparser.h
@@ -18,26 +18,26 @@
 #define STRP_STATS_INCR(stat) ((stat)++)
 
 struct strp_stats {
-	unsigned long long rx_msgs;
-	unsigned long long rx_bytes;
-	unsigned int rx_mem_fail;
-	unsigned int rx_need_more_hdr;
-	unsigned int rx_msg_too_big;
-	unsigned int rx_msg_timeouts;
-	unsigned int rx_bad_hdr_len;
+	unsigned long long msgs;
+	unsigned long long bytes;
+	unsigned int mem_fail;
+	unsigned int need_more_hdr;
+	unsigned int msg_too_big;
+	unsigned int msg_timeouts;
+	unsigned int bad_hdr_len;
 };
 
 struct strp_aggr_stats {
-	unsigned long long rx_msgs;
-	unsigned long long rx_bytes;
-	unsigned int rx_mem_fail;
-	unsigned int rx_need_more_hdr;
-	unsigned int rx_msg_too_big;
-	unsigned int rx_msg_timeouts;
-	unsigned int rx_bad_hdr_len;
-	unsigned int rx_aborts;
-	unsigned int rx_interrupted;
-	unsigned int rx_unrecov_intr;
+	unsigned long long msgs;
+	unsigned long long bytes;
+	unsigned int mem_fail;
+	unsigned int need_more_hdr;
+	unsigned int msg_too_big;
+	unsigned int msg_timeouts;
+	unsigned int bad_hdr_len;
+	unsigned int aborts;
+	unsigned int interrupted;
+	unsigned int unrecov_intr;
 };
 
 struct strparser;
@@ -48,16 +48,18 @@ struct strp_callbacks {
 	void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
 	int (*read_sock_done)(struct strparser *strp, int err);
 	void (*abort_parser)(struct strparser *strp, int err);
+	void (*lock)(struct strparser *strp);
+	void (*unlock)(struct strparser *strp);
 };
 
-struct strp_rx_msg {
+struct strp_msg {
 	int full_len;
 	int offset;
 };
 
-static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb)
+static inline struct strp_msg *strp_msg(struct sk_buff *skb)
 {
-	return (struct strp_rx_msg *)((void *)skb->cb +
+	return (struct strp_msg *)((void *)skb->cb +
 		offsetof(struct qdisc_skb_cb, data));
 }
 
@@ -65,18 +67,18 @@ static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb)
 struct strparser {
 	struct sock *sk;
 
-	u32 rx_stopped : 1;
-	u32 rx_paused : 1;
-	u32 rx_aborted : 1;
-	u32 rx_interrupted : 1;
-	u32 rx_unrecov_intr : 1;
-
-	struct sk_buff **rx_skb_nextp;
-	struct timer_list rx_msg_timer;
-	struct sk_buff *rx_skb_head;
-	unsigned int rx_need_bytes;
-	struct delayed_work rx_delayed_work;
-	struct work_struct rx_work;
+	u32 stopped : 1;
+	u32 paused : 1;
+	u32 aborted : 1;
+	u32 interrupted : 1;
+	u32 unrecov_intr : 1;
+
+	struct sk_buff **skb_nextp;
+	struct timer_list msg_timer;
+	struct sk_buff *skb_head;
+	unsigned int need_bytes;
+	struct delayed_work delayed_work;
+	struct work_struct work;
 	struct strp_stats stats;
 	struct strp_callbacks cb;
 };
@@ -84,7 +86,7 @@ struct strparser {
 /* Must be called with lock held for attached socket */
 static inline void strp_pause(struct strparser *strp)
 {
-	strp->rx_paused = 1;
+	strp->paused = 1;
 }
 
 /* May be called without holding lock for attached socket */
@@ -97,37 +99,37 @@ static inline void save_strp_stats(struct strparser *strp,
 
 #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat +=		\
 				 strp->stats._stat)
-	SAVE_PSOCK_STATS(rx_msgs);
-	SAVE_PSOCK_STATS(rx_bytes);
-	SAVE_PSOCK_STATS(rx_mem_fail);
-	SAVE_PSOCK_STATS(rx_need_more_hdr);
-	SAVE_PSOCK_STATS(rx_msg_too_big);
-	SAVE_PSOCK_STATS(rx_msg_timeouts);
-	SAVE_PSOCK_STATS(rx_bad_hdr_len);
+	SAVE_PSOCK_STATS(msgs);
+	SAVE_PSOCK_STATS(bytes);
+	SAVE_PSOCK_STATS(mem_fail);
+	SAVE_PSOCK_STATS(need_more_hdr);
+	SAVE_PSOCK_STATS(msg_too_big);
+	SAVE_PSOCK_STATS(msg_timeouts);
+	SAVE_PSOCK_STATS(bad_hdr_len);
 #undef SAVE_PSOCK_STATS
 
-	if (strp->rx_aborted)
-		agg_stats->rx_aborts++;
-	if (strp->rx_interrupted)
-		agg_stats->rx_interrupted++;
-	if (strp->rx_unrecov_intr)
-		agg_stats->rx_unrecov_intr++;
+	if (strp->aborted)
+		agg_stats->aborts++;
+	if (strp->interrupted)
+		agg_stats->interrupted++;
+	if (strp->unrecov_intr)
+		agg_stats->unrecov_intr++;
 }
 
 static inline void aggregate_strp_stats(struct strp_aggr_stats *stats,
 					struct strp_aggr_stats *agg_stats)
 {
 #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat)
-	SAVE_PSOCK_STATS(rx_msgs);
-	SAVE_PSOCK_STATS(rx_bytes);
-	SAVE_PSOCK_STATS(rx_mem_fail);
-	SAVE_PSOCK_STATS(rx_need_more_hdr);
-	SAVE_PSOCK_STATS(rx_msg_too_big);
-	SAVE_PSOCK_STATS(rx_msg_timeouts);
-	SAVE_PSOCK_STATS(rx_bad_hdr_len);
-	SAVE_PSOCK_STATS(rx_aborts);
-	SAVE_PSOCK_STATS(rx_interrupted);
-	SAVE_PSOCK_STATS(rx_unrecov_intr);
+	SAVE_PSOCK_STATS(msgs);
+	SAVE_PSOCK_STATS(bytes);
+	SAVE_PSOCK_STATS(mem_fail);
+	SAVE_PSOCK_STATS(need_more_hdr);
+	SAVE_PSOCK_STATS(msg_too_big);
+	SAVE_PSOCK_STATS(msg_timeouts);
+	SAVE_PSOCK_STATS(bad_hdr_len);
+	SAVE_PSOCK_STATS(aborts);
+	SAVE_PSOCK_STATS(interrupted);
+	SAVE_PSOCK_STATS(unrecov_intr);
 #undef SAVE_PSOCK_STATS
 
 }
@@ -135,8 +137,11 @@ static inline void aggregate_strp_stats(struct strp_aggr_stats *stats,
 void strp_done(struct strparser *strp);
 void strp_stop(struct strparser *strp);
 void strp_check_rcv(struct strparser *strp);
-int strp_init(struct strparser *strp, struct sock *csk,
+int strp_init(struct strparser *strp, struct sock *sk,
 	      struct strp_callbacks *cb);
 void strp_data_ready(struct strparser *strp);
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+		 unsigned int orig_offset, size_t orig_len,
+		 size_t max_msg_size, long timeo);
 
 #endif /* __NET_STRPARSER_H_ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70483296157f..5173fecde495 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #endif
 #define TCP_RTO_MAX	((unsigned)(120*HZ))
 #define TCP_RTO_MIN	((unsigned)(HZ/5))
+#define TCP_TIMEOUT_MIN	(2U) /* Min timeout for TCP timers in jiffies */
 #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC6298 2.1 initial RTO value	*/
 #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))	/* RFC 1122 initial RTO value, now
 						 * used as a fallback RTO for the
@@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
 					                 * for local resources.
 					                 */
-#define TCP_REO_TIMEOUT_MIN	(2000) /* Min RACK reordering timeout in usec */
-
 #define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */
 #define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/
 #define TCP_KEEPALIVE_INTVL	(75*HZ)
@@ -257,7 +256,6 @@ extern int sysctl_tcp_rmem[3];
 extern int sysctl_tcp_app_win;
 extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_frto;
-extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
@@ -352,8 +350,11 @@ int tcp_v4_rcv(struct sk_buff *skb);
 
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
 int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
 int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
 		 int flags);
+int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags);
 ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 		 size_t size, int flags);
 void tcp_release_cb(struct sock *sk);
@@ -363,7 +364,7 @@ void tcp_delack_timer_handler(struct sock *sk);
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th, unsigned int len);
+			 const struct tcphdr *th);
 void tcp_rcv_space_adjust(struct sock *sk);
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
 void tcp_twsk_destructor(struct sock *sk);
@@ -633,29 +634,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
 	return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
 }
 
-static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
-{
-	tp->pred_flags = htonl((tp->tcp_header_len << 26) |
-			       ntohl(TCP_FLAG_ACK) |
-			       snd_wnd);
-}
-
-static inline void tcp_fast_path_on(struct tcp_sock *tp)
-{
-	__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
-}
-
-static inline void tcp_fast_path_check(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
-	    tp->rcv_wnd &&
-	    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
-	    !tp->urg_data)
-		tcp_fast_path_on(tp);
-}
-
 /* Compute the actual rto_min value */
 static inline u32 tcp_rto_min(struct sock *sk)
 {
@@ -905,9 +883,8 @@ enum tcp_ca_event {
 
 /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
 enum tcp_ca_ack_event_flags {
-	CA_ACK_SLOWPATH		= (1 << 0),	/* In slow path processing */
-	CA_ACK_WIN_UPDATE	= (1 << 1),	/* ACK updated window */
-	CA_ACK_ECE		= (1 << 2),	/* ECE bit is set on ack */
+	CA_ACK_WIN_UPDATE	= (1 << 0),	/* ACK updated window */
+	CA_ACK_ECE		= (1 << 1),	/* ECE bit is set on ack */
 };
 
 /*
@@ -1245,17 +1222,6 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
 		__tcp_checksum_complete(skb);
 }
 
-/* Prequeue for VJ style copy to user, combined with checksumming. */
-
-static inline void tcp_prequeue_init(struct tcp_sock *tp)
-{
-	tp->ucopy.task = NULL;
-	tp->ucopy.len = 0;
-	tp->ucopy.memory = 0;
-	skb_queue_head_init(&tp->ucopy.prequeue);
-}
-
-bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
 int tcp_filter(struct sock *sk, struct sk_buff *skb);
 
@@ -1919,7 +1885,8 @@ extern void tcp_rack_reo_timeout(struct sock *sk);
 /*
  * Save and compile IPv4 options, return a pointer to it
  */
-static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
+static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net,
+							 struct sk_buff *skb)
 {
 	const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
 	struct ip_options_rcu *dopt = NULL;
@@ -1928,7 +1895,7 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
 		int opt_size = sizeof(*dopt) + opt->optlen;
 
 		dopt = kmalloc(opt_size, GFP_ATOMIC);
-		if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
+		if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
 			kfree(dopt);
 			dopt = NULL;
 		}
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 02c5be037451..10cce0dd4450 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -115,6 +115,8 @@ struct udp_tunnel_info {
 /* Notify network devices of offloadable types */
 void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 			     unsigned short type);
+void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
+			     unsigned short type);
 void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type);
 void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type);
 
@@ -124,6 +126,12 @@ static inline void udp_tunnel_get_rx_info(struct net_device *dev)
 	call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev);
 }
 
+static inline void udp_tunnel_drop_rx_info(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev);
+}
+
 /* Transmit the skb using UDP encapsulation. */
 void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
 			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c0916ab18d32..afb4929d7232 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
 		      const struct km_event *c);
+void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
@@ -563,7 +564,6 @@ struct xfrm_policy {
 	refcount_t		refcnt;
 	struct timer_list	timer;
 
-	struct flow_cache_object flo;
 	atomic_t		genid;
 	u32			priority;
 	u32			index;
@@ -978,7 +978,6 @@ struct xfrm_dst {
 		struct rt6_info		rt6;
 	} u;
 	struct dst_entry *route;
-	struct flow_cache_object flo;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int num_pols, num_xfrms;
 	u32 xfrm_genid;
@@ -1226,9 +1225,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 	}
 }
 
-void xfrm_garbage_collect(struct net *net);
-void xfrm_garbage_collect_deferred(struct net *net);
-
 #else
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
@@ -1263,9 +1259,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
 {
 	return 1;
 }
-static inline void xfrm_garbage_collect(struct net *net)
-{
-}
 #endif
 
 static __inline__
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 1b61357d3f57..7b1eb7b4be41 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -12,7 +12,8 @@
 	FN(ABORTED)		\
 	FN(DROP)		\
 	FN(PASS)		\
-	FN(TX)
+	FN(TX)			\
+	FN(REDIRECT)
 
 #define __XDP_ACT_TP_FN(x)	\
 	TRACE_DEFINE_ENUM(XDP_##x);
@@ -48,6 +49,34 @@ TRACE_EVENT(xdp_exception,
 		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB))
 );
 
+TRACE_EVENT(xdp_redirect,
+
+	TP_PROTO(const struct net_device *from,
+		 const struct net_device *to,
+		 const struct bpf_prog *xdp, u32 act),
+
+	TP_ARGS(from, to, xdp, act),
+
+	TP_STRUCT__entry(
+		__string(name_from, from->name)
+		__string(name_to, to->name)
+		__array(u8, prog_tag, 8)
+		__field(u32, act)
+	),
+
+	TP_fast_assign(
+		BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag));
+		memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag));
+		__assign_str(name_from, from->name);
+		__assign_str(name_to, to->name);
+		__entry->act = act;
+	),
+
+	TP_printk("prog=%s from=%s to=%s action=%s",
+		  __print_hex_str(__entry->prog_tag, 8),
+		  __get_str(name_from), __get_str(name_to),
+		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB))
+);
 #endif /* _TRACE_XDP_H */
 
 #include <trace/define_trace.h>
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 9861be8da65e..e47c9e436221 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -104,4 +104,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e99e3e6f8b37..1d06be1569b1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -104,6 +104,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_LPM_TRIE,
 	BPF_MAP_TYPE_ARRAY_OF_MAPS,
 	BPF_MAP_TYPE_HASH_OF_MAPS,
+	BPF_MAP_TYPE_DEVMAP,
 };
 
 enum bpf_prog_type {
@@ -344,9 +345,20 @@ union bpf_attr {
  * int bpf_redirect(ifindex, flags)
  *     redirect to another netdev
  *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: TC_ACT_REDIRECT
+ *     @flags:
+ *	  cls_bpf:
+ *          bit 0 - if set, redirect to ingress instead of egress
+ *          other bits - reserved
+ *	  xdp_bpf:
+ *	    all bits - reserved
+ *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
+ *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
+ * int bpf_redirect_map(map, key, flags)
+ *     redirect to endpoint in map
+ *     @map: pointer to dev map
+ *     @key: index in map to lookup
+ *     @flags: --
+ *     Return: XDP_REDIRECT on success or XDP_ABORT on error
  *
  * u32 bpf_get_route_realm(skb)
  *     retrieve a dst's tclassid
@@ -591,7 +603,8 @@ union bpf_attr {
 	FN(get_socket_uid),		\
 	FN(set_hash),			\
 	FN(setsockopt),			\
-	FN(skb_adjust_room),
+	FN(skb_adjust_room),		\
+	FN(redirect_map),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -717,6 +730,7 @@ enum xdp_action {
 	XDP_DROP,
 	XDP_PASS,
 	XDP_TX,
+	XDP_REDIRECT,
 };
 
 /* user accessible metadata for XDP packet hook
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 07bdce1f444a..78fdf52d6b2f 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -18,10 +18,13 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP	2
 #define SO_EE_ORIGIN_ICMP6	3
 #define SO_EE_ORIGIN_TXSTATUS	4
+#define SO_EE_ORIGIN_ZEROCOPY	5
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
+#define SO_EE_CODE_ZEROCOPY_COPIED	1
+
 /**
  *	struct scm_timestamping - timestamps exposed through cmsg
  *
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 7d4a594d5d58..9c041dae8e2c 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1238,6 +1238,47 @@ struct ethtool_per_queue_op {
 	char	data[];
 };
 
+/**
+ * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
+ * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
+ * @active_fec: FEC mode which is active on porte
+ * @fec: Bitmask of supported/configured FEC modes
+ * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
+ *
+ * Drivers should reject a non-zero setting of @autoneg when
+ * autoneogotiation is disabled (or not supported) for the link.
+ *
+ */
+struct ethtool_fecparam {
+	__u32   cmd;
+	/* bitmask of FEC modes */
+	__u32   active_fec;
+	__u32   fec;
+	__u32   reserved;
+};
+
+/**
+ * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
+ * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
+ * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
+ * @ETHTOOL_FEC_OFF: No FEC Mode
+ * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
+ * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
+ */
+enum ethtool_fec_config_bits {
+	ETHTOOL_FEC_NONE_BIT,
+	ETHTOOL_FEC_AUTO_BIT,
+	ETHTOOL_FEC_OFF_BIT,
+	ETHTOOL_FEC_RS_BIT,
+	ETHTOOL_FEC_BASER_BIT,
+};
+
+#define ETHTOOL_FEC_NONE		(1 << ETHTOOL_FEC_NONE_BIT)
+#define ETHTOOL_FEC_AUTO		(1 << ETHTOOL_FEC_AUTO_BIT)
+#define ETHTOOL_FEC_OFF			(1 << ETHTOOL_FEC_OFF_BIT)
+#define ETHTOOL_FEC_RS			(1 << ETHTOOL_FEC_RS_BIT)
+#define ETHTOOL_FEC_BASER		(1 << ETHTOOL_FEC_BASER_BIT)
+
 /* CMDs currently supported */
 #define ETHTOOL_GSET		0x00000001 /* DEPRECATED, Get settings.
 					    * Please use ETHTOOL_GLINKSETTINGS
@@ -1330,6 +1371,8 @@ struct ethtool_per_queue_op {
 #define ETHTOOL_SLINKSETTINGS	0x0000004d /* Set ethtool_link_settings */
 #define ETHTOOL_PHY_GTUNABLE	0x0000004e /* Get PHY tunable configuration */
 #define ETHTOOL_PHY_STUNABLE	0x0000004f /* Set PHY tunable configuration */
+#define ETHTOOL_GFECPARAM	0x00000050 /* Get FEC settings */
+#define ETHTOOL_SFECPARAM	0x00000051 /* Set FEC settings */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
@@ -1387,6 +1430,9 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_2500baseT_Full_BIT	= 47,
 	ETHTOOL_LINK_MODE_5000baseT_Full_BIT	= 48,
 
+	ETHTOOL_LINK_MODE_FEC_NONE_BIT	= 49,
+	ETHTOOL_LINK_MODE_FEC_RS_BIT	= 50,
+	ETHTOOL_LINK_MODE_FEC_BASER_BIT	= 51,
 
 	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
 	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
@@ -1395,7 +1441,7 @@ enum ethtool_link_mode_bit_indices {
 	 */
 
 	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+	  = ETHTOOL_LINK_MODE_FEC_BASER_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h
index d496c02e14bc..33e2a5732bd1 100644
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -35,6 +35,7 @@
 #define RTF_PREF(pref)	((pref) << 27)
 #define RTF_PREF_MASK	0x18000000
 
+#define RTF_OFFLOAD	0x20000000	/* offloaded route		*/
 #define RTF_PCPU	0x40000000	/* read-only: can not be set by user */
 #define RTF_LOCAL	0x80000000
 
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index f86127a46cfc..f4fc9c9e123d 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -226,5 +226,22 @@ struct nlattr {
 #define NLA_ALIGN(len)		(((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
 #define NLA_HDRLEN		((int) NLA_ALIGN(sizeof(struct nlattr)))
 
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ *  value = 0x0, and selector = 0x1
+ *  implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ *  value = 0x2, and selector = 0x2
+ *  implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+	__u32 value;
+	__u32 selector;
+};
 
 #endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index d148505010a7..dab7dad9e01a 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -683,10 +683,29 @@ struct tcamsg {
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
 };
+
+enum {
+	TCA_ROOT_UNSPEC,
+	TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+	TCA_ROOT_FLAGS,
+	TCA_ROOT_COUNT,
+	TCA_ROOT_TIME_DELTA, /* in msecs */
+	__TCA_ROOT_MAX,
+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
 #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
-#define TCAA_MAX 1
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
diff --git a/include/linux/rxrpc.h b/include/uapi/linux/rxrpc.h
index 7343f71783dc..9656aad8f8f7 100644
--- a/include/linux/rxrpc.h
+++ b/include/uapi/linux/rxrpc.h
@@ -1,17 +1,18 @@
-/* AF_RXRPC parameters
+/* Types and definitions for AF_RXRPC.
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
+ * modify it under the terms of the GNU General Public Licence
  * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
+ * 2 of the Licence, or (at your option) any later version.
  */
 
-#ifndef _LINUX_RXRPC_H
-#define _LINUX_RXRPC_H
+#ifndef _UAPI_LINUX_RXRPC_H
+#define _UAPI_LINUX_RXRPC_H
 
+#include <linux/types.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 
@@ -76,4 +77,48 @@ enum rxrpc_cmsg_type {
 #define RXRPC_SECURITY_RXGK	4	/* gssapi-based */
 #define RXRPC_SECURITY_RXK5	5	/* kerberos 5 */
 
-#endif /* _LINUX_RXRPC_H */
+/*
+ * RxRPC-level abort codes
+ */
+#define RX_CALL_DEAD		-1	/* call/conn has been inactive and is shut down */
+#define RX_INVALID_OPERATION	-2	/* invalid operation requested / attempted */
+#define RX_CALL_TIMEOUT		-3	/* call timeout exceeded */
+#define RX_EOF			-4	/* unexpected end of data on read op */
+#define RX_PROTOCOL_ERROR	-5	/* low-level protocol error */
+#define RX_USER_ABORT		-6	/* generic user abort */
+#define RX_ADDRINUSE		-7	/* UDP port in use */
+#define RX_DEBUGI_BADTYPE	-8	/* bad debugging packet type */
+
+/*
+ * (un)marshalling abort codes (rxgen)
+ */
+#define RXGEN_CC_MARSHAL	-450
+#define RXGEN_CC_UNMARSHAL	-451
+#define RXGEN_SS_MARSHAL	-452
+#define RXGEN_SS_UNMARSHAL	-453
+#define RXGEN_DECODE		-454
+#define RXGEN_OPCODE		-455
+#define RXGEN_SS_XDRFREE	-456
+#define RXGEN_CC_XDRFREE	-457
+
+/*
+ * Rx kerberos security abort codes
+ * - unfortunately we have no generalised security abort codes to say things
+ *   like "unsupported security", so we have to use these instead and hope the
+ *   other side understands
+ */
+#define RXKADINCONSISTENCY	19270400	/* security module structure inconsistent */
+#define RXKADPACKETSHORT	19270401	/* packet too short for security challenge */
+#define RXKADLEVELFAIL		19270402	/* security level negotiation failed */
+#define RXKADTICKETLEN		19270403	/* ticket length too short or too long */
+#define RXKADOUTOFSEQUENCE	19270404	/* packet had bad sequence number */
+#define RXKADNOAUTH		19270405	/* caller not authorised */
+#define RXKADBADKEY		19270406	/* illegal key: bad parity or weak */
+#define RXKADBADTICKET		19270407	/* security object was passed a bad ticket */
+#define RXKADUNKNOWNKEY		19270408	/* ticket contained unknown key version number */
+#define RXKADEXPIRED		19270409	/* authentication expired */
+#define RXKADSEALEDINCON	19270410	/* sealed data inconsistent */
+#define RXKADDATALEN		19270411	/* user data too long */
+#define RXKADILLEGALLEVEL	19270412	/* caller not authorised to use encrypted conns */
+
+#endif /* _UAPI_LINUX_RXRPC_H */
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index d85693295798..b3f346fb9fe3 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -184,14 +184,7 @@ enum
 	LINUX_MIB_DELAYEDACKLOST,		/* DelayedACKLost */
 	LINUX_MIB_LISTENOVERFLOWS,		/* ListenOverflows */
 	LINUX_MIB_LISTENDROPS,			/* ListenDrops */
-	LINUX_MIB_TCPPREQUEUED,			/* TCPPrequeued */
-	LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG,	/* TCPDirectCopyFromBacklog */
-	LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE,	/* TCPDirectCopyFromPrequeue */
-	LINUX_MIB_TCPPREQUEUEDROPPED,		/* TCPPrequeueDropped */
-	LINUX_MIB_TCPHPHITS,			/* TCPHPHits */
-	LINUX_MIB_TCPHPHITSTOUSER,		/* TCPHPHitsToUser */
 	LINUX_MIB_TCPPUREACKS,			/* TCPPureAcks */
-	LINUX_MIB_TCPHPACKS,			/* TCPHPAcks */
 	LINUX_MIB_TCPRENORECOVERY,		/* TCPRenoRecovery */
 	LINUX_MIB_TCPSACKRECOVERY,		/* TCPSackRecovery */
 	LINUX_MIB_TCPSACKRENEGING,		/* TCPSACKReneging */
@@ -208,14 +201,12 @@ enum
 	LINUX_MIB_TCPSACKFAILURES,		/* TCPSackFailures */
 	LINUX_MIB_TCPLOSSFAILURES,		/* TCPLossFailures */
 	LINUX_MIB_TCPFASTRETRANS,		/* TCPFastRetrans */
-	LINUX_MIB_TCPFORWARDRETRANS,		/* TCPForwardRetrans */
 	LINUX_MIB_TCPSLOWSTARTRETRANS,		/* TCPSlowStartRetrans */
 	LINUX_MIB_TCPTIMEOUTS,			/* TCPTimeouts */
 	LINUX_MIB_TCPLOSSPROBES,		/* TCPLossProbes */
 	LINUX_MIB_TCPLOSSPROBERECOVERY,		/* TCPLossProbeRecovery */
 	LINUX_MIB_TCPRENORECOVERYFAIL,		/* TCPRenoRecoveryFail */
 	LINUX_MIB_TCPSACKRECOVERYFAIL,		/* TCPSackRecoveryFail */
-	LINUX_MIB_TCPSCHEDULERFAILED,		/* TCPSchedulerFailed */
 	LINUX_MIB_TCPRCVCOLLAPSED,		/* TCPRcvCollapsed */
 	LINUX_MIB_TCPDSACKOLDSENT,		/* TCPDSACKOldSent */
 	LINUX_MIB_TCPDSACKOFOSENT,		/* TCPDSACKOfoSent */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index a5507c977497..030e594bab45 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -231,6 +231,14 @@ enum {
 	TCP_NLA_SNDBUF_LIMITED,	/* Time (usec) limited by send buffer */
 	TCP_NLA_DATA_SEGS_OUT,	/* Data pkts sent including retransmission */
 	TCP_NLA_TOTAL_RETRANS,	/* Data pkts retransmitted */
+	TCP_NLA_PACING_RATE,    /* Pacing rate in bytes per second */
+	TCP_NLA_DELIVERY_RATE,  /* Delivery rate in bytes per second */
+	TCP_NLA_SND_CWND,       /* Sending congestion window */
+	TCP_NLA_REORDERING,     /* Reordering metric */
+	TCP_NLA_MIN_RTT,        /* minimum RTT */
+	TCP_NLA_RECUR_RETRANS,  /* Recurring retransmits for the current pkt */
+	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
+
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e1e5e658f2db..48e92705be59 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,6 +2,9 @@ obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+ifeq ($(CONFIG_NET),y)
+obj-$(CONFIG_BPF_SYSCALL) += devmap.o
+endif
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
new file mode 100644
index 000000000000..d439ee0eadb1
--- /dev/null
+++ b/kernel/bpf/devmap.c
@@ -0,0 +1,432 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+/* Devmaps primary use is as a backend map for XDP BPF helper call
+ * bpf_redirect_map(). Because XDP is mostly concerned with performance we
+ * spent some effort to ensure the datapath with redirect maps does not use
+ * any locking. This is a quick note on the details.
+ *
+ * We have three possible paths to get into the devmap control plane bpf
+ * syscalls, bpf programs, and driver side xmit/flush operations. A bpf syscall
+ * will invoke an update, delete, or lookup operation. To ensure updates and
+ * deletes appear atomic from the datapath side xchg() is used to modify the
+ * netdev_map array. Then because the datapath does a lookup into the netdev_map
+ * array (read-only) from an RCU critical section we use call_rcu() to wait for
+ * an rcu grace period before free'ing the old data structures. This ensures the
+ * datapath always has a valid copy. However, the datapath does a "flush"
+ * operation that pushes any pending packets in the driver outside the RCU
+ * critical section. Each bpf_dtab_netdev tracks these pending operations using
+ * an atomic per-cpu bitmap. The bpf_dtab_netdev object will not be destroyed
+ * until all bits are cleared indicating outstanding flush operations have
+ * completed.
+ *
+ * BPF syscalls may race with BPF program calls on any of the update, delete
+ * or lookup operations. As noted above the xchg() operation also keep the
+ * netdev_map consistent in this case. From the devmap side BPF programs
+ * calling into these operations are the same as multiple user space threads
+ * making system calls.
+ *
+ * Finally, any of the above may race with a netdev_unregister notifier. The
+ * unregister notifier must search for net devices in the map structure that
+ * contain a reference to the net device and remove them. This is a two step
+ * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b)
+ * check to see if the ifindex is the same as the net_device being removed.
+ * Unfortunately, the xchg() operations do not protect against this. To avoid
+ * potentially removing incorrect objects the dev_map_list_mutex protects
+ * conflicting netdev unregister and BPF syscall operations. Updates and
+ * deletes from a BPF program (done in rcu critical section) are blocked
+ * because of this mutex.
+ */
+#include <linux/bpf.h>
+#include <linux/jhash.h>
+#include <linux/filter.h>
+#include <linux/rculist_nulls.h>
+#include "percpu_freelist.h"
+#include "bpf_lru_list.h"
+#include "map_in_map.h"
+
+struct bpf_dtab_netdev {
+	struct net_device *dev;
+	int key;
+	struct rcu_head rcu;
+	struct bpf_dtab *dtab;
+};
+
+struct bpf_dtab {
+	struct bpf_map map;
+	struct bpf_dtab_netdev **netdev_map;
+	unsigned long int __percpu *flush_needed;
+	struct list_head list;
+};
+
+static DEFINE_MUTEX(dev_map_list_mutex);
+static LIST_HEAD(dev_map_list);
+
+static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_dtab *dtab;
+	u64 cost;
+	int err;
+
+	/* check sanity of attributes */
+	if (attr->max_entries == 0 || attr->key_size != 4 ||
+	    attr->value_size != 4 || attr->map_flags)
+		return ERR_PTR(-EINVAL);
+
+	/* if value_size is bigger, the user space won't be able to
+	 * access the elements.
+	 */
+	if (attr->value_size > KMALLOC_MAX_SIZE)
+		return ERR_PTR(-E2BIG);
+
+	dtab = kzalloc(sizeof(*dtab), GFP_USER);
+	if (!dtab)
+		return ERR_PTR(-ENOMEM);
+
+	/* mandatory map attributes */
+	dtab->map.map_type = attr->map_type;
+	dtab->map.key_size = attr->key_size;
+	dtab->map.value_size = attr->value_size;
+	dtab->map.max_entries = attr->max_entries;
+	dtab->map.map_flags = attr->map_flags;
+
+	err = -ENOMEM;
+
+	/* make sure page count doesn't overflow */
+	cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
+	cost += BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
+	if (cost >= U32_MAX - PAGE_SIZE)
+		goto free_dtab;
+
+	dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+	/* if map size is larger than memlock limit, reject it early */
+	err = bpf_map_precharge_memlock(dtab->map.pages);
+	if (err)
+		goto free_dtab;
+
+	err = -ENOMEM;
+	/* A per cpu bitfield with a bit per possible net device */
+	dtab->flush_needed = __alloc_percpu(
+				BITS_TO_LONGS(attr->max_entries) *
+				sizeof(unsigned long),
+				__alignof__(unsigned long));
+	if (!dtab->flush_needed)
+		goto free_dtab;
+
+	dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
+					      sizeof(struct bpf_dtab_netdev *));
+	if (!dtab->netdev_map)
+		goto free_dtab;
+
+	mutex_lock(&dev_map_list_mutex);
+	list_add_tail(&dtab->list, &dev_map_list);
+	mutex_unlock(&dev_map_list_mutex);
+	return &dtab->map;
+
+free_dtab:
+	free_percpu(dtab->flush_needed);
+	kfree(dtab);
+	return ERR_PTR(err);
+}
+
+static void dev_map_free(struct bpf_map *map)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	int i, cpu;
+
+	/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+	 * so the programs (can be more than one that used this map) were
+	 * disconnected from events. Wait for outstanding critical sections in
+	 * these programs to complete. The rcu critical section only guarantees
+	 * no further reads against netdev_map. It does __not__ ensure pending
+	 * flush operations (if any) are complete.
+	 */
+	synchronize_rcu();
+
+	/* To ensure all pending flush operations have completed wait for flush
+	 * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
+	 * Because the above synchronize_rcu() ensures the map is disconnected
+	 * from the program we can assume no new bits will be set.
+	 */
+	for_each_online_cpu(cpu) {
+		unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu);
+
+		while (!bitmap_empty(bitmap, dtab->map.max_entries))
+			cpu_relax();
+	}
+
+	/* Although we should no longer have datapath or bpf syscall operations
+	 * at this point we we can still race with netdev notifier, hence the
+	 * lock.
+	 */
+	mutex_lock(&dev_map_list_mutex);
+	for (i = 0; i < dtab->map.max_entries; i++) {
+		struct bpf_dtab_netdev *dev;
+
+		dev = dtab->netdev_map[i];
+		if (!dev)
+			continue;
+
+		dev_put(dev->dev);
+		kfree(dev);
+	}
+
+	/* At this point bpf program is detached and all pending operations
+	 * _must_ be complete
+	 */
+	list_del(&dtab->list);
+	mutex_unlock(&dev_map_list_mutex);
+	free_percpu(dtab->flush_needed);
+	bpf_map_area_free(dtab->netdev_map);
+	kfree(dtab);
+}
+
+static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	u32 index = key ? *(u32 *)key : U32_MAX;
+	u32 *next = (u32 *)next_key;
+
+	if (index >= dtab->map.max_entries) {
+		*next = 0;
+		return 0;
+	}
+
+	if (index == dtab->map.max_entries - 1)
+		return -ENOENT;
+
+	*next = index + 1;
+	return 0;
+}
+
+void __dev_map_insert_ctx(struct bpf_map *map, u32 key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
+
+	__set_bit(key, bitmap);
+}
+
+struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct bpf_dtab_netdev *dev;
+
+	if (key >= map->max_entries)
+		return NULL;
+
+	dev = READ_ONCE(dtab->netdev_map[key]);
+	return dev ? dev->dev : NULL;
+}
+
+/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
+ * from the driver before returning from its napi->poll() routine. The poll()
+ * routine is called either from busy_poll context or net_rx_action signaled
+ * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
+ * net device can be torn down. On devmap tear down we ensure the ctx bitmap
+ * is zeroed before completing to ensure all flush operations have completed.
+ */
+void __dev_map_flush(struct bpf_map *map)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
+	u32 bit;
+
+	for_each_set_bit(bit, bitmap, map->max_entries) {
+		struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
+		struct net_device *netdev;
+
+		/* This is possible if the dev entry is removed by user space
+		 * between xdp redirect and flush op.
+		 */
+		if (unlikely(!dev))
+			continue;
+
+		netdev = dev->dev;
+
+		__clear_bit(bit, bitmap);
+		if (unlikely(!netdev || !netdev->netdev_ops->ndo_xdp_flush))
+			continue;
+
+		netdev->netdev_ops->ndo_xdp_flush(netdev);
+	}
+}
+
+/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
+ * update happens in parallel here a dev_put wont happen until after reading the
+ * ifindex.
+ */
+static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct bpf_dtab_netdev *dev;
+	u32 i = *(u32 *)key;
+
+	if (i >= map->max_entries)
+		return NULL;
+
+	dev = READ_ONCE(dtab->netdev_map[i]);
+	return dev ? &dev->dev->ifindex : NULL;
+}
+
+static void dev_map_flush_old(struct bpf_dtab_netdev *old_dev)
+{
+	if (old_dev->dev->netdev_ops->ndo_xdp_flush) {
+		struct net_device *fl = old_dev->dev;
+		unsigned long *bitmap;
+		int cpu;
+
+		for_each_online_cpu(cpu) {
+			bitmap = per_cpu_ptr(old_dev->dtab->flush_needed, cpu);
+			__clear_bit(old_dev->key, bitmap);
+
+			fl->netdev_ops->ndo_xdp_flush(old_dev->dev);
+		}
+	}
+}
+
+static void __dev_map_entry_free(struct rcu_head *rcu)
+{
+	struct bpf_dtab_netdev *old_dev;
+
+	old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
+	dev_map_flush_old(old_dev);
+	dev_put(old_dev->dev);
+	kfree(old_dev);
+}
+
+static int dev_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct bpf_dtab_netdev *old_dev;
+	int k = *(u32 *)key;
+
+	if (k >= map->max_entries)
+		return -EINVAL;
+
+	/* Use synchronize_rcu() here to ensure any rcu critical sections
+	 * have completed, but this does not guarantee a flush has happened
+	 * yet. Because driver side rcu_read_lock/unlock only protects the
+	 * running XDP program. However, for pending flush operations the
+	 * dev and ctx are stored in another per cpu map. And additionally,
+	 * the driver tear down ensures all soft irqs are complete before
+	 * removing the net device in the case of dev_put equals zero.
+	 */
+	mutex_lock(&dev_map_list_mutex);
+	old_dev = xchg(&dtab->netdev_map[k], NULL);
+	if (old_dev)
+		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+	mutex_unlock(&dev_map_list_mutex);
+	return 0;
+}
+
+static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
+				u64 map_flags)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct net *net = current->nsproxy->net_ns;
+	struct bpf_dtab_netdev *dev, *old_dev;
+	u32 i = *(u32 *)key;
+	u32 ifindex = *(u32 *)value;
+
+	if (unlikely(map_flags > BPF_EXIST))
+		return -EINVAL;
+
+	if (unlikely(i >= dtab->map.max_entries))
+		return -E2BIG;
+
+	if (unlikely(map_flags == BPF_NOEXIST))
+		return -EEXIST;
+
+	if (!ifindex) {
+		dev = NULL;
+	} else {
+		dev = kmalloc(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN);
+		if (!dev)
+			return -ENOMEM;
+
+		dev->dev = dev_get_by_index(net, ifindex);
+		if (!dev->dev) {
+			kfree(dev);
+			return -EINVAL;
+		}
+
+		dev->key = i;
+		dev->dtab = dtab;
+	}
+
+	/* Use call_rcu() here to ensure rcu critical sections have completed
+	 * Remembering the driver side flush operation will happen before the
+	 * net device is removed.
+	 */
+	mutex_lock(&dev_map_list_mutex);
+	old_dev = xchg(&dtab->netdev_map[i], dev);
+	if (old_dev)
+		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+	mutex_unlock(&dev_map_list_mutex);
+
+	return 0;
+}
+
+const struct bpf_map_ops dev_map_ops = {
+	.map_alloc = dev_map_alloc,
+	.map_free = dev_map_free,
+	.map_get_next_key = dev_map_get_next_key,
+	.map_lookup_elem = dev_map_lookup_elem,
+	.map_update_elem = dev_map_update_elem,
+	.map_delete_elem = dev_map_delete_elem,
+};
+
+static int dev_map_notification(struct notifier_block *notifier,
+				ulong event, void *ptr)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct bpf_dtab *dtab;
+	int i;
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		mutex_lock(&dev_map_list_mutex);
+		list_for_each_entry(dtab, &dev_map_list, list) {
+			for (i = 0; i < dtab->map.max_entries; i++) {
+				struct bpf_dtab_netdev *dev;
+
+				dev = dtab->netdev_map[i];
+				if (!dev ||
+				    dev->dev->ifindex != netdev->ifindex)
+					continue;
+				dev = xchg(&dtab->netdev_map[i], NULL);
+				if (dev)
+					call_rcu(&dev->rcu,
+						 __dev_map_entry_free);
+			}
+		}
+		mutex_unlock(&dev_map_list_mutex);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block dev_map_notifier = {
+	.notifier_call = dev_map_notification,
+};
+
+static int __init dev_map_init(void)
+{
+	register_netdevice_notifier(&dev_map_notifier);
+	return 0;
+}
+
+subsys_initcall(dev_map_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 664d93972373..f6e8b3887eab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1283,6 +1283,14 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		    func_id != BPF_FUNC_current_task_under_cgroup)
 			goto error;
 		break;
+	/* devmap returns a pointer to a live net_device ifindex that we cannot
+	 * allow to be modified from bpf side. So do not allow lookup elements
+	 * for now.
+	 */
+	case BPF_MAP_TYPE_DEVMAP:
+		if (func_id != BPF_FUNC_redirect_map)
+			goto error;
+		break;
 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 	case BPF_MAP_TYPE_HASH_OF_MAPS:
 		if (func_id != BPF_FUNC_map_lookup_elem)
@@ -1311,6 +1319,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 			goto error;
 		break;
+	case BPF_FUNC_redirect_map:
+		if (map->map_type != BPF_MAP_TYPE_DEVMAP)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/lib/nlattr.c b/lib/nlattr.c
index fb52435be42d..ee79b7a3c6b0 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -27,6 +27,30 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
 	[NLA_S64]	= sizeof(s64),
 };
 
+static int validate_nla_bitfield32(const struct nlattr *nla,
+				   u32 *valid_flags_allowed)
+{
+	const struct nla_bitfield32 *bf = nla_data(nla);
+	u32 *valid_flags_mask = valid_flags_allowed;
+
+	if (!valid_flags_allowed)
+		return -EINVAL;
+
+	/*disallow invalid bit selector */
+	if (bf->selector & ~*valid_flags_mask)
+		return -EINVAL;
+
+	/*disallow invalid bit values */
+	if (bf->value & ~*valid_flags_mask)
+		return -EINVAL;
+
+	/*disallow valid bit values that are not selected*/
+	if (bf->value & ~bf->selector)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy)
 {
@@ -46,6 +70,12 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 			return -ERANGE;
 		break;
 
+	case NLA_BITFIELD32:
+		if (attrlen != sizeof(struct nla_bitfield32))
+			return -ERANGE;
+
+		return validate_nla_bitfield32(nla, pt->validation_data);
+
 	case NLA_NUL_STRING:
 		if (pt->len)
 			minlen = min_t(int, attrlen, pt->len + 1);
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a3501173e200..83ba5483455a 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -729,11 +729,9 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
 				    const unsigned char *packet_buff,
 				    int packet_len, bool direct_link)
 {
-	unsigned char *skb_buff;
 	unsigned long new_direct_link_flag;
 
-	skb_buff = skb_put_data(forw_packet_aggr->skb, packet_buff,
-				packet_len);
+	skb_put_data(forw_packet_aggr->skb, packet_buff, packet_len);
 	forw_packet_aggr->packet_len += packet_len;
 	forw_packet_aggr->num_packets++;
 
@@ -1281,7 +1279,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 	batadv_ogm_packet->tq = combined_tq;
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
+		   "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
 		   orig_node->orig, orig_neigh_node->orig, total_count,
 		   neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty,
 		   batadv_ogm_packet->tq, if_incoming->net_dev->name,
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 1e3dc374bfde..8be61734fc43 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -137,7 +137,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
 	struct batadv_priv *bat_priv;
 	struct batadv_ogm2_packet *ogm_packet;
 	struct sk_buff *skb, *skb_tmp;
-	unsigned char *ogm_buff, *pkt_buff;
+	unsigned char *ogm_buff;
 	int ogm_buff_len;
 	u16 tvlv_len = 0;
 	int ret;
@@ -166,7 +166,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
 		goto reschedule;
 
 	skb_reserve(skb, ETH_HLEN);
-	pkt_buff = skb_put_data(skb, ogm_buff, ogm_buff_len);
+	skb_put_data(skb, ogm_buff, ogm_buff_len);
 
 	ogm_packet = (struct batadv_ogm2_packet *)skb->data;
 	ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno));
@@ -200,7 +200,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
 				type = "unknown";
 			}
 
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n",
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselves on %s suppressed: %s\n",
 				   hard_iface->net_dev->name, type);
 
 			batadv_hardif_put(hard_iface);
@@ -683,18 +683,18 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
 	ogm_throughput = ntohl(ogm_packet->throughput);
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, troughput %u, TTL %u, V %u, tvlv_len %u)\n",
+		   "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, throughput %u, TTL %u, V %u, tvlv_len %u)\n",
 		   ethhdr->h_source, if_incoming->net_dev->name,
 		   if_incoming->net_dev->dev_addr, ogm_packet->orig,
 		   ntohl(ogm_packet->seqno), ogm_throughput, ogm_packet->ttl,
 		   ogm_packet->version, ntohs(ogm_packet->tvlv_len));
 
-	/* If the troughput metric is 0, immediately drop the packet. No need to
-	 * create orig_node / neigh_node for an unusable route.
+	/* If the throughput metric is 0, immediately drop the packet. No need
+	 * to create orig_node / neigh_node for an unusable route.
 	 */
 	if (ogm_throughput == 0) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-			   "Drop packet: originator packet with troughput metric of 0\n");
+			   "Drop packet: originator packet with throughput metric of 0\n");
 		return;
 	}
 
@@ -762,7 +762,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
 				type = "unknown";
 			}
 
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n",
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s suppressed: %s\n",
 				   ogm_packet->orig, hard_iface->net_dev->name,
 				   type);
 
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 6930d6b50f99..b6cfa78e9381 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -834,7 +834,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
 			last_seen_msecs = last_seen_msecs % 60000;
 			last_seen_secs = last_seen_msecs / 1000;
 
-			seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n",
+			seq_printf(seq, " * %15pI4 %pM %4i %6i:%02i\n",
 				   &dat_entry->ip, dat_entry->mac_addr,
 				   batadv_print_vid(dat_entry->vid),
 				   last_seen_mins, last_seen_secs);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2be8f1f46529..05cc7637c064 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.2"
+#define BATADV_SOURCE_VERSION "2017.3"
 #endif
 
 /* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index d239a9d72ac3..054a65e6eb68 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -911,7 +911,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 				type = "unknown";
 			}
 
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n",
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s suppressed: %s\n",
 				   bcast_packet->orig,
 				   hard_iface->net_dev->name, type);
 
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index ab3b654b05cc..4e2576fc0c59 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -273,9 +273,6 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
 			   struct lowpan_peer *peer)
 {
 	const u8 *saddr;
-	struct lowpan_btle_dev *dev;
-
-	dev = lowpan_btle_dev(netdev);
 
 	saddr = peer->lladdr;
 
@@ -618,12 +615,8 @@ static void ifup(struct net_device *netdev)
 
 static void ifdown(struct net_device *netdev)
 {
-	int err;
-
 	rtnl_lock();
-	err = dev_close(netdev);
-	if (err < 0)
-		BT_INFO("iface %s cannot be closed (%d)", netdev->name, err);
+	dev_close(netdev);
 	rtnl_unlock();
 }
 
diff --git a/net/core/Makefile b/net/core/Makefile
index 79f9479e9658..56d771a887b6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,9 +9,9 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
+			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
+			fib_notifier.o
 
-obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee5647bd91b3..2f3277945d35 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -573,27 +573,12 @@ fault:
 }
 EXPORT_SYMBOL(skb_copy_datagram_from_iter);
 
-/**
- *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
- *	@skb: buffer to copy
- *	@from: the source to copy from
- *
- *	The function will first copy up to headlen, and then pin the userspace
- *	pages and build frags through them.
- *
- *	Returns 0, -EFAULT or -EMSGSIZE.
- */
-int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+			    struct iov_iter *from, size_t length)
 {
-	int len = iov_iter_count(from);
-	int copy = min_t(int, skb_headlen(skb), len);
-	int frag = 0;
+	int frag = skb_shinfo(skb)->nr_frags;
 
-	/* copy up to skb headlen */
-	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
-		return -EFAULT;
-
-	while (iov_iter_count(from)) {
+	while (length && iov_iter_count(from)) {
 		struct page *pages[MAX_SKB_FRAGS];
 		size_t start;
 		ssize_t copied;
@@ -603,18 +588,24 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 		if (frag == MAX_SKB_FRAGS)
 			return -EMSGSIZE;
 
-		copied = iov_iter_get_pages(from, pages, ~0U,
+		copied = iov_iter_get_pages(from, pages, length,
 					    MAX_SKB_FRAGS - frag, &start);
 		if (copied < 0)
 			return -EFAULT;
 
 		iov_iter_advance(from, copied);
+		length -= copied;
 
 		truesize = PAGE_ALIGN(copied + start);
 		skb->data_len += copied;
 		skb->len += copied;
 		skb->truesize += truesize;
-		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+		if (sk && sk->sk_type == SOCK_STREAM) {
+			sk->sk_wmem_queued += truesize;
+			sk_mem_charge(sk, truesize);
+		} else {
+			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+		}
 		while (copied) {
 			int size = min_t(int, copied, PAGE_SIZE - start);
 			skb_fill_page_desc(skb, frag++, pages[n], start, size);
@@ -625,6 +616,28 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(__zerocopy_sg_from_iter);
+
+/**
+ *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
+ *	@skb: buffer to copy
+ *	@from: the source to copy from
+ *
+ *	The function will first copy up to headlen, and then pin the userspace
+ *	pages and build frags through them.
+ *
+ *	Returns 0, -EFAULT or -EMSGSIZE.
+ */
+int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+{
+	int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
+
+	/* copy up to skb headlen */
+	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
+		return -EFAULT;
+
+	return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
+}
 EXPORT_SYMBOL(zerocopy_sg_from_iter);
 
 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
diff --git a/net/core/dev.c b/net/core/dev.c
index 8515f8fe0460..1d75499add72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -144,6 +144,7 @@
 #include <linux/netfilter_ingress.h>
 #include <linux/crash_dump.h>
 #include <linux/sctp.h>
+#include <net/udp_tunnel.h>
 
 #include "net-sysfs.h"
 
@@ -1413,7 +1414,7 @@ int dev_open(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_open);
 
-static int __dev_close_many(struct list_head *head)
+static void __dev_close_many(struct list_head *head)
 {
 	struct net_device *dev;
 
@@ -1455,23 +1456,18 @@ static int __dev_close_many(struct list_head *head)
 		dev->flags &= ~IFF_UP;
 		netpoll_poll_enable(dev);
 	}
-
-	return 0;
 }
 
-static int __dev_close(struct net_device *dev)
+static void __dev_close(struct net_device *dev)
 {
-	int retval;
 	LIST_HEAD(single);
 
 	list_add(&dev->close_list, &single);
-	retval = __dev_close_many(&single);
+	__dev_close_many(&single);
 	list_del(&single);
-
-	return retval;
 }
 
-int dev_close_many(struct list_head *head, bool unlink)
+void dev_close_many(struct list_head *head, bool unlink)
 {
 	struct net_device *dev, *tmp;
 
@@ -1488,8 +1484,6 @@ int dev_close_many(struct list_head *head, bool unlink)
 		if (unlink)
 			list_del_init(&dev->close_list);
 	}
-
-	return 0;
 }
 EXPORT_SYMBOL(dev_close_many);
 
@@ -1502,7 +1496,7 @@ EXPORT_SYMBOL(dev_close_many);
  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
  *	chain.
  */
-int dev_close(struct net_device *dev)
+void dev_close(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		LIST_HEAD(single);
@@ -1511,7 +1505,6 @@ int dev_close(struct net_device *dev)
 		dev_close_many(&single, true);
 		list_del(&single);
 	}
-	return 0;
 }
 EXPORT_SYMBOL(dev_close);
 
@@ -1860,7 +1853,7 @@ static inline int deliver_skb(struct sk_buff *skb,
 			      struct packet_type *pt_prev,
 			      struct net_device *orig_dev)
 {
-	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 		return -ENOMEM;
 	refcount_inc(&skb->users);
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@ -3865,6 +3858,121 @@ drop:
 	return NET_RX_DROP;
 }
 
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+				     struct bpf_prog *xdp_prog)
+{
+	struct xdp_buff xdp;
+	u32 act = XDP_DROP;
+	void *orig_data;
+	int hlen, off;
+	u32 mac_len;
+
+	/* Reinjected packets coming from act_mirred or similar should
+	 * not get XDP generic processing.
+	 */
+	if (skb_cloned(skb))
+		return XDP_PASS;
+
+	if (skb_linearize(skb))
+		goto do_drop;
+
+	/* The XDP program wants to see the packet starting at the MAC
+	 * header.
+	 */
+	mac_len = skb->data - skb_mac_header(skb);
+	hlen = skb_headlen(skb) + mac_len;
+	xdp.data = skb->data - mac_len;
+	xdp.data_end = xdp.data + hlen;
+	xdp.data_hard_start = skb->data - skb_headroom(skb);
+	orig_data = xdp.data;
+
+	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+	off = xdp.data - orig_data;
+	if (off > 0)
+		__skb_pull(skb, off);
+	else if (off < 0)
+		__skb_push(skb, -off);
+
+	switch (act) {
+	case XDP_REDIRECT:
+	case XDP_TX:
+		__skb_push(skb, mac_len);
+		/* fall through */
+	case XDP_PASS:
+		break;
+
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(skb->dev, xdp_prog, act);
+		/* fall through */
+	case XDP_DROP:
+	do_drop:
+		kfree_skb(skb);
+		break;
+	}
+
+	return act;
+}
+
+/* When doing generic XDP we have to bypass the qdisc layer and the
+ * network taps in order to match in-driver-XDP behavior.
+ */
+static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+{
+	struct net_device *dev = skb->dev;
+	struct netdev_queue *txq;
+	bool free_skb = true;
+	int cpu, rc;
+
+	txq = netdev_pick_tx(dev, skb, NULL);
+	cpu = smp_processor_id();
+	HARD_TX_LOCK(dev, txq, cpu);
+	if (!netif_xmit_stopped(txq)) {
+		rc = netdev_start_xmit(skb, dev, txq, 0);
+		if (dev_xmit_complete(rc))
+			free_skb = false;
+	}
+	HARD_TX_UNLOCK(dev, txq);
+	if (free_skb) {
+		trace_xdp_exception(dev, xdp_prog, XDP_TX);
+		kfree_skb(skb);
+	}
+}
+
+static struct static_key generic_xdp_needed __read_mostly;
+
+static int do_xdp_generic(struct sk_buff *skb)
+{
+	struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+
+	if (xdp_prog) {
+		u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+		int err;
+
+		if (act != XDP_PASS) {
+			switch (act) {
+			case XDP_REDIRECT:
+				err = xdp_do_generic_redirect(skb->dev, skb);
+				if (err)
+					goto out_redir;
+			/* fallthru to submit skb */
+			case XDP_TX:
+				generic_xdp_tx(skb, xdp_prog);
+				break;
+			}
+			return XDP_DROP;
+		}
+	}
+	return XDP_PASS;
+out_redir:
+	trace_xdp_exception(skb->dev, xdp_prog, XDP_REDIRECT);
+	kfree_skb(skb);
+	return XDP_DROP;
+}
+
 static int netif_rx_internal(struct sk_buff *skb)
 {
 	int ret;
@@ -3872,6 +3980,18 @@ static int netif_rx_internal(struct sk_buff *skb)
 	net_timestamp_check(netdev_tstamp_prequeue, skb);
 
 	trace_netif_rx(skb);
+
+	if (static_key_false(&generic_xdp_needed)) {
+		int ret = do_xdp_generic(skb);
+
+		/* Consider XDP consuming the packet a success from
+		 * the netdev point of view we do not want to count
+		 * this as an error.
+		 */
+		if (ret != XDP_PASS)
+			return NET_RX_SUCCESS;
+	}
+
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4292,7 +4412,7 @@ skip_classify:
 	}
 
 	if (pt_prev) {
-		if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+		if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 			goto drop;
 		else
 			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@ -4338,8 +4458,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	return ret;
 }
 
-static struct static_key generic_xdp_needed __read_mostly;
-
 static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
 {
 	struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
@@ -4373,89 +4491,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
 	return ret;
 }
 
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
-				     struct bpf_prog *xdp_prog)
-{
-	struct xdp_buff xdp;
-	u32 act = XDP_DROP;
-	void *orig_data;
-	int hlen, off;
-	u32 mac_len;
-
-	/* Reinjected packets coming from act_mirred or similar should
-	 * not get XDP generic processing.
-	 */
-	if (skb_cloned(skb))
-		return XDP_PASS;
-
-	if (skb_linearize(skb))
-		goto do_drop;
-
-	/* The XDP program wants to see the packet starting at the MAC
-	 * header.
-	 */
-	mac_len = skb->data - skb_mac_header(skb);
-	hlen = skb_headlen(skb) + mac_len;
-	xdp.data = skb->data - mac_len;
-	xdp.data_end = xdp.data + hlen;
-	xdp.data_hard_start = skb->data - skb_headroom(skb);
-	orig_data = xdp.data;
-
-	act = bpf_prog_run_xdp(xdp_prog, &xdp);
-
-	off = xdp.data - orig_data;
-	if (off > 0)
-		__skb_pull(skb, off);
-	else if (off < 0)
-		__skb_push(skb, -off);
-
-	switch (act) {
-	case XDP_TX:
-		__skb_push(skb, mac_len);
-		/* fall through */
-	case XDP_PASS:
-		break;
-
-	default:
-		bpf_warn_invalid_xdp_action(act);
-		/* fall through */
-	case XDP_ABORTED:
-		trace_xdp_exception(skb->dev, xdp_prog, act);
-		/* fall through */
-	case XDP_DROP:
-	do_drop:
-		kfree_skb(skb);
-		break;
-	}
-
-	return act;
-}
-
-/* When doing generic XDP we have to bypass the qdisc layer and the
- * network taps in order to match in-driver-XDP behavior.
- */
-static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
-{
-	struct net_device *dev = skb->dev;
-	struct netdev_queue *txq;
-	bool free_skb = true;
-	int cpu, rc;
-
-	txq = netdev_pick_tx(dev, skb, NULL);
-	cpu = smp_processor_id();
-	HARD_TX_LOCK(dev, txq, cpu);
-	if (!netif_xmit_stopped(txq)) {
-		rc = netdev_start_xmit(skb, dev, txq, 0);
-		if (dev_xmit_complete(rc))
-			free_skb = false;
-	}
-	HARD_TX_UNLOCK(dev, txq);
-	if (free_skb) {
-		trace_xdp_exception(dev, xdp_prog, XDP_TX);
-		kfree_skb(skb);
-	}
-}
-
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
 	int ret;
@@ -4468,17 +4503,11 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 	rcu_read_lock();
 
 	if (static_key_false(&generic_xdp_needed)) {
-		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+		int ret = do_xdp_generic(skb);
 
-		if (xdp_prog) {
-			u32 act = netif_receive_generic_xdp(skb, xdp_prog);
-
-			if (act != XDP_PASS) {
-				rcu_read_unlock();
-				if (act == XDP_TX)
-					generic_xdp_tx(skb, xdp_prog);
-				return NET_RX_DROP;
-			}
+		if (ret != XDP_PASS) {
+			rcu_read_unlock();
+			return NET_RX_DROP;
 		}
 	}
 
@@ -6689,8 +6718,12 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
 	 */
 
 	ret = 0;
-	if ((old_flags ^ flags) & IFF_UP)
-		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
+	if ((old_flags ^ flags) & IFF_UP) {
+		if (old_flags & IFF_UP)
+			__dev_close(dev);
+		else
+			ret = __dev_open(dev);
+	}
 
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
@@ -7235,24 +7268,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		features &= ~NETIF_F_GSO;
 	}
 
-	/* UFO needs SG and checksumming */
-	if (features & NETIF_F_UFO) {
-		/* maybe split UFO into V4 and V6? */
-		if (!(features & NETIF_F_HW_CSUM) &&
-		    ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
-		     (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
-			netdev_dbg(dev,
-				"Dropping NETIF_F_UFO since no checksum offload features.\n");
-			features &= ~NETIF_F_UFO;
-		}
-
-		if (!(features & NETIF_F_SG)) {
-			netdev_dbg(dev,
-				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
-			features &= ~NETIF_F_UFO;
-		}
-	}
-
 	/* GSO partial features require GSO partial be set */
 	if ((features & dev->gso_partial_features) &&
 	    !(features & NETIF_F_GSO_PARTIAL)) {
@@ -7313,8 +7328,27 @@ sync_lower:
 	netdev_for_each_lower_dev(dev, lower, iter)
 		netdev_sync_lower_features(dev, lower, features);
 
-	if (!err)
+	if (!err) {
+		netdev_features_t diff = features ^ dev->features;
+
+		if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
+			/* udp_tunnel_{get,drop}_rx_info both need
+			 * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the
+			 * device, or they won't do anything.
+			 * Thus we need to update dev->features
+			 * *before* calling udp_tunnel_get_rx_info,
+			 * but *after* calling udp_tunnel_drop_rx_info.
+			 */
+			if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
+				dev->features = features;
+				udp_tunnel_get_rx_info(dev);
+			} else {
+				udp_tunnel_drop_rx_info(dev);
+			}
+		}
+
 		dev->features = features;
+	}
 
 	return err < 0 ? 0 : 1;
 }
@@ -7516,6 +7550,12 @@ int register_netdevice(struct net_device *dev)
 	 */
 	dev->hw_features |= NETIF_F_SOFT_FEATURES;
 	dev->features |= NETIF_F_SOFT_FEATURES;
+
+	if (dev->netdev_ops->ndo_udp_tunnel_add) {
+		dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
+		dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
+	}
+
 	dev->wanted_features = dev->features & dev->hw_features;
 
 	if (!(dev->flags & IFF_LOOPBACK))
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 674b6c9cec18..6a582ae4c5d9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -76,7 +76,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_LRO_BIT] =              "rx-lro",
 
 	[NETIF_F_TSO_BIT] =              "tx-tcp-segmentation",
-	[NETIF_F_UFO_BIT] =              "tx-udp-fragmentation",
 	[NETIF_F_GSO_ROBUST_BIT] =       "tx-gso-robust",
 	[NETIF_F_TSO_ECN_BIT] =          "tx-tcp-ecn-segmentation",
 	[NETIF_F_TSO_MANGLEID_BIT] =	 "tx-tcp-mangleid-segmentation",
@@ -106,6 +105,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_HW_TC_BIT] =		 "hw-tc-offload",
 	[NETIF_F_HW_ESP_BIT] =		 "esp-hw-offload",
 	[NETIF_F_HW_ESP_TX_CSUM_BIT] =	 "esp-tx-csum-hw-offload",
+	[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] =	 "rx-udp_tunnel-port-offload",
 };
 
 static const char
@@ -299,9 +299,6 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
 	case ETHTOOL_GTSO:
 	case ETHTOOL_STSO:
 		return NETIF_F_ALL_TSO;
-	case ETHTOOL_GUFO:
-	case ETHTOOL_SUFO:
-		return NETIF_F_UFO;
 	case ETHTOOL_GGSO:
 	case ETHTOOL_SGSO:
 		return NETIF_F_GSO;
@@ -2515,6 +2512,33 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
+static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM };
+
+	if (!dev->ethtool_ops->get_fecparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_fecparam(dev, &fecparam);
+
+	if (copy_to_user(useraddr, &fecparam, sizeof(fecparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_fecparam fecparam;
+
+	if (!dev->ethtool_ops->set_fecparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&fecparam, useraddr, sizeof(fecparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_fecparam(dev, &fecparam);
+}
+
 /* The main entry point in this file.  Called from net/core/dev_ioctl.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2555,7 +2579,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GPHYSTATS:
 	case ETHTOOL_GTSO:
 	case ETHTOOL_GPERMADDR:
-	case ETHTOOL_GUFO:
 	case ETHTOOL_GGSO:
 	case ETHTOOL_GGRO:
 	case ETHTOOL_GFLAGS:
@@ -2574,6 +2597,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GTUNABLE:
 	case ETHTOOL_PHY_GTUNABLE:
 	case ETHTOOL_GLINKSETTINGS:
+	case ETHTOOL_GFECPARAM:
 		break;
 	default:
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -2723,7 +2747,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCSUM:
 	case ETHTOOL_GSG:
 	case ETHTOOL_GTSO:
-	case ETHTOOL_GUFO:
 	case ETHTOOL_GGSO:
 	case ETHTOOL_GGRO:
 		rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
@@ -2732,7 +2755,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SRXCSUM:
 	case ETHTOOL_SSG:
 	case ETHTOOL_STSO:
-	case ETHTOOL_SUFO:
 	case ETHTOOL_SGSO:
 	case ETHTOOL_SGRO:
 		rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
@@ -2785,6 +2807,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_PHY_STUNABLE:
 		rc = set_phy_tunable(dev, useraddr);
 		break;
+	case ETHTOOL_GFECPARAM:
+		rc = ethtool_get_fecparam(dev, useraddr);
+		break;
+	case ETHTOOL_SFECPARAM:
+		rc = ethtool_set_fecparam(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
new file mode 100644
index 000000000000..292aab83702f
--- /dev/null
+++ b/net/core/fib_notifier.c
@@ -0,0 +1,164 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+		      enum fib_event_type event_type,
+		      struct fib_notifier_info *info)
+{
+	info->net = net;
+	return nb->notifier_call(nb, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifier);
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
+{
+	info->net = net;
+	return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifiers);
+
+static unsigned int fib_seq_sum(void)
+{
+	struct fib_notifier_ops *ops;
+	unsigned int fib_seq = 0;
+	struct net *net;
+
+	rtnl_lock();
+	for_each_net(net) {
+		list_for_each_entry(ops, &net->fib_notifier_ops, list)
+			fib_seq += ops->fib_seq_read(net);
+	}
+	rtnl_unlock();
+
+	return fib_seq;
+}
+
+static int fib_net_dump(struct net *net, struct notifier_block *nb)
+{
+	struct fib_notifier_ops *ops;
+
+	list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+		int err = ops->fib_dump(net, nb);
+
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+				   void (*cb)(struct notifier_block *nb),
+				   unsigned int fib_seq)
+{
+	atomic_notifier_chain_register(&fib_chain, nb);
+	if (fib_seq == fib_seq_sum())
+		return true;
+	atomic_notifier_chain_unregister(&fib_chain, nb);
+	if (cb)
+		cb(nb);
+	return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+			  void (*cb)(struct notifier_block *nb))
+{
+	int retries = 0;
+	int err;
+
+	do {
+		unsigned int fib_seq = fib_seq_sum();
+		struct net *net;
+
+		rcu_read_lock();
+		for_each_net_rcu(net) {
+			err = fib_net_dump(net, nb);
+			if (err)
+				goto err_fib_net_dump;
+		}
+		rcu_read_unlock();
+
+		if (fib_dump_is_consistent(nb, cb, fib_seq))
+			return 0;
+	} while (++retries < FIB_DUMP_MAX_RETRIES);
+
+	return -EBUSY;
+
+err_fib_net_dump:
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
+
+static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
+				       struct net *net)
+{
+	struct fib_notifier_ops *o;
+
+	list_for_each_entry(o, &net->fib_notifier_ops, list)
+		if (ops->family == o->family)
+			return -EEXIST;
+	list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
+	return 0;
+}
+
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net)
+{
+	struct fib_notifier_ops *ops;
+	int err;
+
+	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return ERR_PTR(-ENOMEM);
+
+	err = __fib_notifier_ops_register(ops, net);
+	if (err)
+		goto err_register;
+
+	return ops;
+
+err_register:
+	kfree(ops);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(fib_notifier_ops_register);
+
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops)
+{
+	list_del_rcu(&ops->list);
+	kfree_rcu(ops, rcu);
+}
+EXPORT_SYMBOL(fib_notifier_ops_unregister);
+
+static int __net_init fib_notifier_net_init(struct net *net)
+{
+	INIT_LIST_HEAD(&net->fib_notifier_ops);
+	return 0;
+}
+
+static struct pernet_operations fib_notifier_net_ops = {
+	.init = fib_notifier_net_init,
+};
+
+static int __init fib_notifier_init(void)
+{
+	return register_pernet_subsys(&fib_notifier_net_ops);
+}
+
+subsys_initcall(fib_notifier_init);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fdcb1bcd2afa..fc0b65093417 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,6 +299,67 @@ out:
 }
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+				  enum fib_event_type event_type,
+				  struct fib_rule *rule, int family)
+{
+	struct fib_rule_notifier_info info = {
+		.info.family = family,
+		.rule = rule,
+	};
+
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib_rule_notifiers(struct net *net,
+				   enum fib_event_type event_type,
+				   struct fib_rule *rule,
+				   struct fib_rules_ops *ops)
+{
+	struct fib_rule_notifier_info info = {
+		.info.family = ops->family,
+		.rule = rule,
+	};
+
+	ops->fib_rules_seq++;
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
+{
+	struct fib_rules_ops *ops;
+	struct fib_rule *rule;
+
+	ops = lookup_rules_ops(net, family);
+	if (!ops)
+		return -EAFNOSUPPORT;
+	list_for_each_entry_rcu(rule, &ops->rules_list, list)
+		call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
+				       family);
+	rules_ops_put(ops);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+unsigned int fib_rules_seq_read(struct net *net, int family)
+{
+	unsigned int fib_rules_seq;
+	struct fib_rules_ops *ops;
+
+	ASSERT_RTNL();
+
+	ops = lookup_rules_ops(net, family);
+	if (!ops)
+		return 0;
+	fib_rules_seq = ops->fib_rules_seq;
+	rules_ops_put(ops);
+
+	return fib_rules_seq;
+}
+EXPORT_SYMBOL_GPL(fib_rules_seq_read);
+
 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
 			    struct fib_rules_ops *ops)
 {
@@ -548,6 +609,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (rule->tun_id)
 		ip_tunnel_need_metadata();
 
+	call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops);
 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
 	flush_route_cache(ops);
 	rules_ops_put(ops);
@@ -687,6 +749,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 			}
 		}
 
+		call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops);
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).portid);
 		fib_rule_put(rule);
diff --git a/net/core/filter.c b/net/core/filter.c
index f44fc22fd45a..78d00933dbe7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -55,6 +55,7 @@
 #include <net/sock_reuseport.h>
 #include <net/busy_poll.h>
 #include <net/tcp.h>
+#include <linux/bpf_trace.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -1778,6 +1779,8 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 struct redirect_info {
 	u32 ifindex;
 	u32 flags;
+	struct bpf_map *map;
+	struct bpf_map *map_to_flush;
 };
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -1791,6 +1794,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
+	ri->map = NULL;
 
 	return TC_ACT_REDIRECT;
 }
@@ -1818,6 +1822,29 @@ static const struct bpf_func_proto bpf_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_3(bpf_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+	if (unlikely(flags))
+		return XDP_ABORTED;
+
+	ri->ifindex = ifindex;
+	ri->flags = flags;
+	ri->map = map;
+
+	return XDP_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_redirect_map_proto = {
+	.func           = bpf_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -2024,8 +2051,8 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 		return ret;
 
 	if (skb_is_gso(skb)) {
-		/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
-		 * be changed into SKB_GSO_TCPV6.
+		/* SKB_GSO_TCPV4 needs to be changed into
+		 * SKB_GSO_TCPV6.
 		 */
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
 			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
@@ -2060,8 +2087,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 		return ret;
 
 	if (skb_is_gso(skb)) {
-		/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
-		 * be changed into SKB_GSO_TCPV4.
+		/* SKB_GSO_TCPV6 needs to be changed into
+		 * SKB_GSO_TCPV4.
 		 */
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
 			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
@@ -2412,6 +2439,142 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static int __bpf_tx_xdp(struct net_device *dev,
+			struct bpf_map *map,
+			struct xdp_buff *xdp,
+			u32 index)
+{
+	int err;
+
+	if (!dev->netdev_ops->ndo_xdp_xmit) {
+		bpf_warn_invalid_xdp_redirect(dev->ifindex);
+		return -EOPNOTSUPP;
+	}
+
+	err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+	if (err)
+		return err;
+
+	if (map)
+		__dev_map_insert_ctx(map, index);
+	else
+		dev->netdev_ops->ndo_xdp_flush(dev);
+
+	return err;
+}
+
+void xdp_do_flush_map(void)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_map *map = ri->map_to_flush;
+
+	ri->map = NULL;
+	ri->map_to_flush = NULL;
+
+	if (map)
+		__dev_map_flush(map);
+}
+EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+
+int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
+			struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_map *map = ri->map;
+	u32 index = ri->ifindex;
+	struct net_device *fwd;
+	int err = -EINVAL;
+
+	ri->ifindex = 0;
+	ri->map = NULL;
+
+	fwd = __dev_map_lookup_elem(map, index);
+	if (!fwd)
+		goto out;
+
+	if (ri->map_to_flush && (ri->map_to_flush != map))
+		xdp_do_flush_map();
+
+	err = __bpf_tx_xdp(fwd, map, xdp, index);
+	if (likely(!err))
+		ri->map_to_flush = map;
+
+out:
+	trace_xdp_redirect(dev, fwd, xdp_prog, XDP_REDIRECT);
+	return err;
+}
+
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+		    struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct net_device *fwd;
+	u32 index = ri->ifindex;
+
+	if (ri->map)
+		return xdp_do_redirect_map(dev, xdp, xdp_prog);
+
+	fwd = dev_get_by_index_rcu(dev_net(dev), index);
+	ri->ifindex = 0;
+	ri->map = NULL;
+	if (unlikely(!fwd)) {
+		bpf_warn_invalid_xdp_redirect(index);
+		return -EINVAL;
+	}
+
+	trace_xdp_redirect(dev, fwd, xdp_prog, XDP_REDIRECT);
+
+	return __bpf_tx_xdp(fwd, NULL, xdp, 0);
+}
+EXPORT_SYMBOL_GPL(xdp_do_redirect);
+
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	unsigned int len;
+	u32 index = ri->ifindex;
+
+	dev = dev_get_by_index_rcu(dev_net(dev), index);
+	ri->ifindex = 0;
+	if (unlikely(!dev)) {
+		bpf_warn_invalid_xdp_redirect(index);
+		goto err;
+	}
+
+	if (unlikely(!(dev->flags & IFF_UP)))
+		goto err;
+
+	len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
+	if (skb->len > len)
+		goto err;
+
+	skb->dev = dev;
+	return 0;
+err:
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
+
+BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+	if (unlikely(flags))
+		return XDP_ABORTED;
+
+	ri->ifindex = ifindex;
+	ri->flags = flags;
+	return XDP_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_xdp_redirect_proto = {
+	.func           = bpf_xdp_redirect,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_ANYTHING,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
@@ -3011,6 +3174,10 @@ xdp_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_xdp_adjust_head:
 		return &bpf_xdp_adjust_head_proto;
+	case BPF_FUNC_redirect:
+		return &bpf_xdp_redirect_proto;
+	case BPF_FUNC_redirect_map:
+		return &bpf_redirect_map_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -3310,6 +3477,11 @@ void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
+void bpf_warn_invalid_xdp_redirect(u32 ifindex)
+{
+	WARN_ONCE(1, "Illegal XDP redirect to unsupported device ifindex(%i)\n", ifindex);
+}
+
 static bool __is_valid_sock_ops_access(int off, int size)
 {
 	if (off < 0 || off >= sizeof(struct bpf_sock_ops))
diff --git a/net/core/flow.c b/net/core/flow.c
deleted file mode 100644
index f7f5d1932a27..000000000000
--- a/net/core/flow.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/* flow.c: Generic flow cache.
- *
- * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/percpu.h>
-#include <linux/bitops.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/mutex.h>
-#include <net/flow.h>
-#include <linux/atomic.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-
-struct flow_cache_entry {
-	union {
-		struct hlist_node	hlist;
-		struct list_head	gc_list;
-	} u;
-	struct net			*net;
-	u16				family;
-	u8				dir;
-	u32				genid;
-	struct flowi			key;
-	struct flow_cache_object	*object;
-};
-
-struct flow_flush_info {
-	struct flow_cache		*cache;
-	atomic_t			cpuleft;
-	struct completion		completion;
-};
-
-static struct kmem_cache *flow_cachep __read_mostly;
-
-#define flow_cache_hash_size(cache)	(1U << (cache)->hash_shift)
-#define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
-
-static void flow_cache_new_hashrnd(unsigned long arg)
-{
-	struct flow_cache *fc = (void *) arg;
-	int i;
-
-	for_each_possible_cpu(i)
-		per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
-
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-}
-
-static int flow_entry_valid(struct flow_cache_entry *fle,
-				struct netns_xfrm *xfrm)
-{
-	if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
-		return 0;
-	if (fle->object && !fle->object->ops->check(fle->object))
-		return 0;
-	return 1;
-}
-
-static void flow_entry_kill(struct flow_cache_entry *fle,
-				struct netns_xfrm *xfrm)
-{
-	if (fle->object)
-		fle->object->ops->delete(fle->object);
-	kmem_cache_free(flow_cachep, fle);
-}
-
-static void flow_cache_gc_task(struct work_struct *work)
-{
-	struct list_head gc_list;
-	struct flow_cache_entry *fce, *n;
-	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_gc_work);
-
-	INIT_LIST_HEAD(&gc_list);
-	spin_lock_bh(&xfrm->flow_cache_gc_lock);
-	list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
-	spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-
-	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
-		flow_entry_kill(fce, xfrm);
-		atomic_dec(&xfrm->flow_cache_gc_count);
-	}
-}
-
-static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
-				     unsigned int deleted,
-				     struct list_head *gc_list,
-				     struct netns_xfrm *xfrm)
-{
-	if (deleted) {
-		atomic_add(deleted, &xfrm->flow_cache_gc_count);
-		fcp->hash_count -= deleted;
-		spin_lock_bh(&xfrm->flow_cache_gc_lock);
-		list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
-		spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-		schedule_work(&xfrm->flow_cache_gc_work);
-	}
-}
-
-static void __flow_cache_shrink(struct flow_cache *fc,
-				struct flow_cache_percpu *fcp,
-				unsigned int shrink_to)
-{
-	struct flow_cache_entry *fle;
-	struct hlist_node *tmp;
-	LIST_HEAD(gc_list);
-	unsigned int deleted = 0;
-	struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-						flow_cache_global);
-	unsigned int i;
-
-	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		unsigned int saved = 0;
-
-		hlist_for_each_entry_safe(fle, tmp,
-					  &fcp->hash_table[i], u.hlist) {
-			if (saved < shrink_to &&
-			    flow_entry_valid(fle, xfrm)) {
-				saved++;
-			} else {
-				deleted++;
-				hlist_del(&fle->u.hlist);
-				list_add_tail(&fle->u.gc_list, &gc_list);
-			}
-		}
-	}
-
-	flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-}
-
-static void flow_cache_shrink(struct flow_cache *fc,
-			      struct flow_cache_percpu *fcp)
-{
-	unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
-
-	__flow_cache_shrink(fc, fcp, shrink_to);
-}
-
-static void flow_new_hash_rnd(struct flow_cache *fc,
-			      struct flow_cache_percpu *fcp)
-{
-	get_random_bytes(&fcp->hash_rnd, sizeof(u32));
-	fcp->hash_rnd_recalc = 0;
-	__flow_cache_shrink(fc, fcp, 0);
-}
-
-static u32 flow_hash_code(struct flow_cache *fc,
-			  struct flow_cache_percpu *fcp,
-			  const struct flowi *key,
-			  unsigned int keysize)
-{
-	const u32 *k = (const u32 *) key;
-	const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
-
-	return jhash2(k, length, fcp->hash_rnd)
-		& (flow_cache_hash_size(fc) - 1);
-}
-
-/* I hear what you're saying, use memcmp.  But memcmp cannot make
- * important assumptions that we can here, such as alignment.
- */
-static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
-			    unsigned int keysize)
-{
-	const flow_compare_t *k1, *k1_lim, *k2;
-
-	k1 = (const flow_compare_t *) key1;
-	k1_lim = k1 + keysize;
-
-	k2 = (const flow_compare_t *) key2;
-
-	do {
-		if (*k1++ != *k2++)
-			return 1;
-	} while (k1 < k1_lim);
-
-	return 0;
-}
-
-struct flow_cache_object *
-flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
-		  flow_resolve_t resolver, void *ctx)
-{
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle, *tfle;
-	struct flow_cache_object *flo;
-	unsigned int keysize;
-	unsigned int hash;
-
-	local_bh_disable();
-	fcp = this_cpu_ptr(fc->percpu);
-
-	fle = NULL;
-	flo = NULL;
-
-	keysize = flow_key_size(family);
-	if (!keysize)
-		goto nocache;
-
-	/* Packet really early in init?  Making flow_cache_init a
-	 * pre-smp initcall would solve this.  --RR */
-	if (!fcp->hash_table)
-		goto nocache;
-
-	if (fcp->hash_rnd_recalc)
-		flow_new_hash_rnd(fc, fcp);
-
-	hash = flow_hash_code(fc, fcp, key, keysize);
-	hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
-		if (tfle->net == net &&
-		    tfle->family == family &&
-		    tfle->dir == dir &&
-		    flow_key_compare(key, &tfle->key, keysize) == 0) {
-			fle = tfle;
-			break;
-		}
-	}
-
-	if (unlikely(!fle)) {
-		if (fcp->hash_count > fc->high_watermark)
-			flow_cache_shrink(fc, fcp);
-
-		if (atomic_read(&net->xfrm.flow_cache_gc_count) >
-		    2 * num_online_cpus() * fc->high_watermark) {
-			flo = ERR_PTR(-ENOBUFS);
-			goto ret_object;
-		}
-
-		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
-		if (fle) {
-			fle->net = net;
-			fle->family = family;
-			fle->dir = dir;
-			memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
-			fle->object = NULL;
-			hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
-			fcp->hash_count++;
-		}
-	} else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
-		flo = fle->object;
-		if (!flo)
-			goto ret_object;
-		flo = flo->ops->get(flo);
-		if (flo)
-			goto ret_object;
-	} else if (fle->object) {
-	        flo = fle->object;
-	        flo->ops->delete(flo);
-	        fle->object = NULL;
-	}
-
-nocache:
-	flo = NULL;
-	if (fle) {
-		flo = fle->object;
-		fle->object = NULL;
-	}
-	flo = resolver(net, key, family, dir, flo, ctx);
-	if (fle) {
-		fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
-		if (!IS_ERR(flo))
-			fle->object = flo;
-		else
-			fle->genid--;
-	} else {
-		if (!IS_ERR_OR_NULL(flo))
-			flo->ops->delete(flo);
-	}
-ret_object:
-	local_bh_enable();
-	return flo;
-}
-EXPORT_SYMBOL(flow_cache_lookup);
-
-static void flow_cache_flush_tasklet(unsigned long data)
-{
-	struct flow_flush_info *info = (void *)data;
-	struct flow_cache *fc = info->cache;
-	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle;
-	struct hlist_node *tmp;
-	LIST_HEAD(gc_list);
-	unsigned int deleted = 0;
-	struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-						flow_cache_global);
-	unsigned int i;
-
-	fcp = this_cpu_ptr(fc->percpu);
-	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		hlist_for_each_entry_safe(fle, tmp,
-					  &fcp->hash_table[i], u.hlist) {
-			if (flow_entry_valid(fle, xfrm))
-				continue;
-
-			deleted++;
-			hlist_del(&fle->u.hlist);
-			list_add_tail(&fle->u.gc_list, &gc_list);
-		}
-	}
-
-	flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-
-	if (atomic_dec_and_test(&info->cpuleft))
-		complete(&info->completion);
-}
-
-/*
- * Return whether a cpu needs flushing.  Conservatively, we assume
- * the presence of any entries means the core may require flushing,
- * since the flow_cache_ops.check() function may assume it's running
- * on the same core as the per-cpu cache component.
- */
-static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
-{
-	struct flow_cache_percpu *fcp;
-	unsigned int i;
-
-	fcp = per_cpu_ptr(fc->percpu, cpu);
-	for (i = 0; i < flow_cache_hash_size(fc); i++)
-		if (!hlist_empty(&fcp->hash_table[i]))
-			return 0;
-	return 1;
-}
-
-static void flow_cache_flush_per_cpu(void *data)
-{
-	struct flow_flush_info *info = data;
-	struct tasklet_struct *tasklet;
-
-	tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet;
-	tasklet->data = (unsigned long)info;
-	tasklet_schedule(tasklet);
-}
-
-void flow_cache_flush(struct net *net)
-{
-	struct flow_flush_info info;
-	cpumask_var_t mask;
-	int i, self;
-
-	/* Track which cpus need flushing to avoid disturbing all cores. */
-	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-		return;
-	cpumask_clear(mask);
-
-	/* Don't want cpus going down or up during this. */
-	get_online_cpus();
-	mutex_lock(&net->xfrm.flow_flush_sem);
-	info.cache = &net->xfrm.flow_cache_global;
-	for_each_online_cpu(i)
-		if (!flow_cache_percpu_empty(info.cache, i))
-			cpumask_set_cpu(i, mask);
-	atomic_set(&info.cpuleft, cpumask_weight(mask));
-	if (atomic_read(&info.cpuleft) == 0)
-		goto done;
-
-	init_completion(&info.completion);
-
-	local_bh_disable();
-	self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
-	on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
-	if (self)
-		flow_cache_flush_tasklet((unsigned long)&info);
-	local_bh_enable();
-
-	wait_for_completion(&info.completion);
-
-done:
-	mutex_unlock(&net->xfrm.flow_flush_sem);
-	put_online_cpus();
-	free_cpumask_var(mask);
-}
-
-static void flow_cache_flush_task(struct work_struct *work)
-{
-	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_flush_work);
-	struct net *net = container_of(xfrm, struct net, xfrm);
-
-	flow_cache_flush(net);
-}
-
-void flow_cache_flush_deferred(struct net *net)
-{
-	schedule_work(&net->xfrm.flow_cache_flush_work);
-}
-
-static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
-{
-	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-	unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
-
-	if (!fcp->hash_table) {
-		fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
-		if (!fcp->hash_table) {
-			pr_err("NET: failed to allocate flow cache sz %u\n", sz);
-			return -ENOMEM;
-		}
-		fcp->hash_rnd_recalc = 1;
-		fcp->hash_count = 0;
-		tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
-	}
-	return 0;
-}
-
-static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
-{
-	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-
-	return flow_cache_cpu_prepare(fc, cpu);
-}
-
-static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
-{
-	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-
-	__flow_cache_shrink(fc, fcp, 0);
-	return 0;
-}
-
-int flow_cache_init(struct net *net)
-{
-	int i;
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-	if (!flow_cachep)
-		flow_cachep = kmem_cache_create("flow_cache",
-						sizeof(struct flow_cache_entry),
-						0, SLAB_PANIC, NULL);
-	spin_lock_init(&net->xfrm.flow_cache_gc_lock);
-	INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
-	INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
-	INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
-	mutex_init(&net->xfrm.flow_flush_sem);
-	atomic_set(&net->xfrm.flow_cache_gc_count, 0);
-
-	fc->hash_shift = 10;
-	fc->low_watermark = 2 * flow_cache_hash_size(fc);
-	fc->high_watermark = 4 * flow_cache_hash_size(fc);
-
-	fc->percpu = alloc_percpu(struct flow_cache_percpu);
-	if (!fc->percpu)
-		return -ENOMEM;
-
-	if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
-		goto err;
-
-	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
-		    (unsigned long) fc);
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-
-	return 0;
-
-err:
-	for_each_possible_cpu(i) {
-		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-		kfree(fcp->hash_table);
-		fcp->hash_table = NULL;
-	}
-
-	free_percpu(fc->percpu);
-	fc->percpu = NULL;
-
-	return -ENOMEM;
-}
-EXPORT_SYMBOL(flow_cache_init);
-
-void flow_cache_fini(struct net *net)
-{
-	int i;
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-	del_timer_sync(&fc->rnd_timer);
-
-	cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
-
-	for_each_possible_cpu(i) {
-		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-		kfree(fcp->hash_table);
-		fcp->hash_table = NULL;
-	}
-
-	free_percpu(fc->percpu);
-	fc->percpu = NULL;
-}
-EXPORT_SYMBOL(flow_cache_fini);
-
-void __init flow_cache_hp_init(void)
-{
-	int ret;
-
-	ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
-				      "net/flow:prepare",
-				      flow_cache_cpu_up_prep,
-				      flow_cache_cpu_dead);
-	WARN_ON(ret < 0);
-}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index fc5fc4594c90..0cc672aba1f0 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -998,51 +998,6 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
 }
 EXPORT_SYMBOL(skb_get_hash_perturb);
 
-__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
-{
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	memcpy(&keys.addrs.v6addrs.src, &fl6->saddr,
-	       sizeof(keys.addrs.v6addrs.src));
-	memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr,
-	       sizeof(keys.addrs.v6addrs.dst));
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	keys.ports.src = fl6->fl6_sport;
-	keys.ports.dst = fl6->fl6_dport;
-	keys.keyid.keyid = fl6->fl6_gre_key;
-	keys.tags.flow_label = (__force u32)fl6->flowlabel;
-	keys.basic.ip_proto = fl6->flowi6_proto;
-
-	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
-			  flow_keys_have_l4(&keys));
-
-	return skb->hash;
-}
-EXPORT_SYMBOL(__skb_get_hash_flowi6);
-
-__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
-{
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	keys.addrs.v4addrs.src = fl4->saddr;
-	keys.addrs.v4addrs.dst = fl4->daddr;
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-	keys.ports.src = fl4->fl4_sport;
-	keys.ports.dst = fl4->fl4_dport;
-	keys.keyid.keyid = fl4->fl4_gre_key;
-	keys.basic.ip_proto = fl4->flowi4_proto;
-
-	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
-			  flow_keys_have_l4(&keys));
-
-	return skb->hash;
-}
-EXPORT_SYMBOL(__skb_get_hash_flowi4);
-
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 		   const struct flow_keys *keys, int hlen)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f990eb8b30a9..42b62c716a33 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -158,31 +158,6 @@ out:
  *
  */
 
-struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
-{
-	struct sk_buff *skb;
-
-	/* Get the HEAD */
-	skb = kmem_cache_alloc_node(skbuff_head_cache,
-				    gfp_mask & ~__GFP_DMA, node);
-	if (!skb)
-		goto out;
-
-	/*
-	 * Only clear those fields we need to clear, not those that we will
-	 * actually initialise below. Hence, don't put any more fields after
-	 * the tail pointer in struct sk_buff!
-	 */
-	memset(skb, 0, offsetof(struct sk_buff, tail));
-	skb->head = NULL;
-	skb->truesize = sizeof(struct sk_buff);
-	refcount_set(&skb->users, 1);
-
-	skb->mac_header = (typeof(skb->mac_header))~0U;
-out:
-	return skb;
-}
-
 /**
  *	__alloc_skb	-	allocate a network buffer
  *	@size: size to allocate
@@ -592,21 +567,10 @@ static void skb_release_data(struct sk_buff *skb)
 	for (i = 0; i < shinfo->nr_frags; i++)
 		__skb_frag_unref(&shinfo->frags[i]);
 
-	/*
-	 * If skb buf is from userspace, we need to notify the caller
-	 * the lower device DMA has done;
-	 */
-	if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		struct ubuf_info *uarg;
-
-		uarg = shinfo->destructor_arg;
-		if (uarg->callback)
-			uarg->callback(uarg, true);
-	}
-
 	if (shinfo->frag_list)
 		kfree_skb_list(shinfo->frag_list);
 
+	skb_zcopy_clear(skb, true);
 	skb_free_head(skb);
 }
 
@@ -720,14 +684,7 @@ EXPORT_SYMBOL(kfree_skb_list);
  */
 void skb_tx_error(struct sk_buff *skb)
 {
-	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		struct ubuf_info *uarg;
-
-		uarg = skb_shinfo(skb)->destructor_arg;
-		if (uarg->callback)
-			uarg->callback(uarg, false);
-		skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
-	}
+	skb_zcopy_clear(skb, true);
 }
 EXPORT_SYMBOL(skb_tx_error);
 
@@ -762,8 +719,7 @@ void consume_stateless_skb(struct sk_buff *skb)
 		return;
 
 	trace_consume_skb(skb);
-	if (likely(skb->head))
-		skb_release_data(skb);
+	skb_release_data(skb);
 	kfree_skbmem(skb);
 }
 
@@ -941,6 +897,273 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
+static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+{
+	unsigned long max_pg, num_pg, new_pg, old_pg;
+	struct user_struct *user;
+
+	if (capable(CAP_IPC_LOCK) || !size)
+		return 0;
+
+	num_pg = (size >> PAGE_SHIFT) + 2;	/* worst case */
+	max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	user = mmp->user ? : current_user();
+
+	do {
+		old_pg = atomic_long_read(&user->locked_vm);
+		new_pg = old_pg + num_pg;
+		if (new_pg > max_pg)
+			return -ENOBUFS;
+	} while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) !=
+		 old_pg);
+
+	if (!mmp->user) {
+		mmp->user = get_uid(user);
+		mmp->num_pg = num_pg;
+	} else {
+		mmp->num_pg += num_pg;
+	}
+
+	return 0;
+}
+
+static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+{
+	if (mmp->user) {
+		atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
+		free_uid(mmp->user);
+	}
+}
+
+struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
+{
+	struct ubuf_info *uarg;
+	struct sk_buff *skb;
+
+	WARN_ON_ONCE(!in_task());
+
+	if (!sock_flag(sk, SOCK_ZEROCOPY))
+		return NULL;
+
+	skb = sock_omalloc(sk, 0, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+
+	BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
+	uarg = (void *)skb->cb;
+	uarg->mmp.user = NULL;
+
+	if (mm_account_pinned_pages(&uarg->mmp, size)) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	uarg->callback = sock_zerocopy_callback;
+	uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
+	uarg->len = 1;
+	uarg->bytelen = size;
+	uarg->zerocopy = 1;
+	atomic_set(&uarg->refcnt, 0);
+	sock_hold(sk);
+
+	return uarg;
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_alloc);
+
+static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
+{
+	return container_of((void *)uarg, struct sk_buff, cb);
+}
+
+struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
+					struct ubuf_info *uarg)
+{
+	if (uarg) {
+		const u32 byte_limit = 1 << 19;		/* limit to a few TSO */
+		u32 bytelen, next;
+
+		/* realloc only when socket is locked (TCP, UDP cork),
+		 * so uarg->len and sk_zckey access is serialized
+		 */
+		if (!sock_owned_by_user(sk)) {
+			WARN_ON_ONCE(1);
+			return NULL;
+		}
+
+		bytelen = uarg->bytelen + size;
+		if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
+			/* TCP can create new skb to attach new uarg */
+			if (sk->sk_type == SOCK_STREAM)
+				goto new_alloc;
+			return NULL;
+		}
+
+		next = (u32)atomic_read(&sk->sk_zckey);
+		if ((u32)(uarg->id + uarg->len) == next) {
+			if (mm_account_pinned_pages(&uarg->mmp, size))
+				return NULL;
+			uarg->len++;
+			uarg->bytelen = bytelen;
+			atomic_set(&sk->sk_zckey, ++next);
+			return uarg;
+		}
+	}
+
+new_alloc:
+	return sock_zerocopy_alloc(sk, size);
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_realloc);
+
+static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
+{
+	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+	u32 old_lo, old_hi;
+	u64 sum_len;
+
+	old_lo = serr->ee.ee_info;
+	old_hi = serr->ee.ee_data;
+	sum_len = old_hi - old_lo + 1ULL + len;
+
+	if (sum_len >= (1ULL << 32))
+		return false;
+
+	if (lo != old_hi + 1)
+		return false;
+
+	serr->ee.ee_data += len;
+	return true;
+}
+
+void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
+{
+	struct sk_buff *tail, *skb = skb_from_uarg(uarg);
+	struct sock_exterr_skb *serr;
+	struct sock *sk = skb->sk;
+	struct sk_buff_head *q;
+	unsigned long flags;
+	u32 lo, hi;
+	u16 len;
+
+	/* if !len, there was only 1 call, and it was aborted
+	 * so do not queue a completion notification
+	 */
+	if (!uarg->len || sock_flag(sk, SOCK_DEAD))
+		goto release;
+
+	len = uarg->len;
+	lo = uarg->id;
+	hi = uarg->id + len - 1;
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = 0;
+	serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
+	serr->ee.ee_data = hi;
+	serr->ee.ee_info = lo;
+	if (!success)
+		serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
+
+	q = &sk->sk_error_queue;
+	spin_lock_irqsave(&q->lock, flags);
+	tail = skb_peek_tail(q);
+	if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY ||
+	    !skb_zerocopy_notify_extend(tail, lo, len)) {
+		__skb_queue_tail(q, skb);
+		skb = NULL;
+	}
+	spin_unlock_irqrestore(&q->lock, flags);
+
+	sk->sk_error_report(sk);
+
+release:
+	consume_skb(skb);
+	sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
+
+void sock_zerocopy_put(struct ubuf_info *uarg)
+{
+	if (uarg && atomic_dec_and_test(&uarg->refcnt)) {
+		mm_unaccount_pinned_pages(&uarg->mmp);
+
+		if (uarg->callback)
+			uarg->callback(uarg, uarg->zerocopy);
+		else
+			consume_skb(skb_from_uarg(uarg));
+	}
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_put);
+
+void sock_zerocopy_put_abort(struct ubuf_info *uarg)
+{
+	if (uarg) {
+		struct sock *sk = skb_from_uarg(uarg)->sk;
+
+		atomic_dec(&sk->sk_zckey);
+		uarg->len--;
+
+		/* sock_zerocopy_put expects a ref. Most sockets take one per
+		 * skb, which is zero on abort. tcp_sendmsg holds one extra, to
+		 * avoid an skb send inside the main loop triggering uarg free.
+		 */
+		if (sk->sk_type != SOCK_STREAM)
+			atomic_inc(&uarg->refcnt);
+
+		sock_zerocopy_put(uarg);
+	}
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
+
+extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+				   struct iov_iter *from, size_t length);
+
+int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+			     struct msghdr *msg, int len,
+			     struct ubuf_info *uarg)
+{
+	struct ubuf_info *orig_uarg = skb_zcopy(skb);
+	struct iov_iter orig_iter = msg->msg_iter;
+	int err, orig_len = skb->len;
+
+	/* An skb can only point to one uarg. This edge case happens when
+	 * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
+	 */
+	if (orig_uarg && uarg != orig_uarg)
+		return -EEXIST;
+
+	err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
+	if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
+		/* Streams do not free skb on error. Reset to prev state. */
+		msg->msg_iter = orig_iter;
+		___pskb_trim(skb, orig_len);
+		return err;
+	}
+
+	skb_zcopy_set(skb, uarg);
+	return skb->len - orig_len;
+}
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
+
+static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
+			      gfp_t gfp_mask)
+{
+	if (skb_zcopy(orig)) {
+		if (skb_zcopy(nskb)) {
+			/* !gfp_mask callers are verified to !skb_zcopy(nskb) */
+			if (!gfp_mask) {
+				WARN_ON_ONCE(1);
+				return -ENOMEM;
+			}
+			if (skb_uarg(nskb) == skb_uarg(orig))
+				return 0;
+			if (skb_copy_ubufs(nskb, GFP_ATOMIC))
+				return -EIO;
+		}
+		skb_zcopy_set(nskb, skb_uarg(orig));
+	}
+	return 0;
+}
+
 /**
  *	skb_copy_ubufs	-	copy userspace skb frags buffers to kernel
  *	@skb: the skb to modify
@@ -958,15 +1181,19 @@ EXPORT_SYMBOL_GPL(skb_morph);
  */
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 {
-	int i;
 	int num_frags = skb_shinfo(skb)->nr_frags;
 	struct page *page, *head = NULL;
-	struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
+	int i, new_frags;
+	u32 d_off;
 
-	for (i = 0; i < num_frags; i++) {
-		u8 *vaddr;
-		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+	if (!num_frags)
+		return 0;
 
+	if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
+		return -EINVAL;
+
+	new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < new_frags; i++) {
 		page = alloc_page(gfp_mask);
 		if (!page) {
 			while (head) {
@@ -976,28 +1203,51 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 			}
 			return -ENOMEM;
 		}
-		vaddr = kmap_atomic(skb_frag_page(f));
-		memcpy(page_address(page),
-		       vaddr + f->page_offset, skb_frag_size(f));
-		kunmap_atomic(vaddr);
 		set_page_private(page, (unsigned long)head);
 		head = page;
 	}
 
+	page = head;
+	d_off = 0;
+	for (i = 0; i < num_frags; i++) {
+		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+		u32 p_off, p_len, copied;
+		struct page *p;
+		u8 *vaddr;
+
+		skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
+				      p, p_off, p_len, copied) {
+			u32 copy, done = 0;
+			vaddr = kmap_atomic(p);
+
+			while (done < p_len) {
+				if (d_off == PAGE_SIZE) {
+					d_off = 0;
+					page = (struct page *)page_private(page);
+				}
+				copy = min_t(u32, PAGE_SIZE - d_off, p_len - done);
+				memcpy(page_address(page) + d_off,
+				       vaddr + p_off + done, copy);
+				done += copy;
+				d_off += copy;
+			}
+			kunmap_atomic(vaddr);
+		}
+	}
+
 	/* skb frags release userspace buffers */
 	for (i = 0; i < num_frags; i++)
 		skb_frag_unref(skb, i);
 
-	uarg->callback(uarg, false);
-
 	/* skb frags point to kernel buffers */
-	for (i = num_frags - 1; i >= 0; i--) {
-		__skb_fill_page_desc(skb, i, head, 0,
-				     skb_shinfo(skb)->frags[i].size);
+	for (i = 0; i < new_frags - 1; i++) {
+		__skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
 		head = (struct page *)page_private(head);
 	}
+	__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
+	skb_shinfo(skb)->nr_frags = new_frags;
 
-	skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+	skb_zcopy_clear(skb, false);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(skb_copy_ubufs);
@@ -1158,7 +1408,8 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
 	if (skb_shinfo(skb)->nr_frags) {
 		int i;
 
-		if (skb_orphan_frags(skb, gfp_mask)) {
+		if (skb_orphan_frags(skb, gfp_mask) ||
+		    skb_zerocopy_clone(n, skb, gfp_mask)) {
 			kfree_skb(n);
 			n = NULL;
 			goto out;
@@ -1235,9 +1486,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	 * be since all we did is relocate the values
 	 */
 	if (skb_cloned(skb)) {
-		/* copy this zero copy skb frags */
 		if (skb_orphan_frags(skb, gfp_mask))
 			goto nofrags;
+		if (skb_zcopy(skb))
+			atomic_inc(&skb_uarg(skb)->refcnt);
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 			skb_frag_ref(skb, i);
 
@@ -1719,6 +1971,8 @@ pull_pages:
 			if (eat) {
 				skb_shinfo(skb)->frags[k].page_offset += eat;
 				skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+				if (!i)
+					goto end;
 				eat = 0;
 			}
 			k++;
@@ -1726,9 +1980,13 @@ pull_pages:
 	}
 	skb_shinfo(skb)->nr_frags = k;
 
+end:
 	skb->tail     += delta;
 	skb->data_len -= delta;
 
+	if (!skb->data_len)
+		skb_zcopy_clear(skb, false);
+
 	return skb_tail_pointer(skb);
 }
 EXPORT_SYMBOL(__pskb_pull_tail);
@@ -1776,16 +2034,20 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 
 		end = start + skb_frag_size(f);
 		if ((copy = end - offset) > 0) {
+			u32 p_off, p_len, copied;
+			struct page *p;
 			u8 *vaddr;
 
 			if (copy > len)
 				copy = len;
 
-			vaddr = kmap_atomic(skb_frag_page(f));
-			memcpy(to,
-			       vaddr + f->page_offset + offset - start,
-			       copy);
-			kunmap_atomic(vaddr);
+			skb_frag_foreach_page(f,
+					      f->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				memcpy(to + copied, vaddr + p_off, p_len);
+				kunmap_atomic(vaddr);
+			}
 
 			if ((len -= copy) == 0)
 				return 0;
@@ -2005,6 +2267,107 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
 }
 EXPORT_SYMBOL_GPL(skb_splice_bits);
 
+/* Send skb data on a socket. Socket must be locked. */
+int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+			 int len)
+{
+	unsigned int orig_len = len;
+	struct sk_buff *head = skb;
+	unsigned short fragidx;
+	int slen, ret;
+
+do_frag_list:
+
+	/* Deal with head data */
+	while (offset < skb_headlen(skb) && len) {
+		struct kvec kv;
+		struct msghdr msg;
+
+		slen = min_t(int, len, skb_headlen(skb) - offset);
+		kv.iov_base = skb->data + offset;
+		kv.iov_len = len;
+		memset(&msg, 0, sizeof(msg));
+
+		ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
+		if (ret <= 0)
+			goto error;
+
+		offset += ret;
+		len -= ret;
+	}
+
+	/* All the data was skb head? */
+	if (!len)
+		goto out;
+
+	/* Make offset relative to start of frags */
+	offset -= skb_headlen(skb);
+
+	/* Find where we are in frag list */
+	for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+
+		if (offset < frag->size)
+			break;
+
+		offset -= frag->size;
+	}
+
+	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+
+		slen = min_t(size_t, len, frag->size - offset);
+
+		while (slen) {
+			ret = kernel_sendpage_locked(sk, frag->page.p,
+						     frag->page_offset + offset,
+						     slen, MSG_DONTWAIT);
+			if (ret <= 0)
+				goto error;
+
+			len -= ret;
+			offset += ret;
+			slen -= ret;
+		}
+
+		offset = 0;
+	}
+
+	if (len) {
+		/* Process any frag lists */
+
+		if (skb == head) {
+			if (skb_has_frag_list(skb)) {
+				skb = skb_shinfo(skb)->frag_list;
+				goto do_frag_list;
+			}
+		} else if (skb->next) {
+			skb = skb->next;
+			goto do_frag_list;
+		}
+	}
+
+out:
+	return orig_len - len;
+
+error:
+	return orig_len == len ? ret : orig_len - len;
+}
+EXPORT_SYMBOL_GPL(skb_send_sock_locked);
+
+/* Send skb data on a socket. */
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
+{
+	int ret = 0;
+
+	lock_sock(sk);
+	ret = skb_send_sock_locked(sk, skb, offset, len);
+	release_sock(sk);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(skb_send_sock);
+
 /**
  *	skb_store_bits - store bits from kernel buffer to skb
  *	@skb: destination buffer
@@ -2044,15 +2407,20 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 
 		end = start + skb_frag_size(frag);
 		if ((copy = end - offset) > 0) {
+			u32 p_off, p_len, copied;
+			struct page *p;
 			u8 *vaddr;
 
 			if (copy > len)
 				copy = len;
 
-			vaddr = kmap_atomic(skb_frag_page(frag));
-			memcpy(vaddr + frag->page_offset + offset - start,
-			       from, copy);
-			kunmap_atomic(vaddr);
+			skb_frag_foreach_page(frag,
+					      frag->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				memcpy(vaddr + p_off, from + copied, p_len);
+				kunmap_atomic(vaddr);
+			}
 
 			if ((len -= copy) == 0)
 				return 0;
@@ -2117,20 +2485,27 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 
 		end = start + skb_frag_size(frag);
 		if ((copy = end - offset) > 0) {
+			u32 p_off, p_len, copied;
+			struct page *p;
 			__wsum csum2;
 			u8 *vaddr;
 
 			if (copy > len)
 				copy = len;
-			vaddr = kmap_atomic(skb_frag_page(frag));
-			csum2 = ops->update(vaddr + frag->page_offset +
-					    offset - start, copy, 0);
-			kunmap_atomic(vaddr);
-			csum = ops->combine(csum, csum2, pos, copy);
+
+			skb_frag_foreach_page(frag,
+					      frag->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				csum2 = ops->update(vaddr + p_off, p_len, 0);
+				kunmap_atomic(vaddr);
+				csum = ops->combine(csum, csum2, pos, p_len);
+				pos += p_len;
+			}
+
 			if (!(len -= copy))
 				return csum;
 			offset += copy;
-			pos    += copy;
 		}
 		start = end;
 	}
@@ -2203,24 +2578,31 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 
 		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
 		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			u32 p_off, p_len, copied;
+			struct page *p;
 			__wsum csum2;
 			u8 *vaddr;
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 			if (copy > len)
 				copy = len;
-			vaddr = kmap_atomic(skb_frag_page(frag));
-			csum2 = csum_partial_copy_nocheck(vaddr +
-							  frag->page_offset +
-							  offset - start, to,
-							  copy, 0);
-			kunmap_atomic(vaddr);
-			csum = csum_block_add(csum, csum2, pos);
+
+			skb_frag_foreach_page(frag,
+					      frag->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				csum2 = csum_partial_copy_nocheck(vaddr + p_off,
+								  to + copied,
+								  p_len, 0);
+				kunmap_atomic(vaddr);
+				csum = csum_block_add(csum, csum2, pos);
+				pos += p_len;
+			}
+
 			if (!(len -= copy))
 				return csum;
 			offset += copy;
 			to     += copy;
-			pos    += copy;
 		}
 		start = end;
 	}
@@ -2360,6 +2742,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
 		skb_tx_error(from);
 		return -ENOMEM;
 	}
+	skb_zerocopy_clone(to, from, GFP_ATOMIC);
 
 	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
 		if (!len)
@@ -2657,6 +3040,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 
 	skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags &
 				      SKBTX_SHARED_FRAG;
+	skb_zerocopy_clone(skb1, skb, 0);
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
 	else		/* Second chunk has no header, nothing to copy. */
@@ -2700,6 +3084,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 
 	if (skb_headlen(skb))
 		return 0;
+	if (skb_zcopy(tgt) || skb_zcopy(skb))
+		return 0;
 
 	todo = shiftlen;
 	from = 0;
@@ -3273,6 +3659,8 @@ normal:
 
 		skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
 					      SKBTX_SHARED_FRAG;
+		if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
+			goto err;
 
 		while (pos < offset + len) {
 			if (i >= nfrags) {
@@ -4396,6 +4784,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 
 	if (skb_has_frag_list(to) || skb_has_frag_list(from))
 		return false;
+	if (skb_zcopy(to) || skb_zcopy(from))
+		return false;
 
 	if (skb_headlen(from) != 0) {
 		struct page *page;
diff --git a/net/core/sock.c b/net/core/sock.c
index ac2a404c73eb..9ea988d25b0a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1055,6 +1055,20 @@ set_rcvbuf:
 		if (val == 1)
 			dst_negative_advice(sk);
 		break;
+
+	case SO_ZEROCOPY:
+		if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+			ret = -ENOTSUPP;
+		else if (sk->sk_protocol != IPPROTO_TCP)
+			ret = -ENOTSUPP;
+		else if (sk->sk_state != TCP_CLOSE)
+			ret = -EBUSY;
+		else if (val < 0 || val > 1)
+			ret = -EINVAL;
+		else
+			sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1383,6 +1397,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val64 = sock_gen_cookie(sk);
 		break;
 
+	case SO_ZEROCOPY:
+		v.val = sock_flag(sk, SOCK_ZEROCOPY);
+		break;
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -1670,6 +1688,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		atomic_set(&newsk->sk_drops, 0);
 		newsk->sk_send_head	= NULL;
 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+		atomic_set(&newsk->sk_zckey, 0);
 
 		sock_reset_flag(newsk, SOCK_DONE);
 
@@ -1923,6 +1942,33 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 }
 EXPORT_SYMBOL(sock_wmalloc);
 
+static void sock_ofree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_omem_alloc);
+}
+
+struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+			     gfp_t priority)
+{
+	struct sk_buff *skb;
+
+	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
+	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
+	    sysctl_optmem_max)
+		return NULL;
+
+	skb = alloc_skb(size, priority);
+	if (!skb)
+		return NULL;
+
+	atomic_add(skb->truesize, &sk->sk_omem_alloc);
+	skb->sk = sk;
+	skb->destructor = sock_ofree;
+	return skb;
+}
+
 /*
  * Allocate a memory block from the socket's option memory buffer.
  */
@@ -2500,6 +2546,12 @@ int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 }
 EXPORT_SYMBOL(sock_no_sendmsg);
 
+int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_sendmsg_locked);
+
 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
 		    int flags)
 {
@@ -2528,6 +2580,22 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
 }
 EXPORT_SYMBOL(sock_no_sendpage);
 
+ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+				int offset, size_t size, int flags)
+{
+	ssize_t res;
+	struct msghdr msg = {.msg_flags = flags};
+	struct kvec iov;
+	char *kaddr = kmap(page);
+
+	iov.iov_base = kaddr + offset;
+	iov.iov_len = size;
+	res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
+	kunmap(page);
+	return res;
+}
+EXPORT_SYMBOL(sock_no_sendpage_locked);
+
 /*
  *	Default Socket Callbacks
  */
@@ -2673,6 +2741,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
 
 	sk->sk_stamp = SK_DEFAULT_STAMP;
+	atomic_set(&sk->sk_zckey, 0);
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id		=	0;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 416ac4ef9ba9..a91e520e735f 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -190,6 +190,8 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct sk_buff *nskb = NULL;
+	struct pcpu_sw_netstats *s;
+	struct dsa_slave_priv *p;
 
 	if (unlikely(dst == NULL)) {
 		kfree_skb(skb);
@@ -207,12 +209,16 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	skb = nskb;
+	p = netdev_priv(skb->dev);
 	skb_push(skb, ETH_HLEN);
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, skb->dev);
 
-	skb->dev->stats.rx_packets++;
-	skb->dev->stats.rx_bytes += skb->len;
+	s = this_cpu_ptr(p->stats64);
+	u64_stats_update_begin(&s->syncp);
+	s->rx_packets++;
+	s->rx_bytes += skb->len;
+	u64_stats_update_end(&s->syncp);
 
 	netif_receive_skb(skb);
 
@@ -220,6 +226,11 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 }
 
 #ifdef CONFIG_PM_SLEEP
+static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
+{
+	return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
+}
+
 int dsa_switch_suspend(struct dsa_switch *ds)
 {
 	int i, ret = 0;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 55982cc39b24..306cff229def 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -77,6 +77,8 @@ struct dsa_slave_priv {
 	struct sk_buff *	(*xmit)(struct sk_buff *skb,
 					struct net_device *dev);
 
+	struct pcpu_sw_netstats	*stats64;
+
 	/* DSA port data, such as switch, port index, etc. */
 	struct dsa_port		*dp;
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9507bd38cf04..c6b5de2fe413 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -352,10 +352,14 @@ static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p,
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct pcpu_sw_netstats *s;
 	struct sk_buff *nskb;
 
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
+	s = this_cpu_ptr(p->stats64);
+	u64_stats_update_begin(&s->syncp);
+	s->tx_packets++;
+	s->tx_bytes += skb->len;
+	u64_stats_update_end(&s->syncp);
 
 	/* Transmit function may have to reallocate the original SKB,
 	 * in which case it must have freed it. Only free it here on error.
@@ -594,11 +598,26 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->dp->ds;
-
-	data[0] = dev->stats.tx_packets;
-	data[1] = dev->stats.tx_bytes;
-	data[2] = dev->stats.rx_packets;
-	data[3] = dev->stats.rx_bytes;
+	struct pcpu_sw_netstats *s;
+	unsigned int start;
+	int i;
+
+	for_each_possible_cpu(i) {
+		u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+
+		s = per_cpu_ptr(p->stats64, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&s->syncp);
+			tx_packets = s->tx_packets;
+			tx_bytes = s->tx_bytes;
+			rx_packets = s->rx_packets;
+			rx_bytes = s->rx_bytes;
+		} while (u64_stats_fetch_retry_irq(&s->syncp, start));
+		data[0] += tx_packets;
+		data[1] += tx_bytes;
+		data[2] += rx_packets;
+		data[3] += rx_bytes;
+	}
 	if (ds->ops->get_ethtool_stats)
 		ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4);
 }
@@ -648,17 +667,24 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	struct dsa_switch *ds = p->dp->ds;
 	int ret;
 
-	if (!ds->ops->set_eee)
+	/* Port's PHY and MAC both need to be EEE capable */
+	if (!p->phy)
+		return -ENODEV;
+
+	if (!ds->ops->set_mac_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->ops->set_eee(ds, p->dp->index, p->phy, e);
+	ret = ds->ops->set_mac_eee(ds, p->dp->index, e);
 	if (ret)
 		return ret;
 
-	if (p->phy)
-		ret = phy_ethtool_set_eee(p->phy, e);
+	if (e->eee_enabled) {
+		ret = phy_init_eee(p->phy, 0);
+		if (ret)
+			return ret;
+	}
 
-	return ret;
+	return phy_ethtool_set_eee(p->phy, e);
 }
 
 static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -667,17 +693,18 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
 	struct dsa_switch *ds = p->dp->ds;
 	int ret;
 
-	if (!ds->ops->get_eee)
+	/* Port's PHY and MAC both need to be EEE capable */
+	if (!p->phy)
+		return -ENODEV;
+
+	if (!ds->ops->get_mac_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->ops->get_eee(ds, p->dp->index, e);
+	ret = ds->ops->get_mac_eee(ds, p->dp->index, e);
 	if (ret)
 		return ret;
 
-	if (p->phy)
-		ret = phy_ethtool_get_eee(p->phy, e);
-
-	return ret;
+	return phy_ethtool_get_eee(p->phy, e);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -747,12 +774,12 @@ dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p,
 }
 
 static int dsa_slave_add_cls_matchall(struct net_device *dev,
-				      __be16 protocol,
 				      struct tc_cls_matchall_offload *cls,
 				      bool ingress)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_mall_tc_entry *mall_tc_entry;
+	__be16 protocol = cls->common.protocol;
 	struct dsa_switch *ds = p->dp->ds;
 	struct net *net = dev_net(dev);
 	struct dsa_slave_priv *to_p;
@@ -765,7 +792,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
 	if (!ds->ops->port_mirror_add)
 		return err;
 
-	if (!tc_single_action(cls->exts))
+	if (!tcf_exts_has_one_action(cls->exts))
 		return err;
 
 	tcf_exts_to_list(cls->exts, &actions);
@@ -836,31 +863,64 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
 	kfree(mall_tc_entry);
 }
 
-static int dsa_slave_setup_tc(struct net_device *dev, u32 handle,
-			      u32 chain_index, __be16 protocol,
-			      struct tc_to_netdev *tc)
+static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
+					   struct tc_cls_matchall_offload *cls)
 {
-	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+	bool ingress = TC_H_MAJ(cls->common.handle) == TC_H_MAJ(TC_H_INGRESS);
 
-	if (chain_index)
+	if (cls->common.chain_index)
 		return -EOPNOTSUPP;
 
-	switch (tc->type) {
-	case TC_SETUP_MATCHALL:
-		switch (tc->cls_mall->command) {
-		case TC_CLSMATCHALL_REPLACE:
-			return dsa_slave_add_cls_matchall(dev, protocol,
-							  tc->cls_mall,
-							  ingress);
-		case TC_CLSMATCHALL_DESTROY:
-			dsa_slave_del_cls_matchall(dev, tc->cls_mall);
-			return 0;
-		}
+	switch (cls->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return dsa_slave_add_cls_matchall(dev, cls, ingress);
+	case TC_CLSMATCHALL_DESTROY:
+		dsa_slave_del_cls_matchall(dev, cls);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			      void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		return dsa_slave_setup_tc_cls_matchall(dev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
+static void dsa_slave_get_stats64(struct net_device *dev,
+				  struct rtnl_link_stats64 *stats)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct pcpu_sw_netstats *s;
+	unsigned int start;
+	int i;
+
+	netdev_stats_to_stats64(stats, &dev->stats);
+	for_each_possible_cpu(i) {
+		u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+
+		s = per_cpu_ptr(p->stats64, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&s->syncp);
+			tx_packets = s->tx_packets;
+			tx_bytes = s->tx_bytes;
+			rx_packets = s->rx_packets;
+			rx_bytes = s->rx_bytes;
+		} while (u64_stats_fetch_retry_irq(&s->syncp, start));
+
+		stats->tx_packets += tx_packets;
+		stats->tx_bytes += tx_bytes;
+		stats->rx_packets += rx_packets;
+		stats->rx_bytes += rx_bytes;
+	}
+}
+
 void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
 {
 	ops->get_sset_count = dsa_cpu_port_get_sset_count;
@@ -936,6 +996,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_bridge_dellink	= switchdev_port_bridge_dellink,
 	.ndo_get_phys_port_name	= dsa_slave_get_phys_port_name,
 	.ndo_setup_tc		= dsa_slave_setup_tc,
+	.ndo_get_stats64	= dsa_slave_get_stats64,
 };
 
 static const struct switchdev_ops dsa_slave_switchdev_ops = {
@@ -1171,6 +1232,11 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	slave_dev->vlan_features = master->vlan_features;
 
 	p = netdev_priv(slave_dev);
+	p->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!p->stats64) {
+		free_netdev(slave_dev);
+		return -ENOMEM;
+	}
 	p->dp = &ds->ports[port];
 	INIT_LIST_HEAD(&p->mall_tc_list);
 	p->xmit = dst->tag_ops->xmit;
@@ -1185,6 +1251,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 		netdev_err(master, "error %d registering interface %s\n",
 			   ret, slave_dev->name);
 		ds->ports[port].netdev = NULL;
+		free_percpu(p->stats64);
 		free_netdev(slave_dev);
 		return ret;
 	}
@@ -1195,6 +1262,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	if (ret) {
 		netdev_err(master, "error %d setting up slave phy\n", ret);
 		unregister_netdev(slave_dev);
+		free_percpu(p->stats64);
 		free_netdev(slave_dev);
 		return ret;
 	}
@@ -1217,6 +1285,7 @@ void dsa_slave_destroy(struct net_device *slave_dev)
 			of_phy_deregister_fixed_link(port_dn);
 	}
 	unregister_netdev(slave_dev);
+	free_percpu(p->stats64);
 	free_netdev(slave_dev);
 }
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 76c2077c3f5b..683ffafb6ff9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -944,6 +944,8 @@ const struct proto_ops inet_stream_ops = {
 	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
+	.sendmsg_locked    = tcp_sendmsg_locked,
+	.sendpage_locked   = tcp_sendpage_locked,
 	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
@@ -1219,10 +1221,9 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 				 netdev_features_t features)
 {
-	bool udpfrag = false, fixedid = false, gso_partial, encap;
+	bool fixedid = false, gso_partial, encap;
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	const struct net_offload *ops;
-	unsigned int offset = 0;
 	struct iphdr *iph;
 	int proto, tot_len;
 	int nhoff;
@@ -1257,7 +1258,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	segs = ERR_PTR(-EPROTONOSUPPORT);
 
 	if (!skb->encapsulation || encap) {
-		udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
 		fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
 
 		/* fixed ID is invalid if DF bit is not set */
@@ -1277,13 +1277,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	skb = segs;
 	do {
 		iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
-		if (udpfrag) {
-			iph->frag_off = htons(offset >> 3);
-			if (skb->next)
-				iph->frag_off |= htons(IP_MF);
-			offset += skb->len - nhoff - ihl;
-			tot_len = skb->len - nhoff;
-		} else if (skb_is_gso(skb)) {
+		if (skb_is_gso(skb)) {
 			if (!fixedid) {
 				iph->id = htons(id);
 				id += skb_shinfo(skb)->gso_segs;
@@ -1771,6 +1765,11 @@ static const struct net_offload ipip_offload = {
 	},
 };
 
+static int __init ipip_offload_init(void)
+{
+	return inet_add_offload(&ipip_offload, IPPROTO_IPIP);
+}
+
 static int __init ipv4_offload_init(void)
 {
 	/*
@@ -1780,9 +1779,10 @@ static int __init ipv4_offload_init(void)
 		pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
 	if (tcpv4_offload_init() < 0)
 		pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
+	if (ipip_offload_init() < 0)
+		pr_crit("%s: Cannot add IPIP protocol offload\n", __func__);
 
 	dev_add_offload(&ip_packet_offload);
-	inet_add_offload(&ipip_offload, IPPROTO_IPIP);
 	return 0;
 }
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 044d2a159a3c..2cba559f14df 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1247,22 +1247,28 @@ static int __net_init ip_fib_net_init(struct net *net)
 	int err;
 	size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
 
-	net->ipv4.fib_seq = 0;
+	err = fib4_notifier_init(net);
+	if (err)
+		return err;
 
 	/* Avoid false sharing : Use at least a full cache line */
 	size = max_t(size_t, size, L1_CACHE_BYTES);
 
 	net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
-	if (!net->ipv4.fib_table_hash)
-		return -ENOMEM;
+	if (!net->ipv4.fib_table_hash) {
+		err = -ENOMEM;
+		goto err_table_hash_alloc;
+	}
 
 	err = fib4_rules_init(net);
 	if (err < 0)
-		goto fail;
+		goto err_rules_init;
 	return 0;
 
-fail:
+err_rules_init:
 	kfree(net->ipv4.fib_table_hash);
+err_table_hash_alloc:
+	fib4_notifier_exit(net);
 	return err;
 }
 
@@ -1292,6 +1298,7 @@ static void ip_fib_net_exit(struct net *net)
 #endif
 	rtnl_unlock();
 	kfree(net->ipv4.fib_table_hash);
+	fib4_notifier_exit(net);
 }
 
 static int __net_init fib_net_init(struct net *net)
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index e0714d975947..5d7afb145562 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -1,86 +1,71 @@
 #include <linux/rtnetlink.h>
 #include <linux/notifier.h>
-#include <linux/rcupdate.h>
+#include <linux/socket.h>
 #include <linux/kernel.h>
 #include <net/net_namespace.h>
+#include <net/fib_notifier.h>
 #include <net/netns/ipv4.h>
 #include <net/ip_fib.h>
 
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
-		      enum fib_event_type event_type,
-		      struct fib_notifier_info *info)
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
 {
-	info->net = net;
-	return nb->notifier_call(nb, event_type, info);
+	info->family = AF_INET;
+	return call_fib_notifier(nb, net, event_type, info);
 }
 
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
-		       struct fib_notifier_info *info)
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info)
 {
+	ASSERT_RTNL();
+
+	info->family = AF_INET;
 	net->ipv4.fib_seq++;
-	info->net = net;
-	return atomic_notifier_call_chain(&fib_chain, event_type, info);
+	return call_fib_notifiers(net, event_type, info);
 }
 
-static unsigned int fib_seq_sum(void)
+static unsigned int fib4_seq_read(struct net *net)
 {
-	unsigned int fib_seq = 0;
-	struct net *net;
-
-	rtnl_lock();
-	for_each_net(net)
-		fib_seq += net->ipv4.fib_seq;
-	rtnl_unlock();
+	ASSERT_RTNL();
 
-	return fib_seq;
+	return net->ipv4.fib_seq + fib4_rules_seq_read(net);
 }
 
-static bool fib_dump_is_consistent(struct notifier_block *nb,
-				   void (*cb)(struct notifier_block *nb),
-				   unsigned int fib_seq)
+static int fib4_dump(struct net *net, struct notifier_block *nb)
 {
-	atomic_notifier_chain_register(&fib_chain, nb);
-	if (fib_seq == fib_seq_sum())
-		return true;
-	atomic_notifier_chain_unregister(&fib_chain, nb);
-	if (cb)
-		cb(nb);
-	return false;
+	int err;
+
+	err = fib4_rules_dump(net, nb);
+	if (err)
+		return err;
+
+	fib_notify(net, nb);
+
+	return 0;
 }
 
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
-			  void (*cb)(struct notifier_block *nb))
-{
-	int retries = 0;
+static const struct fib_notifier_ops fib4_notifier_ops_template = {
+	.family		= AF_INET,
+	.fib_seq_read	= fib4_seq_read,
+	.fib_dump	= fib4_dump,
+};
 
-	do {
-		unsigned int fib_seq = fib_seq_sum();
-		struct net *net;
+int __net_init fib4_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
 
-		/* Mutex semantics guarantee that every change done to
-		 * FIB tries before we read the change sequence counter
-		 * is now visible to us.
-		 */
-		rcu_read_lock();
-		for_each_net_rcu(net) {
-			fib_rules_notify(net, nb);
-			fib_notify(net, nb);
-		}
-		rcu_read_unlock();
+	net->ipv4.fib_seq = 0;
 
-		if (fib_dump_is_consistent(nb, cb, fib_seq))
-			return 0;
-	} while (++retries < FIB_DUMP_MAX_RETRIES);
+	ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv4.notifier_ops = ops;
 
-	return -EBUSY;
+	return 0;
 }
-EXPORT_SYMBOL(register_fib_notifier);
 
-int unregister_fib_notifier(struct notifier_block *nb)
+void __net_exit fib4_notifier_exit(struct net *net)
 {
-	return atomic_notifier_chain_unregister(&fib_chain, nb);
+	fib_notifier_ops_unregister(net->ipv4.notifier_ops);
 }
-EXPORT_SYMBOL(unregister_fib_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 778ecf977eb2..35d646a62ad4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -68,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule)
 }
 EXPORT_SYMBOL_GPL(fib4_rule_default);
 
+int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, AF_INET);
+}
+
+unsigned int fib4_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, AF_INET);
+}
+
 int __fib_lookup(struct net *net, struct flowi4 *flp,
 		 struct fib_result *res, unsigned int flags)
 {
@@ -185,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net)
 	return NULL;
 }
 
-static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
-				  enum fib_event_type event_type,
-				  struct fib_rule *rule)
-{
-	struct fib_rule_notifier_info info = {
-		.rule = rule,
-	};
-
-	return call_fib_notifier(nb, net, event_type, &info.info);
-}
-
-static int call_fib_rule_notifiers(struct net *net,
-				   enum fib_event_type event_type,
-				   struct fib_rule *rule)
-{
-	struct fib_rule_notifier_info info = {
-		.rule = rule,
-	};
-
-	return call_fib_notifiers(net, event_type, &info.info);
-}
-
-/* Called with rcu_read_lock() */
-void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
-	struct fib_rules_ops *ops = net->ipv4.rules_ops;
-	struct fib_rule *rule;
-
-	list_for_each_entry_rcu(rule, &ops->rules_list, list)
-		call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
-}
-
 static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
 	FRA_GENERIC_POLICY,
 	[FRA_FLOW]	= { .type = NLA_U32 },
@@ -273,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule4->tos = frh->tos;
 
 	net->ipv4.fib_has_custom_rules = true;
-	call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
 
 	err = 0;
 errout:
@@ -295,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule)
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	net->ipv4.fib_has_custom_rules = true;
-	call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
 errout:
 	return err;
 }
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b8d18171cca3..632b454ce77c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -44,6 +44,7 @@
 #include <net/netlink.h>
 #include <net/nexthop.h>
 #include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
 
 #include "fib_lookup.h"
 
@@ -1342,6 +1343,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
 				rtm->rtm_flags |= RTNH_F_DEAD;
 		}
+		if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
+			rtm->rtm_flags |= RTNH_F_OFFLOAD;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		if (fi->fib_nh[0].nh_tclassid &&
 		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
@@ -1449,14 +1452,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
 		if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
 		    fib_nh->nh_flags & RTNH_F_LINKDOWN)
 			break;
-		return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
-					  &info.info);
+		return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
+					   &info.info);
 	case FIB_EVENT_NH_DEL:
 		if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
 		     fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
 		    (fib_nh->nh_flags & RTNH_F_DEAD))
-			return call_fib_notifiers(dev_net(fib_nh->nh_dev),
-						  event_type, &info.info);
+			return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
+						   event_type, &info.info);
 	default:
 		break;
 	}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 64668c69dda6..1a6ffb0dab9c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -81,6 +81,7 @@
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <net/ip_fib.h>
+#include <net/fib_notifier.h>
 #include <trace/events/fib.h>
 #include "fib_lookup.h"
 
@@ -97,7 +98,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
 		.type = type,
 		.tb_id = tb_id,
 	};
-	return call_fib_notifier(nb, net, event_type, &info.info);
+	return call_fib4_notifier(nb, net, event_type, &info.info);
 }
 
 static int call_fib_entry_notifiers(struct net *net,
@@ -113,7 +114,7 @@ static int call_fib_entry_notifiers(struct net *net,
 		.type = type,
 		.tb_id = tb_id,
 	};
-	return call_fib_notifiers(net, event_type, &info.info);
+	return call_fib4_notifiers(net, event_type, &info.info);
 }
 
 #define MAX_STAT_DEPTH 32
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d5cac99170b1..416bb304a281 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	__be16 protocol = skb->protocol;
 	u16 mac_len = skb->mac_len;
 	int gre_offset, outer_hlen;
-	bool need_csum, ufo, gso_partial;
+	bool need_csum, gso_partial;
 
 	if (!skb->encapsulation)
 		goto out;
@@ -47,20 +47,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
 	skb->encap_hdr_csum = need_csum;
 
-	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
-
 	features &= skb->dev->hw_enc_features;
 
-	/* The only checksum offload we care about from here on out is the
-	 * outer one so strip the existing checksum feature flags based
-	 * on the fact that we will be computing our checksum in software.
-	 */
-	if (ufo) {
-		features &= ~NETIF_F_CSUM_MASK;
-		if (!need_csum)
-			features |= NETIF_F_HW_CSUM;
-	}
-
 	/* segment inner packet. */
 	segs = skb_mac_gso_segment(skb, features);
 	if (IS_ERR_OR_NULL(segs)) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c2be26b98b5f..681e33998e03 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -412,7 +412,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	int type = icmp_param->data.icmph.type;
 	int code = icmp_param->data.icmph.code;
 
-	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
+	if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
 		return;
 
 	/* Needed by both icmp_global_allow and icmp_xmit_lock */
@@ -694,7 +694,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 					  iph->tos;
 	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
-	if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
+	if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
 		goto out_unlock;
 
 
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index c5a117cc6619..337ad41bb80a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -33,7 +33,7 @@
  *  also be removed if the pool is overloaded i.e. if the total amount of
  *  entries is greater-or-equal than the threshold.
  *
- *  Node pool is organised as an AVL tree.
+ *  Node pool is organised as an RB tree.
  *  Such an implementation has been chosen not just for fun.  It's a way to
  *  prevent easy and efficient DoS attacks by creating hash collisions.  A huge
  *  amount of long living nodes in a single hash slot would significantly delay
@@ -45,7 +45,7 @@
  *      AND reference count being 0.
  *  3.  Global variable peer_total is modified under the pool lock.
  *  4.  struct inet_peer fields modification:
- *		avl_left, avl_right, avl_parent, avl_height: pool lock
+ *		rb_node: pool lock
  *		refcnt: atomically against modifications on other CPU;
  *		   usually under some other lock to prevent node disappearing
  *		daddr: unchangeable
@@ -53,30 +53,15 @@
 
 static struct kmem_cache *peer_cachep __read_mostly;
 
-static LIST_HEAD(gc_list);
-static const int gc_delay = 60 * HZ;
-static struct delayed_work gc_work;
-static DEFINE_SPINLOCK(gc_lock);
-
-#define node_height(x) x->avl_height
-
-#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
-#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
-static const struct inet_peer peer_fake_node = {
-	.avl_left	= peer_avl_empty_rcu,
-	.avl_right	= peer_avl_empty_rcu,
-	.avl_height	= 0
-};
-
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
-	bp->root = peer_avl_empty_rcu;
+	bp->rb_root = RB_ROOT;
 	seqlock_init(&bp->lock);
 	bp->total = 0;
 }
 EXPORT_SYMBOL_GPL(inet_peer_base_init);
 
-#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
+#define PEER_MAX_GC 32
 
 /* Exported for sysctl_net_ipv4.  */
 int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
@@ -84,53 +69,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries m
 int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
 
-static void inetpeer_gc_worker(struct work_struct *work)
-{
-	struct inet_peer *p, *n, *c;
-	struct list_head list;
-
-	spin_lock_bh(&gc_lock);
-	list_replace_init(&gc_list, &list);
-	spin_unlock_bh(&gc_lock);
-
-	if (list_empty(&list))
-		return;
-
-	list_for_each_entry_safe(p, n, &list, gc_list) {
-
-		if (need_resched())
-			cond_resched();
-
-		c = rcu_dereference_protected(p->avl_left, 1);
-		if (c != peer_avl_empty) {
-			list_add_tail(&c->gc_list, &list);
-			p->avl_left = peer_avl_empty_rcu;
-		}
-
-		c = rcu_dereference_protected(p->avl_right, 1);
-		if (c != peer_avl_empty) {
-			list_add_tail(&c->gc_list, &list);
-			p->avl_right = peer_avl_empty_rcu;
-		}
-
-		n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
-
-		if (refcount_read(&p->refcnt) == 1) {
-			list_del(&p->gc_list);
-			kmem_cache_free(peer_cachep, p);
-		}
-	}
-
-	if (list_empty(&list))
-		return;
-
-	spin_lock_bh(&gc_lock);
-	list_splice(&list, &gc_list);
-	spin_unlock_bh(&gc_lock);
-
-	schedule_delayed_work(&gc_work, gc_delay);
-}
-
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
 {
@@ -153,225 +91,62 @@ void __init inet_initpeers(void)
 			sizeof(struct inet_peer),
 			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
-
-	INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
 }
 
-#define rcu_deref_locked(X, BASE)				\
-	rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
-
-/*
- * Called with local BH disabled and the pool lock held.
- */
-#define lookup(_daddr, _stack, _base)				\
-({								\
-	struct inet_peer *u;					\
-	struct inet_peer __rcu **v;				\
-								\
-	stackptr = _stack;					\
-	*stackptr++ = &_base->root;				\
-	for (u = rcu_deref_locked(_base->root, _base);		\
-	     u != peer_avl_empty;) {				\
-		int cmp = inetpeer_addr_cmp(_daddr, &u->daddr);	\
-		if (cmp == 0)					\
-			break;					\
-		if (cmp == -1)					\
-			v = &u->avl_left;			\
-		else						\
-			v = &u->avl_right;			\
-		*stackptr++ = v;				\
-		u = rcu_deref_locked(*v, _base);		\
-	}							\
-	u;							\
-})
-
-/*
- * Called with rcu_read_lock()
- * Because we hold no lock against a writer, its quite possible we fall
- * in an endless loop.
- * But every pointer we follow is guaranteed to be valid thanks to RCU.
- * We exit from this function if number of links exceeds PEER_MAXDEPTH
- */
-static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
-				    struct inet_peer_base *base)
+/* Called with rcu_read_lock() or base->lock held */
+static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
+				struct inet_peer_base *base,
+				unsigned int seq,
+				struct inet_peer *gc_stack[],
+				unsigned int *gc_cnt,
+				struct rb_node **parent_p,
+				struct rb_node ***pp_p)
 {
-	struct inet_peer *u = rcu_dereference(base->root);
-	int count = 0;
+	struct rb_node **pp, *parent;
+	struct inet_peer *p;
+
+	pp = &base->rb_root.rb_node;
+	parent = NULL;
+	while (*pp) {
+		int cmp;
 
-	while (u != peer_avl_empty) {
-		int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
+		parent = rcu_dereference_raw(*pp);
+		p = rb_entry(parent, struct inet_peer, rb_node);
+		cmp = inetpeer_addr_cmp(daddr, &p->daddr);
 		if (cmp == 0) {
-			/* Before taking a reference, check if this entry was
-			 * deleted (refcnt=0)
-			 */
-			if (!refcount_inc_not_zero(&u->refcnt)) {
-				u = NULL;
-			}
-			return u;
+			if (!refcount_inc_not_zero(&p->refcnt))
+				break;
+			return p;
+		}
+		if (gc_stack) {
+			if (*gc_cnt < PEER_MAX_GC)
+				gc_stack[(*gc_cnt)++] = p;
+		} else if (unlikely(read_seqretry(&base->lock, seq))) {
+			break;
 		}
 		if (cmp == -1)
-			u = rcu_dereference(u->avl_left);
+			pp = &(*pp)->rb_left;
 		else
-			u = rcu_dereference(u->avl_right);
-		if (unlikely(++count == PEER_MAXDEPTH))
-			break;
+			pp = &(*pp)->rb_right;
 	}
+	*parent_p = parent;
+	*pp_p = pp;
 	return NULL;
 }
 
-/* Called with local BH disabled and the pool lock held. */
-#define lookup_rightempty(start, base)				\
-({								\
-	struct inet_peer *u;					\
-	struct inet_peer __rcu **v;				\
-	*stackptr++ = &start->avl_left;				\
-	v = &start->avl_left;					\
-	for (u = rcu_deref_locked(*v, base);			\
-	     u->avl_right != peer_avl_empty_rcu;) {		\
-		v = &u->avl_right;				\
-		*stackptr++ = v;				\
-		u = rcu_deref_locked(*v, base);			\
-	}							\
-	u;							\
-})
-
-/* Called with local BH disabled and the pool lock held.
- * Variable names are the proof of operation correctness.
- * Look into mm/map_avl.c for more detail description of the ideas.
- */
-static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
-			       struct inet_peer __rcu ***stackend,
-			       struct inet_peer_base *base)
-{
-	struct inet_peer __rcu **nodep;
-	struct inet_peer *node, *l, *r;
-	int lh, rh;
-
-	while (stackend > stack) {
-		nodep = *--stackend;
-		node = rcu_deref_locked(*nodep, base);
-		l = rcu_deref_locked(node->avl_left, base);
-		r = rcu_deref_locked(node->avl_right, base);
-		lh = node_height(l);
-		rh = node_height(r);
-		if (lh > rh + 1) { /* l: RH+2 */
-			struct inet_peer *ll, *lr, *lrl, *lrr;
-			int lrh;
-			ll = rcu_deref_locked(l->avl_left, base);
-			lr = rcu_deref_locked(l->avl_right, base);
-			lrh = node_height(lr);
-			if (lrh <= node_height(ll)) {	/* ll: RH+1 */
-				RCU_INIT_POINTER(node->avl_left, lr);	/* lr: RH or RH+1 */
-				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
-				node->avl_height = lrh + 1; /* RH+1 or RH+2 */
-				RCU_INIT_POINTER(l->avl_left, ll);       /* ll: RH+1 */
-				RCU_INIT_POINTER(l->avl_right, node);	/* node: RH+1 or RH+2 */
-				l->avl_height = node->avl_height + 1;
-				RCU_INIT_POINTER(*nodep, l);
-			} else { /* ll: RH, lr: RH+1 */
-				lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
-				lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
-				RCU_INIT_POINTER(node->avl_left, lrr);	/* lrr: RH or RH-1 */
-				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
-				node->avl_height = rh + 1; /* node: RH+1 */
-				RCU_INIT_POINTER(l->avl_left, ll);	/* ll: RH */
-				RCU_INIT_POINTER(l->avl_right, lrl);	/* lrl: RH or RH-1 */
-				l->avl_height = rh + 1;	/* l: RH+1 */
-				RCU_INIT_POINTER(lr->avl_left, l);	/* l: RH+1 */
-				RCU_INIT_POINTER(lr->avl_right, node);	/* node: RH+1 */
-				lr->avl_height = rh + 2;
-				RCU_INIT_POINTER(*nodep, lr);
-			}
-		} else if (rh > lh + 1) { /* r: LH+2 */
-			struct inet_peer *rr, *rl, *rlr, *rll;
-			int rlh;
-			rr = rcu_deref_locked(r->avl_right, base);
-			rl = rcu_deref_locked(r->avl_left, base);
-			rlh = node_height(rl);
-			if (rlh <= node_height(rr)) {	/* rr: LH+1 */
-				RCU_INIT_POINTER(node->avl_right, rl);	/* rl: LH or LH+1 */
-				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
-				node->avl_height = rlh + 1; /* LH+1 or LH+2 */
-				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH+1 */
-				RCU_INIT_POINTER(r->avl_left, node);	/* node: LH+1 or LH+2 */
-				r->avl_height = node->avl_height + 1;
-				RCU_INIT_POINTER(*nodep, r);
-			} else { /* rr: RH, rl: RH+1 */
-				rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
-				rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
-				RCU_INIT_POINTER(node->avl_right, rll);	/* rll: LH or LH-1 */
-				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
-				node->avl_height = lh + 1; /* node: LH+1 */
-				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH */
-				RCU_INIT_POINTER(r->avl_left, rlr);	/* rlr: LH or LH-1 */
-				r->avl_height = lh + 1;	/* r: LH+1 */
-				RCU_INIT_POINTER(rl->avl_right, r);	/* r: LH+1 */
-				RCU_INIT_POINTER(rl->avl_left, node);	/* node: LH+1 */
-				rl->avl_height = lh + 2;
-				RCU_INIT_POINTER(*nodep, rl);
-			}
-		} else {
-			node->avl_height = (lh > rh ? lh : rh) + 1;
-		}
-	}
-}
-
-/* Called with local BH disabled and the pool lock held. */
-#define link_to_pool(n, base)					\
-do {								\
-	n->avl_height = 1;					\
-	n->avl_left = peer_avl_empty_rcu;			\
-	n->avl_right = peer_avl_empty_rcu;			\
-	/* lockless readers can catch us now */			\
-	rcu_assign_pointer(**--stackptr, n);			\
-	peer_avl_rebalance(stack, stackptr, base);		\
-} while (0)
-
 static void inetpeer_free_rcu(struct rcu_head *head)
 {
 	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
 }
 
-static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
-			     struct inet_peer __rcu **stack[PEER_MAXDEPTH])
-{
-	struct inet_peer __rcu ***stackptr, ***delp;
-
-	if (lookup(&p->daddr, stack, base) != p)
-		BUG();
-	delp = stackptr - 1; /* *delp[0] == p */
-	if (p->avl_left == peer_avl_empty_rcu) {
-		*delp[0] = p->avl_right;
-		--stackptr;
-	} else {
-		/* look for a node to insert instead of p */
-		struct inet_peer *t;
-		t = lookup_rightempty(p, base);
-		BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
-		**--stackptr = t->avl_left;
-		/* t is removed, t->daddr > x->daddr for any
-		 * x in p->avl_left subtree.
-		 * Put t in the old place of p. */
-		RCU_INIT_POINTER(*delp[0], t);
-		t->avl_left = p->avl_left;
-		t->avl_right = p->avl_right;
-		t->avl_height = p->avl_height;
-		BUG_ON(delp[1] != &p->avl_left);
-		delp[1] = &t->avl_left; /* was &p->avl_left */
-	}
-	peer_avl_rebalance(stack, stackptr, base);
-	base->total--;
-	call_rcu(&p->rcu, inetpeer_free_rcu);
-}
-
 /* perform garbage collect on all items stacked during a lookup */
-static int inet_peer_gc(struct inet_peer_base *base,
-			struct inet_peer __rcu **stack[PEER_MAXDEPTH],
-			struct inet_peer __rcu ***stackptr)
+static void inet_peer_gc(struct inet_peer_base *base,
+			 struct inet_peer *gc_stack[],
+			 unsigned int gc_cnt)
 {
-	struct inet_peer *p, *gchead = NULL;
+	struct inet_peer *p;
 	__u32 delta, ttl;
-	int cnt = 0;
+	int i;
 
 	if (base->total >= inet_peer_threshold)
 		ttl = 0; /* be aggressive */
@@ -379,43 +154,38 @@ static int inet_peer_gc(struct inet_peer_base *base,
 		ttl = inet_peer_maxttl
 				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
 					base->total / inet_peer_threshold * HZ;
-	stackptr--; /* last stack slot is peer_avl_empty */
-	while (stackptr > stack) {
-		stackptr--;
-		p = rcu_deref_locked(**stackptr, base);
-		if (refcount_read(&p->refcnt) == 1) {
-			smp_rmb();
-			delta = (__u32)jiffies - p->dtime;
-			if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) {
-				p->gc_next = gchead;
-				gchead = p;
-			}
-		}
+	for (i = 0; i < gc_cnt; i++) {
+		p = gc_stack[i];
+		delta = (__u32)jiffies - p->dtime;
+		if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
+			gc_stack[i] = NULL;
 	}
-	while ((p = gchead) != NULL) {
-		gchead = p->gc_next;
-		cnt++;
-		unlink_from_pool(p, base, stack);
+	for (i = 0; i < gc_cnt; i++) {
+		p = gc_stack[i];
+		if (p) {
+			rb_erase(&p->rb_node, &base->rb_root);
+			base->total--;
+			call_rcu(&p->rcu, inetpeer_free_rcu);
+		}
 	}
-	return cnt;
 }
 
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 			       const struct inetpeer_addr *daddr,
 			       int create)
 {
-	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
-	struct inet_peer *p;
-	unsigned int sequence;
-	int invalidated, gccnt = 0;
+	struct inet_peer *p, *gc_stack[PEER_MAX_GC];
+	struct rb_node **pp, *parent;
+	unsigned int gc_cnt, seq;
+	int invalidated;
 
 	/* Attempt a lockless lookup first.
 	 * Because of a concurrent writer, we might not find an existing entry.
 	 */
 	rcu_read_lock();
-	sequence = read_seqbegin(&base->lock);
-	p = lookup_rcu(daddr, base);
-	invalidated = read_seqretry(&base->lock, sequence);
+	seq = read_seqbegin(&base->lock);
+	p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
+	invalidated = read_seqretry(&base->lock, seq);
 	rcu_read_unlock();
 
 	if (p)
@@ -428,36 +198,31 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 	/* retry an exact lookup, taking the lock before.
 	 * At least, nodes should be hot in our cache.
 	 */
+	parent = NULL;
 	write_seqlock_bh(&base->lock);
-relookup:
-	p = lookup(daddr, stack, base);
-	if (p != peer_avl_empty) {
-		refcount_inc(&p->refcnt);
-		write_sequnlock_bh(&base->lock);
-		return p;
-	}
-	if (!gccnt) {
-		gccnt = inet_peer_gc(base, stack, stackptr);
-		if (gccnt && create)
-			goto relookup;
-	}
-	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
-	if (p) {
-		p->daddr = *daddr;
-		refcount_set(&p->refcnt, 2);
-		atomic_set(&p->rid, 0);
-		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
-		p->rate_tokens = 0;
-		/* 60*HZ is arbitrary, but chosen enough high so that the first
-		 * calculation of tokens is at its maximum.
-		 */
-		p->rate_last = jiffies - 60*HZ;
-		INIT_LIST_HEAD(&p->gc_list);
 
-		/* Link the node. */
-		link_to_pool(p, base);
-		base->total++;
+	gc_cnt = 0;
+	p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp);
+	if (!p && create) {
+		p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
+		if (p) {
+			p->daddr = *daddr;
+			refcount_set(&p->refcnt, 2);
+			atomic_set(&p->rid, 0);
+			p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
+			p->rate_tokens = 0;
+			/* 60*HZ is arbitrary, but chosen enough high so that the first
+			 * calculation of tokens is at its maximum.
+			 */
+			p->rate_last = jiffies - 60*HZ;
+
+			rb_link_node(&p->rb_node, parent, pp);
+			rb_insert_color(&p->rb_node, &base->rb_root);
+			base->total++;
+		}
 	}
+	if (gc_cnt)
+		inet_peer_gc(base, gc_stack, gc_cnt);
 	write_sequnlock_bh(&base->lock);
 
 	return p;
@@ -467,8 +232,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
 void inet_putpeer(struct inet_peer *p)
 {
 	p->dtime = (__u32)jiffies;
-	smp_mb__before_atomic();
-	refcount_dec(&p->refcnt);
+
+	if (refcount_dec_and_test(&p->refcnt))
+		call_rcu(&p->rcu, inetpeer_free_rcu);
 }
 EXPORT_SYMBOL_GPL(inet_putpeer);
 
@@ -513,30 +279,16 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
 
-static void inetpeer_inval_rcu(struct rcu_head *head)
-{
-	struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
-
-	spin_lock_bh(&gc_lock);
-	list_add_tail(&p->gc_list, &gc_list);
-	spin_unlock_bh(&gc_lock);
-
-	schedule_delayed_work(&gc_work, gc_delay);
-}
-
 void inetpeer_invalidate_tree(struct inet_peer_base *base)
 {
-	struct inet_peer *root;
-
-	write_seqlock_bh(&base->lock);
+	struct inet_peer *p, *n;
 
-	root = rcu_deref_locked(base->root, base);
-	if (root != peer_avl_empty) {
-		base->root = peer_avl_empty_rcu;
-		base->total = 0;
-		call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
+	rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) {
+		inet_putpeer(p);
+		cond_resched();
 	}
 
-	write_sequnlock_bh(&base->lock);
+	base->rb_root = RB_ROOT;
+	base->total = 0;
 }
 EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 93157f2f4758..525ae88d1e58 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -86,8 +86,8 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
  * NOTE: dopt cannot point to skb.
  */
 
-int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
-		      const struct ip_options *sopt)
+int __ip_options_echo(struct net *net, struct ip_options *dopt,
+		      struct sk_buff *skb, const struct ip_options *sopt)
 {
 	unsigned char *sptr, *dptr;
 	int soffset, doffset;
@@ -140,7 +140,7 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
 						__be32 addr;
 
 						memcpy(&addr, dptr+soffset-1, 4);
-						if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) {
+						if (inet_addr_type(net, addr) != RTN_UNICAST) {
 							dopt->ts_needtime = 1;
 							soffset += 8;
 						}
@@ -174,9 +174,6 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
 				doffset -= 4;
 		}
 		if (doffset > 3) {
-			__be32 daddr = fib_compute_spec_dst(skb);
-
-			memcpy(&start[doffset-1], &daddr, 4);
 			dopt->faddr = faddr;
 			dptr[0] = start[0];
 			dptr[1] = doffset+3;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 50c74cd890bc..73b0b15245b6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -853,61 +853,6 @@ csum_page(struct page *page, int offset, int copy)
 	return csum;
 }
 
-static inline int ip_ufo_append_data(struct sock *sk,
-			struct sk_buff_head *queue,
-			int getfrag(void *from, char *to, int offset, int len,
-			       int odd, struct sk_buff *skb),
-			void *from, int length, int hh_len, int fragheaderlen,
-			int transhdrlen, int maxfraglen, unsigned int flags)
-{
-	struct sk_buff *skb;
-	int err;
-
-	/* There is support for UDP fragmentation offload by network
-	 * device, so create one single skb packet containing complete
-	 * udp datagram
-	 */
-	skb = skb_peek_tail(queue);
-	if (!skb) {
-		skb = sock_alloc_send_skb(sk,
-			hh_len + fragheaderlen + transhdrlen + 20,
-			(flags & MSG_DONTWAIT), &err);
-
-		if (!skb)
-			return err;
-
-		/* reserve space for Hardware header */
-		skb_reserve(skb, hh_len);
-
-		/* create space for UDP/IP header */
-		skb_put(skb, fragheaderlen + transhdrlen);
-
-		/* initialize network header pointer */
-		skb_reset_network_header(skb);
-
-		/* initialize protocol header pointer */
-		skb->transport_header = skb->network_header + fragheaderlen;
-
-		skb->csum = 0;
-
-		if (flags & MSG_CONFIRM)
-			skb_set_dst_pending_confirm(skb, 1);
-
-		__skb_queue_tail(queue, skb);
-	} else if (skb_is_gso(skb)) {
-		goto append;
-	}
-
-	skb->ip_summed = CHECKSUM_PARTIAL;
-	/* specify the length of each IP datagram fragment */
-	skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
-	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-
-append:
-	return skb_append_datato_frags(sk, skb, getfrag, from,
-				       (length - transhdrlen));
-}
-
 static int __ip_append_data(struct sock *sk,
 			    struct flowi4 *fl4,
 			    struct sk_buff_head *queue,
@@ -965,18 +910,6 @@ static int __ip_append_data(struct sock *sk,
 		csummode = CHECKSUM_PARTIAL;
 
 	cork->length += length;
-	if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
-	     (skb && skb_is_gso(skb))) &&
-	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
-		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
-					 hh_len, fragheaderlen, transhdrlen,
-					 maxfraglen, flags);
-		if (err)
-			goto error;
-		return 0;
-	}
 
 	/* So, what's going on in the loop below?
 	 *
@@ -1287,15 +1220,6 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 	if (!skb)
 		return -EINVAL;
 
-	if ((size + skb->len > mtu) &&
-	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO)) {
-		if (skb->ip_summed != CHECKSUM_PARTIAL)
-			return -EOPNOTSUPP;
-
-		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
-		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-	}
 	cork->length += size;
 
 	while (size > 0) {
@@ -1601,7 +1525,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	int err;
 	int oif;
 
-	if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
+	if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
 		return;
 
 	ipc.addr = daddr;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ecc4b4a2413e..dd68a9ed5e40 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -80,7 +80,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
 }
 
 
-static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
+static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
+				 struct sk_buff *skb)
 {
 	unsigned char optbuf[sizeof(struct ip_options) + 40];
 	struct ip_options *opt = (struct ip_options *)optbuf;
@@ -88,7 +89,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
 	if (IPCB(skb)->opt.optlen == 0)
 		return;
 
-	if (ip_options_echo(opt, skb)) {
+	if (ip_options_echo(net, opt, skb)) {
 		msg->msg_flags |= MSG_CTRUNC;
 		return;
 	}
@@ -204,7 +205,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
 	}
 
 	if (flags & IP_CMSG_RETOPTS) {
-		ip_cmsg_recv_retopts(msg, skb);
+		ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
 
 		flags &= ~IP_CMSG_RETOPTS;
 		if (!flags)
@@ -1227,14 +1228,7 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
 		pktinfo->ipi_ifindex = 0;
 		pktinfo->ipi_spec_dst.s_addr = 0;
 	}
-	/* We need to keep the dst for __ip_options_echo()
-	 * We could restrict the test to opt.ts_needtime || opt.srr,
-	 * but the following is good enough as IP options are not often used.
-	 */
-	if (unlikely(IPCB(skb)->opt.optlen))
-		skb_dst_force(skb);
-	else
-		skb_dst_drop(skb);
+	skb_dst_drop(skb);
 }
 
 int ip_setsockopt(struct sock *sk, int level,
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 0192c255e508..5ed63d250950 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
 	.get_link_net	= ip_tunnel_get_link_net,
 };
 
-static bool is_vti_tunnel(const struct net_device *dev)
-{
-	return dev->netdev_ops == &vti_netdev_ops;
-}
-
-static int vti_device_event(struct notifier_block *unused,
-			    unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct ip_tunnel *tunnel = netdev_priv(dev);
-
-	if (!is_vti_tunnel(dev))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_DOWN:
-		if (!net_eq(tunnel->net, dev_net(dev)))
-			xfrm_garbage_collect(tunnel->net);
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block vti_notifier_block __read_mostly = {
-	.notifier_call = vti_device_event,
-};
-
 static int __init vti_init(void)
 {
 	const char *msg;
@@ -618,8 +591,6 @@ static int __init vti_init(void)
 
 	pr_info("IPv4 over IPsec tunneling driver\n");
 
-	register_netdevice_notifier(&vti_notifier_block);
-
 	msg = "tunnel device";
 	err = register_pernet_device(&vti_net_ops);
 	if (err < 0)
@@ -652,7 +623,6 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti_net_ops);
 pernet_dev_failed:
-	unregister_netdevice_notifier(&vti_notifier_block);
 	pr_err("vti init: failed to register %s\n", msg);
 	return err;
 }
@@ -664,7 +634,6 @@ static void __exit vti_fini(void)
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti_net_ops);
-	unregister_netdevice_notifier(&vti_notifier_block);
 }
 
 module_init(vti_init);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 43eb6567b3a0..b6d3fe03feb3 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -206,14 +206,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST),
 	SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS),
 	SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS),
-	SNMP_MIB_ITEM("TCPPrequeued", LINUX_MIB_TCPPREQUEUED),
-	SNMP_MIB_ITEM("TCPDirectCopyFromBacklog", LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG),
-	SNMP_MIB_ITEM("TCPDirectCopyFromPrequeue", LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE),
-	SNMP_MIB_ITEM("TCPPrequeueDropped", LINUX_MIB_TCPPREQUEUEDROPPED),
-	SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS),
-	SNMP_MIB_ITEM("TCPHPHitsToUser", LINUX_MIB_TCPHPHITSTOUSER),
 	SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS),
-	SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS),
 	SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
 	SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY),
 	SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING),
@@ -230,14 +223,12 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES),
 	SNMP_MIB_ITEM("TCPLossFailures", LINUX_MIB_TCPLOSSFAILURES),
 	SNMP_MIB_ITEM("TCPFastRetrans", LINUX_MIB_TCPFASTRETRANS),
-	SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),
 	SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
 	SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
 	SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES),
 	SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),
 	SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
-	SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
 	SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED),
 	SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT),
 	SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT),
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 03ad8778c395..b1bb1b3a1082 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
 	 */
-	ireq->opt = tcp_v4_save_options(skb);
+	ireq->opt = tcp_v4_save_options(sock_net(sk), skb);
 
 	if (security_inet_conn_request(sk, skb, req)) {
 		reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9bf809726066..0d3c038d7b04 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -45,6 +45,9 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
 static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 
+/* obsolete */
+static int sysctl_tcp_low_latency __read_mostly;
+
 /* Update system visible IP port range */
 static void set_local_port_range(struct net *net, int range[2])
 {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 71ce33decd97..71b25567e787 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -388,6 +388,19 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
 	return period;
 }
 
+static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
+{
+	u32 rate = READ_ONCE(tp->rate_delivered);
+	u32 intv = READ_ONCE(tp->rate_interval_us);
+	u64 rate64 = 0;
+
+	if (rate && intv) {
+		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
+		do_div(rate64, intv);
+	}
+	return rate64;
+}
+
 /* Address-family independent initialization for a tcp_sock.
  *
  * NOTE: A lot of things set to zero explicitly by call to
@@ -400,7 +413,6 @@ void tcp_init_sock(struct sock *sk)
 
 	tp->out_of_order_queue = RB_ROOT;
 	tcp_init_xmit_timers(sk);
-	tcp_prequeue_init(tp);
 	INIT_LIST_HEAD(&tp->tsq_node);
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
@@ -1034,23 +1046,29 @@ out_err:
 }
 EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 
-int tcp_sendpage(struct sock *sk, struct page *page, int offset,
-		 size_t size, int flags)
+int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags)
 {
-	ssize_t res;
-
 	if (!(sk->sk_route_caps & NETIF_F_SG) ||
 	    !sk_check_csum_caps(sk))
 		return sock_no_sendpage(sk->sk_socket, page, offset, size,
 					flags);
 
-	lock_sock(sk);
-
 	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
 
-	res = do_tcp_sendpages(sk, page, offset, size, flags);
+	return do_tcp_sendpages(sk, page, offset, size, flags);
+}
+
+int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+		 size_t size, int flags)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_sendpage_locked(sk, page, offset, size, flags);
 	release_sock(sk);
-	return res;
+
+	return ret;
 }
 EXPORT_SYMBOL(tcp_sendpage);
 
@@ -1144,9 +1162,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 	return err;
 }
 
-int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct ubuf_info *uarg = NULL;
 	struct sk_buff *skb;
 	struct sockcm_cookie sockc;
 	int flags, err, copied = 0;
@@ -1155,9 +1174,27 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	bool sg;
 	long timeo;
 
-	lock_sock(sk);
-
 	flags = msg->msg_flags;
+
+	if (flags & MSG_ZEROCOPY && size) {
+		if (sk->sk_state != TCP_ESTABLISHED) {
+			err = -EINVAL;
+			goto out_err;
+		}
+
+		skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL;
+		uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
+		if (!uarg) {
+			err = -ENOBUFS;
+			goto out_err;
+		}
+
+		/* skb may be freed in main loop, keep extra ref on uarg */
+		sock_zerocopy_get(uarg);
+		if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG))
+			uarg->zerocopy = 0;
+	}
+
 	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
 		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
 		if (err == -EINPROGRESS && copied_syn > 0)
@@ -1281,7 +1318,7 @@ new_segment:
 			err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
 			if (err)
 				goto do_fault;
-		} else {
+		} else if (!uarg || !uarg->zerocopy) {
 			bool merge = true;
 			int i = skb_shinfo(skb)->nr_frags;
 			struct page_frag *pfrag = sk_page_frag(sk);
@@ -1319,6 +1356,13 @@ new_segment:
 				page_ref_inc(pfrag->page);
 			}
 			pfrag->offset += copy;
+		} else {
+			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
+			if (err == -EMSGSIZE || err == -EEXIST)
+				goto new_segment;
+			if (err < 0)
+				goto do_error;
+			copy = err;
 		}
 
 		if (!copied)
@@ -1365,7 +1409,7 @@ out:
 		tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
 	}
 out_nopush:
-	release_sock(sk);
+	sock_zerocopy_put(uarg);
 	return copied + copied_syn;
 
 do_fault:
@@ -1382,6 +1426,7 @@ do_error:
 	if (copied + copied_syn)
 		goto out;
 out_err:
+	sock_zerocopy_put_abort(uarg);
 	err = sk_stream_error(sk, flags, err);
 	/* make sure we wake any epoll edge trigger waiter */
 	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
@@ -1389,9 +1434,19 @@ out_err:
 		sk->sk_write_space(sk);
 		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
 	}
-	release_sock(sk);
 	return err;
 }
+
+int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_sendmsg_locked(sk, msg, size);
+	release_sock(sk);
+
+	return ret;
+}
 EXPORT_SYMBOL(tcp_sendmsg);
 
 /*
@@ -1525,20 +1580,6 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
 		tcp_send_ack(sk);
 }
 
-static void tcp_prequeue_process(struct sock *sk)
-{
-	struct sk_buff *skb;
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
-
-	while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-		sk_backlog_rcv(sk, skb);
-
-	/* Clear memory counter. */
-	tp->ucopy.memory = 0;
-}
-
 static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 {
 	struct sk_buff *skb;
@@ -1671,7 +1712,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 	int err;
 	int target;		/* Read at least this many bytes */
 	long timeo;
-	struct task_struct *user_recv = NULL;
 	struct sk_buff *skb, *last;
 	u32 urg_hole = 0;
 
@@ -1806,51 +1846,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 
 		tcp_cleanup_rbuf(sk, copied);
 
-		if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
-			/* Install new reader */
-			if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
-				user_recv = current;
-				tp->ucopy.task = user_recv;
-				tp->ucopy.msg = msg;
-			}
-
-			tp->ucopy.len = len;
-
-			WARN_ON(tp->copied_seq != tp->rcv_nxt &&
-				!(flags & (MSG_PEEK | MSG_TRUNC)));
-
-			/* Ugly... If prequeue is not empty, we have to
-			 * process it before releasing socket, otherwise
-			 * order will be broken at second iteration.
-			 * More elegant solution is required!!!
-			 *
-			 * Look: we have the following (pseudo)queues:
-			 *
-			 * 1. packets in flight
-			 * 2. backlog
-			 * 3. prequeue
-			 * 4. receive_queue
-			 *
-			 * Each queue can be processed only if the next ones
-			 * are empty. At this point we have empty receive_queue.
-			 * But prequeue _can_ be not empty after 2nd iteration,
-			 * when we jumped to start of loop because backlog
-			 * processing added something to receive_queue.
-			 * We cannot release_sock(), because backlog contains
-			 * packets arrived _after_ prequeued ones.
-			 *
-			 * Shortly, algorithm is clear --- to process all
-			 * the queues in order. We could make it more directly,
-			 * requeueing packets from backlog to prequeue, if
-			 * is not empty. It is more elegant, but eats cycles,
-			 * unfortunately.
-			 */
-			if (!skb_queue_empty(&tp->ucopy.prequeue))
-				goto do_prequeue;
-
-			/* __ Set realtime policy in scheduler __ */
-		}
-
 		if (copied >= target) {
 			/* Do not sleep, just process backlog. */
 			release_sock(sk);
@@ -1859,31 +1854,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 			sk_wait_data(sk, &timeo, last);
 		}
 
-		if (user_recv) {
-			int chunk;
-
-			/* __ Restore normal policy in scheduler __ */
-
-			chunk = len - tp->ucopy.len;
-			if (chunk != 0) {
-				NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
-				len -= chunk;
-				copied += chunk;
-			}
-
-			if (tp->rcv_nxt == tp->copied_seq &&
-			    !skb_queue_empty(&tp->ucopy.prequeue)) {
-do_prequeue:
-				tcp_prequeue_process(sk);
-
-				chunk = len - tp->ucopy.len;
-				if (chunk != 0) {
-					NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
-					len -= chunk;
-					copied += chunk;
-				}
-			}
-		}
 		if ((flags & MSG_PEEK) &&
 		    (peek_seq - copied - urg_hole != tp->copied_seq)) {
 			net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
@@ -1934,10 +1904,8 @@ do_prequeue:
 		tcp_rcv_space_adjust(sk);
 
 skip_copy:
-		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
+		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
 			tp->urg_data = 0;
-			tcp_fast_path_check(sk);
-		}
 		if (used + offset < skb->len)
 			continue;
 
@@ -1955,25 +1923,6 @@ skip_copy:
 		break;
 	} while (len > 0);
 
-	if (user_recv) {
-		if (!skb_queue_empty(&tp->ucopy.prequeue)) {
-			int chunk;
-
-			tp->ucopy.len = copied > 0 ? len : 0;
-
-			tcp_prequeue_process(sk);
-
-			if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
-				NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
-				len -= chunk;
-				copied += chunk;
-			}
-		}
-
-		tp->ucopy.task = NULL;
-		tp->ucopy.len = 0;
-	}
-
 	/* According to UNIX98, msg_name/msg_namelen are ignored
 	 * on connected socket. I was just happy when found this 8) --ANK
 	 */
@@ -2823,7 +2772,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
 	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	u32 now, intv;
+	u32 now;
 	u64 rate64;
 	bool slow;
 	u32 rate;
@@ -2922,13 +2871,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_data_segs_out = tp->data_segs_out;
 
 	info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
-	rate = READ_ONCE(tp->rate_delivered);
-	intv = READ_ONCE(tp->rate_interval_us);
-	if (rate && intv) {
-		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
-		do_div(rate64, intv);
+	rate64 = tcp_compute_delivery_rate(tp);
+	if (rate64)
 		info->tcpi_delivery_rate = rate64;
-	}
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -2938,8 +2883,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *stats;
 	struct tcp_info info;
+	u64 rate64;
+	u32 rate;
 
-	stats = alloc_skb(5 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
+	stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
+			  3 * nla_total_size(sizeof(u32)) +
+			  2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
 	if (!stats)
 		return NULL;
 
@@ -2954,6 +2903,20 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 			  tp->data_segs_out, TCP_NLA_PAD);
 	nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS,
 			  tp->total_retrans, TCP_NLA_PAD);
+
+	rate = READ_ONCE(sk->sk_pacing_rate);
+	rate64 = rate != ~0U ? rate : ~0ULL;
+	nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
+
+	rate64 = tcp_compute_delivery_rate(tp);
+	nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
+
+	nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
+	nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
+	nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
+
+	nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
+	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
 	return stats;
 }
 
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 609965f0e298..fc3614377413 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -49,7 +49,6 @@ MODULE_PARM_DESC(smooth_part, "log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wma
 struct bictcp {
 	u32	cnt;		/* increase cwnd by 1 after ACKs */
 	u32	last_max_cwnd;	/* last maximum snd_cwnd */
-	u32	loss_cwnd;	/* congestion window at last loss */
 	u32	last_cwnd;	/* the last snd_cwnd */
 	u32	last_time;	/* time when updated last_cwnd */
 	u32	epoch_start;	/* beginning of an epoch */
@@ -72,7 +71,6 @@ static void bictcp_init(struct sock *sk)
 	struct bictcp *ca = inet_csk_ca(sk);
 
 	bictcp_reset(ca);
-	ca->loss_cwnd = 0;
 
 	if (initial_ssthresh)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
@@ -172,22 +170,12 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
 	else
 		ca->last_max_cwnd = tp->snd_cwnd;
 
-	ca->loss_cwnd = tp->snd_cwnd;
-
 	if (tp->snd_cwnd <= low_window)
 		return max(tp->snd_cwnd >> 1U, 2U);
 	else
 		return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 }
 
-static u32 bictcp_undo_cwnd(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	const struct bictcp *ca = inet_csk_ca(sk);
-
-	return max(tp->snd_cwnd, ca->loss_cwnd);
-}
-
 static void bictcp_state(struct sock *sk, u8 new_state)
 {
 	if (new_state == TCP_CA_Loss)
@@ -214,7 +202,7 @@ static struct tcp_congestion_ops bictcp __read_mostly = {
 	.ssthresh	= bictcp_recalc_ssthresh,
 	.cong_avoid	= bictcp_cong_avoid,
 	.set_state	= bictcp_state,
-	.undo_cwnd	= bictcp_undo_cwnd,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.pkts_acked     = bictcp_acked,
 	.owner		= THIS_MODULE,
 	.name		= "bic",
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 50a0f3e51d5b..66ac69f7bd19 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -85,7 +85,6 @@ struct cdg {
 	u8  state;
 	u8  delack;
 	u32 rtt_seq;
-	u32 undo_cwnd;
 	u32 shadow_wnd;
 	u16 backoff_cnt;
 	u16 sample_cnt;
@@ -330,8 +329,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
 	struct cdg *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	ca->undo_cwnd = tp->snd_cwnd;
-
 	if (ca->state == CDG_BACKOFF)
 		return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
 
@@ -344,13 +341,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
 	return max(2U, tp->snd_cwnd >> 1);
 }
 
-static u32 tcp_cdg_undo_cwnd(struct sock *sk)
-{
-	struct cdg *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->undo_cwnd);
-}
-
 static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
 {
 	struct cdg *ca = inet_csk_ca(sk);
@@ -403,7 +393,7 @@ struct tcp_congestion_ops tcp_cdg __read_mostly = {
 	.cong_avoid = tcp_cdg_cong_avoid,
 	.cwnd_event = tcp_cdg_cwnd_event,
 	.pkts_acked = tcp_cdg_acked,
-	.undo_cwnd = tcp_cdg_undo_cwnd,
+	.undo_cwnd = tcp_reno_undo_cwnd,
 	.ssthresh = tcp_cdg_ssthresh,
 	.release = tcp_cdg_release,
 	.init = tcp_cdg_init,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index fde983f6376b..c2b174469645 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -456,7 +456,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
-	return max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+	return max(tp->snd_cwnd, tp->prior_cwnd);
 }
 EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 57ae5b5ae643..78bfadfcf342 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -83,7 +83,6 @@ MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (mse
 struct bictcp {
 	u32	cnt;		/* increase cwnd by 1 after ACKs */
 	u32	last_max_cwnd;	/* last maximum snd_cwnd */
-	u32	loss_cwnd;	/* congestion window at last loss */
 	u32	last_cwnd;	/* the last snd_cwnd */
 	u32	last_time;	/* time when updated last_cwnd */
 	u32	bic_origin_point;/* origin point of bic function */
@@ -142,7 +141,6 @@ static void bictcp_init(struct sock *sk)
 	struct bictcp *ca = inet_csk_ca(sk);
 
 	bictcp_reset(ca);
-	ca->loss_cwnd = 0;
 
 	if (hystart)
 		bictcp_hystart_reset(sk);
@@ -366,18 +364,9 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
 	else
 		ca->last_max_cwnd = tp->snd_cwnd;
 
-	ca->loss_cwnd = tp->snd_cwnd;
-
 	return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 }
 
-static u32 bictcp_undo_cwnd(struct sock *sk)
-{
-	struct bictcp *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static void bictcp_state(struct sock *sk, u8 new_state)
 {
 	if (new_state == TCP_CA_Loss) {
@@ -470,7 +459,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
 	.ssthresh	= bictcp_recalc_ssthresh,
 	.cong_avoid	= bictcp_cong_avoid,
 	.set_state	= bictcp_state,
-	.undo_cwnd	= bictcp_undo_cwnd,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cwnd_event	= bictcp_cwnd_event,
 	.pkts_acked     = bictcp_acked,
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 6d9879e93648..d1c33c91eadc 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -94,7 +94,6 @@ static const struct hstcp_aimd_val {
 
 struct hstcp {
 	u32	ai;
-	u32	loss_cwnd;
 };
 
 static void hstcp_init(struct sock *sk)
@@ -153,22 +152,14 @@ static u32 hstcp_ssthresh(struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct hstcp *ca = inet_csk_ca(sk);
 
-	ca->loss_cwnd = tp->snd_cwnd;
 	/* Do multiplicative decrease */
 	return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
 }
 
-static u32 hstcp_cwnd_undo(struct sock *sk)
-{
-	const struct hstcp *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
 	.init		= hstcp_init,
 	.ssthresh	= hstcp_ssthresh,
-	.undo_cwnd	= hstcp_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= hstcp_cong_avoid,
 
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 3eb78cde6ff0..082d479462fa 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -66,7 +66,6 @@ static inline void htcp_reset(struct htcp *ca)
 
 static u32 htcp_cwnd_undo(struct sock *sk)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
 
 	if (ca->undo_last_cong) {
@@ -76,7 +75,7 @@ static u32 htcp_cwnd_undo(struct sock *sk)
 		ca->undo_last_cong = 0;
 	}
 
-	return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
+	return tcp_reno_undo_cwnd(sk);
 }
 
 static inline void measure_rtt(struct sock *sk, u32 srtt)
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 60352ff4f5a8..7c843578f233 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -48,7 +48,6 @@ struct illinois {
 	u32	end_seq;	/* right edge of current RTT */
 	u32	alpha;		/* Additive increase */
 	u32	beta;		/* Muliplicative decrease */
-	u32	loss_cwnd;	/* cwnd on loss */
 	u16	acked;		/* # packets acked by current ACK */
 	u8	rtt_above;	/* average rtt has gone above threshold */
 	u8	rtt_low;	/* # of rtts measurements below threshold */
@@ -297,18 +296,10 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct illinois *ca = inet_csk_ca(sk);
 
-	ca->loss_cwnd = tp->snd_cwnd;
 	/* Multiplicative decrease */
 	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
 }
 
-static u32 tcp_illinois_cwnd_undo(struct sock *sk)
-{
-	const struct illinois *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 /* Extract info for Tcp socket info provided via netlink. */
 static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
 				union tcp_cc_info *info)
@@ -336,7 +327,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
 static struct tcp_congestion_ops tcp_illinois __read_mostly = {
 	.init		= tcp_illinois_init,
 	.ssthresh	= tcp_illinois_ssthresh,
-	.undo_cwnd	= tcp_illinois_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_illinois_cong_avoid,
 	.set_state	= tcp_illinois_state,
 	.get_info	= tcp_illinois_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2920e0cb09f8..842ed75ccb25 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -103,7 +103,6 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
 #define FLAG_ECE		0x40 /* ECE in this ACK				*/
 #define FLAG_LOST_RETRANS	0x80 /* This ACK marks some retransmission lost */
-#define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
 #define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/
 #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
 #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
@@ -1951,6 +1950,7 @@ void tcp_enter_loss(struct sock *sk)
 	    !after(tp->high_seq, tp->snd_una) ||
 	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
+		tp->prior_cwnd = tp->snd_cwnd;
 		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tcp_ca_event(sk, CA_EVENT_LOSS);
 		tcp_init_undo(tp);
@@ -3367,12 +3367,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 		if (tp->snd_wnd != nwin) {
 			tp->snd_wnd = nwin;
 
-			/* Note, it is the only place, where
-			 * fast path is recovered for sending TCP.
-			 */
-			tp->pred_flags = 0;
-			tcp_fast_path_check(sk);
-
 			if (tcp_send_head(sk))
 				tcp_slow_start_after_idle_check(sk);
 
@@ -3554,6 +3548,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	u32 lost = tp->lost;
 	int acked = 0; /* Number of packets newly acked */
 	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+	u32 ack_ev_flags = 0;
 
 	sack_state.first_sackt = 0;
 	sack_state.rate = &rs;
@@ -3597,42 +3592,26 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (flag & FLAG_UPDATE_TS_RECENT)
 		tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
 
-	if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
-		/* Window is constant, pure forward advance.
-		 * No more checks are required.
-		 * Note, we use the fact that SND.UNA>=SND.WL2.
-		 */
-		tcp_update_wl(tp, ack_seq);
-		tcp_snd_una_update(tp, ack);
-		flag |= FLAG_WIN_UPDATE;
-
-		tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
-
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
-	} else {
-		u32 ack_ev_flags = CA_ACK_SLOWPATH;
-
-		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
-			flag |= FLAG_DATA;
-		else
-			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
+	if (ack_seq != TCP_SKB_CB(skb)->end_seq)
+		flag |= FLAG_DATA;
+	else
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
 
-		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
+	flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
 
-		if (TCP_SKB_CB(skb)->sacked)
-			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
-							&sack_state);
+	if (TCP_SKB_CB(skb)->sacked)
+		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+						&sack_state);
 
-		if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
-			flag |= FLAG_ECE;
-			ack_ev_flags |= CA_ACK_ECE;
-		}
+	if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
+		flag |= FLAG_ECE;
+		ack_ev_flags = CA_ACK_ECE;
+	}
 
-		if (flag & FLAG_WIN_UPDATE)
-			ack_ev_flags |= CA_ACK_WIN_UPDATE;
+	if (flag & FLAG_WIN_UPDATE)
+		ack_ev_flags |= CA_ACK_WIN_UPDATE;
 
-		tcp_in_ack_event(sk, ack_ev_flags);
-	}
+	tcp_in_ack_event(sk, ack_ev_flags);
 
 	/* We passed data and got it acked, remove any soft error
 	 * log. Something worked...
@@ -4398,8 +4377,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 		return;
 	}
 
-	/* Disable header prediction. */
-	tp->pred_flags = 0;
 	inet_csk_schedule_ack(sk);
 
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
@@ -4588,8 +4565,8 @@ err:
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	bool fragstolen = false;
-	int eaten = -1;
+	bool fragstolen;
+	int eaten;
 
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
 		__kfree_skb(skb);
@@ -4611,32 +4588,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			goto out_of_window;
 
 		/* Ok. In sequence. In window. */
-		if (tp->ucopy.task == current &&
-		    tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
-		    sock_owned_by_user(sk) && !tp->urg_data) {
-			int chunk = min_t(unsigned int, skb->len,
-					  tp->ucopy.len);
-
-			__set_current_state(TASK_RUNNING);
-
-			if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
-				tp->ucopy.len -= chunk;
-				tp->copied_seq += chunk;
-				eaten = (chunk == skb->len);
-				tcp_rcv_space_adjust(sk);
-			}
-		}
-
-		if (eaten <= 0) {
 queue_and_out:
-			if (eaten < 0) {
-				if (skb_queue_len(&sk->sk_receive_queue) == 0)
-					sk_forced_mem_schedule(sk, skb->truesize);
-				else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
-					goto drop;
-			}
-			eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
-		}
+		if (skb_queue_len(&sk->sk_receive_queue) == 0)
+			sk_forced_mem_schedule(sk, skb->truesize);
+		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+			goto drop;
+
+		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
 		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		if (skb->len)
 			tcp_event_data_recv(sk, skb);
@@ -4656,8 +4614,6 @@ queue_and_out:
 		if (tp->rx_opt.num_sacks)
 			tcp_sack_remove(tp);
 
-		tcp_fast_path_check(sk);
-
 		if (eaten > 0)
 			kfree_skb_partial(skb, fragstolen);
 		if (!sock_flag(sk, SOCK_DEAD))
@@ -4983,7 +4939,6 @@ static int tcp_prune_queue(struct sock *sk)
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
 
 	/* Massive buffer overcommit. */
-	tp->pred_flags = 0;
 	return -1;
 }
 
@@ -5155,9 +5110,6 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
 
 	tp->urg_data = TCP_URG_NOTYET;
 	tp->urg_seq = ptr;
-
-	/* Disable header prediction. */
-	tp->pred_flags = 0;
 }
 
 /* This is the 'fast' part of urgent handling. */
@@ -5186,26 +5138,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
 	}
 }
 
-static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int chunk = skb->len - hlen;
-	int err;
-
-	if (skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
-	else
-		err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
-
-	if (!err) {
-		tp->ucopy.len -= chunk;
-		tp->copied_seq += chunk;
-		tcp_rcv_space_adjust(sk);
-	}
-
-	return err;
-}
-
 /* Accept RST for rcv_nxt - 1 after a FIN.
  * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
  * FIN is sent followed by a RST packet. The RST is sent with the same
@@ -5336,201 +5268,29 @@ discard:
 
 /*
  *	TCP receive function for the ESTABLISHED state.
- *
- *	It is split into a fast path and a slow path. The fast path is
- * 	disabled when:
- *	- A zero window was announced from us - zero window probing
- *        is only handled properly in the slow path.
- *	- Out of order segments arrived.
- *	- Urgent data is expected.
- *	- There is no buffer space left
- *	- Unexpected TCP flags/window values/header lengths are received
- *	  (detected by checking the TCP header against pred_flags)
- *	- Data is sent in both directions. Fast path only supports pure senders
- *	  or pure receivers (this means either the sequence number or the ack
- *	  value must stay constant)
- *	- Unexpected TCP option.
- *
- *	When these conditions are not satisfied it drops into a standard
- *	receive procedure patterned after RFC793 to handle all cases.
- *	The first three cases are guaranteed by proper pred_flags setting,
- *	the rest is checked inline. Fast processing is turned on in
- *	tcp_data_queue when everything is OK.
  */
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th, unsigned int len)
+			 const struct tcphdr *th)
 {
+	unsigned int len = skb->len;
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tcp_mstamp_refresh(tp);
 	if (unlikely(!sk->sk_rx_dst))
 		inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
-	/*
-	 *	Header prediction.
-	 *	The code loosely follows the one in the famous
-	 *	"30 instruction TCP receive" Van Jacobson mail.
-	 *
-	 *	Van's trick is to deposit buffers into socket queue
-	 *	on a device interrupt, to call tcp_recv function
-	 *	on the receive process context and checksum and copy
-	 *	the buffer to user space. smart...
-	 *
-	 *	Our current scheme is not silly either but we take the
-	 *	extra cost of the net_bh soft interrupt processing...
-	 *	We do checksum and copy also but from device to kernel.
-	 */
 
 	tp->rx_opt.saw_tstamp = 0;
 
-	/*	pred_flags is 0xS?10 << 16 + snd_wnd
-	 *	if header_prediction is to be made
-	 *	'S' will always be tp->tcp_header_len >> 2
-	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
-	 *  turn it off	(when there are holes in the receive
-	 *	 space for instance)
-	 *	PSH flag is ignored.
-	 */
-
-	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
-	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
-	    !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
-		int tcp_header_len = tp->tcp_header_len;
-
-		/* Timestamp header prediction: tcp_header_len
-		 * is automatically equal to th->doff*4 due to pred_flags
-		 * match.
-		 */
-
-		/* Check timestamp */
-		if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
-			/* No? Slow path! */
-			if (!tcp_parse_aligned_timestamp(tp, th))
-				goto slow_path;
-
-			/* If PAWS failed, check it more carefully in slow path */
-			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
-				goto slow_path;
-
-			/* DO NOT update ts_recent here, if checksum fails
-			 * and timestamp was corrupted part, it will result
-			 * in a hung connection since we will drop all
-			 * future packets due to the PAWS test.
-			 */
-		}
-
-		if (len <= tcp_header_len) {
-			/* Bulk data transfer: sender */
-			if (len == tcp_header_len) {
-				/* Predicted packet is in window by definition.
-				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
-				 * Hence, check seq<=rcv_wup reduces to:
-				 */
-				if (tcp_header_len ==
-				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
-				    tp->rcv_nxt == tp->rcv_wup)
-					tcp_store_ts_recent(tp);
-
-				/* We know that such packets are checksummed
-				 * on entry.
-				 */
-				tcp_ack(sk, skb, 0);
-				__kfree_skb(skb);
-				tcp_data_snd_check(sk);
-				return;
-			} else { /* Header too small */
-				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
-				goto discard;
-			}
-		} else {
-			int eaten = 0;
-			bool fragstolen = false;
-
-			if (tp->ucopy.task == current &&
-			    tp->copied_seq == tp->rcv_nxt &&
-			    len - tcp_header_len <= tp->ucopy.len &&
-			    sock_owned_by_user(sk)) {
-				__set_current_state(TASK_RUNNING);
-
-				if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
-					/* Predicted packet is in window by definition.
-					 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
-					 * Hence, check seq<=rcv_wup reduces to:
-					 */
-					if (tcp_header_len ==
-					    (sizeof(struct tcphdr) +
-					     TCPOLEN_TSTAMP_ALIGNED) &&
-					    tp->rcv_nxt == tp->rcv_wup)
-						tcp_store_ts_recent(tp);
-
-					tcp_rcv_rtt_measure_ts(sk, skb);
-
-					__skb_pull(skb, tcp_header_len);
-					tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
-					NET_INC_STATS(sock_net(sk),
-							LINUX_MIB_TCPHPHITSTOUSER);
-					eaten = 1;
-				}
-			}
-			if (!eaten) {
-				if (tcp_checksum_complete(skb))
-					goto csum_error;
-
-				if ((int)skb->truesize > sk->sk_forward_alloc)
-					goto step5;
-
-				/* Predicted packet is in window by definition.
-				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
-				 * Hence, check seq<=rcv_wup reduces to:
-				 */
-				if (tcp_header_len ==
-				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
-				    tp->rcv_nxt == tp->rcv_wup)
-					tcp_store_ts_recent(tp);
-
-				tcp_rcv_rtt_measure_ts(sk, skb);
-
-				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
-
-				/* Bulk data transfer: receiver */
-				eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
-						      &fragstolen);
-			}
-
-			tcp_event_data_recv(sk, skb);
-
-			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
-				/* Well, only one small jumplet in fast path... */
-				tcp_ack(sk, skb, FLAG_DATA);
-				tcp_data_snd_check(sk);
-				if (!inet_csk_ack_scheduled(sk))
-					goto no_ack;
-			}
-
-			__tcp_ack_snd_check(sk, 0);
-no_ack:
-			if (eaten)
-				kfree_skb_partial(skb, fragstolen);
-			sk->sk_data_ready(sk);
-			return;
-		}
-	}
-
-slow_path:
 	if (len < (th->doff << 2) || tcp_checksum_complete(skb))
 		goto csum_error;
 
 	if (!th->ack && !th->rst && !th->syn)
 		goto discard;
 
-	/*
-	 *	Standard slow path.
-	 */
-
 	if (!tcp_validate_incoming(sk, skb, th, 1))
 		return;
 
-step5:
-	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
+	if (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0)
 		goto discard;
 
 	tcp_rcv_rtt_measure_ts(sk, skb);
@@ -5583,12 +5343,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 
 	if (sock_flag(sk, SOCK_KEEPOPEN))
 		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
-
-	if (!tp->rx_opt.snd_wscale)
-		__tcp_fast_path_on(tp, tp->snd_wnd);
-	else
-		tp->pred_flags = 0;
-
 }
 
 static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@ -5717,7 +5471,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		tcp_ecn_rcv_synack(tp, th);
 
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
-		tcp_ack(sk, skb, FLAG_SLOWPATH);
+		tcp_ack(sk, skb, 0);
 
 		/* Ok.. it's good. Set up sequence numbers and
 		 * move to established.
@@ -5953,8 +5707,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		return 0;
 
 	/* step 5: check the ACK field */
-	acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
-				      FLAG_UPDATE_TS_RECENT |
+
+	acceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT |
 				      FLAG_NO_CHALLENGE_ACK) > 0;
 
 	if (!acceptable) {
@@ -6022,7 +5776,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		tp->lsndtime = tcp_jiffies32;
 
 		tcp_initialize_rcv_mss(sk);
-		tcp_fast_path_on(tp);
 		break;
 
 	case TCP_FIN_WAIT1: {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a20e7f03d5f7..5f708c85110e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,8 +85,6 @@
 #include <crypto/hash.h>
 #include <linux/scatterlist.h>
 
-int sysctl_tcp_low_latency __read_mostly;
-
 #ifdef CONFIG_TCP_MD5SIG
 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
@@ -1269,7 +1267,7 @@ static void tcp_v4_init_req(struct request_sock *req,
 
 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
-	ireq->opt = tcp_v4_save_options(skb);
+	ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb);
 }
 
 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1458,7 +1456,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 				sk->sk_rx_dst = NULL;
 			}
 		}
-		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
+		tcp_rcv_established(sk, skb, tcp_hdr(skb));
 		return 0;
 	}
 
@@ -1541,61 +1539,6 @@ void tcp_v4_early_demux(struct sk_buff *skb)
 	}
 }
 
-/* Packet is added to VJ-style prequeue for processing in process
- * context, if a reader task is waiting. Apparently, this exciting
- * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
- * failed somewhere. Latency? Burstiness? Well, at least now we will
- * see, why it failed. 8)8)				  --ANK
- *
- */
-bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (sysctl_tcp_low_latency || !tp->ucopy.task)
-		return false;
-
-	if (skb->len <= tcp_hdrlen(skb) &&
-	    skb_queue_len(&tp->ucopy.prequeue) == 0)
-		return false;
-
-	/* Before escaping RCU protected region, we need to take care of skb
-	 * dst. Prequeue is only enabled for established sockets.
-	 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
-	 * Instead of doing full sk_rx_dst validity here, let's perform
-	 * an optimistic check.
-	 */
-	if (likely(sk->sk_rx_dst))
-		skb_dst_drop(skb);
-	else
-		skb_dst_force_safe(skb);
-
-	__skb_queue_tail(&tp->ucopy.prequeue, skb);
-	tp->ucopy.memory += skb->truesize;
-	if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
-	    tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
-		struct sk_buff *skb1;
-
-		BUG_ON(sock_owned_by_user(sk));
-		__NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
-				skb_queue_len(&tp->ucopy.prequeue));
-
-		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-			sk_backlog_rcv(sk, skb1);
-
-		tp->ucopy.memory = 0;
-	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
-		wake_up_interruptible_sync_poll(sk_sleep(sk),
-					   POLLIN | POLLRDNORM | POLLRDBAND);
-		if (!inet_csk_ack_scheduled(sk))
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  (3 * tcp_rto_min(sk)) / 4,
-						  TCP_RTO_MAX);
-	}
-	return true;
-}
-EXPORT_SYMBOL(tcp_prequeue);
-
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
@@ -1770,8 +1713,7 @@ process:
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-		if (!tcp_prequeue(sk, skb))
-			ret = tcp_v4_do_rcv(sk, skb);
+		ret = tcp_v4_do_rcv(sk, skb);
 	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
@@ -1936,9 +1878,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	}
 #endif
 
-	/* Clean prequeue, it must be empty really */
-	__skb_queue_purge(&tp->ucopy.prequeue);
-
 	/* Clean up a referenced TCP bind bucket. */
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0ff83c1637d8..1537b87c657f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -436,8 +436,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		struct tcp_sock *newtp = tcp_sk(newsk);
 
 		/* Now setup tcp_sock */
-		newtp->pred_flags = 0;
-
 		newtp->rcv_wup = newtp->copied_seq =
 		newtp->rcv_nxt = treq->rcv_isn + 1;
 		newtp->segs_in = 1;
@@ -445,7 +443,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->snd_sml = newtp->snd_una =
 		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
 
-		tcp_prequeue_init(newtp);
 		INIT_LIST_HEAD(&newtp->tsq_node);
 
 		tcp_init_wl(newtp, treq->rcv_isn);
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 6d650ed3cb59..1ff73982e28c 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -86,7 +86,6 @@ struct tcpnv {
 				 * < 0 => less than 1 packet/RTT */
 	u8  available8;
 	u16 available16;
-	u32 loss_cwnd;	/* cwnd at last loss */
 	u8  nv_allow_cwnd_growth:1, /* whether cwnd can grow */
 		nv_reset:1,	    /* whether to reset values */
 		nv_catchup:1;	    /* whether we are growing because
@@ -121,7 +120,6 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	ca->nv_reset = 0;
-	ca->loss_cwnd = 0;
 	ca->nv_no_cong_cnt = 0;
 	ca->nv_rtt_cnt = 0;
 	ca->nv_last_rtt = 0;
@@ -177,19 +175,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static u32 tcpnv_recalc_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
-	struct tcpnv *ca = inet_csk_ca(sk);
 
-	ca->loss_cwnd = tp->snd_cwnd;
 	return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
 }
 
-static u32 tcpnv_undo_cwnd(struct sock *sk)
-{
-	struct tcpnv *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static void tcpnv_state(struct sock *sk, u8 new_state)
 {
 	struct tcpnv *ca = inet_csk_ca(sk);
@@ -446,7 +435,7 @@ static struct tcp_congestion_ops tcpnv __read_mostly = {
 	.ssthresh	= tcpnv_recalc_ssthresh,
 	.cong_avoid	= tcpnv_cong_avoid,
 	.set_state	= tcpnv_state,
-	.undo_cwnd	= tcpnv_undo_cwnd,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.pkts_acked     = tcpnv_acked,
 	.get_info	= tcpnv_get_info,
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2f1588bf73da..d49bff51bdb7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -295,9 +295,7 @@ static u16 tcp_select_window(struct sock *sk)
 	/* RFC1323 scaling applied */
 	new_win >>= tp->rx_opt.rcv_wscale;
 
-	/* If we advertise zero window, disable fast path. */
 	if (new_win == 0) {
-		tp->pred_flags = 0;
 		if (old_win)
 			NET_INC_STATS(sock_net(sk),
 				      LINUX_MIB_TCPTOZEROWINDOWADV);
@@ -2378,7 +2376,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 timeout, tlp_time_stamp, rto_time_stamp;
-	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 
 	/* No consecutive loss probes. */
 	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
@@ -2407,15 +2404,19 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	     tcp_send_head(sk))
 		return false;
 
-	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+	/* Probe timeout is 2*rtt. Add minimum RTO to account
 	 * for delayed ack when there's one outstanding packet. If no RTT
 	 * sample is available then probe after TCP_TIMEOUT_INIT.
 	 */
-	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
-	if (tp->packets_out == 1)
-		timeout = max_t(u32, timeout,
-				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
-	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+	if (tp->srtt_us) {
+		timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+		if (tp->packets_out == 1)
+			timeout += TCP_RTO_MIN;
+		else
+			timeout += TCP_TIMEOUT_MIN;
+	} else {
+		timeout = TCP_TIMEOUT_INIT;
+	}
 
 	/* If RTO is shorter, just schedule TLP in its place. */
 	tlp_time_stamp = tcp_jiffies32 + timeout;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index f6c50af24a64..697f4c67b2e3 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -105,8 +105,9 @@ static inline int tcp_probe_avail(void)
  * Note: arguments must match tcp_rcv_established()!
  */
 static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-				 const struct tcphdr *th, unsigned int len)
+				 const struct tcphdr *th)
 {
+	unsigned int len = skb->len;
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_sock *inet = inet_sk(sk);
 
@@ -145,7 +146,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				BUG();
 			}
 
-			p->length = skb->len;
+			p->length = len;
 			p->snd_nxt = tp->snd_nxt;
 			p->snd_una = tp->snd_una;
 			p->snd_cwnd = tp->snd_cwnd;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fe9a493d0208..449cd914d58e 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk)
 	tp->rack.advanced = 0;
 	tcp_rack_detect_loss(sk, &timeout);
 	if (timeout) {
-		timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
+		timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
 					  timeout, inet_csk(sk)->icsk_rto);
 	}
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index f2123075ce6e..addc122f8818 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,10 +15,6 @@
 #define TCP_SCALABLE_AI_CNT	50U
 #define TCP_SCALABLE_MD_SCALE	3
 
-struct scalable {
-	u32 loss_cwnd;
-};
-
 static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -36,23 +32,13 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static u32 tcp_scalable_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
-	struct scalable *ca = inet_csk_ca(sk);
-
-	ca->loss_cwnd = tp->snd_cwnd;
 
 	return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
 }
 
-static u32 tcp_scalable_cwnd_undo(struct sock *sk)
-{
-	const struct scalable *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_scalable __read_mostly = {
 	.ssthresh	= tcp_scalable_ssthresh,
-	.undo_cwnd	= tcp_scalable_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_scalable_cong_avoid,
 
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c0feeeef962a..f753f9d2fee3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -239,7 +239,6 @@ static int tcp_write_timeout(struct sock *sk)
 /* Called with BH disabled */
 void tcp_delack_timer_handler(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	sk_mem_reclaim_partial(sk);
@@ -254,17 +253,6 @@ void tcp_delack_timer_handler(struct sock *sk)
 	}
 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
 
-	if (!skb_queue_empty(&tp->ucopy.prequeue)) {
-		struct sk_buff *skb;
-
-		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
-
-		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-			sk_backlog_rcv(sk, skb);
-
-		tp->ucopy.memory = 0;
-	}
-
 	if (inet_csk_ack_scheduled(sk)) {
 		if (!icsk->icsk_ack.pingpong) {
 			/* Delayed ACK missed: inflate ATO. */
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 76005d4b8dfc..6fcf482d611b 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -30,7 +30,6 @@ struct veno {
 	u32 basertt;		/* the min of all Veno rtt measurements seen (in usec) */
 	u32 inc;		/* decide whether to increase cwnd */
 	u32 diff;		/* calculate the diff rate */
-	u32 loss_cwnd;		/* cwnd when loss occured */
 };
 
 /* There are several situations when we must "re-start" Veno:
@@ -194,7 +193,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct veno *veno = inet_csk_ca(sk);
 
-	veno->loss_cwnd = tp->snd_cwnd;
 	if (veno->diff < beta)
 		/* in "non-congestive state", cut cwnd by 1/5 */
 		return max(tp->snd_cwnd * 4 / 5, 2U);
@@ -203,17 +201,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 		return max(tp->snd_cwnd >> 1U, 2U);
 }
 
-static u32 tcp_veno_cwnd_undo(struct sock *sk)
-{
-	const struct veno *veno = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_veno __read_mostly = {
 	.init		= tcp_veno_init,
 	.ssthresh	= tcp_veno_ssthresh,
-	.undo_cwnd	= tcp_veno_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_veno_cong_avoid,
 	.pkts_acked	= tcp_veno_pkts_acked,
 	.set_state	= tcp_veno_state,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index bec9cafbe3f9..e5de84310949 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -154,24 +154,6 @@ static inline void update_rtt_min(struct westwood *w)
 }
 
 /*
- * @westwood_fast_bw
- * It is called when we are in fast path. In particular it is called when
- * header prediction is successful. In such case in fact update is
- * straight forward and doesn't need any particular care.
- */
-static inline void westwood_fast_bw(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct westwood *w = inet_csk_ca(sk);
-
-	westwood_update_window(sk);
-
-	w->bk += tp->snd_una - w->snd_una;
-	w->snd_una = tp->snd_una;
-	update_rtt_min(w);
-}
-
-/*
  * @westwood_acked_count
  * This function evaluates cumul_ack for evaluating bk in case of
  * delayed or partial acks.
@@ -223,17 +205,12 @@ static u32 tcp_westwood_bw_rttmin(const struct sock *sk)
 
 static void tcp_westwood_ack(struct sock *sk, u32 ack_flags)
 {
-	if (ack_flags & CA_ACK_SLOWPATH) {
-		struct westwood *w = inet_csk_ca(sk);
-
-		westwood_update_window(sk);
-		w->bk += westwood_acked_count(sk);
+	struct westwood *w = inet_csk_ca(sk);
 
-		update_rtt_min(w);
-		return;
-	}
+	westwood_update_window(sk);
+	w->bk += westwood_acked_count(sk);
 
-	westwood_fast_bw(sk);
+	update_rtt_min(w);
 }
 
 static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index e6ff99c4bd3b..96e829b2e2fc 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -37,7 +37,6 @@ struct yeah {
 	u32 fast_count;
 
 	u32 pkts_acked;
-	u32 loss_cwnd;
 };
 
 static void tcp_yeah_init(struct sock *sk)
@@ -220,22 +219,14 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
 
 	yeah->fast_count = 0;
 	yeah->reno_count = max(yeah->reno_count>>1, 2U);
-	yeah->loss_cwnd = tp->snd_cwnd;
 
 	return max_t(int, tp->snd_cwnd - reduction, 2);
 }
 
-static u32 tcp_yeah_cwnd_undo(struct sock *sk)
-{
-	const struct yeah *yeah = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_yeah __read_mostly = {
 	.init		= tcp_yeah_init,
 	.ssthresh	= tcp_yeah_ssthresh,
-	.undo_cwnd      = tcp_yeah_cwnd_undo,
+	.undo_cwnd      = tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_yeah_cong_avoid,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e6276fa3750b..38bca2c4897d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1176,7 +1176,11 @@ static void udp_set_dev_scratch(struct sk_buff *skb)
 	scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
 	scratch->is_linear = !skb_is_nonlinear(skb);
 #endif
-	if (likely(!skb->_skb_refdst))
+	/* all head states execept sp (dst, sk, nf) are always cleared by
+	 * udp_rcv() and we need to preserve secpath, if present, to eventually
+	 * process IP_CMSG_PASSSEC at recvmsg() time
+	 */
+	if (likely(!skb_sec_path(skb)))
 		scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
 }
 
@@ -1782,13 +1786,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		sk_mark_napi_id_once(sk, skb);
 	}
 
-	/* At recvmsg() time we may access skb->dst or skb->sp depending on
-	 * the IP options and the cmsg flags, elsewhere can we clear all
-	 * pending head states while they are hot in the cache
-	 */
-	if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
-		skb_release_head_state(skb);
-
 	rc = __udp_enqueue_schedule_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 781250151d40..97658bfc1b58 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	__be16 new_protocol, bool is_ipv6)
 {
 	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
-	bool remcsum, need_csum, offload_csum, ufo, gso_partial;
+	bool remcsum, need_csum, offload_csum, gso_partial;
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct udphdr *uh = udp_hdr(skb);
 	u16 mac_offset = skb->mac_header;
@@ -61,8 +61,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
 	skb->remcsum_offload = remcsum;
 
-	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
-
 	need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
 	/* Try to offload checksum if possible */
 	offload_csum = !!(need_csum &&
@@ -77,7 +75,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	 * outer one so strip the existing checksum feature flags and
 	 * instead set the flag based on our outer checksum offload value.
 	 */
-	if (remcsum || ufo) {
+	if (remcsum) {
 		features &= ~NETIF_F_CSUM_MASK;
 		if (!need_csum || offload_csum)
 			features |= NETIF_F_HW_CSUM;
@@ -189,66 +187,16 @@ out_unlock:
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
-static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
-					 netdev_features_t features)
+static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb,
+					   netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	unsigned int mss;
-	__wsum csum;
-	struct udphdr *uh;
-	struct iphdr *iph;
 
 	if (skb->encapsulation &&
 	    (skb_shinfo(skb)->gso_type &
-	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
+	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)))
 		segs = skb_udp_tunnel_segment(skb, features, false);
-		goto out;
-	}
 
-	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-		goto out;
-
-	mss = skb_shinfo(skb)->gso_size;
-	if (unlikely(skb->len <= mss))
-		goto out;
-
-	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
-		/* Packet is from an untrusted source, reset gso_segs. */
-
-		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
-		segs = NULL;
-		goto out;
-	}
-
-	/* Do software UFO. Complete and fill in the UDP checksum as
-	 * HW cannot do checksum of UDP packets sent as multiple
-	 * IP fragments.
-	 */
-
-	uh = udp_hdr(skb);
-	iph = ip_hdr(skb);
-
-	uh->check = 0;
-	csum = skb_checksum(skb, 0, skb->len, 0);
-	uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
-	if (uh->check == 0)
-		uh->check = CSUM_MANGLED_0;
-
-	skb->ip_summed = CHECKSUM_NONE;
-
-	/* If there is no outer header we can fake a checksum offload
-	 * due to the fact that we have already done the checksum in
-	 * software prior to segmenting the frame.
-	 */
-	if (!skb->encap_hdr_csum)
-		features |= NETIF_F_HW_CSUM;
-
-	/* Fragment the skb. IP headers of the fragments are updated in
-	 * inet_gso_segment()
-	 */
-	segs = skb_segment(skb, features);
-out:
 	return segs;
 }
 
@@ -382,7 +330,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv4_offload = {
 	.callbacks = {
-		.gso_segment = udp4_ufo_fragment,
+		.gso_segment = udp4_tunnel_segment,
 		.gro_receive  =	udp4_gro_receive,
 		.gro_complete =	udp4_gro_complete,
 	},
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 58bd39fb14b4..6539ff15e9a3 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -82,7 +82,8 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct udp_tunnel_info ti;
 
-	if (!dev->netdev_ops->ndo_udp_tunnel_add)
+	if (!dev->netdev_ops->ndo_udp_tunnel_add ||
+	    !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
 		return;
 
 	ti.type = type;
@@ -93,6 +94,24 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 }
 EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
 
+void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
+			     unsigned short type)
+{
+	struct sock *sk = sock->sk;
+	struct udp_tunnel_info ti;
+
+	if (!dev->netdev_ops->ndo_udp_tunnel_del ||
+	    !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+		return;
+
+	ti.type = type;
+	ti.sa_family = sk->sk_family;
+	ti.port = inet_sk(sk)->inet_sport;
+
+	dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port);
+
 /* Notify netdevs that UDP port started listening */
 void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
 {
@@ -109,6 +128,8 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
 	for_each_netdev_rcu(net, dev) {
 		if (!dev->netdev_ops->ndo_udp_tunnel_add)
 			continue;
+		if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+			continue;
 		dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
 	}
 	rcu_read_unlock();
@@ -131,6 +152,8 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
 	for_each_netdev_rcu(net, dev) {
 		if (!dev->netdev_ops->ndo_udp_tunnel_del)
 			continue;
+		if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+			continue;
 		dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
 	}
 	rcu_read_unlock();
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 71b4ecc195c7..4aefb149fe0a 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	fl4->flowi4_tos = iph->tos;
 }
 
-static inline int xfrm4_garbage_collect(struct dst_ops *ops)
-{
-	struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
-
-	xfrm_garbage_collect_deferred(net);
-	return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
-}
-
 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -259,14 +251,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm4_dst_ops_template = {
 	.family =		AF_INET,
-	.gc =			xfrm4_garbage_collect,
 	.update_pmtu =		xfrm4_update_pmtu,
 	.redirect =		xfrm4_redirect,
 	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm4_dst_destroy,
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
-	.gc_thresh =		INT_MAX,
+	.gc_thresh =		32768,
 };
 
 static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 217e9ff0e24b..f8b24c2e0d77 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
 		exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
-		udp_offload.o seg6.o
+		udp_offload.o seg6.o fib6_notifier.o
 
 ipv6-offload :=	ip6_offload.o tcpv6_offload.o exthdrs_offload.o
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3c46e9513a31..30ee23eef268 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3066,7 +3066,7 @@ static void init_loopback(struct net_device *dev)
 				 * lo device down, release this obsolete dst and
 				 * reallocate a new router for ifa.
 				 */
-				if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
+				if (!sp_ifa->rt->rt6i_node) {
 					ip6_rt_put(sp_ifa->rt);
 					sp_ifa->rt = NULL;
 				} else {
@@ -3321,11 +3321,11 @@ static void addrconf_gre_config(struct net_device *dev)
 static int fixup_permanent_addr(struct inet6_dev *idev,
 				struct inet6_ifaddr *ifp)
 {
-	/* rt6i_ref == 0 means the host route was removed from the
+	/* !rt6i_node means the host route was removed from the
 	 * FIB, for example, if 'lo' device is taken down. In that
 	 * case regenerate the host route.
 	 */
-	if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+	if (!ifp->rt || !ifp->rt->rt6i_node) {
 		struct rt6_info *rt, *prev;
 
 		rt = addrconf_dst_alloc(idev, &ifp->addr, false);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a88b5b5b7955..0a7c74049a0c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,7 @@ lookup_protocol:
 	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
+	np->autoflowlabel = ip6_default_np_autolabel(net);
 	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
 
 	/* Init the ipv4 part of the socket since we can have sockets
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
new file mode 100644
index 000000000000..66a103ef7e86
--- /dev/null
+++ b/net/ipv6/fib6_notifier.c
@@ -0,0 +1,61 @@
+#include <linux/notifier.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+#include <net/netns/ipv6.h>
+#include <net/ip6_fib.h>
+
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
+{
+	info->family = AF_INET6;
+	return call_fib_notifier(nb, net, event_type, info);
+}
+
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info)
+{
+	info->family = AF_INET6;
+	return call_fib_notifiers(net, event_type, info);
+}
+
+static unsigned int fib6_seq_read(struct net *net)
+{
+	return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
+}
+
+static int fib6_dump(struct net *net, struct notifier_block *nb)
+{
+	int err;
+
+	err = fib6_rules_dump(net, nb);
+	if (err)
+		return err;
+
+	return fib6_tables_dump(net, nb);
+}
+
+static const struct fib_notifier_ops fib6_notifier_ops_template = {
+	.family		= AF_INET6,
+	.fib_seq_read	= fib6_seq_read,
+	.fib_dump	= fib6_dump,
+};
+
+int __net_init fib6_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
+
+	ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv6.notifier_ops = ops;
+
+	return 0;
+}
+
+void __net_exit fib6_notifier_exit(struct net *net)
+{
+	fib_notifier_ops_unregister(net->ipv6.notifier_ops);
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec849d88a662..2f29e4e33bd3 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/netdevice.h>
+#include <linux/notifier.h>
 #include <linux/export.h>
 
 #include <net/fib_rules.h>
@@ -29,6 +30,36 @@ struct fib6_rule {
 	u8			tclass;
 };
 
+static bool fib6_rule_matchall(const struct fib_rule *rule)
+{
+	struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
+
+	if (r->dst.plen || r->src.plen || r->tclass)
+		return false;
+	return fib_rule_matchall(rule);
+}
+
+bool fib6_rule_default(const struct fib_rule *rule)
+{
+	if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+	    rule->l3mdev)
+		return false;
+	if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
+		return false;
+	return true;
+}
+EXPORT_SYMBOL_GPL(fib6_rule_default);
+
+int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, AF_INET6);
+}
+
+unsigned int fib6_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, AF_INET6);
+}
+
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags, pol_lookup_t lookup)
 {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ebb299cf72b7..69ed0043d117 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -33,6 +33,7 @@
 #include <net/ndisc.h>
 #include <net/addrconf.h>
 #include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
 
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
@@ -153,7 +154,7 @@ static void node_free(struct fib6_node *fn)
 	kmem_cache_free(fib6_node_kmem, fn);
 }
 
-static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 {
 	int cpu;
 
@@ -176,15 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 	free_percpu(non_pcpu_rt->rt6i_pcpu);
 	non_pcpu_rt->rt6i_pcpu = NULL;
 }
-
-static void rt6_release(struct rt6_info *rt)
-{
-	if (atomic_dec_and_test(&rt->rt6i_ref)) {
-		rt6_free_pcpu(rt);
-		dst_dev_put(&rt->dst);
-		dst_release(&rt->dst);
-	}
-}
+EXPORT_SYMBOL_GPL(rt6_free_pcpu);
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
 {
@@ -302,6 +295,109 @@ static void __net_init fib6_tables_init(struct net *net)
 
 #endif
 
+unsigned int fib6_tables_seq_read(struct net *net)
+{
+	unsigned int h, fib_seq = 0;
+
+	rcu_read_lock();
+	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+		struct fib6_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+			read_lock_bh(&tb->tb6_lock);
+			fib_seq += tb->fib_seq;
+			read_unlock_bh(&tb->tb6_lock);
+		}
+	}
+	rcu_read_unlock();
+
+	return fib_seq;
+}
+
+static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+				    enum fib_event_type event_type,
+				    struct rt6_info *rt)
+{
+	struct fib6_entry_notifier_info info = {
+		.rt = rt,
+	};
+
+	return call_fib6_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib6_entry_notifiers(struct net *net,
+				     enum fib_event_type event_type,
+				     struct rt6_info *rt)
+{
+	struct fib6_entry_notifier_info info = {
+		.rt = rt,
+	};
+
+	rt->rt6i_table->fib_seq++;
+	return call_fib6_notifiers(net, event_type, &info.info);
+}
+
+struct fib6_dump_arg {
+	struct net *net;
+	struct notifier_block *nb;
+};
+
+static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+{
+	if (rt == arg->net->ipv6.ip6_null_entry)
+		return;
+	call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+}
+
+static int fib6_node_dump(struct fib6_walker *w)
+{
+	struct rt6_info *rt;
+
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+		fib6_rt_dump(rt, w->args);
+	w->leaf = NULL;
+	return 0;
+}
+
+static void fib6_table_dump(struct net *net, struct fib6_table *tb,
+			    struct fib6_walker *w)
+{
+	w->root = &tb->tb6_root;
+	read_lock_bh(&tb->tb6_lock);
+	fib6_walk(net, w);
+	read_unlock_bh(&tb->tb6_lock);
+}
+
+/* Called with rcu_read_lock() */
+int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+{
+	struct fib6_dump_arg arg;
+	struct fib6_walker *w;
+	unsigned int h;
+
+	w = kzalloc(sizeof(*w), GFP_ATOMIC);
+	if (!w)
+		return -ENOMEM;
+
+	w->func = fib6_node_dump;
+	arg.net = net;
+	arg.nb = nb;
+	w->args = &arg;
+
+	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+		struct fib6_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+			fib6_table_dump(net, tb, w);
+	}
+
+	kfree(w);
+
+	return 0;
+}
+
 static int fib6_dump_node(struct fib6_walker *w)
 {
 	int res;
@@ -733,8 +829,6 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 			}
 			fn = fn->parent;
 		}
-		/* No more references are possible at this point. */
-		BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
 	}
 }
 
@@ -879,6 +973,8 @@ add:
 		*ins = rt;
 		rt->rt6i_node = fn;
 		atomic_inc(&rt->rt6i_ref);
+		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
+					  rt);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
 		info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -906,6 +1002,8 @@ add:
 		rt->rt6i_node = fn;
 		rt->dst.rt6_next = iter->dst.rt6_next;
 		atomic_inc(&rt->rt6i_ref);
+		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
+					  rt);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
 		if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -913,6 +1011,7 @@ add:
 			fn->fn_flags |= RTN_RTINFO;
 		}
 		nsiblings = iter->rt6i_nsiblings;
+		iter->rt6i_node = NULL;
 		fib6_purge_rt(iter, fn, info->nl_net);
 		rt6_release(iter);
 
@@ -925,6 +1024,7 @@ add:
 					break;
 				if (rt6_qualify_for_ecmp(iter)) {
 					*ins = iter->dst.rt6_next;
+					iter->rt6i_node = NULL;
 					fib6_purge_rt(iter, fn, info->nl_net);
 					rt6_release(iter);
 					nsiblings--;
@@ -1459,6 +1559,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 
 	fib6_purge_rt(rt, fn, net);
 
+	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
 	if (!info->skip_notify)
 		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
 	rt6_release(rt);
@@ -1839,6 +1940,11 @@ static void fib6_gc_timer_cb(unsigned long arg)
 static int __net_init fib6_net_init(struct net *net)
 {
 	size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+	int err;
+
+	err = fib6_notifier_init(net);
+	if (err)
+		return err;
 
 	spin_lock_init(&net->ipv6.fib6_gc_lock);
 	rwlock_init(&net->ipv6.fib6_walker_lock);
@@ -1891,6 +1997,7 @@ out_fib_table_hash:
 out_rt6_stats:
 	kfree(net->ipv6.rt6_stats);
 out_timer:
+	fib6_notifier_exit(net);
 	return -ENOMEM;
 }
 
@@ -1907,6 +2014,7 @@ static void fib6_net_exit(struct net *net)
 	kfree(net->ipv6.fib6_main_tbl);
 	kfree(net->ipv6.fib_table_hash);
 	kfree(net->ipv6.rt6_stats);
+	fib6_notifier_exit(net);
 }
 
 static struct pernet_operations fib6_net_ops = {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 162efba0d0cd..43ca864327c7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1110,69 +1110,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 }
 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
 
-static inline int ip6_ufo_append_data(struct sock *sk,
-			struct sk_buff_head *queue,
-			int getfrag(void *from, char *to, int offset, int len,
-			int odd, struct sk_buff *skb),
-			void *from, int length, int hh_len, int fragheaderlen,
-			int exthdrlen, int transhdrlen, int mtu,
-			unsigned int flags, const struct flowi6 *fl6)
-
-{
-	struct sk_buff *skb;
-	int err;
-
-	/* There is support for UDP large send offload by network
-	 * device, so create one single skb packet containing complete
-	 * udp datagram
-	 */
-	skb = skb_peek_tail(queue);
-	if (!skb) {
-		skb = sock_alloc_send_skb(sk,
-			hh_len + fragheaderlen + transhdrlen + 20,
-			(flags & MSG_DONTWAIT), &err);
-		if (!skb)
-			return err;
-
-		/* reserve space for Hardware header */
-		skb_reserve(skb, hh_len);
-
-		/* create space for UDP/IP header */
-		skb_put(skb, fragheaderlen + transhdrlen);
-
-		/* initialize network header pointer */
-		skb_set_network_header(skb, exthdrlen);
-
-		/* initialize protocol header pointer */
-		skb->transport_header = skb->network_header + fragheaderlen;
-
-		skb->protocol = htons(ETH_P_IPV6);
-		skb->csum = 0;
-
-		if (flags & MSG_CONFIRM)
-			skb_set_dst_pending_confirm(skb, 1);
-
-		__skb_queue_tail(queue, skb);
-	} else if (skb_is_gso(skb)) {
-		goto append;
-	}
-
-	skb->ip_summed = CHECKSUM_PARTIAL;
-	/* Specify the length of each IPv6 datagram fragment.
-	 * It has to be a multiple of 8.
-	 */
-	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
-				     sizeof(struct frag_hdr)) & ~7;
-	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-	skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
-							 &fl6->daddr,
-							 &fl6->saddr);
-
-append:
-	return skb_append_datato_frags(sk, skb, getfrag, from,
-				       (length - transhdrlen));
-}
-
 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
 					       gfp_t gfp)
 {
@@ -1381,19 +1318,6 @@ emsgsize:
 	 */
 
 	cork->length += length;
-	if ((((length + (skb ? skb->len : headersize)) > mtu) ||
-	     (skb && skb_is_gso(skb))) &&
-	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
-		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
-					  hh_len, fragheaderlen, exthdrlen,
-					  transhdrlen, mtu, flags, fl6);
-		if (err)
-			goto error;
-		return 0;
-	}
-
 	if (!skb)
 		goto alloc_new_skb;
 
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 486c2305f53c..79444a4bfd6d 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
 	.priority	=	100,
 };
 
-static bool is_vti6_tunnel(const struct net_device *dev)
-{
-	return dev->netdev_ops == &vti6_netdev_ops;
-}
-
-static int vti6_device_event(struct notifier_block *unused,
-			     unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct ip6_tnl *t = netdev_priv(dev);
-
-	if (!is_vti6_tunnel(dev))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_DOWN:
-		if (!net_eq(t->net, dev_net(dev)))
-			xfrm_garbage_collect(t->net);
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block vti6_notifier_block __read_mostly = {
-	.notifier_call = vti6_device_event,
-};
-
 /**
  * vti6_tunnel_init - register protocol and reserve needed resources
  *
@@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void)
 	const char *msg;
 	int err;
 
-	register_netdevice_notifier(&vti6_notifier_block);
-
 	msg = "tunnel device";
 	err = register_pernet_device(&vti6_net_ops);
 	if (err < 0)
@@ -1216,7 +1187,6 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti6_net_ops);
 pernet_dev_failed:
-	unregister_netdevice_notifier(&vti6_notifier_block);
 	pr_err("vti6 init: failed to register %s\n", msg);
 	return err;
 }
@@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void)
 	xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
 	xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti6_net_ops);
-	unregister_netdevice_notifier(&vti6_notifier_block);
 }
 
 module_init(vti6_tunnel_init);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4d30c96a819d..aba07fce67fb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1820,6 +1820,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		goto out;
 	}
 
+	if (cfg->fc_flags & RTF_OFFLOAD) {
+		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_OFFLOAD");
+		goto out;
+	}
+
 	if (cfg->fc_dst_len > 128) {
 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
 		goto out;
@@ -3327,6 +3332,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 			goto nla_put_failure;
 	}
 
+	if (rt->rt6i_flags & RTF_OFFLOAD)
+		*flags |= RTNH_F_OFFLOAD;
+
 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
 	if (!skip_oif && rt->dst.dev &&
 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2521690d62d6..ced5dcf37465 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1296,7 +1296,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 		}
 
-		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
+		tcp_rcv_established(sk, skb, tcp_hdr(skb));
 		if (opt_skb)
 			goto ipv6_pktoptions;
 		return 0;
@@ -1505,8 +1505,7 @@ process:
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-		if (!tcp_prequeue(sk, skb))
-			ret = tcp_v6_do_rcv(sk, skb);
+		ret = tcp_v6_do_rcv(sk, skb);
 	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a2267f80febb..455fd4e39333 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,109 +17,15 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
-					 netdev_features_t features)
+static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb,
+					   netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	unsigned int mss;
-	unsigned int unfrag_ip6hlen, unfrag_len;
-	struct frag_hdr *fptr;
-	u8 *packet_start, *prevhdr;
-	u8 nexthdr;
-	u8 frag_hdr_sz = sizeof(struct frag_hdr);
-	__wsum csum;
-	int tnl_hlen;
-	int err;
-
-	mss = skb_shinfo(skb)->gso_size;
-	if (unlikely(skb->len <= mss))
-		goto out;
-
-	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
-		/* Packet is from an untrusted source, reset gso_segs. */
-
-		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
-		/* Set the IPv6 fragment id if not set yet */
-		if (!skb_shinfo(skb)->ip6_frag_id)
-			ipv6_proxy_select_ident(dev_net(skb->dev), skb);
-
-		segs = NULL;
-		goto out;
-	}
 
 	if (skb->encapsulation && skb_shinfo(skb)->gso_type &
 	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
 		segs = skb_udp_tunnel_segment(skb, features, true);
-	else {
-		const struct ipv6hdr *ipv6h;
-		struct udphdr *uh;
-
-		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-			goto out;
-
-		/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
-		 * do checksum of UDP packets sent as multiple IP fragments.
-		 */
-
-		uh = udp_hdr(skb);
-		ipv6h = ipv6_hdr(skb);
-
-		uh->check = 0;
-		csum = skb_checksum(skb, 0, skb->len, 0);
-		uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
-					  &ipv6h->daddr, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-
-		skb->ip_summed = CHECKSUM_NONE;
-
-		/* If there is no outer header we can fake a checksum offload
-		 * due to the fact that we have already done the checksum in
-		 * software prior to segmenting the frame.
-		 */
-		if (!skb->encap_hdr_csum)
-			features |= NETIF_F_HW_CSUM;
-
-		/* Check if there is enough headroom to insert fragment header. */
-		tnl_hlen = skb_tnl_header_len(skb);
-		if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
-			if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
-				goto out;
-		}
-
-		/* Find the unfragmentable header and shift it left by frag_hdr_sz
-		 * bytes to insert fragment header.
-		 */
-		err = ip6_find_1stfragopt(skb, &prevhdr);
-		if (err < 0)
-			return ERR_PTR(err);
-		unfrag_ip6hlen = err;
-		nexthdr = *prevhdr;
-		*prevhdr = NEXTHDR_FRAGMENT;
-		unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
-			     unfrag_ip6hlen + tnl_hlen;
-		packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
-		memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
-
-		SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
-		skb->mac_header -= frag_hdr_sz;
-		skb->network_header -= frag_hdr_sz;
-
-		fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
-		fptr->nexthdr = nexthdr;
-		fptr->reserved = 0;
-		if (!skb_shinfo(skb)->ip6_frag_id)
-			ipv6_proxy_select_ident(dev_net(skb->dev), skb);
-		fptr->identification = skb_shinfo(skb)->ip6_frag_id;
-
-		/* Fragment the skb. ipv6 header and the remaining fields of the
-		 * fragment header are updated in ipv6_gso_segment()
-		 */
-		segs = skb_segment(skb, features);
-	}
 
-out:
 	return segs;
 }
 
@@ -169,7 +75,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv6_offload = {
 	.callbacks = {
-		.gso_segment	=	udp6_ufo_fragment,
+		.gso_segment	=	udp6_tunnel_segment,
 		.gro_receive	=	udp6_gro_receive,
 		.gro_complete	=	udp6_gro_complete,
 	},
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 79651bc71bf0..f44b25a48478 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	}
 }
 
-static inline int xfrm6_garbage_collect(struct dst_ops *ops)
-{
-	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
-
-	xfrm_garbage_collect_deferred(net);
-	return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
-}
-
 static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -279,14 +271,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm6_dst_ops_template = {
 	.family =		AF_INET6,
-	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
 	.redirect =		xfrm6_redirect,
 	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
-	.gc_thresh =		INT_MAX,
+	.gc_thresh =		32768,
 };
 
 static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index c343ac60bf50..c748e8a6a72c 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 	seq_printf(seq,
 		   "   psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
 		   psock->index,
-		   psock->strp.stats.rx_msgs,
-		   psock->strp.stats.rx_bytes,
+		   psock->strp.stats.msgs,
+		   psock->strp.stats.bytes,
 		   psock->stats.tx_msgs,
 		   psock->stats.tx_bytes,
 		   psock->sk->sk_receive_queue.qlen,
@@ -170,22 +170,22 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 	if (psock->tx_stopped)
 		seq_puts(seq, "TxStop ");
 
-	if (psock->strp.rx_stopped)
+	if (psock->strp.stopped)
 		seq_puts(seq, "RxStop ");
 
 	if (psock->tx_kcm)
 		seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
 
-	if (!psock->strp.rx_paused && !psock->ready_rx_msg) {
+	if (!psock->strp.paused && !psock->ready_rx_msg) {
 		if (psock->sk->sk_receive_queue.qlen) {
-			if (psock->strp.rx_need_bytes)
+			if (psock->strp.need_bytes)
 				seq_printf(seq, "RxWait=%u ",
-					   psock->strp.rx_need_bytes);
+					   psock->strp.need_bytes);
 			else
 				seq_printf(seq, "RxWait ");
 		}
 	} else  {
-		if (psock->strp.rx_paused)
+		if (psock->strp.paused)
 			seq_puts(seq, "RxPause ");
 
 		if (psock->ready_rx_msg)
@@ -371,20 +371,20 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq,
 		   "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
 		   "",
-		   strp_stats.rx_msgs,
-		   strp_stats.rx_bytes,
+		   strp_stats.msgs,
+		   strp_stats.bytes,
 		   psock_stats.tx_msgs,
 		   psock_stats.tx_bytes,
 		   psock_stats.reserved,
 		   psock_stats.unreserved,
-		   strp_stats.rx_aborts,
-		   strp_stats.rx_interrupted,
-		   strp_stats.rx_unrecov_intr,
-		   strp_stats.rx_mem_fail,
-		   strp_stats.rx_need_more_hdr,
-		   strp_stats.rx_bad_hdr_len,
-		   strp_stats.rx_msg_too_big,
-		   strp_stats.rx_msg_timeouts,
+		   strp_stats.aborts,
+		   strp_stats.interrupted,
+		   strp_stats.unrecov_intr,
+		   strp_stats.mem_fail,
+		   strp_stats.need_more_hdr,
+		   strp_stats.bad_hdr_len,
+		   strp_stats.msg_too_big,
+		   strp_stats.msg_timeouts,
 		   psock_stats.tx_aborts);
 
 	return 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index da49191f7ad0..88ce73288247 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -96,12 +96,12 @@ static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
 				    struct kcm_psock *psock)
 {
 	STRP_STATS_ADD(mux->stats.rx_bytes,
-		       psock->strp.stats.rx_bytes -
+		       psock->strp.stats.bytes -
 		       psock->saved_rx_bytes);
 	mux->stats.rx_msgs +=
-		psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
-	psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
-	psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
+		psock->strp.stats.msgs - psock->saved_rx_msgs;
+	psock->saved_rx_msgs = psock->strp.stats.msgs;
+	psock->saved_rx_bytes = psock->strp.stats.bytes;
 }
 
 static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@ -1118,7 +1118,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct kcm_sock *kcm = kcm_sk(sk);
 	int err = 0;
 	long timeo;
-	struct strp_rx_msg *rxm;
+	struct strp_msg *stm;
 	int copied = 0;
 	struct sk_buff *skb;
 
@@ -1132,26 +1132,26 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = strp_rx_msg(skb);
+	stm = strp_msg(skb);
 
-	if (len > rxm->full_len)
-		len = rxm->full_len;
+	if (len > stm->full_len)
+		len = stm->full_len;
 
-	err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
+	err = skb_copy_datagram_msg(skb, stm->offset, msg, len);
 	if (err < 0)
 		goto out;
 
 	copied = len;
 	if (likely(!(flags & MSG_PEEK))) {
 		KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
-		if (copied < rxm->full_len) {
+		if (copied < stm->full_len) {
 			if (sock->type == SOCK_DGRAM) {
 				/* Truncated message */
 				msg->msg_flags |= MSG_TRUNC;
 				goto msg_finished;
 			}
-			rxm->offset += copied;
-			rxm->full_len -= copied;
+			stm->offset += copied;
+			stm->full_len -= copied;
 		} else {
 msg_finished:
 			/* Finished with message */
@@ -1175,7 +1175,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 	struct sock *sk = sock->sk;
 	struct kcm_sock *kcm = kcm_sk(sk);
 	long timeo;
-	struct strp_rx_msg *rxm;
+	struct strp_msg *stm;
 	int err = 0;
 	ssize_t copied;
 	struct sk_buff *skb;
@@ -1192,12 +1192,12 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = strp_rx_msg(skb);
+	stm = strp_msg(skb);
 
-	if (len > rxm->full_len)
-		len = rxm->full_len;
+	if (len > stm->full_len)
+		len = stm->full_len;
 
-	copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
+	copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags);
 	if (copied < 0) {
 		err = copied;
 		goto err_out;
@@ -1205,8 +1205,8 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 
 	KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
 
-	rxm->offset += copied;
-	rxm->full_len -= copied;
+	stm->offset += copied;
+	stm->full_len -= copied;
 
 	/* We have no way to return MSG_EOR. If all the bytes have been
 	 * read we still leave the message in the receive socket buffer.
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ca9d3ae665e7..10d7133e4fe9 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2398,8 +2398,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 
 out:
 	xfrm_pol_put(xp);
-	if (err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2650,8 +2648,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 
 out:
 	xfrm_pol_put(xp);
-	if (delete && err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2751,8 +2747,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
 	int err, err2;
 
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
-	if (!err)
-		xfrm_garbage_collect(net);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - old silent behavior */
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 3f6c4fa78bdb..245fa350a7a8 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -106,7 +106,7 @@ static DEFINE_SPINLOCK(recent_lock);
 static DEFINE_MUTEX(recent_mutex);
 
 #ifdef CONFIG_PROC_FS
-static const struct file_operations recent_old_fops, recent_mt_fops;
+static const struct file_operations recent_mt_fops;
 #endif
 
 static u_int32_t hash_rnd __read_mostly;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 45fe8c8a884d..f6e229b51dfb 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -335,8 +335,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 			     const struct dp_upcall_info *upcall_info,
 				 uint32_t cutlen)
 {
-	unsigned short gso_type = skb_shinfo(skb)->gso_type;
-	struct sw_flow_key later_key;
 	struct sk_buff *segs, *nskb;
 	int err;
 
@@ -347,21 +345,9 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 	if (segs == NULL)
 		return -EINVAL;
 
-	if (gso_type & SKB_GSO_UDP) {
-		/* The initial flow key extracted by ovs_flow_key_extract()
-		 * in this case is for a first fragment, so we need to
-		 * properly mark later fragments.
-		 */
-		later_key = *key;
-		later_key.ip.frag = OVS_FRAG_TYPE_LATER;
-	}
-
 	/* Queue all of the segments. */
 	skb = segs;
 	do {
-		if (gso_type & SKB_GSO_UDP && skb != segs)
-			key = &later_key;
-
 		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 		if (err)
 			break;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 3f76cb765e5b..8c94cef25a72 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -72,8 +72,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 			   const struct sk_buff *skb)
 {
 	struct flow_stats *stats;
-	int node = numa_node_id();
-	int cpu = smp_processor_id();
+	unsigned int cpu = smp_processor_id();
 	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	stats = rcu_dereference(flow->stats[cpu]);
@@ -108,7 +107,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 							      __GFP_THISNODE |
 							      __GFP_NOWARN |
 							      __GFP_NOMEMALLOC,
-							      node);
+							      numa_node_id());
 				if (likely(new_stats)) {
 					new_stats->used = jiffies;
 					new_stats->packet_count = 1;
@@ -118,6 +117,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 
 					rcu_assign_pointer(flow->stats[cpu],
 							   new_stats);
+					cpumask_set_cpu(cpu, &flow->cpu_used_mask);
 					goto unlock;
 				}
 			}
@@ -145,7 +145,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
 	memset(ovs_stats, 0, sizeof(*ovs_stats));
 
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
 		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
 
 		if (stats) {
@@ -169,7 +169,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
 	int cpu;
 
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
 		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
 
 		if (stats) {
@@ -584,8 +584,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 			key->ip.frag = OVS_FRAG_TYPE_LATER;
 			return 0;
 		}
-		if (nh->frag_off & htons(IP_MF) ||
-			skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+		if (nh->frag_off & htons(IP_MF))
 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
 		else
 			key->ip.frag = OVS_FRAG_TYPE_NONE;
@@ -701,9 +700,6 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
 		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
 			return 0;
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-			key->ip.frag = OVS_FRAG_TYPE_FIRST;
-
 		/* Transport layer. */
 		if (key->ip.proto == NEXTHDR_TCP) {
 			if (tcphdr_ok(skb)) {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a9bc1c875965..1875bba4f865 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -31,6 +31,7 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/flex_array.h>
+#include <linux/cpumask.h>
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>
 #include <net/dst_metadata.h>
@@ -219,6 +220,7 @@ struct sw_flow {
 					 */
 	struct sw_flow_key key;
 	struct sw_flow_id id;
+	struct cpumask cpu_used_mask;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
 	struct flow_stats __rcu *stats[]; /* One for each CPU.  First one
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index ea7a8073fa02..80ea2a71852e 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -98,6 +98,8 @@ struct sw_flow *ovs_flow_alloc(void)
 
 	RCU_INIT_POINTER(flow->stats[0], stats);
 
+	cpumask_set_cpu(0, &flow->cpu_used_mask);
+
 	return flow;
 err:
 	kmem_cache_free(flow_cache, flow);
@@ -141,7 +143,7 @@ static void flow_free(struct sw_flow *flow)
 	if (flow->sf_acts)
 		ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask))
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
 		if (flow->stats[cpu])
 			kmem_cache_free(flow_stats_cache,
 					(struct flow_stats __force *)flow->stats[cpu]);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0615c2a950fa..5a178047a7ce 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -177,8 +177,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 #define BLK_PLUS_PRIV(sz_of_priv) \
 	(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
 
-#define PGV_FROM_VMALLOC 1
-
 #define BLOCK_STATUS(x)	((x)->hdr.bh1.block_status)
 #define BLOCK_NUM_PKTS(x)	((x)->hdr.bh1.num_pkts)
 #define BLOCK_O2FP(x)		((x)->hdr.bh1.offset_to_first_pkt)
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 50a3789ac23e..7ee2d5d68b78 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -151,6 +151,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	struct rds_transport *loop_trans;
 	unsigned long flags;
 	int ret, i;
+	int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
 
 	rcu_read_lock();
 	conn = rds_conn_lookup(net, head, laddr, faddr, trans);
@@ -172,6 +173,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 		conn = ERR_PTR(-ENOMEM);
 		goto out;
 	}
+	conn->c_path = kcalloc(npaths, sizeof(struct rds_conn_path), gfp);
+	if (!conn->c_path) {
+		kmem_cache_free(rds_conn_slab, conn);
+		conn = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 
 	INIT_HLIST_NODE(&conn->c_hash_node);
 	conn->c_laddr = laddr;
@@ -181,6 +188,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 
 	ret = rds_cong_get_maps(conn);
 	if (ret) {
+		kfree(conn->c_path);
 		kmem_cache_free(rds_conn_slab, conn);
 		conn = ERR_PTR(ret);
 		goto out;
@@ -207,13 +215,14 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	conn->c_trans = trans;
 
 	init_waitqueue_head(&conn->c_hs_waitq);
-	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+	for (i = 0; i < npaths; i++) {
 		__rds_conn_path_init(conn, &conn->c_path[i],
 				     is_outgoing);
 		conn->c_path[i].cp_index = i;
 	}
 	ret = trans->conn_alloc(conn, gfp);
 	if (ret) {
+		kfree(conn->c_path);
 		kmem_cache_free(rds_conn_slab, conn);
 		conn = ERR_PTR(ret);
 		goto out;
@@ -236,6 +245,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 		/* Creating passive conn */
 		if (parent->c_passive) {
 			trans->conn_free(conn->c_path[0].cp_transport_data);
+			kfree(conn->c_path);
 			kmem_cache_free(rds_conn_slab, conn);
 			conn = parent->c_passive;
 		} else {
@@ -252,7 +262,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 			struct rds_conn_path *cp;
 			int i;
 
-			for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+			for (i = 0; i < npaths; i++) {
 				cp = &conn->c_path[i];
 				/* The ->conn_alloc invocation may have
 				 * allocated resource for all paths, so all
@@ -261,6 +271,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 				if (cp->cp_transport_data)
 					trans->conn_free(cp->cp_transport_data);
 			}
+			kfree(conn->c_path);
 			kmem_cache_free(rds_conn_slab, conn);
 			conn = found;
 		} else {
@@ -374,13 +385,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
 	if (!cp->cp_transport_data)
 		return;
 
-	rds_conn_path_drop(cp);
-	flush_work(&cp->cp_down_w);
-
 	/* make sure lingering queued work won't try to ref the conn */
 	cancel_delayed_work_sync(&cp->cp_send_w);
 	cancel_delayed_work_sync(&cp->cp_recv_w);
 
+	rds_conn_path_drop(cp, true);
+	flush_work(&cp->cp_down_w);
+
 	/* tear down queued messages */
 	list_for_each_entry_safe(rm, rtmp,
 				 &cp->cp_send_queue,
@@ -407,6 +418,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	unsigned long flags;
 	int i;
 	struct rds_conn_path *cp;
+	int npaths = (conn->c_trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
 
 	rdsdebug("freeing conn %p for %pI4 -> "
 		 "%pI4\n", conn, &conn->c_laddr,
@@ -420,7 +432,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	synchronize_rcu();
 
 	/* shut the connection down */
-	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+	for (i = 0; i < npaths; i++) {
 		cp = &conn->c_path[i];
 		rds_conn_path_destroy(cp);
 		BUG_ON(!list_empty(&cp->cp_retrans));
@@ -434,6 +446,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	rds_cong_remove_conn(conn);
 
 	put_net(conn->c_net);
+	kfree(conn->c_path);
 	kmem_cache_free(rds_conn_slab, conn);
 
 	spin_lock_irqsave(&rds_conn_lock, flags);
@@ -464,8 +477,12 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 	     i++, head++) {
 		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 			struct rds_conn_path *cp;
+			int npaths;
+
+			npaths = (conn->c_trans->t_mp_capable ?
+				 RDS_MPATH_WORKERS : 1);
 
-			for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+			for (j = 0; j < npaths; j++) {
 				cp = &conn->c_path[j];
 				if (want_send)
 					list = &cp->cp_send_queue;
@@ -486,8 +503,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 				}
 
 				spin_unlock_irqrestore(&cp->cp_lock, flags);
-				if (!conn->c_trans->t_mp_capable)
-					break;
 			}
 		}
 	}
@@ -571,15 +586,16 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
 	     i++, head++) {
 		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 			struct rds_conn_path *cp;
+			int npaths;
 
-			for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+			npaths = (conn->c_trans->t_mp_capable ?
+				 RDS_MPATH_WORKERS : 1);
+			for (j = 0; j < npaths; j++) {
 				cp = &conn->c_path[j];
 
 				/* XXX no cp_lock usage.. */
 				if (!visitor(cp, buffer))
 					continue;
-				if (!conn->c_trans->t_mp_capable)
-					break;
 			}
 
 			/* We copy as much as we can fit in the buffer,
@@ -664,9 +680,13 @@ void rds_conn_exit(void)
 /*
  * Force a disconnect
  */
-void rds_conn_path_drop(struct rds_conn_path *cp)
+void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
 {
 	atomic_set(&cp->cp_state, RDS_CONN_ERROR);
+
+	if (!destroy && cp->cp_conn->c_destroy_in_prog)
+		return;
+
 	queue_work(rds_wq, &cp->cp_down_w);
 }
 EXPORT_SYMBOL_GPL(rds_conn_path_drop);
@@ -674,7 +694,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop);
 void rds_conn_drop(struct rds_connection *conn)
 {
 	WARN_ON(conn->c_trans->t_mp_capable);
-	rds_conn_path_drop(&conn->c_path[0]);
+	rds_conn_path_drop(&conn->c_path[0], false);
 }
 EXPORT_SYMBOL_GPL(rds_conn_drop);
 
@@ -706,5 +726,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
 	vprintk(fmt, ap);
 	va_end(ap);
 
-	rds_conn_path_drop(cp);
+	rds_conn_path_drop(cp, false);
 }
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 516bcc89b46f..2e0315b159cb 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -154,7 +154,7 @@ struct rds_connection {
 	struct list_head	c_map_item;
 	unsigned long		c_map_queued;
 
-	struct rds_conn_path	c_path[RDS_MPATH_WORKERS];
+	struct rds_conn_path	*c_path;
 	wait_queue_head_t	c_hs_waitq; /* handshake waitq */
 
 	u32			c_my_gen_num;
@@ -700,7 +700,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
 void rds_conn_shutdown(struct rds_conn_path *cpath);
 void rds_conn_destroy(struct rds_connection *conn);
 void rds_conn_drop(struct rds_connection *conn);
-void rds_conn_path_drop(struct rds_conn_path *cpath);
+void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
 void rds_conn_connect_if_down(struct rds_connection *conn);
 void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
 void rds_for_each_conn_info(struct socket *sock, unsigned int len,
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 431404dbdad1..6b7ee71f40c6 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 			continue;
 
 		/* reconnect with new parameters */
-		rds_conn_path_drop(tc->t_cpath);
+		rds_conn_path_drop(tc->t_cpath, false);
 	}
 	spin_unlock_irq(&rds_tcp_conn_lock);
 }
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index cbe08a1fa4c7..46f74dad0e16 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk)
 		if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
 		    rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
 					     RDS_CONN_ERROR)) {
-			rds_conn_path_drop(cp);
+			rds_conn_path_drop(cp, false);
 		} else {
 			rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
 		}
 		break;
 	case TCP_CLOSE_WAIT:
 	case TCP_CLOSE:
-		rds_conn_path_drop(cp);
+		rds_conn_path_drop(cp, false);
 	default:
 		break;
 	}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 0d8616aa5bad..dc860d1bb608 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -157,7 +157,7 @@ out:
 					"returned %d, "
 					"disconnecting and reconnecting\n",
 					&conn->c_faddr, cp->cp_index, ret);
-				rds_conn_path_drop(cp);
+				rds_conn_path_drop(cp, false);
 			}
 		}
 	}
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 2852bc1d37d4..f121daa402c8 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
 				"current state is %d\n",
 				__func__,
 				atomic_read(&cp->cp_state));
-		rds_conn_path_drop(cp);
+		rds_conn_path_drop(cp, false);
 		return;
 	}
 
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 69b97339ff9d..8c0db9b3e4ab 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -15,7 +15,7 @@
 #include <net/netns/generic.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
-#include <rxrpc/packet.h>
+#include "protocol.h"
 
 #if 0
 #define CHECK_SLAB_OKAY(X)				     \
diff --git a/include/rxrpc/packet.h b/net/rxrpc/protocol.h
index a2dcfb850b9f..4bddcf3face3 100644
--- a/include/rxrpc/packet.h
+++ b/net/rxrpc/protocol.h
@@ -187,49 +187,4 @@ struct rxkad_response {
 	__be32		ticket_len;	/* Kerberos ticket length  */
 } __packed;
 
-/*****************************************************************************/
-/*
- * RxRPC-level abort codes
- */
-#define RX_CALL_DEAD		-1	/* call/conn has been inactive and is shut down */
-#define RX_INVALID_OPERATION	-2	/* invalid operation requested / attempted */
-#define RX_CALL_TIMEOUT		-3	/* call timeout exceeded */
-#define RX_EOF			-4	/* unexpected end of data on read op */
-#define RX_PROTOCOL_ERROR	-5	/* low-level protocol error */
-#define RX_USER_ABORT		-6	/* generic user abort */
-#define RX_ADDRINUSE		-7	/* UDP port in use */
-#define RX_DEBUGI_BADTYPE	-8	/* bad debugging packet type */
-
-/*
- * (un)marshalling abort codes (rxgen)
- */
-#define	RXGEN_CC_MARSHAL    -450
-#define	RXGEN_CC_UNMARSHAL  -451
-#define	RXGEN_SS_MARSHAL    -452
-#define	RXGEN_SS_UNMARSHAL  -453
-#define	RXGEN_DECODE	    -454
-#define	RXGEN_OPCODE	    -455
-#define	RXGEN_SS_XDRFREE    -456
-#define	RXGEN_CC_XDRFREE    -457
-
-/*
- * Rx kerberos security abort codes
- * - unfortunately we have no generalised security abort codes to say things
- *   like "unsupported security", so we have to use these instead and hope the
- *   other side understands
- */
-#define RXKADINCONSISTENCY	19270400	/* security module structure inconsistent */
-#define RXKADPACKETSHORT	19270401	/* packet too short for security challenge */
-#define RXKADLEVELFAIL		19270402	/* security level negotiation failed */
-#define RXKADTICKETLEN		19270403	/* ticket length too short or too long */
-#define RXKADOUTOFSEQUENCE	19270404	/* packet had bad sequence number */
-#define RXKADNOAUTH		19270405	/* caller not authorised */
-#define RXKADBADKEY		19270406	/* illegal key: bad parity or weak */
-#define RXKADBADTICKET		19270407	/* security object was passed a bad ticket */
-#define RXKADUNKNOWNKEY		19270408	/* ticket contained unknown key version number */
-#define RXKADEXPIRED		19270409	/* authentication expired */
-#define RXKADSEALEDINCON	19270410	/* sealed data inconsistent */
-#define RXKADDATALEN		19270411	/* user data too long */
-#define RXKADILLEGALLEVEL	19270412	/* caller not authorised to use encrypted conns */
-
 #endif /* _LINUX_RXRPC_PACKET_H */
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f2e9ed34a963..a2915d958279 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -110,6 +110,8 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	u32 act_flags = cb->args[2];
+	unsigned long jiffy_since = cb->args[3];
 	struct nlattr *nest;
 
 	spin_lock_bh(&hinfo->lock);
@@ -127,6 +129,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			if (index < s_i)
 				continue;
 
+			if (jiffy_since &&
+			    time_after(jiffy_since,
+				       (unsigned long)p->tcfa_tm.lastuse))
+				continue;
+
 			nest = nla_nest_start(skb, n_i);
 			if (nest == NULL)
 				goto nla_put_failure;
@@ -138,14 +145,20 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			}
 			nla_nest_end(skb, nest);
 			n_i++;
-			if (n_i >= TCA_ACT_MAX_PRIO)
+			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+			    n_i >= TCA_ACT_MAX_PRIO)
 				goto done;
 		}
 	}
 done:
+	if (index >= 0)
+		cb->args[0] = index + 1;
+
 	spin_unlock_bh(&hinfo->lock);
-	if (n_i)
-		cb->args[0] += n_i;
+	if (n_i) {
+		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
+			cb->args[1] = n_i;
+	}
 	return n_i;
 
 nla_put_failure:
@@ -460,9 +473,10 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
 int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res)
 {
-	int ret = -1, i;
 	u32 jmp_prgcnt = 0;
 	u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */
+	int i;
+	int ret = TC_ACT_OK;
 
 	if (skb_skip_tc_classify(skb))
 		return TC_ACT_OK;
@@ -1068,11 +1082,18 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	return tcf_add_notify(net, n, &actions, portid);
 }
 
+static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
+	[TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32,
+			     .validation_data = &tcaa_root_flags_allowed },
+	[TCA_ROOT_TIME_DELTA]      = { .type = NLA_U32 },
+};
+
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nlattr *tca[TCA_ACT_MAX + 1];
+	struct nlattr *tca[TCA_ROOT_MAX + 1];
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
@@ -1080,7 +1101,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
 			  extack);
 	if (ret < 0)
 		return ret;
@@ -1121,16 +1142,12 @@ replay:
 	return ret;
 }
 
-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(struct nlattr **nla)
 {
 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
 
-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
-			NULL, NULL) < 0)
-		return NULL;
 	tb1 = nla[TCA_ACT_TAB];
 	if (tb1 == NULL)
 		return NULL;
@@ -1157,8 +1174,20 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tc_action_ops *a_o;
 	int ret = 0;
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
-	struct nlattr *kind = find_dump_kind(cb->nlh);
+	struct nlattr *tb[TCA_ROOT_MAX + 1];
+	struct nlattr *count_attr = NULL;
+	unsigned long jiffy_since = 0;
+	struct nlattr *kind = NULL;
+	struct nla_bitfield32 bf;
+	u32 msecs_since = 0;
+	u32 act_count = 0;
+
+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
+			  tcaa_policy, NULL);
+	if (ret < 0)
+		return ret;
 
+	kind = find_dump_kind(tb);
 	if (kind == NULL) {
 		pr_info("tc_dump_action: action bad kind\n");
 		return 0;
@@ -1168,14 +1197,32 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (a_o == NULL)
 		return 0;
 
+	cb->args[2] = 0;
+	if (tb[TCA_ROOT_FLAGS]) {
+		bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]);
+		cb->args[2] = bf.value;
+	}
+
+	if (tb[TCA_ROOT_TIME_DELTA]) {
+		msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]);
+	}
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
+
+	if (msecs_since)
+		jiffy_since = jiffies - msecs_to_jiffies(msecs_since);
+
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
+	cb->args[3] = jiffy_since;
+	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
+	if (!count_attr)
+		goto out_module_put;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
 	if (nest == NULL)
@@ -1188,6 +1235,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (ret > 0) {
 		nla_nest_end(skb, nest);
 		ret = skb->len;
+		act_count = cb->args[1];
+		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
+		cb->args[1] = 0;
 	} else
 		nlmsg_trim(skb, b);
 
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3317a2f579da..67afc12df88b 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -231,9 +231,6 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
 	const struct iphdr *iph;
 	u16 ul;
 
-	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-		return 1;
-
 	/*
 	 * Support both UDP and UDPLITE checksum algorithms, Don't use
 	 * udph->len to get the real length without any protocol check,
@@ -287,9 +284,6 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
 	const struct ipv6hdr *ip6h;
 	u16 ul;
 
-	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-		return 1;
-
 	/*
 	 * Support both UDP and UDPLITE checksum algorithms, Don't use
 	 * udph->len to get the real length without any protocol check,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 39da0c5801c9..e655221c654e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -883,18 +883,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 }
 EXPORT_SYMBOL(tcf_exts_validate);
 
-void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
-		     struct tcf_exts *src)
+void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct tcf_exts old = *dst;
 
-	tcf_tree_lock(tp);
-	dst->nr_actions = src->nr_actions;
-	dst->actions = src->actions;
-	dst->type = src->type;
-	tcf_tree_unlock(tp);
-
+	*dst = *src;
 	tcf_exts_destroy(&old);
 #endif
 }
@@ -915,7 +909,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 #ifdef CONFIG_NET_CLS_ACT
 	struct nlattr *nest;
 
-	if (exts->action && exts->nr_actions) {
+	if (exts->action && tcf_exts_has_actions(exts)) {
 		/*
 		 * again for backward compatible mode - we want
 		 * to work with both old and new modes of entering
@@ -972,7 +966,7 @@ int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
 	const struct tc_action *a;
 	LIST_HEAD(actions);
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	tcf_exts_to_list(exts, &actions);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index c4fd63a068f9..7c7a82138f76 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -129,33 +129,22 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 			   struct nlattr *est, bool ovr)
 {
 	int err;
-	struct tcf_exts e;
-	struct tcf_ematch_tree t;
 
-	err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
-	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
+	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &f->ematches);
 	if (err < 0)
-		goto errout;
+		return err;
 
 	if (tb[TCA_BASIC_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
-	tcf_exts_change(tp, &f->exts, &e);
-	tcf_em_tree_change(tp, &f->ematches, &t);
 	f->tp = tp;
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int basic_change(struct net *net, struct sk_buff *in_skb,
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index f57bd531ba98..2d4d06e41cd9 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -147,24 +147,18 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			       enum tc_clsbpf_command cmd)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_bpf_offload bpf_offload = {};
-	struct tc_to_netdev offload;
+	struct tc_cls_bpf_offload cls_bpf = {};
 	int err;
 
-	offload.type = TC_SETUP_CLSBPF;
-	offload.cls_bpf = &bpf_offload;
-
-	bpf_offload.command = cmd;
-	bpf_offload.exts = &prog->exts;
-	bpf_offload.prog = prog->filter;
-	bpf_offload.name = prog->bpf_name;
-	bpf_offload.exts_integrated = prog->exts_integrated;
-	bpf_offload.gen_flags = prog->gen_flags;
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index,
-					    tp->protocol, &offload);
+	tc_cls_common_offload_init(&cls_bpf.common, tp);
+	cls_bpf.command = cmd;
+	cls_bpf.exts = &prog->exts;
+	cls_bpf.prog = prog->filter;
+	cls_bpf.name = prog->bpf_name;
+	cls_bpf.exts_integrated = prog->exts_integrated;
+	cls_bpf.gen_flags = prog->gen_flags;
 
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, &cls_bpf);
 	if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
 		prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
 
@@ -382,13 +376,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 	return 0;
 }
 
-static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
-				   struct cls_bpf_prog *prog,
-				   unsigned long base, struct nlattr **tb,
-				   struct nlattr *est, bool ovr)
+static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
+			     struct cls_bpf_prog *prog, unsigned long base,
+			     struct nlattr **tb, struct nlattr *est, bool ovr)
 {
 	bool is_bpf, is_ebpf, have_exts = false;
-	struct tcf_exts exts;
 	u32 gen_flags = 0;
 	int ret;
 
@@ -397,30 +389,23 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 		return -EINVAL;
 
-	ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr);
 	if (ret < 0)
 		return ret;
-	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
-	if (ret < 0)
-		goto errout;
 
 	if (tb[TCA_BPF_FLAGS]) {
 		u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
 
-		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
-			ret = -EINVAL;
-			goto errout;
-		}
+		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
+			return -EINVAL;
 
 		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
 	}
 	if (tb[TCA_BPF_FLAGS_GEN]) {
 		gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
 		if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
-		    !tc_flags_valid(gen_flags)) {
-			ret = -EINVAL;
-			goto errout;
-		}
+		    !tc_flags_valid(gen_flags))
+			return -EINVAL;
 	}
 
 	prog->exts_integrated = have_exts;
@@ -429,19 +414,14 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
 		       cls_bpf_prog_from_efd(tb, prog, tp);
 	if (ret < 0)
-		goto errout;
+		return ret;
 
 	if (tb[TCA_BPF_CLASSID]) {
 		prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
 		tcf_bind_filter(tp, &prog->res, base);
 	}
 
-	tcf_exts_change(tp, &prog->exts, &exts);
 	return 0;
-
-errout:
-	tcf_exts_destroy(&exts);
-	return ret;
 }
 
 static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -508,8 +488,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 		goto errout;
 	}
 
-	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE],
-				      ovr);
+	ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
 	if (ret < 0)
 		goto errout;
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 12ce547eea04..df7a582775df 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -76,8 +76,6 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 	struct nlattr *tb[TCA_CGROUP_MAX + 1];
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 	struct cls_cgroup_head *new;
-	struct tcf_ematch_tree t;
-	struct tcf_exts e;
 	int err;
 
 	if (!tca[TCA_OPTIONS])
@@ -103,23 +101,13 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout;
 
-	err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr);
 	if (err < 0)
 		goto errout;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
-	if (err < 0) {
-		tcf_exts_destroy(&e);
-		goto errout;
-	}
 
-	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
-	if (err < 0) {
-		tcf_exts_destroy(&e);
+	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches);
+	if (err < 0)
 		goto errout;
-	}
-
-	tcf_exts_change(tp, &new->exts, &e);
-	tcf_em_tree_change(tp, &new->ematches, &t);
 
 	rcu_assign_pointer(tp->root, new);
 	if (head)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 3065752b9cda..55e281b20140 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -388,8 +388,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	struct flow_filter *fold, *fnew;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_FLOW_MAX + 1];
-	struct tcf_exts e;
-	struct tcf_ematch_tree t;
 	unsigned int nkeys = 0;
 	unsigned int perturb_period = 0;
 	u32 baseclass = 0;
@@ -425,31 +423,27 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 			return -EOPNOTSUPP;
 	}
 
-	err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
-	if (err < 0)
-		goto err1;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
-	if (err < 0)
-		goto err1;
+	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+	if (!fnew)
+		return -ENOBUFS;
 
-	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
+	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches);
 	if (err < 0)
 		goto err1;
 
-	err = -ENOBUFS;
-	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
-	if (!fnew)
+	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	if (err < 0)
 		goto err2;
 
-	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr);
 	if (err < 0)
-		goto err3;
+		goto err2;
 
 	fold = (struct flow_filter *)*arg;
 	if (fold) {
 		err = -EINVAL;
 		if (fold->handle != handle && handle)
-			goto err3;
+			goto err2;
 
 		/* Copy fold into fnew */
 		fnew->tp = fold->tp;
@@ -469,31 +463,31 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 		if (tb[TCA_FLOW_MODE])
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
-			goto err3;
+			goto err2;
 
 		if (mode == FLOW_MODE_HASH)
 			perturb_period = fold->perturb_period;
 		if (tb[TCA_FLOW_PERTURB]) {
 			if (mode != FLOW_MODE_HASH)
-				goto err3;
+				goto err2;
 			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
 		}
 	} else {
 		err = -EINVAL;
 		if (!handle)
-			goto err3;
+			goto err2;
 		if (!tb[TCA_FLOW_KEYS])
-			goto err3;
+			goto err2;
 
 		mode = FLOW_MODE_MAP;
 		if (tb[TCA_FLOW_MODE])
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
-			goto err3;
+			goto err2;
 
 		if (tb[TCA_FLOW_PERTURB]) {
 			if (mode != FLOW_MODE_HASH)
-				goto err3;
+				goto err2;
 			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
 		}
 
@@ -511,9 +505,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation,
 			       (unsigned long)fnew);
 
-	tcf_exts_change(tp, &fnew->exts, &e);
-	tcf_em_tree_change(tp, &fnew->ematches, &t);
-
 	netif_keep_dst(qdisc_dev(tp->q));
 
 	if (tb[TCA_FLOW_KEYS]) {
@@ -552,13 +543,11 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 		call_rcu(&fold->rcu, flow_destroy_filter);
 	return 0;
 
-err3:
-	tcf_exts_destroy(&fnew->exts);
 err2:
-	tcf_em_tree_destroy(&t);
-	kfree(fnew);
+	tcf_exts_destroy(&fnew->exts);
+	tcf_em_tree_destroy(&fnew->ematches);
 err1:
-	tcf_exts_destroy(&e);
+	kfree(fnew);
 	return err;
 }
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7832eb93379b..1474bacf4df4 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -88,7 +88,6 @@ struct cls_fl_filter {
 	u32 handle;
 	u32 flags;
 	struct rcu_head	rcu;
-	struct tc_to_netdev tc;
 	struct net_device *hw_dev;
 };
 
@@ -225,22 +224,17 @@ static void fl_destroy_filter(struct rcu_head *head)
 
 static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
-	struct tc_cls_flower_offload offload = {0};
+	struct tc_cls_flower_offload cls_flower = {};
 	struct net_device *dev = f->hw_dev;
-	struct tc_to_netdev *tc = &f->tc;
 
 	if (!tc_can_offload(dev, tp))
 		return;
 
-	offload.command = TC_CLSFLOWER_DESTROY;
-	offload.prio = tp->prio;
-	offload.cookie = (unsigned long)f;
+	tc_cls_common_offload_init(&cls_flower.common, tp);
+	cls_flower.command = TC_CLSFLOWER_DESTROY;
+	cls_flower.cookie = (unsigned long) f;
 
-	tc->type = TC_SETUP_CLSFLOWER;
-	tc->cls_flower = &offload;
-
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
-				      tp->protocol, tc);
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower);
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -249,8 +243,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 				struct cls_fl_filter *f)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_flower_offload offload = {0};
-	struct tc_to_netdev *tc = &f->tc;
+	struct tc_cls_flower_offload cls_flower = {};
 	int err;
 
 	if (!tc_can_offload(dev, tp)) {
@@ -260,24 +253,21 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 			return tc_skip_sw(f->flags) ? -EINVAL : 0;
 		}
 		dev = f->hw_dev;
-		tc->egress_dev = true;
+		cls_flower.egress_dev = true;
 	} else {
 		f->hw_dev = dev;
 	}
 
-	offload.command = TC_CLSFLOWER_REPLACE;
-	offload.prio = tp->prio;
-	offload.cookie = (unsigned long)f;
-	offload.dissector = dissector;
-	offload.mask = mask;
-	offload.key = &f->mkey;
-	offload.exts = &f->exts;
-
-	tc->type = TC_SETUP_CLSFLOWER;
-	tc->cls_flower = &offload;
+	tc_cls_common_offload_init(&cls_flower.common, tp);
+	cls_flower.command = TC_CLSFLOWER_REPLACE;
+	cls_flower.cookie = (unsigned long) f;
+	cls_flower.dissector = dissector;
+	cls_flower.mask = mask;
+	cls_flower.key = &f->mkey;
+	cls_flower.exts = &f->exts;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index, tp->protocol, tc);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
+					    &cls_flower);
 	if (!err)
 		f->flags |= TCA_CLS_FLAGS_IN_HW;
 
@@ -288,23 +278,19 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 
 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
-	struct tc_cls_flower_offload offload = {0};
+	struct tc_cls_flower_offload cls_flower = {};
 	struct net_device *dev = f->hw_dev;
-	struct tc_to_netdev *tc = &f->tc;
 
 	if (!tc_can_offload(dev, tp))
 		return;
 
-	offload.command = TC_CLSFLOWER_STATS;
-	offload.prio = tp->prio;
-	offload.cookie = (unsigned long)f;
-	offload.exts = &f->exts;
-
-	tc->type = TC_SETUP_CLSFLOWER;
-	tc->cls_flower = &offload;
+	tc_cls_common_offload_init(&cls_flower.common, tp);
+	cls_flower.command = TC_CLSFLOWER_STATS;
+	cls_flower.cookie = (unsigned long) f;
+	cls_flower.exts = &f->exts;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-				      tp->chain->index, tp->protocol, tc);
+	dev->netdev_ops->ndo_setup_tc(dev, TC_CLSFLOWER_STATS,
+				      &cls_flower);
 }
 
 static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
@@ -852,15 +838,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 			unsigned long base, struct nlattr **tb,
 			struct nlattr *est, bool ovr)
 {
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
 	if (tb[TCA_FLOWER_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
@@ -869,17 +851,12 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 
 	err = fl_set_key(net, tb, &f->key, &mask->key);
 	if (err)
-		goto errout;
+		return err;
 
 	fl_mask_update_range(mask);
 	fl_set_masked_key(&f->mkey, &f->key, mask);
 
-	tcf_exts_change(tp, &f->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static u32 fl_grab_new_handle(struct tcf_proto *tp,
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index d3885362e017..11f178f1b2be 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -190,22 +190,17 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
 	[TCA_FW_MASK]		= { .type = NLA_U32 },
 };
 
-static int
-fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
-		struct nlattr **tb, struct nlattr **tca, unsigned long base,
-		bool ovr)
+static int fw_set_parms(struct net *net, struct tcf_proto *tp,
+			struct fw_filter *f, struct nlattr **tb,
+			struct nlattr **tca, unsigned long base, bool ovr)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct tcf_exts e;
 	u32 mask;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
-	if (err < 0)
-		goto errout;
 
 	if (tb[TCA_FW_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
@@ -216,10 +211,8 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	if (tb[TCA_FW_INDEV]) {
 		int ret;
 		ret = tcf_change_indev(net, tb[TCA_FW_INDEV]);
-		if (ret < 0) {
-			err = ret;
-			goto errout;
-		}
+		if (ret < 0)
+			return ret;
 		f->ifindex = ret;
 	}
 #endif /* CONFIG_NET_CLS_IND */
@@ -228,16 +221,11 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	if (tb[TCA_FW_MASK]) {
 		mask = nla_get_u32(tb[TCA_FW_MASK]);
 		if (mask != head->mask)
-			goto errout;
+			return err;
 	} else if (head->mask != 0xFFFFFFFF)
-		goto errout;
-
-	tcf_exts_change(tp, &f->exts, &e);
+		return err;
 
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int fw_change(struct net *net, struct sk_buff *in_skb,
@@ -282,7 +270,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 			return err;
 		}
 
-		err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
+		err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr);
 		if (err < 0) {
 			tcf_exts_destroy(&fnew->exts);
 			kfree(fnew);
@@ -330,7 +318,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 	f->id = handle;
 	f->tp = tp;
 
-	err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
+	err = fw_set_parms(net, tp, f, tb, tca, base, ovr);
 	if (err < 0)
 		goto errout;
 
@@ -387,7 +375,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
 	t->tcm_handle = f->id;
 
-	if (!f->res.classid && !tcf_exts_is_available(&f->exts))
+	if (!f->res.classid && !tcf_exts_has_actions(&f->exts))
 		return skb->len;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 9dc26c32cf32..c9f6500b8080 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -54,19 +54,16 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 				  unsigned long cookie)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_to_netdev offload;
-	struct tc_cls_matchall_offload mall_offload = {0};
+	struct tc_cls_matchall_offload cls_mall = {};
 	int err;
 
-	offload.type = TC_SETUP_MATCHALL;
-	offload.cls_mall = &mall_offload;
-	offload.cls_mall->command = TC_CLSMATCHALL_REPLACE;
-	offload.cls_mall->exts = &head->exts;
-	offload.cls_mall->cookie = cookie;
+	tc_cls_common_offload_init(&cls_mall.common, tp);
+	cls_mall.command = TC_CLSMATCHALL_REPLACE;
+	cls_mall.exts = &head->exts;
+	cls_mall.cookie = cookie;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index,
-					    tp->protocol, &offload);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL,
+					    &cls_mall);
 	if (!err)
 		head->flags |= TCA_CLS_FLAGS_IN_HW;
 
@@ -78,17 +75,13 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
 				   unsigned long cookie)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_to_netdev offload;
-	struct tc_cls_matchall_offload mall_offload = {0};
+	struct tc_cls_matchall_offload cls_mall = {};
 
-	offload.type = TC_SETUP_MATCHALL;
-	offload.cls_mall = &mall_offload;
-	offload.cls_mall->command = TC_CLSMATCHALL_DESTROY;
-	offload.cls_mall->exts = NULL;
-	offload.cls_mall->cookie = cookie;
+	tc_cls_common_offload_init(&cls_mall.common, tp);
+	cls_mall.command = TC_CLSMATCHALL_DESTROY;
+	cls_mall.cookie = cookie;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
-				      tp->protocol, &offload);
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, &cls_mall);
 }
 
 static void mall_destroy(struct tcf_proto *tp)
@@ -120,27 +113,17 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 			  unsigned long base, struct nlattr **tb,
 			  struct nlattr *est, bool ovr)
 {
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_MATCHALL_ACT, 0);
-	if (err)
-		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr);
 	if (err < 0)
-		goto errout;
+		return err;
 
 	if (tb[TCA_MATCHALL_CLASSID]) {
 		head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
 		tcf_bind_filter(tp, &head->res, base);
 	}
-
-	tcf_exts_change(tp, &head->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int mall_change(struct net *net, struct sk_buff *in_skb,
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index d63d5502ee02..f1e7d7850b44 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -113,7 +113,7 @@ static inline int route4_hash_wild(void)
 #define ROUTE4_APPLY_RESULT()					\
 {								\
 	*res = f->res;						\
-	if (tcf_exts_is_available(&f->exts)) {			\
+	if (tcf_exts_has_actions(&f->exts)) {			\
 		int r = tcf_exts_exec(skb, &f->exts, res);	\
 		if (r < 0) {					\
 			dont_cache = 1;				\
@@ -372,37 +372,32 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 	struct route4_filter *fp;
 	unsigned int h1;
 	struct route4_bucket *b;
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
-	err = -EINVAL;
 	if (tb[TCA_ROUTE4_TO]) {
 		if (new && handle & 0x8000)
-			goto errout;
+			return -EINVAL;
 		to = nla_get_u32(tb[TCA_ROUTE4_TO]);
 		if (to > 0xFF)
-			goto errout;
+			return -EINVAL;
 		nhandle = to;
 	}
 
 	if (tb[TCA_ROUTE4_FROM]) {
 		if (tb[TCA_ROUTE4_IIF])
-			goto errout;
+			return -EINVAL;
 		id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
 		if (id > 0xFF)
-			goto errout;
+			return -EINVAL;
 		nhandle |= id << 16;
 	} else if (tb[TCA_ROUTE4_IIF]) {
 		id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
 		if (id > 0x7FFF)
-			goto errout;
+			return -EINVAL;
 		nhandle |= (id | 0x8000) << 16;
 	} else
 		nhandle |= 0xFFFF << 16;
@@ -410,27 +405,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 	if (handle && new) {
 		nhandle |= handle & 0x7F00;
 		if (nhandle != handle)
-			goto errout;
+			return -EINVAL;
 	}
 
 	h1 = to_hash(nhandle);
 	b = rtnl_dereference(head->table[h1]);
 	if (!b) {
-		err = -ENOBUFS;
 		b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
 		if (b == NULL)
-			goto errout;
+			return -ENOBUFS;
 
 		rcu_assign_pointer(head->table[h1], b);
 	} else {
 		unsigned int h2 = from_hash(nhandle >> 16);
 
-		err = -EEXIST;
 		for (fp = rtnl_dereference(b->ht[h2]);
 		     fp;
 		     fp = rtnl_dereference(fp->next))
 			if (fp->handle == f->handle)
-				goto errout;
+				return -EEXIST;
 	}
 
 	if (tb[TCA_ROUTE4_TO])
@@ -450,12 +443,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
-	tcf_exts_change(tp, &f->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int route4_change(struct net *net, struct sk_buff *in_skb,
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 0d9d07798699..4adb67a73491 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -518,7 +518,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 			tcf_bind_filter(tp, &n->res, base);
 		}
 
-		tcf_exts_change(tp, &n->exts, &e);
+		tcf_exts_change(&n->exts, &e);
 		rsvp_replace(tp, n, handle);
 		return 0;
 	}
@@ -591,7 +591,7 @@ insert:
 			if (f->tunnelhdr == 0)
 				tcf_bind_filter(tp, &f->res, base);
 
-			tcf_exts_change(tp, &f->exts, &e);
+			tcf_exts_change(&f->exts, &e);
 
 			fp = &s->ht[h2];
 			for (nfp = rtnl_dereference(*fp); nfp;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 8a8a58357c39..d69f828f3fed 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -52,7 +52,7 @@ struct tcindex_data {
 
 static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
 {
-	return tcf_exts_is_predicative(&r->exts) || r->res.classid;
+	return tcf_exts_has_actions(&r->exts) || r->res.classid;
 }
 
 static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
@@ -419,9 +419,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	}
 
 	if (old_r)
-		tcf_exts_change(tp, &r->exts, &e);
+		tcf_exts_change(&r->exts, &e);
 	else
-		tcf_exts_change(tp, &cr.exts, &e);
+		tcf_exts_change(&cr.exts, &e);
 
 	if (old_r && old_r != r) {
 		err = tcindex_filter_result_init(old_r);
@@ -439,7 +439,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		struct tcindex_filter *nfp;
 		struct tcindex_filter __rcu **fp;
 
-		tcf_exts_change(tp, &f->result.exts, &r->exts);
+		tcf_exts_change(&f->result.exts, &r->exts);
 
 		fp = cp->h + (handle % cp->hash);
 		for (nfp = rtnl_dereference(*fp);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 2d01195153e6..4ed51d347d0a 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -431,43 +431,35 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
-
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
-
-	if (tc_should_offload(dev, tp, 0)) {
-		offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
-		offload.cls_u32->knode.handle = handle;
-		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					      tp->chain->index, tp->protocol,
-					      &offload);
-	}
+	struct tc_cls_u32_offload cls_u32 = {};
+
+	if (!tc_should_offload(dev, tp, 0))
+		return;
+
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_DELETE_KNODE;
+	cls_u32.knode.handle = handle;
+
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
 }
 
 static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 				u32 flags)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
+	struct tc_cls_u32_offload cls_u32 = {};
 	int err;
 
 	if (!tc_should_offload(dev, tp, flags))
 		return tc_skip_sw(flags) ? -EINVAL : 0;
 
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_NEW_HNODE;
+	cls_u32.hnode.divisor = h->divisor;
+	cls_u32.hnode.handle = h->handle;
+	cls_u32.hnode.prio = h->prio;
 
-	offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
-	offload.cls_u32->hnode.divisor = h->divisor;
-	offload.cls_u32->hnode.handle = h->handle;
-	offload.cls_u32->hnode.prio = h->prio;
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index, tp->protocol,
-					    &offload);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
 	if (tc_skip_sw(flags))
 		return err;
 
@@ -477,56 +469,47 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
+	struct tc_cls_u32_offload cls_u32 = {};
 
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
+	if (!tc_should_offload(dev, tp, 0))
+		return;
 
-	if (tc_should_offload(dev, tp, 0)) {
-		offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
-		offload.cls_u32->hnode.divisor = h->divisor;
-		offload.cls_u32->hnode.handle = h->handle;
-		offload.cls_u32->hnode.prio = h->prio;
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_DELETE_HNODE;
+	cls_u32.hnode.divisor = h->divisor;
+	cls_u32.hnode.handle = h->handle;
+	cls_u32.hnode.prio = h->prio;
 
-		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					      tp->chain->index, tp->protocol,
-					      &offload);
-	}
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
 }
 
 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 				u32 flags)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
+	struct tc_cls_u32_offload cls_u32 = {};
 	int err;
 
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
-
 	if (!tc_should_offload(dev, tp, flags))
 		return tc_skip_sw(flags) ? -EINVAL : 0;
 
-	offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
-	offload.cls_u32->knode.handle = n->handle;
-	offload.cls_u32->knode.fshift = n->fshift;
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
+	cls_u32.knode.handle = n->handle;
+	cls_u32.knode.fshift = n->fshift;
 #ifdef CONFIG_CLS_U32_MARK
-	offload.cls_u32->knode.val = n->val;
-	offload.cls_u32->knode.mask = n->mask;
+	cls_u32.knode.val = n->val;
+	cls_u32.knode.mask = n->mask;
 #else
-	offload.cls_u32->knode.val = 0;
-	offload.cls_u32->knode.mask = 0;
+	cls_u32.knode.val = 0;
+	cls_u32.knode.mask = 0;
 #endif
-	offload.cls_u32->knode.sel = &n->sel;
-	offload.cls_u32->knode.exts = &n->exts;
+	cls_u32.knode.sel = &n->sel;
+	cls_u32.knode.exts = &n->exts;
 	if (n->ht_down)
-		offload.cls_u32->knode.link_handle = n->ht_down->handle;
+		cls_u32.knode.link_handle = n->ht_down->handle;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index, tp->protocol,
-					    &offload);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
 
 	if (!err)
 		n->flags |= TCA_CLS_FLAGS_IN_HW;
@@ -723,29 +706,24 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 			 struct tc_u_knode *n, struct nlattr **tb,
 			 struct nlattr *est, bool ovr)
 {
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
-	err = -EINVAL;
 	if (tb[TCA_U32_LINK]) {
 		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
 		struct tc_u_hnode *ht_down = NULL, *ht_old;
 
 		if (TC_U32_KEY(handle))
-			goto errout;
+			return -EINVAL;
 
 		if (handle) {
 			ht_down = u32_lookup_ht(ht->tp_c, handle);
 
 			if (ht_down == NULL)
-				goto errout;
+				return -EINVAL;
 			ht_down->refcnt++;
 		}
 
@@ -765,16 +743,11 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 		int ret;
 		ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
 		if (ret < 0)
-			goto errout;
+			return -EINVAL;
 		n->ifindex = ret;
 	}
 #endif
-	tcf_exts_change(tp, &n->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 572fe2584e48..0af4b1c6f674 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -41,6 +41,7 @@
 #define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
 
 struct atm_flow_data {
+	struct Qdisc_class_common common;
 	struct Qdisc		*q;	/* FIFO, TBF, etc. */
 	struct tcf_proto __rcu	*filter_list;
 	struct tcf_block	*block;
@@ -49,7 +50,6 @@ struct atm_flow_data {
 					   struct sk_buff *skb); /* chaining */
 	struct atm_qdisc_data	*parent;	/* parent qdisc */
 	struct socket		*sock;		/* for closing */
-	u32			classid;	/* x:y type ID */
 	int			ref;		/* reference count */
 	struct gnet_stats_basic_packed	bstats;
 	struct gnet_stats_queue	qstats;
@@ -75,7 +75,7 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
 	struct atm_flow_data *flow;
 
 	list_for_each_entry(flow, &p->flows, list) {
-		if (flow->classid == classid)
+		if (flow->common.classid == classid)
 			return flow;
 	}
 	return NULL;
@@ -293,7 +293,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	flow->old_pop = flow->vcc->pop;
 	flow->parent = p;
 	flow->vcc->pop = sch_atm_pop;
-	flow->classid = classid;
+	flow->common.classid = classid;
 	flow->ref = 1;
 	flow->excess = excess;
 	list_add(&flow->list, &p->link.list);
@@ -549,7 +549,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 
 	p->link.vcc = NULL;
 	p->link.sock = NULL;
-	p->link.classid = sch->handle;
+	p->link.common.classid = sch->handle;
 	p->link.ref = 1;
 	tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
 	return 0;
@@ -594,7 +594,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 		sch, p, flow, skb, tcm);
 	if (list_empty(&flow->list))
 		return -EINVAL;
-	tcm->tcm_handle = flow->classid;
+	tcm->tcm_handle = flow->common.classid;
 	tcm->tcm_info = flow->q->handle;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
@@ -619,7 +619,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 			goto nla_put_failure;
 	}
 	if (flow->excess) {
-		if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->classid))
+		if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid))
 			goto nla_put_failure;
 	} else {
 		if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index e0c02725cd48..2165a05994b7 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -39,11 +39,9 @@ static void mqprio_destroy(struct Qdisc *sch)
 	}
 
 	if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
-		struct tc_mqprio_qopt offload = { 0 };
-		struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
-					   { .mqprio = &offload } };
+		struct tc_mqprio_qopt mqprio = {};
 
-		dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc);
+		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &mqprio);
 	} else {
 		netdev_set_num_tc(dev, 0);
 	}
@@ -148,16 +146,14 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	 * supplied and verified mapping
 	 */
 	if (qopt->hw) {
-		struct tc_mqprio_qopt offload = *qopt;
-		struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
-					   { .mqprio = &offload } };
+		struct tc_mqprio_qopt mqprio = *qopt;
 
-		err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle,
-						    0, 0, &tc);
+		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO,
+						    &mqprio);
 		if (err)
 			return err;
 
-		priv->hw_offload = offload.hw;
+		priv->hw_offload = mqprio.hw;
 	} else {
 		netdev_set_num_tc(dev, qopt->num_tc);
 		for (i = 0; i < qopt->num_tc; i++)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 40ec83679d6e..dfb9651e818b 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -63,11 +63,11 @@ static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
 /* 1st Level Abstractions. */
 
 /* Initialize a new association from provided memory. */
-static struct sctp_association *sctp_association_init(struct sctp_association *asoc,
-					  const struct sctp_endpoint *ep,
-					  const struct sock *sk,
-					  sctp_scope_t scope,
-					  gfp_t gfp)
+static struct sctp_association *sctp_association_init(
+					struct sctp_association *asoc,
+					const struct sctp_endpoint *ep,
+					const struct sock *sk,
+					enum sctp_scope scope, gfp_t gfp)
 {
 	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
@@ -301,9 +301,8 @@ fail_init:
 
 /* Allocate and initialize a new association */
 struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
-					 const struct sock *sk,
-					 sctp_scope_t scope,
-					 gfp_t gfp)
+					      const struct sock *sk,
+					      enum sctp_scope scope, gfp_t gfp)
 {
 	struct sctp_association *asoc;
 
@@ -797,7 +796,7 @@ void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
  */
 void sctp_assoc_control_transport(struct sctp_association *asoc,
 				  struct sctp_transport *transport,
-				  sctp_transport_cmd_t command,
+				  enum sctp_transport_cmd command,
 				  sctp_sn_error_t error)
 {
 	struct sctp_ulpevent *event;
@@ -1022,11 +1021,11 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 		container_of(work, struct sctp_association,
 			     base.inqueue.immediate);
 	struct net *net = sock_net(asoc->base.sk);
+	union sctp_subtype subtype;
 	struct sctp_endpoint *ep;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
 	int state;
-	sctp_subtype_t subtype;
 	int error = 0;
 
 	/* The association should be held so we should be safe. */
@@ -1564,7 +1563,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
  * local endpoint and the remote peer.
  */
 int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
-				     sctp_scope_t scope, gfp_t gfp)
+				     enum sctp_scope scope, gfp_t gfp)
 {
 	int flags;
 
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index e001b01b0e68..00667c50efa7 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -185,9 +185,9 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1,
  *    are called the two key vectors.
  */
 static struct sctp_auth_bytes *sctp_auth_make_key_vector(
-			sctp_random_param_t *random,
-			sctp_chunks_param_t *chunks,
-			sctp_hmac_algo_param_t *hmacs,
+			struct sctp_random_param *random,
+			struct sctp_chunks_param *chunks,
+			struct sctp_hmac_algo_param *hmacs,
 			gfp_t gfp)
 {
 	struct sctp_auth_bytes *new;
@@ -226,10 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector(
 				    gfp_t gfp)
 {
 	return sctp_auth_make_key_vector(
-				    (sctp_random_param_t *)asoc->c.auth_random,
-				    (sctp_chunks_param_t *)asoc->c.auth_chunks,
-				    (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs,
-				    gfp);
+			(struct sctp_random_param *)asoc->c.auth_random,
+			(struct sctp_chunks_param *)asoc->c.auth_chunks,
+			(struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp);
 }
 
 /* Make a key vector based on peer's parameters */
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 1ebc184a0e23..7df3704982f5 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -45,9 +45,9 @@
 #include <net/sctp/sm.h>
 
 /* Forward declarations for internal helpers. */
-static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *,
-			      union sctp_addr *, sctp_scope_t scope, gfp_t gfp,
-			      int flags);
+static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
+			      union sctp_addr *addr, enum sctp_scope scope,
+			      gfp_t gfp, int flags);
 static void sctp_bind_addr_clean(struct sctp_bind_addr *);
 
 /* First Level Abstractions. */
@@ -57,7 +57,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
  */
 int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
-			sctp_scope_t scope, gfp_t gfp,
+			enum sctp_scope scope, gfp_t gfp,
 			int flags)
 {
 	struct sctp_sockaddr_entry *addr;
@@ -440,9 +440,8 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
 
 /* Copy out addresses from the global local address list. */
 static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
-			      union sctp_addr *addr,
-			      sctp_scope_t scope, gfp_t gfp,
-			      int flags)
+			      union sctp_addr *addr, enum sctp_scope scope,
+			      gfp_t gfp, int flags)
 {
 	int error = 0;
 
@@ -485,9 +484,10 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr)
 }
 
 /* Is 'addr' valid for 'scope'?  */
-int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope)
+int sctp_in_scope(struct net *net, const union sctp_addr *addr,
+		  enum sctp_scope scope)
 {
-	sctp_scope_t addr_scope = sctp_scope(addr);
+	enum sctp_scope addr_scope = sctp_scope(addr);
 
 	/* The unusable SCTP addresses will not be considered with
 	 * any defined scopes.
@@ -545,7 +545,7 @@ int sctp_is_ep_boundall(struct sock *sk)
  ********************************************************************/
 
 /* What is the scope of 'addr'?  */
-sctp_scope_t sctp_scope(const union sctp_addr *addr)
+enum sctp_scope sctp_scope(const union sctp_addr *addr)
 {
 	struct sctp_af *af;
 
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 1323d41e68b8..3afac275ee82 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -201,7 +201,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
 
 		if (hmac_desc)
-			max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) +
+			max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
 					      hmac_desc->hmac_len);
 	}
 
@@ -221,7 +221,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	    asoc->outqueue.out_qlen == 0 &&
 	    list_empty(&asoc->outqueue.retransmit) &&
 	    msg_len > max_data)
-		first_len -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
+		first_len -= SCTP_PAD4(sizeof(struct sctp_sack_chunk));
 
 	/* Encourage Cookie-ECHO bundling. */
 	if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 2e47eb2f05cb..3f619fdcbf0a 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -60,7 +60,7 @@ static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = {
 };
 
 /* Lookup "chunk type" debug name. */
-const char *sctp_cname(const sctp_subtype_t cid)
+const char *sctp_cname(const union sctp_subtype cid)
 {
 	if (cid.chunk <= SCTP_CID_BASE_MAX)
 		return sctp_cid_tbl[cid.chunk];
@@ -130,7 +130,7 @@ static const char *const sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = {
 };
 
 /* Lookup primitive debug name. */
-const char *sctp_pname(const sctp_subtype_t id)
+const char *sctp_pname(const union sctp_subtype id)
 {
 	if (id.primitive <= SCTP_EVENT_PRIMITIVE_MAX)
 		return sctp_primitive_tbl[id.primitive];
@@ -143,7 +143,7 @@ static const char *const sctp_other_tbl[] = {
 };
 
 /* Lookup "other" debug name. */
-const char *sctp_oname(const sctp_subtype_t id)
+const char *sctp_oname(const union sctp_subtype id)
 {
 	if (id.other <= SCTP_EVENT_OTHER_MAX)
 		return sctp_other_tbl[id.other];
@@ -165,7 +165,7 @@ static const char *const sctp_timer_tbl[] = {
 };
 
 /* Lookup timer debug name. */
-const char *sctp_tname(const sctp_subtype_t id)
+const char *sctp_tname(const union sctp_subtype id)
 {
 	BUILD_BUG_ON(SCTP_EVENT_TIMEOUT_MAX + 1 != ARRAY_SIZE(sctp_timer_tbl));
 
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 0e86f988f836..ee1e601a0b11 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -73,13 +73,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 		 * variables.  There are arrays that we encode directly
 		 * into parameters to make the rest of the operations easier.
 		 */
-		auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) +
-				sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
+		auth_hmacs = kzalloc(sizeof(*auth_hmacs) +
+				     sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
 		if (!auth_hmacs)
 			goto nomem;
 
-		auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) +
-					SCTP_NUM_CHUNK_TYPES, gfp);
+		auth_chunks = kzalloc(sizeof(*auth_chunks) +
+				      SCTP_NUM_CHUNK_TYPES, gfp);
 		if (!auth_chunks)
 			goto nomem;
 
@@ -382,8 +382,8 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
 	struct sctp_transport *transport;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
-	sctp_subtype_t subtype;
-	sctp_state_t state;
+	union sctp_subtype subtype;
+	enum sctp_state state;
 	int error = 0;
 	int first_time = 1;	/* is this the first time through the loop */
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 41eb2ec10460..92a07141fd07 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1111,7 +1111,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
 					__be16 peer_port,
 					struct sctp_transport **transportp)
 {
-	sctp_addip_chunk_t *asconf = (struct sctp_addip_chunk *)ch;
+	struct sctp_addip_chunk *asconf = (struct sctp_addip_chunk *)ch;
 	struct sctp_af *af;
 	union sctp_addr_param *param;
 	union sctp_addr paddr;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2a186b201ad2..a2a1c1d08d51 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -243,8 +243,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	union sctp_addr *daddr = &t->ipaddr;
 	union sctp_addr dst_saddr;
 	struct in6_addr *final_p, final;
+	enum sctp_scope scope;
 	__u8 matchlen = 0;
-	sctp_scope_t scope;
 
 	memset(fl6, 0, sizeof(struct flowi6));
 	fl6->daddr = daddr->v6.sin6_addr;
@@ -497,7 +497,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr,
 static int sctp_v6_to_addr_param(const union sctp_addr *addr,
 				 union sctp_addr_param *param)
 {
-	int length = sizeof(sctp_ipv6addr_param_t);
+	int length = sizeof(struct sctp_ipv6addr_param);
 
 	param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
 	param->v6.param_hdr.length = htons(length);
@@ -624,10 +624,10 @@ static int sctp_v6_addr_valid(union sctp_addr *addr,
 }
 
 /* What is the scope of 'addr'?  */
-static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
+static enum sctp_scope sctp_v6_scope(union sctp_addr *addr)
 {
+	enum sctp_scope retval;
 	int v6scope;
-	sctp_scope_t retval;
 
 	/* The IPv6 scope is really a set of bit fields.
 	 * See IFA_* in <net/if_inet6.h>.  Map to a generic SCTP scope.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9d8504985744..4a865cd06d76 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -57,15 +57,15 @@
 #include <net/sctp/checksum.h>
 
 /* Forward declarations for private helpers. */
-static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
-					      struct sctp_chunk *chunk);
-static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk);
+static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
+						 struct sctp_chunk *chunk);
+static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
+						  struct sctp_chunk *chunk);
 static void sctp_packet_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk);
-static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
-					struct sctp_chunk *chunk,
-					u16 chunk_len);
+				    struct sctp_chunk *chunk);
+static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
+					   struct sctp_chunk *chunk,
+					   u16 chunk_len);
 
 static void sctp_packet_reset(struct sctp_packet *packet)
 {
@@ -181,11 +181,11 @@ void sctp_packet_free(struct sctp_packet *packet)
  * as it can fit in the packet, but any more data that does not fit in this
  * packet can be sent only after receiving the COOKIE_ACK.
  */
-sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
-				       struct sctp_chunk *chunk,
-				       int one_packet, gfp_t gfp)
+enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
+					  struct sctp_chunk *chunk,
+					  int one_packet, gfp_t gfp)
 {
-	sctp_xmit_t retval;
+	enum sctp_xmit retval;
 
 	pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__,
 		 packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
@@ -218,12 +218,12 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
 }
 
 /* Try to bundle an auth chunk into the packet. */
-static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
+					      struct sctp_chunk *chunk)
 {
 	struct sctp_association *asoc = pkt->transport->asoc;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 	struct sctp_chunk *auth;
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 
 	/* if we don't have an association, we can't do authentication */
 	if (!asoc)
@@ -254,10 +254,10 @@ static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
 }
 
 /* Try to bundle a SACK with the packet. */
-static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt,
+					      struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	/* If sending DATA and haven't aleady bundled a SACK, try to
 	 * bundle one in to the packet.
@@ -299,11 +299,11 @@ out:
 /* Append a chunk to the offered packet reporting back any inability to do
  * so.
  */
-static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
-					      struct sctp_chunk *chunk)
+static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
+						 struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 	__u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length));
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	/* Check to see if this chunk will fit into the packet */
 	retval = sctp_packet_will_fit(packet, chunk, chunk_len);
@@ -353,10 +353,10 @@ finish:
 /* Append a chunk to the offered packet reporting back any inability to do
  * so.
  */
-sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
-				     struct sctp_chunk *chunk)
+enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
+					struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
 
@@ -653,8 +653,8 @@ out:
  ********************************************************************/
 
 /* This private function check to see if a chunk can be added */
-static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
+						  struct sctp_chunk *chunk)
 {
 	size_t datasize, rwnd, inflight, flight_size;
 	struct sctp_transport *transport = packet->transport;
@@ -762,12 +762,12 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
 	sctp_chunk_assign_ssn(chunk);
 }
 
-static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
-					struct sctp_chunk *chunk,
-					u16 chunk_len)
+static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
+					   struct sctp_chunk *chunk,
+					   u16 chunk_len)
 {
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 	size_t psize, pmtu, maxsize;
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 
 	psize = packet->size;
 	if (packet->transport->asoc)
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e8762702a313..2966ff400755 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -534,7 +534,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
  * one packet out.
  */
 void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
-		     sctp_retransmit_reason_t reason)
+		     enum sctp_retransmit_reason reason)
 {
 	struct net *net = sock_net(q->asoc->base.sk);
 
@@ -594,14 +594,14 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			       int rtx_timeout, int *start_timer)
 {
-	struct list_head *lqueue;
 	struct sctp_transport *transport = pkt->transport;
-	sctp_xmit_t status;
 	struct sctp_chunk *chunk, *chunk1;
-	int fast_rtx;
+	struct list_head *lqueue;
+	enum sctp_xmit status;
 	int error = 0;
 	int timer = 0;
 	int done = 0;
+	int fast_rtx;
 
 	lqueue = &q->retransmit;
 	fast_rtx = q->fast_rtx;
@@ -781,7 +781,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 	struct sctp_transport *transport = NULL;
 	struct sctp_transport *new_transport;
 	struct sctp_chunk *chunk, *tmp;
-	sctp_xmit_t status;
+	enum sctp_xmit status;
 	int error = 0;
 	int start_timer = 0;
 	int one_packet = 0;
@@ -1197,7 +1197,7 @@ sctp_flush_out:
 static void sctp_sack_update_unack_data(struct sctp_association *assoc,
 					struct sctp_sackhdr *sack)
 {
-	sctp_sack_variable_t *frags;
+	union sctp_sack_variable *frags;
 	__u16 unack_data;
 	int i;
 
@@ -1224,7 +1224,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
 	struct sctp_transport *transport;
 	struct sctp_chunk *tchunk = NULL;
 	struct list_head *lchunk, *transport_list, *temp;
-	sctp_sack_variable_t *frags = sack->variable;
+	union sctp_sack_variable *frags = sack->variable;
 	__u32 sack_ctsn, ctsn, tsn;
 	__u32 highest_tsn, highest_new_tsn;
 	__u32 sack_a_rwnd;
@@ -1736,10 +1736,10 @@ static void sctp_mark_missing(struct sctp_outq *q,
 /* Is the given TSN acked by this packet?  */
 static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn)
 {
-	int i;
-	sctp_sack_variable_t *frags;
-	__u16 tsn_offset, blocks;
 	__u32 ctsn = ntohl(sack->cum_tsn_ack);
+	union sctp_sack_variable *frags;
+	__u16 tsn_offset, blocks;
+	int i;
 
 	if (TSN_lte(tsn, ctsn))
 		goto pass;
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index f0553a022859..c0817f7a8964 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -53,8 +53,8 @@
 int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
 			    void *arg) { \
 	int error = 0; \
-	sctp_event_t event_type; sctp_subtype_t subtype; \
-	sctp_state_t state; \
+	enum sctp_event event_type; union sctp_subtype subtype; \
+	enum sctp_state state; \
 	struct sctp_endpoint *ep; \
 	\
 	event_type = SCTP_EVENT_T_PRIMITIVE; \
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 6cc2152e0740..43837dfc86a7 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -130,7 +130,7 @@ static const struct file_operations sctpprobe_fops = {
 static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
 					    const struct sctp_endpoint *ep,
 					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
+					    const union sctp_subtype type,
 					    void *arg,
 					    sctp_cmd_seq_t *commands)
 {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 989a900383b5..fcd80feb293f 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -196,7 +196,7 @@ static void sctp_free_local_addr_list(struct net *net)
 
 /* Copy the local addresses which are valid for 'scope' into 'bp'.  */
 int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
-			      sctp_scope_t scope, gfp_t gfp, int copy_flags)
+			      enum sctp_scope scope, gfp_t gfp, int copy_flags)
 {
 	struct sctp_sockaddr_entry *addr;
 	union sctp_addr laddr;
@@ -292,7 +292,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr,
 static int sctp_v4_to_addr_param(const union sctp_addr *addr,
 				 union sctp_addr_param *param)
 {
-	int length = sizeof(sctp_ipv4addr_param_t);
+	int length = sizeof(struct sctp_ipv4addr_param);
 
 	param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS;
 	param->v4.param_hdr.length = htons(length);
@@ -400,9 +400,9 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
  * IPv4 scoping can be controlled through sysctl option
  * net.sctp.addr_scope_policy
  */
-static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
+static enum sctp_scope sctp_v4_scope(union sctp_addr *addr)
 {
-	sctp_scope_t retval;
+	enum sctp_scope retval;
 
 	/* Check for unusable SCTP addresses. */
 	if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 6110447fe51d..3a8fb1dffbc1 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -69,7 +69,8 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
 static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
 					   __u8 type, __u8 flags, int paylen,
 					   gfp_t gfp);
-static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
+static struct sctp_cookie_param *sctp_pack_cookie(
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const struct sctp_chunk *init_chunk,
 					int *cookie_len,
@@ -134,14 +135,14 @@ static const struct sctp_paramhdr prsctp_param = {
 void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
 		      size_t paylen)
 {
-	sctp_errhdr_t err;
+	struct sctp_errhdr err;
 	__u16 len;
 
 	/* Cause code constants are now defined in network order.  */
 	err.cause = cause_code;
-	len = sizeof(sctp_errhdr_t) + paylen;
+	len = sizeof(err) + paylen;
 	err.length  = htons(len);
-	chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
+	chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err);
 }
 
 /* A helper to initialize an op error inside a
@@ -152,19 +153,19 @@ void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
 static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
 		      size_t paylen)
 {
-	sctp_errhdr_t err;
+	struct sctp_errhdr err;
 	__u16 len;
 
 	/* Cause code constants are now defined in network order.  */
 	err.cause = cause_code;
-	len = sizeof(sctp_errhdr_t) + paylen;
+	len = sizeof(err) + paylen;
 	err.length  = htons(len);
 
 	if (skb_tailroom(chunk->skb) < len)
 		return -ENOSPC;
-	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
-						     sizeof(sctp_errhdr_t),
-						     &err);
+
+	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, sizeof(err), &err);
+
 	return 0;
 }
 /* 3.3.2 Initiation (INIT) (1)
@@ -223,10 +224,10 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	struct sctp_chunk *retval = NULL;
 	int num_types, addrs_len = 0;
 	struct sctp_sock *sp;
-	sctp_supported_addrs_param_t sat;
+	struct sctp_supported_addrs_param sat;
 	__be16 types[2];
-	sctp_adaptation_ind_param_t aiparam;
-	sctp_supported_ext_param_t ext_param;
+	struct sctp_adaptation_ind_param aiparam;
+	struct sctp_supported_ext_param ext_param;
 	int num_ext = 0;
 	__u8 extensions[4];
 	struct sctp_paramhdr *auth_chunks = NULL,
@@ -305,8 +306,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 
 	/* If we have any extensions to report, account for that */
 	if (num_ext)
-		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
-				       num_ext);
+		chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext);
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
 	 *
@@ -348,10 +348,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	 */
 	if (num_ext) {
 		ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT;
-		ext_param.param_hdr.length =
-			    htons(sizeof(sctp_supported_ext_param_t) + num_ext);
-		sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
-				&ext_param);
+		ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext);
+		sctp_addto_chunk(retval, sizeof(ext_param), &ext_param);
 		sctp_addto_param(retval, num_ext, extensions);
 	}
 
@@ -390,11 +388,11 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	union sctp_params addrs;
 	struct sctp_sock *sp;
 	int addrs_len;
-	sctp_cookie_param_t *cookie;
+	struct sctp_cookie_param *cookie;
 	int cookie_len;
 	size_t chunksize;
-	sctp_adaptation_ind_param_t aiparam;
-	sctp_supported_ext_param_t ext_param;
+	struct sctp_adaptation_ind_param aiparam;
+	struct sctp_supported_ext_param ext_param;
 	int num_ext = 0;
 	__u8 extensions[4];
 	struct sctp_paramhdr *auth_chunks = NULL,
@@ -468,8 +466,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	}
 
 	if (num_ext)
-		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
-				       num_ext);
+		chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext);
 
 	/* Now allocate and fill out the chunk.  */
 	retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
@@ -495,10 +492,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 		sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
 	if (num_ext) {
 		ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT;
-		ext_param.param_hdr.length =
-			    htons(sizeof(sctp_supported_ext_param_t) + num_ext);
-		sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
-				 &ext_param);
+		ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext);
+		sctp_addto_chunk(retval, sizeof(ext_param), &ext_param);
 		sctp_addto_param(retval, num_ext, extensions);
 	}
 	if (asoc->peer.prsctp_capable)
@@ -668,11 +663,11 @@ struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
 			    const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
-	sctp_cwrhdr_t cwr;
+	struct sctp_cwrhdr cwr;
 
 	cwr.lowest_tsn = htonl(lowest_tsn);
 	retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0,
-				   sizeof(sctp_cwrhdr_t), GFP_ATOMIC);
+				   sizeof(cwr), GFP_ATOMIC);
 
 	if (!retval)
 		goto nodata;
@@ -702,11 +697,11 @@ struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
 			     const __u32 lowest_tsn)
 {
 	struct sctp_chunk *retval;
-	sctp_ecnehdr_t ecne;
+	struct sctp_ecnehdr ecne;
 
 	ecne.lowest_tsn = htonl(lowest_tsn);
 	retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0,
-				   sizeof(sctp_ecnehdr_t), GFP_ATOMIC);
+				   sizeof(ecne), GFP_ATOMIC);
 	if (!retval)
 		goto nodata;
 	retval->subh.ecne_hdr =
@@ -862,15 +857,15 @@ nodata:
 struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
 				      const struct sctp_chunk *chunk)
 {
+	struct sctp_shutdownhdr shut;
 	struct sctp_chunk *retval;
-	sctp_shutdownhdr_t shut;
 	__u32 ctsn;
 
 	ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
 	shut.cum_tsn_ack = htonl(ctsn);
 
 	retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
-				   sizeof(sctp_shutdownhdr_t), GFP_ATOMIC);
+				   sizeof(shut), GFP_ATOMIC);
 	if (!retval)
 		goto nodata;
 
@@ -984,8 +979,8 @@ struct sctp_chunk *sctp_make_abort_no_data(
 	struct sctp_chunk *retval;
 	__be32 payload;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t)
-				 + sizeof(tsn));
+	retval = sctp_make_abort(asoc, chunk,
+				 sizeof(struct sctp_errhdr) + sizeof(tsn));
 
 	if (!retval)
 		goto no_mem;
@@ -1020,7 +1015,8 @@ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
 	void *payload = NULL;
 	int err;
 
-	retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen);
+	retval = sctp_make_abort(asoc, NULL,
+				 sizeof(struct sctp_errhdr) + paylen);
 	if (!retval)
 		goto err_chunk;
 
@@ -1085,8 +1081,8 @@ struct sctp_chunk *sctp_make_abort_violation(
 	struct sctp_chunk  *retval;
 	struct sctp_paramhdr phdr;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen +
-					      sizeof(phdr));
+	retval = sctp_make_abort(asoc, chunk, sizeof(struct sctp_errhdr) +
+					      paylen + sizeof(phdr));
 	if (!retval)
 		goto end;
 
@@ -1109,7 +1105,7 @@ struct sctp_chunk *sctp_make_violation_paramlen(
 {
 	struct sctp_chunk *retval;
 	static const char error[] = "The following parameter had invalid length:";
-	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
+	size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr) +
 			     sizeof(*param);
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
@@ -1131,7 +1127,7 @@ struct sctp_chunk *sctp_make_violation_max_retrans(
 {
 	struct sctp_chunk *retval;
 	static const char error[] = "Association exceeded its max_retans count";
-	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t);
+	size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr);
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
 	if (!retval)
@@ -1214,7 +1210,8 @@ static struct sctp_chunk *sctp_make_op_error_space(
 	struct sctp_chunk *retval;
 
 	retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
-				   sizeof(sctp_errhdr_t) + size, GFP_ATOMIC);
+				   sizeof(struct sctp_errhdr) + size,
+				   GFP_ATOMIC);
 	if (!retval)
 		goto nodata;
 
@@ -1285,16 +1282,16 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
 		return NULL;
 
 	retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0,
-			hmac_desc->hmac_len + sizeof(sctp_authhdr_t),
-			GFP_ATOMIC);
+				   hmac_desc->hmac_len + sizeof(auth_hdr),
+				   GFP_ATOMIC);
 	if (!retval)
 		return NULL;
 
 	auth_hdr.hmac_id = htons(hmac_desc->hmac_id);
 	auth_hdr.shkey_id = htons(asoc->active_key_id);
 
-	retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t),
-						&auth_hdr);
+	retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
+						 &auth_hdr);
 
 	hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len);
 
@@ -1581,8 +1578,8 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
 					gfp_t gfp)
 {
 	struct sctp_association *asoc;
+	enum sctp_scope scope;
 	struct sk_buff *skb;
-	sctp_scope_t scope;
 
 	/* Create the bare association.  */
 	scope = sctp_scope(sctp_source(chunk));
@@ -1601,14 +1598,15 @@ nodata:
 /* Build a cookie representing asoc.
  * This INCLUDES the param header needed to put the cookie in the INIT ACK.
  */
-static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const struct sctp_chunk *init_chunk,
-				      int *cookie_len,
-				      const __u8 *raw_addrs, int addrs_len)
+static struct sctp_cookie_param *sctp_pack_cookie(
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *init_chunk,
+					int *cookie_len,
+					const __u8 *raw_addrs, int addrs_len)
 {
-	sctp_cookie_param_t *retval;
 	struct sctp_signed_cookie *cookie;
+	struct sctp_cookie_param *retval;
 	int headersize, bodysize;
 
 	/* Header size is static data prior to the actual cookie, including
@@ -1703,7 +1701,7 @@ struct sctp_association *sctp_unpack_cookie(
 	int headersize, bodysize, fixed_size;
 	__u8 *digest = ep->digest;
 	unsigned int len;
-	sctp_scope_t scope;
+	enum sctp_scope scope;
 	struct sk_buff *skb = chunk->skb;
 	ktime_t kt;
 
@@ -2067,10 +2065,11 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
  * 	SCTP_IERROR_ERROR    - stop and report an error.
  * 	SCTP_IERROR_NOMEME   - out of memory.
  */
-static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
-					    union sctp_params param,
-					    struct sctp_chunk *chunk,
-					    struct sctp_chunk **errp)
+static enum sctp_ierror sctp_process_unk_param(
+					const struct sctp_association *asoc,
+					union sctp_params param,
+					struct sctp_chunk *chunk,
+					struct sctp_chunk **errp)
 {
 	int retval = SCTP_IERROR_NO_ERROR;
 
@@ -2119,13 +2118,13 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
  *	SCTP_IERROR_ERROR - stop processing, trigger an ERROR
  * 	SCTP_IERROR_NO_ERROR - continue with the chunk
  */
-static sctp_ierror_t sctp_verify_param(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					union sctp_params param,
-					enum sctp_cid cid,
-					struct sctp_chunk *chunk,
-					struct sctp_chunk **err_chunk)
+static enum sctp_ierror sctp_verify_param(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  union sctp_params param,
+					  enum sctp_cid cid,
+					  struct sctp_chunk *chunk,
+					  struct sctp_chunk **err_chunk)
 {
 	struct sctp_hmac_algo_param *hmacs;
 	int retval = SCTP_IERROR_NO_ERROR;
@@ -2504,7 +2503,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 	int i;
 	__u16 sat;
 	int retval = 1;
-	sctp_scope_t scope;
+	enum sctp_scope scope;
 	u32 stale;
 	struct sctp_af *af;
 	union sctp_addr_param *addr_param;
@@ -2617,7 +2616,7 @@ do_addr_param:
 		if (!net->sctp.addip_enable)
 			goto fall_through;
 
-		addr_param = param.v + sizeof(sctp_addip_param_t);
+		addr_param = param.v + sizeof(struct sctp_addip_param);
 
 		af = sctp_get_af_specific(param_type2af(addr_param->p.type));
 		if (af == NULL)
@@ -2754,7 +2753,7 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
 					   union sctp_addr *addr,
 					   int vparam_len)
 {
-	sctp_addiphdr_t asconf;
+	struct sctp_addiphdr asconf;
 	struct sctp_chunk *retval;
 	int length = sizeof(asconf) + vparam_len;
 	union sctp_addr_param addrparam;
@@ -2812,7 +2811,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
 					      int		      addrcnt,
 					      __be16		      flags)
 {
-	sctp_addip_param_t	param;
+	struct sctp_addip_param	param;
 	struct sctp_chunk	*retval;
 	union sctp_addr_param	addr_param;
 	union sctp_addr		*addr;
@@ -2898,7 +2897,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
 struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 					     union sctp_addr *addr)
 {
-	sctp_addip_param_t	param;
+	struct sctp_addip_param	param;
 	struct sctp_chunk 	*retval;
 	int 			len = sizeof(param);
 	union sctp_addr_param	addrparam;
@@ -2947,7 +2946,7 @@ struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *asoc,
 					       __u32 serial, int vparam_len)
 {
-	sctp_addiphdr_t		asconf;
+	struct sctp_addiphdr	asconf;
 	struct sctp_chunk	*retval;
 	int			length = sizeof(asconf) + vparam_len;
 
@@ -2967,10 +2966,11 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as
 
 /* Add response parameters to an ASCONF_ACK chunk. */
 static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
-			      __be16 err_code, sctp_addip_param_t *asconf_param)
+				     __be16 err_code,
+				     struct sctp_addip_param *asconf_param)
 {
-	sctp_addip_param_t 	ack_param;
-	sctp_errhdr_t		err_param;
+	struct sctp_addip_param ack_param;
+	struct sctp_errhdr	err_param;
 	int			asconf_param_len = 0;
 	int			err_param_len = 0;
 	__be16			response_type;
@@ -3008,15 +3008,15 @@ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
 
 /* Process a asconf parameter. */
 static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
-				       struct sctp_chunk *asconf,
-				       sctp_addip_param_t *asconf_param)
+					struct sctp_chunk *asconf,
+					struct sctp_addip_param *asconf_param)
 {
 	struct sctp_transport *peer;
 	struct sctp_af *af;
 	union sctp_addr	addr;
 	union sctp_addr_param *addr_param;
 
-	addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t);
+	addr_param = (void *)asconf_param + sizeof(*asconf_param);
 
 	if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP &&
 	    asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP &&
@@ -3141,10 +3141,11 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 			struct sctp_chunk *chunk, bool addr_param_needed,
 			struct sctp_paramhdr **errp)
 {
-	sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr;
+	struct sctp_addip_chunk *addip;
 	union sctp_params param;
 	bool addr_param_seen = false;
 
+	addip = (struct sctp_addip_chunk *)chunk->chunk_hdr;
 	sctp_walk_params(param, addip, addip_hdr.params) {
 		size_t length = ntohs(param.p->length);
 
@@ -3153,7 +3154,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 		case SCTP_PARAM_ERR_CAUSE:
 			break;
 		case SCTP_PARAM_IPV4_ADDRESS:
-			if (length != sizeof(sctp_ipv4addr_param_t))
+			if (length != sizeof(struct sctp_ipv4addr_param))
 				return false;
 			/* ensure there is only one addr param and it's in the
 			 * beginning of addip_hdr params, or we reject it.
@@ -3163,7 +3164,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 			addr_param_seen = true;
 			break;
 		case SCTP_PARAM_IPV6_ADDRESS:
-			if (length != sizeof(sctp_ipv6addr_param_t))
+			if (length != sizeof(struct sctp_ipv6addr_param))
 				return false;
 			if (param.v != addip->addip_hdr.params)
 				return false;
@@ -3176,13 +3177,13 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 			if (addr_param_needed && !addr_param_seen)
 				return false;
 			length = ntohs(param.addip->param_hdr.length);
-			if (length < sizeof(sctp_addip_param_t) +
+			if (length < sizeof(struct sctp_addip_param) +
 				     sizeof(**errp))
 				return false;
 			break;
 		case SCTP_PARAM_SUCCESS_REPORT:
 		case SCTP_PARAM_ADAPTATION_LAYER_IND:
-			if (length != sizeof(sctp_addip_param_t))
+			if (length != sizeof(struct sctp_addip_param))
 				return false;
 			break;
 		default:
@@ -3208,10 +3209,10 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 				       struct sctp_chunk *asconf)
 {
-	sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr;
+	struct sctp_addip_chunk *addip;
 	bool all_param_pass = true;
 	union sctp_params param;
-	sctp_addiphdr_t		*hdr;
+	struct sctp_addiphdr	*hdr;
 	union sctp_addr_param	*addr_param;
 	struct sctp_chunk	*asconf_ack;
 	__be16	err_code;
@@ -3219,13 +3220,14 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 	int	chunk_len;
 	__u32	serial;
 
+	addip = (struct sctp_addip_chunk *)asconf->chunk_hdr;
 	chunk_len = ntohs(asconf->chunk_hdr->length) -
 		    sizeof(struct sctp_chunkhdr);
-	hdr = (sctp_addiphdr_t *)asconf->skb->data;
+	hdr = (struct sctp_addiphdr *)asconf->skb->data;
 	serial = ntohl(hdr->serial);
 
 	/* Skip the addiphdr and store a pointer to address parameter.  */
-	length = sizeof(sctp_addiphdr_t);
+	length = sizeof(*hdr);
 	addr_param = (union sctp_addr_param *)(asconf->skb->data + length);
 	chunk_len -= length;
 
@@ -3291,7 +3293,7 @@ done:
 
 /* Process a asconf parameter that is successfully acked. */
 static void sctp_asconf_param_success(struct sctp_association *asoc,
-				     sctp_addip_param_t *asconf_param)
+				      struct sctp_addip_param *asconf_param)
 {
 	struct sctp_af *af;
 	union sctp_addr	addr;
@@ -3300,7 +3302,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
 	struct sctp_transport *transport;
 	struct sctp_sockaddr_entry *saddr;
 
-	addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t);
+	addr_param = (void *)asconf_param + sizeof(*asconf_param);
 
 	/* We have checked the packet before, so we do not check again.	*/
 	af = sctp_get_af_specific(param_type2af(addr_param->p.type));
@@ -3351,11 +3353,11 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
  * specific success indication is present for the parameter.
  */
 static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
-				      sctp_addip_param_t *asconf_param,
-				      int no_err)
+				       struct sctp_addip_param *asconf_param,
+				       int no_err)
 {
-	sctp_addip_param_t	*asconf_ack_param;
-	sctp_errhdr_t		*err_param;
+	struct sctp_addip_param	*asconf_ack_param;
+	struct sctp_errhdr	*err_param;
 	int			length;
 	int			asconf_ack_len;
 	__be16			err_code;
@@ -3371,9 +3373,9 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 	/* Skip the addiphdr from the asconf_ack chunk and store a pointer to
 	 * the first asconf_ack parameter.
 	 */
-	length = sizeof(sctp_addiphdr_t);
-	asconf_ack_param = (sctp_addip_param_t *)(asconf_ack->skb->data +
-						  length);
+	length = sizeof(struct sctp_addiphdr);
+	asconf_ack_param = (struct sctp_addip_param *)(asconf_ack->skb->data +
+						       length);
 	asconf_ack_len -= length;
 
 	while (asconf_ack_len > 0) {
@@ -3382,7 +3384,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 			case SCTP_PARAM_SUCCESS_REPORT:
 				return SCTP_ERROR_NO_ERROR;
 			case SCTP_PARAM_ERR_CAUSE:
-				length = sizeof(sctp_addip_param_t);
+				length = sizeof(*asconf_ack_param);
 				err_param = (void *)asconf_ack_param + length;
 				asconf_ack_len -= length;
 				if (asconf_ack_len > 0)
@@ -3409,7 +3411,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 {
 	struct sctp_chunk	*asconf = asoc->addip_last_asconf;
 	union sctp_addr_param	*addr_param;
-	sctp_addip_param_t	*asconf_param;
+	struct sctp_addip_param	*asconf_param;
 	int	length = 0;
 	int	asconf_len = asconf->skb->len;
 	int	all_param_pass = 0;
@@ -3420,7 +3422,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 	/* Skip the chunkhdr and addiphdr from the last asconf sent and store
 	 * a pointer to address parameter.
 	 */
-	length = sizeof(sctp_addip_chunk_t);
+	length = sizeof(struct sctp_addip_chunk);
 	addr_param = (union sctp_addr_param *)(asconf->skb->data + length);
 	asconf_len -= length;
 
@@ -3436,7 +3438,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 	 * failures are indicated, then all request(s) are considered
 	 * successful.
 	 */
-	if (asconf_ack->skb->len == sizeof(sctp_addiphdr_t))
+	if (asconf_ack->skb->len == sizeof(struct sctp_addiphdr))
 		all_param_pass = 1;
 
 	/* Process the TLVs contained in the last sent ASCONF chunk. */
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d6e5e9e0fd6d..4a12d29d9aa1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -51,17 +51,18 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-static int sctp_cmd_interpreter(sctp_event_t event_type,
-				sctp_subtype_t subtype,
-				sctp_state_t state,
+static int sctp_cmd_interpreter(enum sctp_event event_type,
+				union sctp_subtype subtype,
+				enum sctp_state state,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
 				void *event_arg,
 				sctp_disposition_t status,
 				sctp_cmd_seq_t *commands,
 				gfp_t gfp);
-static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
-			     sctp_state_t state,
+static int sctp_side_effects(enum sctp_event event_type,
+			     union sctp_subtype subtype,
+			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
 			     void *event_arg,
@@ -280,7 +281,7 @@ out_unlock:
  * for timeouts which use the association as their parameter.
  */
 static void sctp_generate_timeout_event(struct sctp_association *asoc,
-					sctp_event_timeout_t timeout_type)
+					enum sctp_event_timeout timeout_type)
 {
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
@@ -602,8 +603,8 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
 /* Worker routine to handle SCTP_CMD_ASSOC_FAILED.  */
 static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
 				  struct sctp_association *asoc,
-				  sctp_event_t event_type,
-				  sctp_subtype_t subtype,
+				  enum sctp_event event_type,
+				  union sctp_subtype subtype,
 				  struct sctp_chunk *chunk,
 				  unsigned int error)
 {
@@ -828,7 +829,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
 	if (!sctp_assoc_update(asoc, new))
 		return;
 
-	abort = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t));
+	abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr));
 	if (abort) {
 		sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
 		sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
@@ -843,7 +844,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
 /* Helper function to change the state of an association. */
 static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
 			       struct sctp_association *asoc,
-			       sctp_state_t state)
+			       enum sctp_state state)
 {
 	struct sock *sk = asoc->base.sk;
 
@@ -1052,8 +1053,8 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
 
 
 static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
-				    sctp_event_timeout_t timer,
-				    char *name)
+				     enum sctp_event_timeout timer,
+				     char *name)
 {
 	struct sctp_transport *t;
 
@@ -1139,18 +1140,16 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
  * If you want to understand all of lksctp, this is a
  * good place to start.
  */
-int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
-	       sctp_state_t state,
-	       struct sctp_endpoint *ep,
-	       struct sctp_association *asoc,
-	       void *event_arg,
-	       gfp_t gfp)
+int sctp_do_sm(struct net *net, enum sctp_event event_type,
+	       union sctp_subtype subtype, enum sctp_state state,
+	       struct sctp_endpoint *ep, struct sctp_association *asoc,
+	       void *event_arg, gfp_t gfp)
 {
 	sctp_cmd_seq_t commands;
 	const sctp_sm_table_entry_t *state_fn;
 	sctp_disposition_t status;
 	int error = 0;
-	typedef const char *(printfn_t)(sctp_subtype_t);
+	typedef const char *(printfn_t)(union sctp_subtype);
 	static printfn_t *table[] = {
 		NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname,
 	};
@@ -1178,8 +1177,9 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
 /*****************************************************************
  * This the master state function side effect processing function.
  *****************************************************************/
-static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
-			     sctp_state_t state,
+static int sctp_side_effects(enum sctp_event event_type,
+			     union sctp_subtype subtype,
+			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
 			     void *event_arg,
@@ -1263,9 +1263,9 @@ bail:
  ********************************************************************/
 
 /* This is the side-effect interpreter.  */
-static int sctp_cmd_interpreter(sctp_event_t event_type,
-				sctp_subtype_t subtype,
-				sctp_state_t state,
+static int sctp_cmd_interpreter(enum sctp_event event_type,
+				union sctp_subtype subtype,
+				enum sctp_state state,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
 				void *event_arg,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index b2a74c3823ee..ac6aaa046529 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -80,19 +80,19 @@ static void sctp_send_stale_cookie_err(struct net *net,
 static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 						 const struct sctp_endpoint *ep,
 						 const struct sctp_association *asoc,
-						 const sctp_subtype_t type,
+						 const union sctp_subtype type,
 						 void *arg,
 						 sctp_cmd_seq_t *commands);
 static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
 					     const struct sctp_endpoint *ep,
 					     const struct sctp_association *asoc,
-					     const sctp_subtype_t type,
+					     const union sctp_subtype type,
 					     void *arg,
 					     sctp_cmd_seq_t *commands);
 static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands);
 static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
@@ -116,7 +116,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands);
 
@@ -124,7 +124,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg, void *ext,
 				     sctp_cmd_seq_t *commands);
 
@@ -132,7 +132,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands);
 
@@ -140,20 +140,21 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands);
 
-static sctp_ierror_t sctp_sf_authenticate(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    struct sctp_chunk *chunk);
+static enum sctp_ierror sctp_sf_authenticate(
+				struct net *net,
+				const struct sctp_endpoint *ep,
+				const struct sctp_association *asoc,
+				const union sctp_subtype type,
+				struct sctp_chunk *chunk);
 
 static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands);
 
@@ -216,7 +217,7 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length)
 sctp_disposition_t sctp_sf_do_4_C(struct net *net,
 				  const struct sctp_endpoint *ep,
 				  const struct sctp_association *asoc,
-				  const sctp_subtype_t type,
+				  const union sctp_subtype type,
 				  void *arg,
 				  sctp_cmd_seq_t *commands)
 {
@@ -302,16 +303,14 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
 sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
-	struct sctp_chunk *chunk = arg;
-	struct sctp_chunk *repl;
+	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
+	struct sctp_unrecognized_param *unk_param;
 	struct sctp_association *new_asoc;
-	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
-	sctp_unrecognized_param_t *unk_param;
 	int len;
 
 	/* 6.10 Bundling
@@ -435,7 +434,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 		 * construct the parameters in INIT ACK by copying the
 		 * ERROR causes over.
 		 */
-		unk_param = (sctp_unrecognized_param_t *)
+		unk_param = (struct sctp_unrecognized_param *)
 			    ((__u8 *)(err_chunk->chunk_hdr) +
 			    sizeof(struct sctp_chunkhdr));
 		/* Replace the cause code with the "Unrecognized parameter"
@@ -498,7 +497,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
+				       const union sctp_subtype type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
@@ -518,7 +517,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 		return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the INIT-ACK chunk has a valid length */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_initack_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* Grab the INIT header.  */
@@ -530,7 +529,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 			      (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 
-		sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
+		enum sctp_error error = SCTP_ERROR_NO_RESOURCE;
 
 		/* This chunk contains fatal error. It is to be discarded.
 		 * Send an ABORT, with causes.  If there are no causes,
@@ -648,7 +647,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type, void *arg,
+				      const union sctp_subtype type, void *arg,
 				      sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
@@ -758,7 +757,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 	 */
 	if (chunk->auth_chunk) {
 		struct sctp_chunk auth;
-		sctp_ierror_t ret;
+		enum sctp_ierror ret;
 
 		/* Make sure that we and the peer are AUTH capable */
 		if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
@@ -875,7 +874,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
 				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type, void *arg,
+				      const union sctp_subtype type, void *arg,
 				      sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
@@ -952,7 +951,7 @@ nomem:
 /* Generate and sendout a heartbeat packet.  */
 static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
 					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
+					    const union sctp_subtype type,
 					    void *arg,
 					    sctp_cmd_seq_t *commands)
 {
@@ -978,7 +977,7 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
 sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -1026,7 +1025,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
 sctp_disposition_t sctp_sf_send_reconf(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type, void *arg,
+				       const union sctp_subtype type, void *arg,
 				       sctp_cmd_seq_t *commands)
 {
 	struct sctp_transport *transport = arg;
@@ -1077,7 +1076,7 @@ sctp_disposition_t sctp_sf_send_reconf(struct net *net,
 sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -1090,7 +1089,8 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the HEARTBEAT chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk,
+				     sizeof(struct sctp_heartbeat_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -1098,7 +1098,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 	 * respond with a HEARTBEAT ACK that contains the Heartbeat
 	 * Information field copied from the received HEARTBEAT chunk.
 	 */
-	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data;
+	chunk->subh.hb_hdr = (struct sctp_heartbeathdr *)chunk->skb->data;
 	param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr;
 	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr);
 
@@ -1151,7 +1151,7 @@ nomem:
 sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -1234,7 +1234,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 	union sctp_addr_param *addrparm;
 	struct sctp_errhdr *errhdr;
 	struct sctp_endpoint *ep;
-	char buffer[sizeof(struct sctp_errhdr)+sizeof(union sctp_addr_param)];
+	char buffer[sizeof(*errhdr) + sizeof(*addrparm)];
 	struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
 
 	/* Build the error on the stack.   We are way to malloc crazy
@@ -1245,7 +1245,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 
 	/* Copy into a parm format. */
 	len = af->to_addr_param(ssa, addrparm);
-	len += sizeof(sctp_errhdr_t);
+	len += sizeof(*errhdr);
 
 	errhdr->cause = SCTP_ERROR_RESTART;
 	errhdr->length = htons(len);
@@ -1416,16 +1416,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg, sctp_cmd_seq_t *commands)
 {
-	sctp_disposition_t retval;
-	struct sctp_chunk *chunk = arg;
-	struct sctp_chunk *repl;
+	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
+	struct sctp_unrecognized_param *unk_param;
 	struct sctp_association *new_asoc;
-	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
-	sctp_unrecognized_param_t *unk_param;
+	sctp_disposition_t retval;
 	int len;
 
 	/* 6.10 Bundling
@@ -1555,7 +1553,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 		 * construct the parameters in INIT ACK by copying the
 		 * ERROR causes over.
 		 */
-		unk_param = (sctp_unrecognized_param_t *)
+		unk_param = (struct sctp_unrecognized_param *)
 			    ((__u8 *)(err_chunk->chunk_hdr) +
 			    sizeof(struct sctp_chunkhdr));
 		/* Replace the cause code with the "Unrecognized parameter"
@@ -1629,7 +1627,7 @@ cleanup:
 sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -1683,7 +1681,7 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
 sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -1706,7 +1704,7 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
 sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
 					    const struct sctp_endpoint *ep,
 					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
+					    const union sctp_subtype type,
 					    void *arg, sctp_cmd_seq_t *commands)
 {
 	/* Per the above section, we'll discard the chunk if we have an
@@ -2029,7 +2027,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2148,7 +2146,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -2167,7 +2165,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
@@ -2190,7 +2188,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2209,7 +2207,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
@@ -2241,7 +2239,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -2268,12 +2266,12 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
 sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2281,7 +2279,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
 	/* Make sure that the ERROR chunk has a valid length.
 	 * The parameter walking depends on this as well.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -2332,17 +2330,16 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
 static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 						 const struct sctp_endpoint *ep,
 						 const struct sctp_association *asoc,
-						 const sctp_subtype_t type,
+						 const union sctp_subtype type,
 						 void *arg,
 						 sctp_cmd_seq_t *commands)
 {
-	struct sctp_chunk *chunk = arg;
-	u32 stale;
-	sctp_cookie_preserve_param_t bht;
-	sctp_errhdr_t *err;
-	struct sctp_chunk *reply;
-	struct sctp_bind_addr *bp;
 	int attempts = asoc->init_err_counter + 1;
+	struct sctp_chunk *chunk = arg, *reply;
+	struct sctp_cookie_preserve_param bht;
+	struct sctp_bind_addr *bp;
+	struct sctp_errhdr *err;
+	u32 stale;
 
 	if (attempts > asoc->max_init_attempts) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
@@ -2352,7 +2349,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 		return SCTP_DISPOSITION_DELETE_TCB;
 	}
 
-	err = (sctp_errhdr_t *)(chunk->skb->data);
+	err = (struct sctp_errhdr *)(chunk->skb->data);
 
 	/* When calculating the time extension, an implementation
 	 * SHOULD use the RTT information measured based on the
@@ -2368,7 +2365,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 	 * to give ample time to retransmit the new cookie and thus
 	 * yield a higher probability of success on the reattempt.
 	 */
-	stale = ntohl(*(__be32 *)((u8 *)err + sizeof(sctp_errhdr_t)));
+	stale = ntohl(*(__be32 *)((u8 *)err + sizeof(*err)));
 	stale = (stale * 2) / 1000;
 
 	bht.param_hdr.type = SCTP_PARAM_COOKIE_PRESERVATIVE;
@@ -2455,7 +2452,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2474,7 +2471,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
@@ -2492,7 +2489,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2503,13 +2500,14 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
 	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
+		struct sctp_errhdr *err;
 
-		sctp_errhdr_t *err;
 		sctp_walk_errors(err, chunk->chunk_hdr);
 		if ((void *)err != (void *)chunk->chunk_end)
-			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg,
+						commands);
 
-		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+		error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
 	}
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
@@ -2529,7 +2527,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -2550,13 +2548,13 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
 	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
-		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+		error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
 
 	return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc,
 				      chunk->transport);
@@ -2568,7 +2566,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2583,7 +2581,7 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
 sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
 					       const struct sctp_endpoint *ep,
 					       const struct sctp_association *asoc,
-					       const sctp_subtype_t type,
+					       const union sctp_subtype type,
 					       void *arg,
 					       sctp_cmd_seq_t *commands)
 {
@@ -2655,13 +2653,13 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
 sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_shutdownhdr_t *sdh;
 	sctp_disposition_t disposition;
+	struct sctp_shutdownhdr *sdh;
 	struct sctp_ulpevent *ev;
 	__u32 ctsn;
 
@@ -2669,14 +2667,13 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk,
-				      sizeof(struct sctp_shutdown_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* Convert the elaborate header.  */
-	sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t));
+	sdh = (struct sctp_shutdownhdr *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(*sdh));
 	chunk->subh.shutdown_hdr = sdh;
 	ctsn = ntohl(sdh->cum_tsn_ack);
 
@@ -2745,24 +2742,23 @@ out:
 sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_shutdownhdr_t *sdh;
+	struct sctp_shutdownhdr *sdh;
 	__u32 ctsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk,
-				      sizeof(struct sctp_shutdown_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
+	sdh = (struct sctp_shutdownhdr *)chunk->skb->data;
 	ctsn = ntohl(sdh->cum_tsn_ack);
 
 	if (TSN_lt(ctsn, asoc->ctsn_ack_point)) {
@@ -2799,7 +2795,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
 sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -2863,23 +2859,23 @@ nomem:
 sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
 				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type,
+				      const union sctp_subtype type,
 				      void *arg,
 				      sctp_cmd_seq_t *commands)
 {
-	sctp_cwrhdr_t *cwr;
 	struct sctp_chunk *chunk = arg;
+	struct sctp_cwrhdr *cwr;
 	u32 lowest_tsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	cwr = (sctp_cwrhdr_t *) chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_cwrhdr_t));
+	cwr = (struct sctp_cwrhdr *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(*cwr));
 
 	lowest_tsn = ntohl(cwr->lowest_tsn);
 
@@ -2919,22 +2915,22 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
 sctp_disposition_t sctp_sf_do_ecne(struct net *net,
 				   const struct sctp_endpoint *ep,
 				   const struct sctp_association *asoc,
-				   const sctp_subtype_t type,
+				   const union sctp_subtype type,
 				   void *arg,
 				   sctp_cmd_seq_t *commands)
 {
-	sctp_ecnehdr_t *ecne;
 	struct sctp_chunk *chunk = arg;
+	struct sctp_ecnehdr *ecne;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	ecne = (sctp_ecnehdr_t *) chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_ecnehdr_t));
+	ecne = (struct sctp_ecnehdr *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(*ecne));
 
 	/* If this is a newer ECNE than the last CWR packet we sent out */
 	sctp_add_cmd_sf(commands, SCTP_CMD_ECN_ECNE,
@@ -2976,7 +2972,7 @@ sctp_disposition_t sctp_sf_do_ecne(struct net *net,
 sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -3096,7 +3092,7 @@ discard_noforce:
 sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -3187,19 +3183,19 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
 sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_sackhdr_t *sackh;
+	struct sctp_sackhdr *sackh;
 	__u32 ctsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SACK chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_sack_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -3261,7 +3257,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
 static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -3311,18 +3307,18 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
 sctp_disposition_t sctp_sf_operr_notify(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ERROR chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	sctp_walk_errors(err, chunk->chunk_hdr);
@@ -3349,7 +3345,7 @@ sctp_disposition_t sctp_sf_operr_notify(struct net *net,
 sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -3432,14 +3428,14 @@ nomem:
 sctp_disposition_t sctp_sf_ootb(struct net *net,
 				const struct sctp_endpoint *ep,
 				const struct sctp_association *asoc,
-				const sctp_subtype_t type,
+				const union sctp_subtype type,
 				void *arg,
 				sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
 	struct sctp_chunkhdr *ch;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 	__u8 *ch_end;
 	int ootb_shut_ack = 0;
 	int ootb_cookie_ack = 0;
@@ -3525,7 +3521,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
 					     const struct sctp_endpoint *ep,
 					     const struct sctp_association *asoc,
-					     const sctp_subtype_t type,
+					     const union sctp_subtype type,
 					     void *arg,
 					     sctp_cmd_seq_t *commands)
 {
@@ -3587,7 +3583,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
 sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
 				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type,
+				      const union sctp_subtype type,
 				      void *arg,
 				      sctp_cmd_seq_t *commands)
 {
@@ -3612,13 +3608,13 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
 sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type, void *arg,
+				     const union sctp_subtype type, void *arg,
 				     sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk	*chunk = arg;
 	struct sctp_chunk	*asconf_ack = NULL;
 	struct sctp_paramhdr	*err_param = NULL;
-	sctp_addiphdr_t		*hdr;
+	struct sctp_addiphdr	*hdr;
 	__u32			serial;
 
 	if (!sctp_vtag_verify(chunk, asoc)) {
@@ -3634,14 +3630,15 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
 	if (!net->sctp.addip_noauth && !chunk->auth)
-		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
+					     commands);
 
 	/* Make sure that the ASCONF ADDIP chunk has a valid length.  */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	hdr = (sctp_addiphdr_t *)chunk->skb->data;
+	hdr = (struct sctp_addiphdr *)chunk->skb->data;
 	serial = ntohl(hdr->serial);
 
 	/* Verify the ASCONF chunk before processing it. */
@@ -3728,14 +3725,15 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 					 const struct sctp_endpoint *ep,
 					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type, void *arg,
+					 const union sctp_subtype type,
+					 void *arg,
 					 sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk	*asconf_ack = arg;
 	struct sctp_chunk	*last_asconf = asoc->addip_last_asconf;
 	struct sctp_chunk	*abort;
 	struct sctp_paramhdr	*err_param = NULL;
-	sctp_addiphdr_t		*addip_hdr;
+	struct sctp_addiphdr	*addip_hdr;
 	__u32			sent_serial, rcvd_serial;
 
 	if (!sctp_vtag_verify(asconf_ack, asoc)) {
@@ -3751,14 +3749,16 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
 	if (!net->sctp.addip_noauth && !asconf_ack->auth)
-		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
+					     commands);
 
 	/* Make sure that the ADDIP chunk has a valid length.  */
-	if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
+	if (!sctp_chunk_length_valid(asconf_ack,
+				     sizeof(struct sctp_addip_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data;
+	addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data;
 	rcvd_serial = ntohl(addip_hdr->serial);
 
 	/* Verify the ASCONF-ACK chunk before processing it. */
@@ -3767,7 +3767,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 			   (void *)err_param, commands);
 
 	if (last_asconf) {
-		addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr;
+		addip_hdr = (struct sctp_addiphdr *)last_asconf->subh.addip_hdr;
 		sent_serial = ntohl(addip_hdr->serial);
 	} else {
 		sent_serial = asoc->addip_serial - 1;
@@ -3782,7 +3782,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 	if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) &&
 	    !(asoc->addip_last_asconf)) {
 		abort = sctp_make_abort(asoc, asconf_ack,
-					sizeof(sctp_errhdr_t));
+					sizeof(struct sctp_errhdr));
 		if (abort) {
 			sctp_init_cause(abort, SCTP_ERROR_ASCONF_ACK, 0);
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -3818,7 +3818,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 		}
 
 		abort = sctp_make_abort(asoc, asconf_ack,
-					sizeof(sctp_errhdr_t));
+					sizeof(struct sctp_errhdr));
 		if (abort) {
 			sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -3844,7 +3844,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 sctp_disposition_t sctp_sf_do_reconf(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type, void *arg,
+				     const union sctp_subtype type, void *arg,
 				     sctp_cmd_seq_t *commands)
 {
 	struct sctp_paramhdr *err_param = NULL;
@@ -3920,7 +3920,7 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
 sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
+				       const union sctp_subtype type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
@@ -3991,7 +3991,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -4079,11 +4079,12 @@ gen_shutdown:
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_ierror_t sctp_sf_authenticate(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    struct sctp_chunk *chunk)
+static enum sctp_ierror sctp_sf_authenticate(
+				struct net *net,
+				const struct sctp_endpoint *ep,
+				const struct sctp_association *asoc,
+				const union sctp_subtype type,
+				struct sctp_chunk *chunk)
 {
 	struct sctp_authhdr *auth_hdr;
 	struct sctp_hmac *hmac;
@@ -4095,7 +4096,7 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net,
 	/* Pull in the auth header, so we can do some more verification */
 	auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
 	chunk->subh.auth_hdr = auth_hdr;
-	skb_pull(chunk->skb, sizeof(struct sctp_authhdr));
+	skb_pull(chunk->skb, sizeof(*auth_hdr));
 
 	/* Make sure that we support the HMAC algorithm from the auth
 	 * chunk.
@@ -4114,7 +4115,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net,
 	/* Make sure that the length of the signature matches what
 	 * we expect.
 	 */
-	sig_len = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_auth_chunk_t);
+	sig_len = ntohs(chunk->chunk_hdr->length) -
+		  sizeof(struct sctp_auth_chunk);
 	hmac = sctp_auth_get_hmac(ntohs(auth_hdr->hmac_id));
 	if (sig_len != hmac->hmac_len)
 		return SCTP_IERROR_PROTO_VIOLATION;
@@ -4136,8 +4138,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net,
 	memset(digest, 0, sig_len);
 
 	sctp_auth_calculate_hmac(asoc, chunk->skb,
-				(struct sctp_auth_chunk *)chunk->chunk_hdr,
-				GFP_ATOMIC);
+				 (struct sctp_auth_chunk *)chunk->chunk_hdr,
+				 GFP_ATOMIC);
 
 	/* Discard the packet if the digests do not match */
 	if (memcmp(save_digest, digest, sig_len)) {
@@ -4156,14 +4158,14 @@ nomem:
 sctp_disposition_t sctp_sf_eat_auth(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
-	struct sctp_authhdr *auth_hdr;
 	struct sctp_chunk *chunk = arg;
+	struct sctp_authhdr *auth_hdr;
 	struct sctp_chunk *err_chunk;
-	sctp_ierror_t error;
+	enum sctp_ierror error;
 
 	/* Make sure that the peer has AUTH capable */
 	if (!asoc->peer.auth_capable)
@@ -4253,7 +4255,7 @@ sctp_disposition_t sctp_sf_eat_auth(struct net *net,
 sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4333,7 +4335,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
 					 const struct sctp_endpoint *ep,
 					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type,
+					 const union sctp_subtype type,
 					 void *arg,
 					 sctp_cmd_seq_t *commands)
 {
@@ -4373,7 +4375,7 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
 sctp_disposition_t sctp_sf_pdiscard(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -4401,7 +4403,7 @@ sctp_disposition_t sctp_sf_pdiscard(struct net *net,
 sctp_disposition_t sctp_sf_violation(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4454,11 +4456,10 @@ static sctp_disposition_t sctp_sf_abort_violation(
 		/* Treat INIT-ACK as a special case during COOKIE-WAIT. */
 		if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK &&
 		    !asoc->peer.i.init_tag) {
-			sctp_initack_chunk_t *initack;
+			struct sctp_initack_chunk *initack;
 
-			initack = (sctp_initack_chunk_t *)chunk->chunk_hdr;
-			if (!sctp_chunk_length_valid(chunk,
-						     sizeof(sctp_initack_chunk_t)))
+			initack = (struct sctp_initack_chunk *)chunk->chunk_hdr;
+			if (!sctp_chunk_length_valid(chunk, sizeof(*initack)))
 				abort->chunk_hdr->flags |= SCTP_CHUNK_FLAG_T;
 			else {
 				unsigned int inittag;
@@ -4521,7 +4522,7 @@ nomem:
  * Handle a protocol violation when the chunk length is invalid.
  * "Invalid" length is identified as smaller than the minimal length a
  * given chunk can be.  For example, a SACK chunk has invalid length
- * if its length is set to be smaller than the size of sctp_sack_chunk_t.
+ * if its length is set to be smaller than the size of struct sctp_sack_chunk.
  *
  * We inform the other end by sending an ABORT with a Protocol Violation
  * error code.
@@ -4540,7 +4541,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4560,7 +4561,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg, void *ext,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4603,7 +4604,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4623,7 +4624,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4698,7 +4699,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
+				       const union sctp_subtype type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
@@ -4810,7 +4811,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
+				       const union sctp_subtype type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
@@ -4850,7 +4851,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -4906,7 +4907,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -4943,7 +4944,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 sctp_disposition_t sctp_sf_error_closed(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -4957,7 +4958,7 @@ sctp_disposition_t sctp_sf_error_closed(struct net *net,
 sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
 					  const struct sctp_endpoint *ep,
 					  const struct sctp_association *asoc,
-					  const sctp_subtype_t type,
+					  const union sctp_subtype type,
 					  void *arg,
 					  sctp_cmd_seq_t *commands)
 {
@@ -4984,7 +4985,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5019,7 +5020,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg, sctp_cmd_seq_t *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
@@ -5046,7 +5047,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5095,7 +5096,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5121,7 +5122,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5148,7 +5149,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5179,7 +5180,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5215,7 +5216,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
 					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -5247,7 +5248,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
 sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -5264,7 +5265,7 @@ sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
 sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
 					 const struct sctp_endpoint *ep,
 					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type,
+					 const union sctp_subtype type,
 					 void *arg, sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
@@ -5282,7 +5283,7 @@ sctp_disposition_t sctp_sf_ignore_primitive(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5306,7 +5307,7 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5338,7 +5339,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5408,7 +5409,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5424,12 +5425,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
 	 */
 	if (chunk) {
 		if (!sctp_vtag_verify(chunk, asoc))
-			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg,
+						commands);
 
 		/* Make sure that the SHUTDOWN chunk has a valid length. */
-		if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t)))
-			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
-							  commands);
+		if (!sctp_chunk_length_valid(
+				chunk, sizeof(struct sctp_shutdown_chunk)))
+			return sctp_sf_violation_chunklen(net, ep, asoc, type,
+							  arg, commands);
 	}
 
 	/* If it has no more outstanding DATA chunks, the SHUTDOWN receiver
@@ -5479,7 +5482,7 @@ nomem:
 sctp_disposition_t sctp_sf_ignore_other(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -5507,7 +5510,7 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net,
 sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -5595,7 +5598,7 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
 sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
+				       const union sctp_subtype type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
@@ -5626,7 +5629,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
 sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
@@ -5690,7 +5693,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
 sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
@@ -5740,7 +5743,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
 sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
@@ -5811,7 +5814,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5882,7 +5885,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
 sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
@@ -5919,7 +5922,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5961,7 +5964,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 sctp_disposition_t sctp_sf_not_impl(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -5979,7 +5982,7 @@ sctp_disposition_t sctp_sf_not_impl(struct net *net,
 sctp_disposition_t sctp_sf_bug(struct net *net,
 			       const struct sctp_endpoint *ep,
 			       const struct sctp_association *asoc,
-			       const sctp_subtype_t type,
+			       const union sctp_subtype type,
 			       void *arg,
 			       sctp_cmd_seq_t *commands)
 {
@@ -6000,7 +6003,7 @@ sctp_disposition_t sctp_sf_bug(struct net *net,
 sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -6107,9 +6110,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
 		switch (chunk->chunk_hdr->type) {
 		case SCTP_CID_INIT_ACK:
 		{
-			sctp_initack_chunk_t *initack;
+			struct sctp_initack_chunk *initack;
 
-			initack = (sctp_initack_chunk_t *)chunk->chunk_hdr;
+			initack = (struct sctp_initack_chunk *)chunk->chunk_hdr;
 			vtag = ntohl(initack->init_hdr.init_tag);
 			break;
 		}
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 3e958c1c4b95..d437f3801399 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -52,9 +52,10 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES];
 static const sctp_sm_table_entry_t
 timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    enum sctp_cid cid,
-							    sctp_state_t state);
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(
+						struct net *net,
+						enum sctp_cid cid,
+						enum sctp_state state);
 
 
 static const sctp_sm_table_entry_t bug = {
@@ -76,10 +77,11 @@ static const sctp_sm_table_entry_t bug = {
 	rtn;								\
 })
 
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
-						  sctp_event_t event_type,
-						  sctp_state_t state,
-						  sctp_subtype_t event_subtype)
+const sctp_sm_table_entry_t *sctp_sm_lookup_event(
+					struct net *net,
+					enum sctp_event event_type,
+					enum sctp_state state,
+					union sctp_subtype event_subtype)
 {
 	switch (event_type) {
 	case SCTP_EVENT_T_CHUNK:
@@ -967,9 +969,10 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
 	TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
 };
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    enum sctp_cid cid,
-							    sctp_state_t state)
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(
+						struct net *net,
+						enum sctp_cid cid,
+						enum sctp_state state)
 {
 	if (state > SCTP_STATE_MAX)
 		return &bug;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1db478e34520..a1e2113806dd 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1055,7 +1055,7 @@ static int __sctp_connect(struct sock *sk,
 	struct sctp_association *asoc2;
 	struct sctp_transport *transport;
 	union sctp_addr to;
-	sctp_scope_t scope;
+	enum sctp_scope scope;
 	long timeo;
 	int err = 0;
 	int addrcnt = 0;
@@ -1610,7 +1610,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	struct sctp_initmsg *sinit;
 	sctp_assoc_t associd = 0;
 	sctp_cmsgs_t cmsgs = { NULL };
-	sctp_scope_t scope;
+	enum sctp_scope scope;
 	bool fill_sinfo_ttl = false, wait_connect = false;
 	struct sctp_datamsg *datamsg;
 	int msg_flags = msg->msg_flags;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 0e732f68c2bf..ef7ca44d6e6a 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -46,7 +46,7 @@ static int timer_max = 86400000; /* ms in one day */
 static int int_max = INT_MAX;
 static int sack_timer_min = 1;
 static int sack_timer_max = 500;
-static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
+static int addr_scope_max = SCTP_SCOPE_POLICY_MAX;
 static int rwnd_scale_max = 16;
 static int rto_alpha_min = 0;
 static int rto_beta_min = 0;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 80a97c8501a7..2d9bd3776bc8 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -490,7 +490,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
  * detected.
  */
 void sctp_transport_lower_cwnd(struct sctp_transport *transport,
-			       sctp_lower_cwnd_t reason)
+			       enum sctp_lower_cwnd reason)
 {
 	struct sctp_association *asoc = transport->asoc;
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 5f86c5062a98..67abc0194f30 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -371,19 +371,19 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
 				struct sctp_chunk *chunk, __u16 flags,
 				gfp_t gfp)
 {
-	struct sctp_ulpevent *event;
 	struct sctp_remote_error *sre;
+	struct sctp_ulpevent *event;
+	struct sctp_errhdr *ch;
 	struct sk_buff *skb;
-	sctp_errhdr_t *ch;
 	__be16 cause;
 	int elen;
 
-	ch = (sctp_errhdr_t *)(chunk->skb->data);
+	ch = (struct sctp_errhdr *)(chunk->skb->data);
 	cause = ch->cause;
-	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
+	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(*ch);
 
 	/* Pull off the ERROR header.  */
-	skb_pull(chunk->skb, sizeof(sctp_errhdr_t));
+	skb_pull(chunk->skb, sizeof(*ch));
 
 	/* Copy the skb to a new skb with room for us to prepend
 	 * notification with.
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 33954852f3f8..c717ef0896aa 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -8,10 +8,6 @@ config SMC
 	  The Linux implementation of the SMC-R solution is designed as
 	  a separate socket family SMC.
 
-	  Warning: SMC will expose all memory for remote reads and writes
-	  once a connection is established.  Don't enable this option except
-	  for tightly controlled lab environment.
-
 	  Select this option if you want to run SMC socket applications
 
 config SMC_DIAG
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6793d7348cc8..8c6d24b2995d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -338,6 +338,12 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
 		return SMC_CLC_DECL_INTERR;
 
 	smc_wr_remember_qp_attr(link);
+
+	rc = smc_wr_reg_send(link,
+			     smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
+	if (rc)
+		return SMC_CLC_DECL_INTERR;
+
 	/* send CONFIRM LINK response over RoCE fabric */
 	rc = smc_llc_send_confirm_link(link,
 				       link->smcibdev->mac[link->ibport - 1],
@@ -430,12 +436,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
 	smc_conn_save_peer_info(smc, &aclc);
 
-	rc = smc_sndbuf_create(smc);
-	if (rc) {
-		reason_code = SMC_CLC_DECL_MEM;
-		goto decline_rdma_unlock;
-	}
-	rc = smc_rmb_create(smc);
+	/* create send buffer and rmb */
+	rc = smc_buf_create(smc);
 	if (rc) {
 		reason_code = SMC_CLC_DECL_MEM;
 		goto decline_rdma_unlock;
@@ -459,7 +461,20 @@ static int smc_connect_rdma(struct smc_sock *smc)
 			reason_code = SMC_CLC_DECL_INTERR;
 			goto decline_rdma_unlock;
 		}
+	} else {
+		struct smc_buf_desc *buf_desc = smc->conn.rmb_desc;
+
+		if (!buf_desc->reused) {
+			/* register memory region for new rmb */
+			rc = smc_wr_reg_send(link,
+					     buf_desc->mr_rx[SMC_SINGLE_LINK]);
+			if (rc) {
+				reason_code = SMC_CLC_DECL_INTERR;
+				goto decline_rdma_unlock;
+			}
+		}
 	}
+	smc_rmb_sync_sg_for_device(&smc->conn);
 
 	rc = smc_clc_send_confirm(smc);
 	if (rc)
@@ -692,6 +707,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 	int rc;
 
 	link = &lgr->lnk[SMC_SINGLE_LINK];
+
+	rc = smc_wr_reg_send(link,
+			     smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
+	if (rc)
+		return SMC_CLC_DECL_INTERR;
+
 	/* send CONFIRM LINK request to client over the RoCE fabric */
 	rc = smc_llc_send_confirm_link(link,
 				       link->smcibdev->mac[link->ibport - 1],
@@ -779,11 +800,6 @@ static void smc_listen_work(struct work_struct *work)
 	mutex_lock(&smc_create_lgr_pending);
 	local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
 					smcibdev, ibport, &pclc.lcl, 0);
-	if (local_contact == SMC_REUSE_CONTACT)
-		/* lock no longer needed, free it due to following
-		 * smc_clc_wait_msg() call
-		 */
-		mutex_unlock(&smc_create_lgr_pending);
 	if (local_contact < 0) {
 		rc = local_contact;
 		if (rc == -ENOMEM)
@@ -794,12 +810,8 @@ static void smc_listen_work(struct work_struct *work)
 	}
 	link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 
-	rc = smc_sndbuf_create(new_smc);
-	if (rc) {
-		reason_code = SMC_CLC_DECL_MEM;
-		goto decline_rdma;
-	}
-	rc = smc_rmb_create(new_smc);
+	/* create send buffer and rmb */
+	rc = smc_buf_create(new_smc);
 	if (rc) {
 		reason_code = SMC_CLC_DECL_MEM;
 		goto decline_rdma;
@@ -808,6 +820,21 @@ static void smc_listen_work(struct work_struct *work)
 	smc_close_init(new_smc);
 	smc_rx_init(new_smc);
 
+	if (local_contact != SMC_FIRST_CONTACT) {
+		struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc;
+
+		if (!buf_desc->reused) {
+			/* register memory region for new rmb */
+			rc = smc_wr_reg_send(link,
+					     buf_desc->mr_rx[SMC_SINGLE_LINK]);
+			if (rc) {
+				reason_code = SMC_CLC_DECL_INTERR;
+				goto decline_rdma;
+			}
+		}
+	}
+	smc_rmb_sync_sg_for_device(&new_smc->conn);
+
 	rc = smc_clc_send_accept(new_smc, local_contact);
 	if (rc)
 		goto out_err;
@@ -853,8 +880,7 @@ out_connected:
 	if (newsmcsk->sk_state == SMC_INIT)
 		newsmcsk->sk_state = SMC_ACTIVE;
 enqueue:
-	if (local_contact == SMC_FIRST_CONTACT)
-		mutex_unlock(&smc_create_lgr_pending);
+	mutex_unlock(&smc_create_lgr_pending);
 	lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
 	if (lsmc->sk.sk_state == SMC_LISTEN) {
 		smc_accept_enqueue(&lsmc->sk, newsmcsk);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 03ec058d18df..3934913ab835 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -204,13 +204,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
 	memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(cclc.qpn, link->roce_qp->qp_num);
 	cclc.rmb_rkey =
-		htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
+		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
 	cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
 	cclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
 	cclc.rmbe_size = conn->rmbe_size_short;
-	cclc.rmb_dma_addr =
-		cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
+	cclc.rmb_dma_addr = cpu_to_be64(
+		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
 	hton24(cclc.psn, link->psn_initial);
 
 	memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -256,13 +256,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 	memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(aclc.qpn, link->roce_qp->qp_num);
 	aclc.rmb_rkey =
-		htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
+		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
 	aclc.conn_idx = 1;			/* as long as 1 RMB = 1 RMBE */
 	aclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	aclc.qp_mtu = link->path_mtu;
 	aclc.rmbe_size = conn->rmbe_size_short,
-	aclc.rmb_dma_addr =
-		cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
+	aclc.rmb_dma_addr = cpu_to_be64(
+		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
 	hton24(aclc.psn, link->psn_initial);
 	memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3ac09a629ea1..1a16d51e2330 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -175,7 +175,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 	rc = smc_wr_alloc_link_mem(lnk);
 	if (rc)
 		goto free_lgr;
-	init_waitqueue_head(&lnk->wr_tx_wait);
 	rc = smc_ib_create_protection_domain(lnk);
 	if (rc)
 		goto free_link_mem;
@@ -207,17 +206,14 @@ out:
 	return rc;
 }
 
-static void smc_sndbuf_unuse(struct smc_connection *conn)
+static void smc_buf_unuse(struct smc_connection *conn)
 {
 	if (conn->sndbuf_desc) {
 		conn->sndbuf_desc->used = 0;
 		conn->sndbuf_size = 0;
 	}
-}
-
-static void smc_rmb_unuse(struct smc_connection *conn)
-{
 	if (conn->rmb_desc) {
+		conn->rmb_desc->reused = true;
 		conn->rmb_desc->used = 0;
 		conn->rmbe_size = 0;
 	}
@@ -232,8 +228,7 @@ void smc_conn_free(struct smc_connection *conn)
 		return;
 	smc_cdc_tx_dismiss_slots(conn);
 	smc_lgr_unregister_conn(conn);
-	smc_rmb_unuse(conn);
-	smc_sndbuf_unuse(conn);
+	smc_buf_unuse(conn);
 }
 
 static void smc_link_clear(struct smc_link *lnk)
@@ -246,48 +241,57 @@ static void smc_link_clear(struct smc_link *lnk)
 	smc_wr_free_link_mem(lnk);
 }
 
-static void smc_lgr_free_sndbufs(struct smc_link_group *lgr)
+static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
+			 bool is_rmb)
 {
-	struct smc_buf_desc *sndbuf_desc, *bf_desc;
-	int i;
-
-	for (i = 0; i < SMC_RMBE_SIZES; i++) {
-		list_for_each_entry_safe(sndbuf_desc, bf_desc, &lgr->sndbufs[i],
-					 list) {
-			list_del(&sndbuf_desc->list);
-			smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-					 smc_uncompress_bufsize(i),
-					 sndbuf_desc, DMA_TO_DEVICE);
-			kfree(sndbuf_desc->cpu_addr);
-			kfree(sndbuf_desc);
-		}
+	if (is_rmb) {
+		if (buf_desc->mr_rx[SMC_SINGLE_LINK])
+			smc_ib_put_memory_region(
+					buf_desc->mr_rx[SMC_SINGLE_LINK]);
+		smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
+				    DMA_FROM_DEVICE);
+	} else {
+		smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
+				    DMA_TO_DEVICE);
 	}
+	sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
+	if (buf_desc->cpu_addr)
+		free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order);
+	kfree(buf_desc);
 }
 
-static void smc_lgr_free_rmbs(struct smc_link_group *lgr)
+static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
 {
-	struct smc_buf_desc *rmb_desc, *bf_desc;
 	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+	struct smc_buf_desc *buf_desc, *bf_desc;
+	struct list_head *buf_list;
 	int i;
 
 	for (i = 0; i < SMC_RMBE_SIZES; i++) {
-		list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i],
+		if (is_rmb)
+			buf_list = &lgr->rmbs[i];
+		else
+			buf_list = &lgr->sndbufs[i];
+		list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
 					 list) {
-			list_del(&rmb_desc->list);
-			smc_ib_buf_unmap(lnk->smcibdev,
-					 smc_uncompress_bufsize(i),
-					 rmb_desc, DMA_FROM_DEVICE);
-			kfree(rmb_desc->cpu_addr);
-			kfree(rmb_desc);
+			list_del(&buf_desc->list);
+			smc_buf_free(buf_desc, lnk, is_rmb);
 		}
 	}
 }
 
+static void smc_lgr_free_bufs(struct smc_link_group *lgr)
+{
+	/* free send buffers */
+	__smc_lgr_free_bufs(lgr, false);
+	/* free rmbs */
+	__smc_lgr_free_bufs(lgr, true);
+}
+
 /* remove a link group */
 void smc_lgr_free(struct smc_link_group *lgr)
 {
-	smc_lgr_free_rmbs(lgr);
-	smc_lgr_free_sndbufs(lgr);
+	smc_lgr_free_bufs(lgr);
 	smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
 	kfree(lgr);
 }
@@ -452,45 +456,25 @@ out:
 	return rc ? rc : local_contact;
 }
 
-/* try to reuse a sndbuf description slot of the sndbufs list for a certain
- * buf_size; if not available, return NULL
+/* try to reuse a sndbuf or rmb description slot for a certain
+ * buffer size; if not available, return NULL
  */
 static inline
-struct smc_buf_desc *smc_sndbuf_get_slot(struct smc_link_group *lgr,
-					 int compressed_bufsize)
+struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr,
+				      int compressed_bufsize,
+				      rwlock_t *lock,
+				      struct list_head *buf_list)
 {
-	struct smc_buf_desc *sndbuf_slot;
-
-	read_lock_bh(&lgr->sndbufs_lock);
-	list_for_each_entry(sndbuf_slot, &lgr->sndbufs[compressed_bufsize],
-			    list) {
-		if (cmpxchg(&sndbuf_slot->used, 0, 1) == 0) {
-			read_unlock_bh(&lgr->sndbufs_lock);
-			return sndbuf_slot;
-		}
-	}
-	read_unlock_bh(&lgr->sndbufs_lock);
-	return NULL;
-}
+	struct smc_buf_desc *buf_slot;
 
-/* try to reuse an rmb description slot of the rmbs list for a certain
- * rmbe_size; if not available, return NULL
- */
-static inline
-struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr,
-				      int compressed_bufsize)
-{
-	struct smc_buf_desc *rmb_slot;
-
-	read_lock_bh(&lgr->rmbs_lock);
-	list_for_each_entry(rmb_slot, &lgr->rmbs[compressed_bufsize],
-			    list) {
-		if (cmpxchg(&rmb_slot->used, 0, 1) == 0) {
-			read_unlock_bh(&lgr->rmbs_lock);
-			return rmb_slot;
+	read_lock_bh(lock);
+	list_for_each_entry(buf_slot, buf_list, list) {
+		if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
+			read_unlock_bh(lock);
+			return buf_slot;
 		}
 	}
-	read_unlock_bh(&lgr->rmbs_lock);
+	read_unlock_bh(lock);
 	return NULL;
 }
 
@@ -503,136 +487,186 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
 	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 }
 
-/* create the tx buffer for an SMC socket */
-int smc_sndbuf_create(struct smc_sock *smc)
+static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
+					       bool is_rmb, int bufsize)
 {
-	struct smc_connection *conn = &smc->conn;
-	struct smc_link_group *lgr = conn->lgr;
-	int tmp_bufsize, tmp_bufsize_short;
-	struct smc_buf_desc *sndbuf_desc;
+	struct smc_buf_desc *buf_desc;
+	struct smc_link *lnk;
 	int rc;
 
-	/* use socket send buffer size (w/o overhead) as start value */
-	for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
-	     tmp_bufsize_short >= 0; tmp_bufsize_short--) {
-		tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
-		/* check for reusable sndbuf_slot in the link group */
-		sndbuf_desc = smc_sndbuf_get_slot(lgr, tmp_bufsize_short);
-		if (sndbuf_desc) {
-			memset(sndbuf_desc->cpu_addr, 0, tmp_bufsize);
-			break; /* found reusable slot */
-		}
-		/* try to alloc a new send buffer */
-		sndbuf_desc = kzalloc(sizeof(*sndbuf_desc), GFP_KERNEL);
-		if (!sndbuf_desc)
-			break; /* give up with -ENOMEM */
-		sndbuf_desc->cpu_addr = kzalloc(tmp_bufsize,
-						GFP_KERNEL | __GFP_NOWARN |
-						__GFP_NOMEMALLOC |
-						__GFP_NORETRY);
-		if (!sndbuf_desc->cpu_addr) {
-			kfree(sndbuf_desc);
-			sndbuf_desc = NULL;
-			/* if send buffer allocation has failed,
-			 * try a smaller one
-			 */
-			continue;
-		}
-		rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-				    tmp_bufsize, sndbuf_desc,
-				    DMA_TO_DEVICE);
+	/* try to alloc a new buffer */
+	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+	if (!buf_desc)
+		return ERR_PTR(-ENOMEM);
+
+	buf_desc->cpu_addr =
+		(void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
+					 __GFP_NOMEMALLOC |
+					 __GFP_NORETRY | __GFP_ZERO,
+					 get_order(bufsize));
+	if (!buf_desc->cpu_addr) {
+		kfree(buf_desc);
+		return ERR_PTR(-EAGAIN);
+	}
+	buf_desc->order = get_order(bufsize);
+
+	/* build the sg table from the pages */
+	lnk = &lgr->lnk[SMC_SINGLE_LINK];
+	rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
+			    GFP_KERNEL);
+	if (rc) {
+		smc_buf_free(buf_desc, lnk, is_rmb);
+		return ERR_PTR(rc);
+	}
+	sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
+		   buf_desc->cpu_addr, bufsize);
+
+	/* map sg table to DMA address */
+	rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
+			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	/* SMC protocol depends on mapping to one DMA address only */
+	if (rc != 1)  {
+		smc_buf_free(buf_desc, lnk, is_rmb);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	/* create a new memory region for the RMB */
+	if (is_rmb) {
+		rc = smc_ib_get_memory_region(lnk->roce_pd,
+					      IB_ACCESS_REMOTE_WRITE |
+					      IB_ACCESS_LOCAL_WRITE,
+					      buf_desc);
 		if (rc) {
-			kfree(sndbuf_desc->cpu_addr);
-			kfree(sndbuf_desc);
-			sndbuf_desc = NULL;
-			continue; /* if mapping failed, try smaller one */
+			smc_buf_free(buf_desc, lnk, is_rmb);
+			return ERR_PTR(rc);
 		}
-		sndbuf_desc->used = 1;
-		write_lock_bh(&lgr->sndbufs_lock);
-		list_add(&sndbuf_desc->list,
-			 &lgr->sndbufs[tmp_bufsize_short]);
-		write_unlock_bh(&lgr->sndbufs_lock);
-		break;
-	}
-	if (sndbuf_desc && sndbuf_desc->cpu_addr) {
-		conn->sndbuf_desc = sndbuf_desc;
-		conn->sndbuf_size = tmp_bufsize;
-		smc->sk.sk_sndbuf = tmp_bufsize * 2;
-		atomic_set(&conn->sndbuf_space, tmp_bufsize);
-		return 0;
-	} else {
-		return -ENOMEM;
 	}
+
+	return buf_desc;
 }
 
-/* create the RMB for an SMC socket (even though the SMC protocol
- * allows more than one RMB-element per RMB, the Linux implementation
- * uses just one RMB-element per RMB, i.e. uses an extra RMB for every
- * connection in a link group
- */
-int smc_rmb_create(struct smc_sock *smc)
+static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
 {
 	struct smc_connection *conn = &smc->conn;
 	struct smc_link_group *lgr = conn->lgr;
-	int tmp_bufsize, tmp_bufsize_short;
-	struct smc_buf_desc *rmb_desc;
-	int rc;
+	struct smc_buf_desc *buf_desc = NULL;
+	struct list_head *buf_list;
+	int bufsize, bufsize_short;
+	int sk_buf_size;
+	rwlock_t *lock;
+
+	if (is_rmb)
+		/* use socket recv buffer size (w/o overhead) as start value */
+		sk_buf_size = smc->sk.sk_rcvbuf / 2;
+	else
+		/* use socket send buffer size (w/o overhead) as start value */
+		sk_buf_size = smc->sk.sk_sndbuf / 2;
+
+	for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
+	     bufsize_short >= 0; bufsize_short--) {
+
+		if (is_rmb) {
+			lock = &lgr->rmbs_lock;
+			buf_list = &lgr->rmbs[bufsize_short];
+		} else {
+			lock = &lgr->sndbufs_lock;
+			buf_list = &lgr->sndbufs[bufsize_short];
+		}
+		bufsize = smc_uncompress_bufsize(bufsize_short);
+		if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
+			continue;
 
-	/* use socket recv buffer size (w/o overhead) as start value */
-	for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_rcvbuf / 2);
-	     tmp_bufsize_short >= 0; tmp_bufsize_short--) {
-		tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
-		/* check for reusable rmb_slot in the link group */
-		rmb_desc = smc_rmb_get_slot(lgr, tmp_bufsize_short);
-		if (rmb_desc) {
-			memset(rmb_desc->cpu_addr, 0, tmp_bufsize);
+		/* check for reusable slot in the link group */
+		buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list);
+		if (buf_desc) {
+			memset(buf_desc->cpu_addr, 0, bufsize);
 			break; /* found reusable slot */
 		}
-		/* try to alloc a new RMB */
-		rmb_desc = kzalloc(sizeof(*rmb_desc), GFP_KERNEL);
-		if (!rmb_desc)
-			break; /* give up with -ENOMEM */
-		rmb_desc->cpu_addr = kzalloc(tmp_bufsize,
-					     GFP_KERNEL | __GFP_NOWARN |
-					     __GFP_NOMEMALLOC |
-					     __GFP_NORETRY);
-		if (!rmb_desc->cpu_addr) {
-			kfree(rmb_desc);
-			rmb_desc = NULL;
-			/* if RMB allocation has failed,
-			 * try a smaller one
-			 */
+
+		buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
+		if (PTR_ERR(buf_desc) == -ENOMEM)
+			break;
+		if (IS_ERR(buf_desc))
 			continue;
-		}
-		rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-				    tmp_bufsize, rmb_desc,
-				    DMA_FROM_DEVICE);
-		if (rc) {
-			kfree(rmb_desc->cpu_addr);
-			kfree(rmb_desc);
-			rmb_desc = NULL;
-			continue; /* if mapping failed, try smaller one */
-		}
-		rmb_desc->rkey[SMC_SINGLE_LINK] =
-			lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey;
-		rmb_desc->used = 1;
-		write_lock_bh(&lgr->rmbs_lock);
-		list_add(&rmb_desc->list,
-			 &lgr->rmbs[tmp_bufsize_short]);
-		write_unlock_bh(&lgr->rmbs_lock);
-		break;
+
+		buf_desc->used = 1;
+		write_lock_bh(lock);
+		list_add(&buf_desc->list, buf_list);
+		write_unlock_bh(lock);
+		break; /* found */
 	}
-	if (rmb_desc && rmb_desc->cpu_addr) {
-		conn->rmb_desc = rmb_desc;
-		conn->rmbe_size = tmp_bufsize;
-		conn->rmbe_size_short = tmp_bufsize_short;
-		smc->sk.sk_rcvbuf = tmp_bufsize * 2;
+
+	if (IS_ERR(buf_desc))
+		return -ENOMEM;
+
+	if (is_rmb) {
+		conn->rmb_desc = buf_desc;
+		conn->rmbe_size = bufsize;
+		conn->rmbe_size_short = bufsize_short;
+		smc->sk.sk_rcvbuf = bufsize * 2;
 		atomic_set(&conn->bytes_to_rcv, 0);
-		conn->rmbe_update_limit = smc_rmb_wnd_update_limit(tmp_bufsize);
-		return 0;
+		conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
 	} else {
-		return -ENOMEM;
+		conn->sndbuf_desc = buf_desc;
+		conn->sndbuf_size = bufsize;
+		smc->sk.sk_sndbuf = bufsize * 2;
+		atomic_set(&conn->sndbuf_space, bufsize);
 	}
+	return 0;
+}
+
+void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+			       conn->sndbuf_desc, DMA_TO_DEVICE);
+}
+
+void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+				  conn->sndbuf_desc, DMA_TO_DEVICE);
+}
+
+void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+			       conn->rmb_desc, DMA_FROM_DEVICE);
+}
+
+void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+				  conn->rmb_desc, DMA_FROM_DEVICE);
+}
+
+/* create the send and receive buffer for an SMC socket;
+ * receive buffers are called RMBs;
+ * (even though the SMC protocol allows more than one RMB-element per RMB,
+ * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
+ * extra RMB for every connection in a link group
+ */
+int smc_buf_create(struct smc_sock *smc)
+{
+	int rc;
+
+	/* create send buffer */
+	rc = __smc_buf_create(smc, false);
+	if (rc)
+		return rc;
+	/* create rmb */
+	rc = __smc_buf_create(smc, true);
+	if (rc)
+		smc_buf_free(smc->conn.sndbuf_desc,
+			     &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
+	return rc;
 }
 
 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index b013cb43a327..19c44bf4e391 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -37,6 +37,14 @@ struct smc_wr_buf {
 	u8	raw[SMC_WR_BUF_SIZE];
 };
 
+#define SMC_WR_REG_MR_WAIT_TIME	(5 * HZ)/* wait time for ib_wr_reg_mr result */
+
+enum smc_wr_reg_state {
+	POSTED,		/* ib_wr_reg_mr request posted */
+	CONFIRMED,	/* ib_wr_reg_mr response: successful */
+	FAILED		/* ib_wr_reg_mr response: failure */
+};
+
 struct smc_link {
 	struct smc_ib_device	*smcibdev;	/* ib-device */
 	u8			ibport;		/* port - values 1 | 2 */
@@ -65,6 +73,10 @@ struct smc_link {
 	u64			wr_rx_id;	/* seq # of last recv WR */
 	u32			wr_rx_cnt;	/* number of WR recv buffers */
 
+	struct ib_reg_wr	wr_reg;		/* WR register memory region */
+	wait_queue_head_t	wr_reg_wait;	/* wait for wr_reg result */
+	enum smc_wr_reg_state	wr_reg_state;	/* state of wr_reg request */
+
 	union ib_gid		gid;		/* gid matching used vlan id */
 	u32			peer_qpn;	/* QP number of peer */
 	enum ib_mtu		path_mtu;	/* used mtu */
@@ -90,14 +102,15 @@ struct smc_link {
 /* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
 struct smc_buf_desc {
 	struct list_head	list;
-	u64			dma_addr[SMC_LINKS_PER_LGR_MAX];
-						/* mapped address of buffer */
 	void			*cpu_addr;	/* virtual address of buffer */
-	u32			rkey[SMC_LINKS_PER_LGR_MAX];
-						/* for rmb only:
-						 * rkey provided to peer
+	struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
+	struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
+						/* for rmb only: memory region
+						 * incl. rkey provided to peer
 						 */
+	u32			order;		/* allocation order */
 	u32			used;		/* currently used / unused */
+	bool			reused;		/* new created / reused */
 };
 
 struct smc_rtoken {				/* address/key of remote RMB */
@@ -173,9 +186,11 @@ struct smc_clc_msg_accept_confirm;
 
 void smc_lgr_free(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
-int smc_sndbuf_create(struct smc_sock *smc);
-int smc_rmb_create(struct smc_sock *smc);
+int smc_buf_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc);
-
+void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
+void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
+void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
+void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
 #endif
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index b31715505a35..547e0e113b17 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -13,6 +13,7 @@
 
 #include <linux/random.h>
 #include <linux/workqueue.h>
+#include <linux/scatterlist.h>
 #include <rdma/ib_verbs.h>
 
 #include "smc_pnet.h"
@@ -192,8 +193,7 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
 {
 	int rc;
 
-	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev,
-				   IB_PD_UNSAFE_GLOBAL_RKEY);
+	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
 	rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
 	if (IS_ERR(lnk->roce_pd))
 		lnk->roce_pd = NULL;
@@ -232,10 +232,10 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
 		.recv_cq = lnk->smcibdev->roce_cq_recv,
 		.srq = NULL,
 		.cap = {
-			.max_send_wr = SMC_WR_BUF_CNT,
 				/* include unsolicited rdma_writes as well,
 				 * there are max. 2 RDMA_WRITE per 1 WR_SEND
 				 */
+			.max_send_wr = SMC_WR_BUF_CNT * 3,
 			.max_recv_wr = SMC_WR_BUF_CNT * 3,
 			.max_send_sge = SMC_IB_MAX_SEND_SGE,
 			.max_recv_sge = 1,
@@ -254,33 +254,117 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
 	return rc;
 }
 
-/* map a new TX or RX buffer to DMA */
-int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
-		   struct smc_buf_desc *buf_slot,
-		   enum dma_data_direction data_direction)
+void smc_ib_put_memory_region(struct ib_mr *mr)
 {
-	int rc = 0;
+	ib_dereg_mr(mr);
+}
 
-	if (buf_slot->dma_addr[SMC_SINGLE_LINK])
-		return rc; /* already mapped */
-	buf_slot->dma_addr[SMC_SINGLE_LINK] =
-		ib_dma_map_single(smcibdev->ibdev, buf_slot->cpu_addr,
-				  buf_size, data_direction);
-	if (ib_dma_mapping_error(smcibdev->ibdev,
-				 buf_slot->dma_addr[SMC_SINGLE_LINK]))
-		rc = -EIO;
-	return rc;
+static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
+{
+	unsigned int offset = 0;
+	int sg_num;
+
+	/* map the largest prefix of a dma mapped SG list */
+	sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
+			      buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+			      buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+			      &offset, PAGE_SIZE);
+
+	return sg_num;
+}
+
+/* Allocate a memory region and map the dma mapped SG list of buf_slot */
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+			     struct smc_buf_desc *buf_slot)
+{
+	if (buf_slot->mr_rx[SMC_SINGLE_LINK])
+		return 0; /* already done */
+
+	buf_slot->mr_rx[SMC_SINGLE_LINK] =
+		ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
+	if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
+		int rc;
+
+		rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
+		buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
+		return rc;
+	}
+
+	if (smc_ib_map_mr_sg(buf_slot) != 1)
+		return -EINVAL;
+
+	return 0;
 }
 
-void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int buf_size,
+/* synchronize buffer usage for cpu access */
+void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+			    struct smc_buf_desc *buf_slot,
+			    enum dma_data_direction data_direction)
+{
+	struct scatterlist *sg;
+	unsigned int i;
+
+	/* for now there is just one DMA address */
+	for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
+		    buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+		if (!sg_dma_len(sg))
+			break;
+		ib_dma_sync_single_for_cpu(smcibdev->ibdev,
+					   sg_dma_address(sg),
+					   sg_dma_len(sg),
+					   data_direction);
+	}
+}
+
+/* synchronize buffer usage for device access */
+void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+			       struct smc_buf_desc *buf_slot,
+			       enum dma_data_direction data_direction)
+{
+	struct scatterlist *sg;
+	unsigned int i;
+
+	/* for now there is just one DMA address */
+	for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
+		    buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+		if (!sg_dma_len(sg))
+			break;
+		ib_dma_sync_single_for_device(smcibdev->ibdev,
+					      sg_dma_address(sg),
+					      sg_dma_len(sg),
+					      data_direction);
+	}
+}
+
+/* Map a new TX or RX buffer SG-table to DMA */
+int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
 		      struct smc_buf_desc *buf_slot,
 		      enum dma_data_direction data_direction)
 {
-	if (!buf_slot->dma_addr[SMC_SINGLE_LINK])
+	int mapped_nents;
+
+	mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
+				     buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+				     buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+				     data_direction);
+	if (!mapped_nents)
+		return -ENOMEM;
+
+	return mapped_nents;
+}
+
+void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+			 struct smc_buf_desc *buf_slot,
+			 enum dma_data_direction data_direction)
+{
+	if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
 		return; /* already unmapped */
-	ib_dma_unmap_single(smcibdev->ibdev, *buf_slot->dma_addr, buf_size,
-			    data_direction);
-	buf_slot->dma_addr[SMC_SINGLE_LINK] = 0;
+
+	ib_dma_unmap_sg(smcibdev->ibdev,
+			buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+			buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+			data_direction);
+	buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
 }
 
 static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index b567152a526d..9b927a33d5e6 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -51,12 +51,12 @@ int smc_ib_register_client(void) __init;
 void smc_ib_unregister_client(void);
 bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
 int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
-		   struct smc_buf_desc *buf_slot,
-		   enum dma_data_direction data_direction);
-void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int bufsize,
+int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
 		      struct smc_buf_desc *buf_slot,
 		      enum dma_data_direction data_direction);
+void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+			 struct smc_buf_desc *buf_slot,
+			 enum dma_data_direction data_direction);
 void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
 int smc_ib_create_protection_domain(struct smc_link *lnk);
 void smc_ib_destroy_queue_pair(struct smc_link *lnk);
@@ -65,6 +65,13 @@ int smc_ib_ready_link(struct smc_link *lnk);
 int smc_ib_modify_qp_rts(struct smc_link *lnk);
 int smc_ib_modify_qp_reset(struct smc_link *lnk);
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
-
-
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+			     struct smc_buf_desc *buf_slot);
+void smc_ib_put_memory_region(struct ib_mr *mr);
+void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+			    struct smc_buf_desc *buf_slot,
+			    enum dma_data_direction data_direction);
+void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+			       struct smc_buf_desc *buf_slot,
+			       enum dma_data_direction data_direction);
 #endif
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index f0c8b089f770..b17a333e9bb0 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -170,6 +170,7 @@ copy:
 				  copylen, conn->rmbe_size - cons.count);
 		chunk_len_sum = chunk_len;
 		chunk_off = cons.count;
+		smc_rmb_sync_sg_for_cpu(conn);
 		for (chunk = 0; chunk < 2; chunk++) {
 			if (!(flags & MSG_TRUNC)) {
 				rc = memcpy_to_msg(msg, rcvbuf_base + chunk_off,
@@ -177,6 +178,7 @@ copy:
 				if (rc) {
 					if (!read_done)
 						read_done = -EFAULT;
+					smc_rmb_sync_sg_for_device(conn);
 					goto out;
 				}
 			}
@@ -190,6 +192,7 @@ copy:
 			chunk_len_sum += chunk_len;
 			chunk_off = 0; /* modulo offset in recv ring buffer */
 		}
+		smc_rmb_sync_sg_for_device(conn);
 
 		/* update cursors */
 		if (!(flags & MSG_PEEK)) {
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 21ec1832ab51..3c656beb8820 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -174,10 +174,12 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
 				  copylen, conn->sndbuf_size - tx_cnt_prep);
 		chunk_len_sum = chunk_len;
 		chunk_off = tx_cnt_prep;
+		smc_sndbuf_sync_sg_for_cpu(conn);
 		for (chunk = 0; chunk < 2; chunk++) {
 			rc = memcpy_from_msg(sndbuf_base + chunk_off,
 					     msg, chunk_len);
 			if (rc) {
+				smc_sndbuf_sync_sg_for_device(conn);
 				if (send_done)
 					return send_done;
 				goto out_err;
@@ -192,6 +194,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
 			chunk_len_sum += chunk_len;
 			chunk_off = 0; /* modulo offset in send ring buffer */
 		}
+		smc_sndbuf_sync_sg_for_device(conn);
 		/* update cursors */
 		smc_curs_add(conn->sndbuf_size, &prep, copylen);
 		smc_curs_write(&conn->tx_curs_prep,
@@ -277,6 +280,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 	struct smc_link_group *lgr = conn->lgr;
 	int to_send, rmbespace;
 	struct smc_link *link;
+	dma_addr_t dma_addr;
 	int num_sges;
 	int rc;
 
@@ -334,12 +338,11 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 		src_len = conn->sndbuf_size - sent.count;
 	}
 	src_len_sum = src_len;
+	dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
 	for (dstchunk = 0; dstchunk < 2; dstchunk++) {
 		num_sges = 0;
 		for (srcchunk = 0; srcchunk < 2; srcchunk++) {
-			sges[srcchunk].addr =
-				conn->sndbuf_desc->dma_addr[SMC_SINGLE_LINK] +
-				src_off;
+			sges[srcchunk].addr = dma_addr + src_off;
 			sges[srcchunk].length = src_len;
 			sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
 			num_sges++;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 874ee9f9d796..ab56bda66783 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -68,6 +68,16 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
 	int i;
 
 	link = wc->qp->qp_context;
+
+	if (wc->opcode == IB_WC_REG_MR) {
+		if (wc->status)
+			link->wr_reg_state = FAILED;
+		else
+			link->wr_reg_state = CONFIRMED;
+		wake_up(&link->wr_reg_wait);
+		return;
+	}
+
 	pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
 	if (pnd_snd_idx == link->wr_tx_cnt)
 		return;
@@ -243,6 +253,52 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
 	return rc;
 }
 
+/* Register a memory region and wait for result. */
+int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
+{
+	struct ib_send_wr *failed_wr = NULL;
+	int rc;
+
+	ib_req_notify_cq(link->smcibdev->roce_cq_send,
+			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+	link->wr_reg_state = POSTED;
+	link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
+	link->wr_reg.mr = mr;
+	link->wr_reg.key = mr->rkey;
+	failed_wr = &link->wr_reg.wr;
+	rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr);
+	WARN_ON(failed_wr != &link->wr_reg.wr);
+	if (rc)
+		return rc;
+
+	rc = wait_event_interruptible_timeout(link->wr_reg_wait,
+					      (link->wr_reg_state != POSTED),
+					      SMC_WR_REG_MR_WAIT_TIME);
+	if (!rc) {
+		/* timeout - terminate connections */
+		struct smc_link_group *lgr;
+
+		lgr = container_of(link, struct smc_link_group,
+				   lnk[SMC_SINGLE_LINK]);
+		smc_lgr_terminate(lgr);
+		return -EPIPE;
+	}
+	if (rc == -ERESTARTSYS)
+		return -EINTR;
+	switch (link->wr_reg_state) {
+	case CONFIRMED:
+		rc = 0;
+		break;
+	case FAILED:
+		rc = -EIO;
+		break;
+	case POSTED:
+		rc = -EPIPE;
+		break;
+	}
+	return rc;
+}
+
 void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
 			     smc_wr_tx_filter filter,
 			     smc_wr_tx_dismisser dismisser,
@@ -458,6 +514,11 @@ static void smc_wr_init_sge(struct smc_link *lnk)
 		lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
 		lnk->wr_rx_ibs[i].num_sge = 1;
 	}
+	lnk->wr_reg.wr.next = NULL;
+	lnk->wr_reg.wr.num_sge = 0;
+	lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
+	lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
+	lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
 }
 
 void smc_wr_free_link(struct smc_link *lnk)
@@ -602,6 +663,8 @@ int smc_wr_create_link(struct smc_link *lnk)
 	smc_wr_init_sge(lnk);
 	memset(lnk->wr_tx_mask, 0,
 	       BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
+	init_waitqueue_head(&lnk->wr_tx_wait);
+	init_waitqueue_head(&lnk->wr_reg_wait);
 	return rc;
 
 dma_unmap:
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 0b9beeda6053..45eb53833052 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -102,5 +102,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
 int smc_wr_rx_post_init(struct smc_link *link);
 void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
+int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr);
 
 #endif /* SMC_WR_H */
diff --git a/net/socket.c b/net/socket.c
index ad22df1ffbd1..b332d1e8e4e4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -652,6 +652,20 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
+int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
+			  struct kvec *vec, size_t num, size_t size)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (!sock->ops->sendmsg_locked)
+		sock_no_sendmsg_locked(sk, msg, size);
+
+	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+
+	return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
+}
+EXPORT_SYMBOL(kernel_sendmsg_locked);
+
 static bool skb_is_err_queue(const struct sk_buff *skb)
 {
 	/* pkt_type of skbs enqueued on the error queue are set to
@@ -3376,6 +3390,19 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 }
 EXPORT_SYMBOL(kernel_sendpage);
 
+int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			   size_t size, int flags)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sock->ops->sendpage_locked)
+		return sock->ops->sendpage_locked(sk, page, offset, size,
+						  flags);
+
+	return sock_no_sendpage_locked(sk, page, offset, size, flags);
+}
+EXPORT_SYMBOL(kernel_sendpage_locked);
+
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
 {
 	mm_segment_t oldfs = get_fs();
@@ -3405,7 +3432,6 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
 	struct inet_sock *inet;
 	struct ip_options_rcu *opt;
 	u32 overhead = 0;
-	bool owned_by_user;
 #if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6_pinfo *np;
 	struct ipv6_txoptions *optv6 = NULL;
@@ -3414,13 +3440,12 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
 	if (!sk)
 		return overhead;
 
-	owned_by_user = sock_owned_by_user(sk);
 	switch (sk->sk_family) {
 	case AF_INET:
 		inet = inet_sk(sk);
 		overhead += sizeof(struct iphdr);
 		opt = rcu_dereference_protected(inet->inet_opt,
-						owned_by_user);
+						sock_owned_by_user(sk));
 		if (opt)
 			overhead += opt->opt.optlen;
 		return overhead;
@@ -3430,7 +3455,7 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
 		overhead += sizeof(struct ipv6hdr);
 		if (np)
 			optv6 = rcu_dereference_protected(np->opt,
-							  owned_by_user);
+							  sock_owned_by_user(sk));
 		if (optv6)
 			overhead += (optv6->opt_flen + optv6->opt_nflen);
 		return overhead;
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index b5c279b22680..0d18fbc6f870 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -29,44 +29,46 @@
 
 static struct workqueue_struct *strp_wq;
 
-struct _strp_rx_msg {
-	/* Internal cb structure. struct strp_rx_msg must be first for passing
+struct _strp_msg {
+	/* Internal cb structure. struct strp_msg must be first for passing
 	 * to upper layer.
 	 */
-	struct strp_rx_msg strp;
+	struct strp_msg strp;
 	int accum_len;
 	int early_eaten;
 };
 
-static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb)
+static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
 {
-	return (struct _strp_rx_msg *)((void *)skb->cb +
+	return (struct _strp_msg *)((void *)skb->cb +
 		offsetof(struct qdisc_skb_cb, data));
 }
 
 /* Lower lock held */
-static void strp_abort_rx_strp(struct strparser *strp, int err)
+static void strp_abort_strp(struct strparser *strp, int err)
 {
-	struct sock *csk = strp->sk;
-
 	/* Unrecoverable error in receive */
 
-	del_timer(&strp->rx_msg_timer);
+	del_timer(&strp->msg_timer);
 
-	if (strp->rx_stopped)
+	if (strp->stopped)
 		return;
 
-	strp->rx_stopped = 1;
+	strp->stopped = 1;
+
+	if (strp->sk) {
+		struct sock *sk = strp->sk;
 
-	/* Report an error on the lower socket */
-	csk->sk_err = err;
-	csk->sk_error_report(csk);
+		/* Report an error on the lower socket */
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	}
 }
 
-static void strp_start_rx_timer(struct strparser *strp)
+static void strp_start_timer(struct strparser *strp, long timeo)
 {
-	if (strp->sk->sk_rcvtimeo)
-		mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo);
+	if (timeo)
+		mod_timer(&strp->msg_timer, timeo);
 }
 
 /* Lower lock held */
@@ -74,46 +76,55 @@ static void strp_parser_err(struct strparser *strp, int err,
 			    read_descriptor_t *desc)
 {
 	desc->error = err;
-	kfree_skb(strp->rx_skb_head);
-	strp->rx_skb_head = NULL;
+	kfree_skb(strp->skb_head);
+	strp->skb_head = NULL;
 	strp->cb.abort_parser(strp, err);
 }
 
 static inline int strp_peek_len(struct strparser *strp)
 {
-	struct socket *sock = strp->sk->sk_socket;
+	if (strp->sk) {
+		struct socket *sock = strp->sk->sk_socket;
+
+		return sock->ops->peek_len(sock);
+	}
+
+	/* If we don't have an associated socket there's nothing to peek.
+	 * Return int max to avoid stopping the strparser.
+	 */
 
-	return sock->ops->peek_len(sock);
+	return INT_MAX;
 }
 
 /* Lower socket lock held */
-static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
-		     unsigned int orig_offset, size_t orig_len)
+static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+		       unsigned int orig_offset, size_t orig_len,
+		       size_t max_msg_size, long timeo)
 {
 	struct strparser *strp = (struct strparser *)desc->arg.data;
-	struct _strp_rx_msg *rxm;
+	struct _strp_msg *stm;
 	struct sk_buff *head, *skb;
 	size_t eaten = 0, cand_len;
 	ssize_t extra;
 	int err;
 	bool cloned_orig = false;
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		return 0;
 
-	head = strp->rx_skb_head;
+	head = strp->skb_head;
 	if (head) {
 		/* Message already in progress */
 
-		rxm = _strp_rx_msg(head);
-		if (unlikely(rxm->early_eaten)) {
+		stm = _strp_msg(head);
+		if (unlikely(stm->early_eaten)) {
 			/* Already some number of bytes on the receive sock
-			 * data saved in rx_skb_head, just indicate they
+			 * data saved in skb_head, just indicate they
 			 * are consumed.
 			 */
-			eaten = orig_len <= rxm->early_eaten ?
-				orig_len : rxm->early_eaten;
-			rxm->early_eaten -= eaten;
+			eaten = orig_len <= stm->early_eaten ?
+				orig_len : stm->early_eaten;
+			stm->early_eaten -= eaten;
 
 			return eaten;
 		}
@@ -126,12 +137,12 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			 */
 			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
 			if (!orig_skb) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = -ENOMEM;
 				return 0;
 			}
 			if (!pskb_pull(orig_skb, orig_offset)) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				kfree_skb(orig_skb);
 				desc->error = -ENOMEM;
 				return 0;
@@ -140,13 +151,13 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			orig_offset = 0;
 		}
 
-		if (!strp->rx_skb_nextp) {
+		if (!strp->skb_nextp) {
 			/* We are going to append to the frags_list of head.
 			 * Need to unshare the frag_list.
 			 */
 			err = skb_unclone(head, GFP_ATOMIC);
 			if (err) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = err;
 				return 0;
 			}
@@ -165,20 +176,20 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 
 				skb = alloc_skb(0, GFP_ATOMIC);
 				if (!skb) {
-					STRP_STATS_INCR(strp->stats.rx_mem_fail);
+					STRP_STATS_INCR(strp->stats.mem_fail);
 					desc->error = -ENOMEM;
 					return 0;
 				}
 				skb->len = head->len;
 				skb->data_len = head->len;
 				skb->truesize = head->truesize;
-				*_strp_rx_msg(skb) = *_strp_rx_msg(head);
-				strp->rx_skb_nextp = &head->next;
+				*_strp_msg(skb) = *_strp_msg(head);
+				strp->skb_nextp = &head->next;
 				skb_shinfo(skb)->frag_list = head;
-				strp->rx_skb_head = skb;
+				strp->skb_head = skb;
 				head = skb;
 			} else {
-				strp->rx_skb_nextp =
+				strp->skb_nextp =
 				    &skb_shinfo(head)->frag_list;
 			}
 		}
@@ -188,112 +199,112 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 		/* Always clone since we will consume something */
 		skb = skb_clone(orig_skb, GFP_ATOMIC);
 		if (!skb) {
-			STRP_STATS_INCR(strp->stats.rx_mem_fail);
+			STRP_STATS_INCR(strp->stats.mem_fail);
 			desc->error = -ENOMEM;
 			break;
 		}
 
 		cand_len = orig_len - eaten;
 
-		head = strp->rx_skb_head;
+		head = strp->skb_head;
 		if (!head) {
 			head = skb;
-			strp->rx_skb_head = head;
-			/* Will set rx_skb_nextp on next packet if needed */
-			strp->rx_skb_nextp = NULL;
-			rxm = _strp_rx_msg(head);
-			memset(rxm, 0, sizeof(*rxm));
-			rxm->strp.offset = orig_offset + eaten;
+			strp->skb_head = head;
+			/* Will set skb_nextp on next packet if needed */
+			strp->skb_nextp = NULL;
+			stm = _strp_msg(head);
+			memset(stm, 0, sizeof(*stm));
+			stm->strp.offset = orig_offset + eaten;
 		} else {
 			/* Unclone since we may be appending to an skb that we
 			 * already share a frag_list with.
 			 */
 			err = skb_unclone(skb, GFP_ATOMIC);
 			if (err) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = err;
 				break;
 			}
 
-			rxm = _strp_rx_msg(head);
-			*strp->rx_skb_nextp = skb;
-			strp->rx_skb_nextp = &skb->next;
+			stm = _strp_msg(head);
+			*strp->skb_nextp = skb;
+			strp->skb_nextp = &skb->next;
 			head->data_len += skb->len;
 			head->len += skb->len;
 			head->truesize += skb->truesize;
 		}
 
-		if (!rxm->strp.full_len) {
+		if (!stm->strp.full_len) {
 			ssize_t len;
 
 			len = (*strp->cb.parse_msg)(strp, head);
 
 			if (!len) {
 				/* Need more header to determine length */
-				if (!rxm->accum_len) {
+				if (!stm->accum_len) {
 					/* Start RX timer for new message */
-					strp_start_rx_timer(strp);
+					strp_start_timer(strp, timeo);
 				}
-				rxm->accum_len += cand_len;
+				stm->accum_len += cand_len;
 				eaten += cand_len;
-				STRP_STATS_INCR(strp->stats.rx_need_more_hdr);
+				STRP_STATS_INCR(strp->stats.need_more_hdr);
 				WARN_ON(eaten != orig_len);
 				break;
 			} else if (len < 0) {
-				if (len == -ESTRPIPE && rxm->accum_len) {
+				if (len == -ESTRPIPE && stm->accum_len) {
 					len = -ENODATA;
-					strp->rx_unrecov_intr = 1;
+					strp->unrecov_intr = 1;
 				} else {
-					strp->rx_interrupted = 1;
+					strp->interrupted = 1;
 				}
 				strp_parser_err(strp, len, desc);
 				break;
-			} else if (len > strp->sk->sk_rcvbuf) {
+			} else if (len > max_msg_size) {
 				/* Message length exceeds maximum allowed */
-				STRP_STATS_INCR(strp->stats.rx_msg_too_big);
+				STRP_STATS_INCR(strp->stats.msg_too_big);
 				strp_parser_err(strp, -EMSGSIZE, desc);
 				break;
 			} else if (len <= (ssize_t)head->len -
-					  skb->len - rxm->strp.offset) {
+					  skb->len - stm->strp.offset) {
 				/* Length must be into new skb (and also
 				 * greater than zero)
 				 */
-				STRP_STATS_INCR(strp->stats.rx_bad_hdr_len);
+				STRP_STATS_INCR(strp->stats.bad_hdr_len);
 				strp_parser_err(strp, -EPROTO, desc);
 				break;
 			}
 
-			rxm->strp.full_len = len;
+			stm->strp.full_len = len;
 		}
 
-		extra = (ssize_t)(rxm->accum_len + cand_len) -
-			rxm->strp.full_len;
+		extra = (ssize_t)(stm->accum_len + cand_len) -
+			stm->strp.full_len;
 
 		if (extra < 0) {
 			/* Message not complete yet. */
-			if (rxm->strp.full_len - rxm->accum_len >
+			if (stm->strp.full_len - stm->accum_len >
 			    strp_peek_len(strp)) {
-				/* Don't have the whole messages in the socket
-				 * buffer. Set strp->rx_need_bytes to wait for
+				/* Don't have the whole message in the socket
+				 * buffer. Set strp->need_bytes to wait for
 				 * the rest of the message. Also, set "early
 				 * eaten" since we've already buffered the skb
 				 * but don't consume yet per strp_read_sock.
 				 */
 
-				if (!rxm->accum_len) {
+				if (!stm->accum_len) {
 					/* Start RX timer for new message */
-					strp_start_rx_timer(strp);
+					strp_start_timer(strp, timeo);
 				}
 
-				strp->rx_need_bytes = rxm->strp.full_len -
-						       rxm->accum_len;
-				rxm->accum_len += cand_len;
-				rxm->early_eaten = cand_len;
-				STRP_STATS_ADD(strp->stats.rx_bytes, cand_len);
+				strp->need_bytes = stm->strp.full_len -
+						       stm->accum_len;
+				stm->accum_len += cand_len;
+				stm->early_eaten = cand_len;
+				STRP_STATS_ADD(strp->stats.bytes, cand_len);
 				desc->count = 0; /* Stop reading socket */
 				break;
 			}
-			rxm->accum_len += cand_len;
+			stm->accum_len += cand_len;
 			eaten += cand_len;
 			WARN_ON(eaten != orig_len);
 			break;
@@ -308,14 +319,14 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 		eaten += (cand_len - extra);
 
 		/* Hurray, we have a new message! */
-		del_timer(&strp->rx_msg_timer);
-		strp->rx_skb_head = NULL;
-		STRP_STATS_INCR(strp->stats.rx_msgs);
+		del_timer(&strp->msg_timer);
+		strp->skb_head = NULL;
+		STRP_STATS_INCR(strp->stats.msgs);
 
 		/* Give skb to upper layer */
 		strp->cb.rcv_msg(strp, head);
 
-		if (unlikely(strp->rx_paused)) {
+		if (unlikely(strp->paused)) {
 			/* Upper layer paused strp */
 			break;
 		}
@@ -324,11 +335,33 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 	if (cloned_orig)
 		kfree_skb(orig_skb);
 
-	STRP_STATS_ADD(strp->stats.rx_bytes, eaten);
+	STRP_STATS_ADD(strp->stats.bytes, eaten);
 
 	return eaten;
 }
 
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+		 unsigned int orig_offset, size_t orig_len,
+		 size_t max_msg_size, long timeo)
+{
+	read_descriptor_t desc; /* Dummy arg to strp_recv */
+
+	desc.arg.data = strp;
+
+	return __strp_recv(&desc, orig_skb, orig_offset, orig_len,
+			   max_msg_size, timeo);
+}
+EXPORT_SYMBOL_GPL(strp_process);
+
+static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+		     unsigned int orig_offset, size_t orig_len)
+{
+	struct strparser *strp = (struct strparser *)desc->arg.data;
+
+	return __strp_recv(desc, orig_skb, orig_offset, orig_len,
+			   strp->sk->sk_rcvbuf, strp->sk->sk_rcvtimeo);
+}
+
 static int default_read_sock_done(struct strparser *strp, int err)
 {
 	return err;
@@ -355,101 +388,129 @@ static int strp_read_sock(struct strparser *strp)
 /* Lower sock lock held */
 void strp_data_ready(struct strparser *strp)
 {
-	if (unlikely(strp->rx_stopped))
+	if (unlikely(strp->stopped))
 		return;
 
-	/* This check is needed to synchronize with do_strp_rx_work.
-	 * do_strp_rx_work acquires a process lock (lock_sock) whereas
+	/* This check is needed to synchronize with do_strp_work.
+	 * do_strp_work acquires a process lock (lock_sock) whereas
 	 * the lock held here is bh_lock_sock. The two locks can be
 	 * held by different threads at the same time, but bh_lock_sock
 	 * allows a thread in BH context to safely check if the process
 	 * lock is held. In this case, if the lock is held, queue work.
 	 */
 	if (sock_owned_by_user(strp->sk)) {
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 		return;
 	}
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		return;
 
-	if (strp->rx_need_bytes) {
-		if (strp_peek_len(strp) >= strp->rx_need_bytes)
-			strp->rx_need_bytes = 0;
+	if (strp->need_bytes) {
+		if (strp_peek_len(strp) >= strp->need_bytes)
+			strp->need_bytes = 0;
 		else
 			return;
 	}
 
 	if (strp_read_sock(strp) == -ENOMEM)
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_data_ready);
 
-static void do_strp_rx_work(struct strparser *strp)
+static void do_strp_work(struct strparser *strp)
 {
 	read_descriptor_t rd_desc;
-	struct sock *csk = strp->sk;
 
 	/* We need the read lock to synchronize with strp_data_ready. We
 	 * need the socket lock for calling strp_read_sock.
 	 */
-	lock_sock(csk);
+	strp->cb.lock(strp);
 
-	if (unlikely(strp->rx_stopped))
+	if (unlikely(strp->stopped))
 		goto out;
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		goto out;
 
 	rd_desc.arg.data = strp;
 
 	if (strp_read_sock(strp) == -ENOMEM)
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 
 out:
-	release_sock(csk);
+	strp->cb.unlock(strp);
 }
 
-static void strp_rx_work(struct work_struct *w)
+static void strp_work(struct work_struct *w)
 {
-	do_strp_rx_work(container_of(w, struct strparser, rx_work));
+	do_strp_work(container_of(w, struct strparser, work));
 }
 
-static void strp_rx_msg_timeout(unsigned long arg)
+static void strp_msg_timeout(unsigned long arg)
 {
 	struct strparser *strp = (struct strparser *)arg;
 
 	/* Message assembly timed out */
-	STRP_STATS_INCR(strp->stats.rx_msg_timeouts);
-	lock_sock(strp->sk);
+	STRP_STATS_INCR(strp->stats.msg_timeouts);
+	strp->cb.lock(strp);
 	strp->cb.abort_parser(strp, ETIMEDOUT);
+	strp->cb.unlock(strp);
+}
+
+static void strp_sock_lock(struct strparser *strp)
+{
+	lock_sock(strp->sk);
+}
+
+static void strp_sock_unlock(struct strparser *strp)
+{
 	release_sock(strp->sk);
 }
 
-int strp_init(struct strparser *strp, struct sock *csk,
+int strp_init(struct strparser *strp, struct sock *sk,
 	      struct strp_callbacks *cb)
 {
-	struct socket *sock = csk->sk_socket;
 
 	if (!cb || !cb->rcv_msg || !cb->parse_msg)
 		return -EINVAL;
 
-	if (!sock->ops->read_sock || !sock->ops->peek_len)
-		return -EAFNOSUPPORT;
+	/* The sk (sock) arg determines the mode of the stream parser.
+	 *
+	 * If the sock is set then the strparser is in receive callback mode.
+	 * The upper layer calls strp_data_ready to kick receive processing
+	 * and strparser calls the read_sock function on the socket to
+	 * get packets.
+	 *
+	 * If the sock is not set then the strparser is in general mode.
+	 * The upper layer calls strp_process for each skb to be parsed.
+	 */
 
-	memset(strp, 0, sizeof(*strp));
+	if (sk) {
+		struct socket *sock = sk->sk_socket;
 
-	strp->sk = csk;
+		if (!sock->ops->read_sock || !sock->ops->peek_len)
+			return -EAFNOSUPPORT;
+	} else {
+		if (!cb->lock || !cb->unlock)
+			return -EINVAL;
+	}
 
-	setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout,
-		    (unsigned long)strp);
+	memset(strp, 0, sizeof(*strp));
 
-	INIT_WORK(&strp->rx_work, strp_rx_work);
+	strp->sk = sk;
 
+	strp->cb.lock = cb->lock ? : strp_sock_lock;
+	strp->cb.unlock = cb->unlock ? : strp_sock_unlock;
 	strp->cb.rcv_msg = cb->rcv_msg;
 	strp->cb.parse_msg = cb->parse_msg;
 	strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
-	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp;
+	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp;
+
+	setup_timer(&strp->msg_timer, strp_msg_timeout,
+		    (unsigned long)strp);
+
+	INIT_WORK(&strp->work, strp_work);
 
 	return 0;
 }
@@ -457,12 +518,12 @@ EXPORT_SYMBOL_GPL(strp_init);
 
 void strp_unpause(struct strparser *strp)
 {
-	strp->rx_paused = 0;
+	strp->paused = 0;
 
-	/* Sync setting rx_paused with RX work */
+	/* Sync setting paused with RX work */
 	smp_mb();
 
-	queue_work(strp_wq, &strp->rx_work);
+	queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_unpause);
 
@@ -471,27 +532,27 @@ EXPORT_SYMBOL_GPL(strp_unpause);
  */
 void strp_done(struct strparser *strp)
 {
-	WARN_ON(!strp->rx_stopped);
+	WARN_ON(!strp->stopped);
 
-	del_timer_sync(&strp->rx_msg_timer);
-	cancel_work_sync(&strp->rx_work);
+	del_timer_sync(&strp->msg_timer);
+	cancel_work_sync(&strp->work);
 
-	if (strp->rx_skb_head) {
-		kfree_skb(strp->rx_skb_head);
-		strp->rx_skb_head = NULL;
+	if (strp->skb_head) {
+		kfree_skb(strp->skb_head);
+		strp->skb_head = NULL;
 	}
 }
 EXPORT_SYMBOL_GPL(strp_done);
 
 void strp_stop(struct strparser *strp)
 {
-	strp->rx_stopped = 1;
+	strp->stopped = 1;
 }
 EXPORT_SYMBOL_GPL(strp_stop);
 
 void strp_check_rcv(struct strparser *strp)
 {
-	queue_work(strp_wq, &strp->rx_work);
+	queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_check_rcv);
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7b52a380d710..5c53f22d62e8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1528,26 +1528,13 @@ static inline bool too_many_unix_fds(struct task_struct *p)
 	return false;
 }
 
-#define MAX_RECURSION_LEVEL 4
-
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
-	unsigned char max_level = 0;
 
 	if (too_many_unix_fds(current))
 		return -ETOOMANYREFS;
 
-	for (i = scm->fp->count - 1; i >= 0; i--) {
-		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
-
-		if (sk)
-			max_level = max(max_level,
-					unix_sk(sk)->recursion_level);
-	}
-	if (unlikely(max_level > MAX_RECURSION_LEVEL))
-		return -ETOOMANYREFS;
-
 	/*
 	 * Need to duplicate file references for the sake of garbage
 	 * collection.  Otherwise a socket in the fps might become a
@@ -1559,7 +1546,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 
 	for (i = scm->fp->count - 1; i >= 0; i--)
 		unix_inflight(scm->fp->user, scm->fp->fp[i]);
-	return max_level;
+	return 0;
 }
 
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1649,7 +1636,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct sk_buff *skb;
 	long timeo;
 	struct scm_cookie scm;
-	int max_level;
 	int data_len = 0;
 	int sk_locked;
 
@@ -1701,7 +1687,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	err = unix_scm_to_skb(&scm, skb, true);
 	if (err < 0)
 		goto out_free;
-	max_level = err + 1;
 
 	skb_put(skb, len - data_len);
 	skb->data_len = data_len;
@@ -1819,8 +1804,6 @@ restart_locked:
 		__net_timestamp(skb);
 	maybe_add_creds(skb, sock, other);
 	skb_queue_tail(&other->sk_receive_queue, skb);
-	if (max_level > unix_sk(other)->recursion_level)
-		unix_sk(other)->recursion_level = max_level;
 	unix_state_unlock(other);
 	other->sk_data_ready(other);
 	sock_put(other);
@@ -1855,7 +1838,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	int sent = 0;
 	struct scm_cookie scm;
 	bool fds_sent = false;
-	int max_level;
 	int data_len;
 
 	wait_for_unix_gc();
@@ -1905,7 +1887,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 			kfree_skb(skb);
 			goto out_err;
 		}
-		max_level = err + 1;
 		fds_sent = true;
 
 		skb_put(skb, size - data_len);
@@ -1925,8 +1906,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 
 		maybe_add_creds(skb, sock, other);
 		skb_queue_tail(&other->sk_receive_queue, skb);
-		if (max_level > unix_sk(other)->recursion_level)
-			unix_sk(other)->recursion_level = max_level;
 		unix_state_unlock(other);
 		other->sk_data_ready(other);
 		sent += size;
@@ -2324,7 +2303,6 @@ redo:
 		last_len = last ? last->len : 0;
 again:
 		if (skb == NULL) {
-			unix_sk(sk)->recursion_level = 0;
 			if (copied >= target)
 				goto unlock;
 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5a1a98df3499..ac095936552d 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -74,7 +74,7 @@ DEFINE_RWLOCK(x25_list_lock);
 
 static const struct proto_ops x25_proto_ops;
 
-static struct x25_address null_x25_address = {"               "};
+static const struct x25_address null_x25_address = {"               "};
 
 #ifdef CONFIG_COMPAT
 struct compat_x25_subscrip_struct {
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 5f7e8bfa0c2d..5cd7a244e88d 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev)
 
 static int xfrm_dev_unregister(struct net_device *dev)
 {
+	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
@@ -175,8 +176,7 @@ static int xfrm_dev_down(struct net_device *dev)
 	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-	xfrm_garbage_collect(dev_net(dev));
-
+	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ff61d8557929..06c3bf7ab86b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -24,6 +24,7 @@
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/cache.h>
+#include <linux/cpu.h>
 #include <linux/audit.h>
 #include <net/dst.h>
 #include <net/flow.h>
@@ -44,6 +45,8 @@ struct xfrm_flo {
 	u8 flags;
 };
 
+static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
+static struct work_struct *xfrm_pcpu_work __read_mostly;
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
@@ -246,36 +249,6 @@ expired:
 	xfrm_pol_put(xp);
 }
 
-static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
-{
-	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-	if (unlikely(pol->walk.dead))
-		flo = NULL;
-	else
-		xfrm_pol_hold(pol);
-
-	return flo;
-}
-
-static int xfrm_policy_flo_check(struct flow_cache_object *flo)
-{
-	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-	return !pol->walk.dead;
-}
-
-static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
-{
-	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
-}
-
-static const struct flow_cache_ops xfrm_policy_fc_ops = {
-	.get = xfrm_policy_flo_get,
-	.check = xfrm_policy_flo_check,
-	.delete = xfrm_policy_flo_delete,
-};
-
 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
  * SPD calls.
  */
@@ -298,7 +271,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 				(unsigned long)policy);
 		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
 			    (unsigned long)policy);
-		policy->flo.ops = &xfrm_policy_fc_ops;
 	}
 	return policy;
 }
@@ -798,7 +770,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	else
 		hlist_add_head(&policy->bydst, chain);
 	__xfrm_policy_link(policy, dir);
-	atomic_inc(&net->xfrm.flow_cache_genid);
 
 	/* After previous checking, family can either be AF_INET or AF_INET6 */
 	if (policy->family == AF_INET)
@@ -1004,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 	}
 	if (!cnt)
 		err = -ESRCH;
+	else
+		xfrm_policy_cache_flush();
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 	return err;
@@ -1175,7 +1148,7 @@ fail:
 }
 
 static struct xfrm_policy *
-__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
 	struct xfrm_policy *pol;
@@ -1187,61 +1160,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
 	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 }
 
-static int flow_to_policy_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-
-	switch (dir) {
-	default:
-	case FLOW_DIR_IN:
-		return XFRM_POLICY_IN;
-	case FLOW_DIR_OUT:
-		return XFRM_POLICY_OUT;
-	case FLOW_DIR_FWD:
-		return XFRM_POLICY_FWD;
-	}
-}
-
-static struct flow_cache_object *
-xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
-		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
-{
-	struct xfrm_policy *pol;
-
-	if (old_obj)
-		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
-
-	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
-	if (IS_ERR_OR_NULL(pol))
-		return ERR_CAST(pol);
-
-	/* Resolver returns two references:
-	 * one for cache and one for caller of flow_cache_lookup() */
-	xfrm_pol_hold(pol);
-
-	return &pol->flo;
-}
-
-static inline int policy_to_flow_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-	switch (dir) {
-	default:
-	case XFRM_POLICY_IN:
-		return FLOW_DIR_IN;
-	case XFRM_POLICY_OUT:
-		return FLOW_DIR_OUT;
-	case XFRM_POLICY_FWD:
-		return FLOW_DIR_FWD;
-	}
-}
-
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 						 const struct flowi *fl, u16 family)
 {
@@ -1261,7 +1179,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 			}
 			err = security_xfrm_policy_lookup(pol->security,
 						      fl->flowi_secid,
-						      policy_to_flow_dir(dir));
+						      dir);
 			if (!err) {
 				if (!xfrm_pol_hold_rcu(pol))
 					goto again;
@@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family)
 	return tos;
 }
 
-static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	if (xdst->route == NULL) {
-		/* Dummy bundle - if it has xfrms we were not
-		 * able to build bundle as template resolution failed.
-		 * It means we need to try again resolving. */
-		if (xdst->num_xfrms > 0)
-			return NULL;
-	} else if (dst->flags & DST_XFRM_QUEUE) {
-		return NULL;
-	} else {
-		/* Real bundle */
-		if (stale_bundle(dst))
-			return NULL;
-	}
-
-	dst_hold(dst);
-	return flo;
-}
-
-static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	if (!xdst->route)
-		return 0;
-	if (stale_bundle(dst))
-		return 0;
-
-	return 1;
-}
-
-static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-	dst->obsolete = DST_OBSOLETE_DEAD;
-	dst_release_immediate(dst);
-}
-
-static const struct flow_cache_ops xfrm_bundle_fc_ops = {
-	.get = xfrm_bundle_flo_get,
-	.check = xfrm_bundle_flo_check,
-	.delete = xfrm_bundle_flo_delete,
-};
-
 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
 	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 		struct dst_entry *dst = &xdst->u.dst;
 
 		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
-		xdst->flo.ops = &xfrm_bundle_fc_ops;
 	} else
 		xdst = ERR_PTR(-ENOBUFS);
 
@@ -1840,6 +1705,102 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 
 }
 
+static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
+{
+	this_cpu_write(xfrm_last_dst, xdst);
+	if (old)
+		dst_release(&old->u.dst);
+}
+
+static void __xfrm_pcpu_work_fn(void)
+{
+	struct xfrm_dst *old;
+
+	old = this_cpu_read(xfrm_last_dst);
+	if (old && !xfrm_bundle_ok(old))
+		xfrm_last_dst_update(NULL, old);
+}
+
+static void xfrm_pcpu_work_fn(struct work_struct *work)
+{
+	local_bh_disable();
+	rcu_read_lock();
+	__xfrm_pcpu_work_fn();
+	rcu_read_unlock();
+	local_bh_enable();
+}
+
+void xfrm_policy_cache_flush(void)
+{
+	struct xfrm_dst *old;
+	bool found = 0;
+	int cpu;
+
+	local_bh_disable();
+	rcu_read_lock();
+	for_each_possible_cpu(cpu) {
+		old = per_cpu(xfrm_last_dst, cpu);
+		if (old && !xfrm_bundle_ok(old)) {
+			if (smp_processor_id() == cpu) {
+				__xfrm_pcpu_work_fn();
+				continue;
+			}
+			found = true;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	local_bh_enable();
+
+	if (!found)
+		return;
+
+	get_online_cpus();
+
+	for_each_possible_cpu(cpu) {
+		bool bundle_release;
+
+		rcu_read_lock();
+		old = per_cpu(xfrm_last_dst, cpu);
+		bundle_release = old && !xfrm_bundle_ok(old);
+		rcu_read_unlock();
+
+		if (!bundle_release)
+			continue;
+
+		if (cpu_online(cpu)) {
+			schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
+			continue;
+		}
+
+		rcu_read_lock();
+		old = per_cpu(xfrm_last_dst, cpu);
+		if (old && !xfrm_bundle_ok(old)) {
+			per_cpu(xfrm_last_dst, cpu) = NULL;
+			dst_release(&old->u.dst);
+		}
+		rcu_read_unlock();
+	}
+
+	put_online_cpus();
+}
+
+static bool xfrm_pol_dead(struct xfrm_dst *xdst)
+{
+	unsigned int num_pols = xdst->num_pols;
+	unsigned int pol_dead = 0, i;
+
+	for (i = 0; i < num_pols; i++)
+		pol_dead |= xdst->pols[i]->walk.dead;
+
+	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
+	if (pol_dead)
+		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
+
+	return pol_dead;
+}
+
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 			       const struct flowi *fl, u16 family,
@@ -1847,10 +1808,22 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 {
 	struct net *net = xp_net(pols[0]);
 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+	struct xfrm_dst *xdst, *old;
 	struct dst_entry *dst;
-	struct xfrm_dst *xdst;
 	int err;
 
+	xdst = this_cpu_read(xfrm_last_dst);
+	if (xdst &&
+	    xdst->u.dst.dev == dst_orig->dev &&
+	    xdst->num_pols == num_pols &&
+	    !xfrm_pol_dead(xdst) &&
+	    memcmp(xdst->pols, pols,
+		   sizeof(struct xfrm_policy *) * num_pols) == 0) {
+		dst_hold(&xdst->u.dst);
+		return xdst;
+	}
+
+	old = xdst;
 	/* Try to instantiate a bundle */
 	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
 	if (err <= 0) {
@@ -1871,6 +1844,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
 
+	atomic_set(&xdst->u.dst.__refcnt, 2);
+	xfrm_last_dst_update(xdst, old);
+
 	return xdst;
 }
 
@@ -2051,86 +2027,39 @@ free_dst:
 	goto out;
 }
 
-static struct flow_cache_object *
-xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
-		   struct flow_cache_object *oldflo, void *ctx)
+static struct xfrm_dst *
+xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
 {
-	struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-	struct xfrm_dst *xdst, *new_xdst;
-	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
-
-	/* Check if the policies from old bundle are usable */
-	xdst = NULL;
-	if (oldflo) {
-		xdst = container_of(oldflo, struct xfrm_dst, flo);
-		num_pols = xdst->num_pols;
-		num_xfrms = xdst->num_xfrms;
-		pol_dead = 0;
-		for (i = 0; i < num_pols; i++) {
-			pols[i] = xdst->pols[i];
-			pol_dead |= pols[i]->walk.dead;
-		}
-		if (pol_dead) {
-			/* Mark DST_OBSOLETE_DEAD to fail the next
-			 * xfrm_dst_check()
-			 */
-			xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-			dst_release_immediate(&xdst->u.dst);
-			xdst = NULL;
-			num_pols = 0;
-			num_xfrms = 0;
-			oldflo = NULL;
-		}
-	}
+	int num_pols = 0, num_xfrms = 0, err;
+	struct xfrm_dst *xdst;
 
 	/* Resolve policies to use if we couldn't get them from
 	 * previous cache entry */
-	if (xdst == NULL) {
-		num_pols = 1;
-		pols[0] = __xfrm_policy_lookup(net, fl, family,
-					       flow_to_policy_dir(dir));
-		err = xfrm_expand_policies(fl, family, pols,
+	num_pols = 1;
+	pols[0] = xfrm_policy_lookup(net, fl, family, dir);
+	err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
-		if (err < 0)
-			goto inc_error;
-		if (num_pols == 0)
-			return NULL;
-		if (num_xfrms <= 0)
-			goto make_dummy_bundle;
-	}
+	if (err < 0)
+		goto inc_error;
+	if (num_pols == 0)
+		return NULL;
+	if (num_xfrms <= 0)
+		goto make_dummy_bundle;
 
-	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+	xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
 						  xflo->dst_orig);
-	if (IS_ERR(new_xdst)) {
-		err = PTR_ERR(new_xdst);
+	if (IS_ERR(xdst)) {
+		err = PTR_ERR(xdst);
 		if (err != -EAGAIN)
 			goto error;
-		if (oldflo == NULL)
-			goto make_dummy_bundle;
-		dst_hold(&xdst->u.dst);
-		return oldflo;
-	} else if (new_xdst == NULL) {
+		goto make_dummy_bundle;
+	} else if (xdst == NULL) {
 		num_xfrms = 0;
-		if (oldflo == NULL)
-			goto make_dummy_bundle;
-		xdst->num_xfrms = 0;
-		dst_hold(&xdst->u.dst);
-		return oldflo;
-	}
-
-	/* Kill the previous bundle */
-	if (xdst) {
-		/* The policies were stolen for newly generated bundle */
-		xdst->num_pols = 0;
-		/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-		dst_release_immediate(&xdst->u.dst);
+		goto make_dummy_bundle;
 	}
 
-	/* We do need to return one reference for original caller */
-	dst_hold(&new_xdst->u.dst);
-	return &new_xdst->flo;
+	return xdst;
 
 make_dummy_bundle:
 	/* We found policies, but there's no bundles to instantiate:
@@ -2146,17 +2075,12 @@ make_dummy_bundle:
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 
 	dst_hold(&xdst->u.dst);
-	return &xdst->flo;
+	return xdst;
 
 inc_error:
 	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 error:
-	if (xdst != NULL) {
-		/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-		dst_release_immediate(&xdst->u.dst);
-	} else
-		xfrm_pols_put(pols, num_pols);
+	xfrm_pols_put(pols, num_pols);
 	return ERR_PTR(err);
 }
 
@@ -2187,11 +2111,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			      const struct sock *sk, int flags)
 {
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-	struct flow_cache_object *flo;
 	struct xfrm_dst *xdst;
 	struct dst_entry *dst, *route;
 	u16 family = dst_orig->ops->family;
-	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+	u8 dir = XFRM_POLICY_OUT;
 	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
 
 	dst = NULL;
@@ -2242,15 +2165,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		flo = flow_cache_lookup(net, fl, family, dir,
-					xfrm_bundle_lookup, &xflo);
-		if (flo == NULL)
+		xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+		if (xdst == NULL)
 			goto nopol;
-		if (IS_ERR(flo)) {
-			err = PTR_ERR(flo);
+		if (IS_ERR(xdst)) {
+			err = PTR_ERR(xdst);
 			goto dropdst;
 		}
-		xdst = container_of(flo, struct xfrm_dst, flo);
 
 		num_pols = xdst->num_pols;
 		num_xfrms = xdst->num_xfrms;
@@ -2449,12 +2370,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	int pi;
 	int reverse;
 	struct flowi fl;
-	u8 fl_dir;
 	int xerr_idx = -1;
 
 	reverse = dir & ~XFRM_POLICY_MASK;
 	dir &= XFRM_POLICY_MASK;
-	fl_dir = policy_to_flow_dir(dir);
 
 	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
@@ -2486,16 +2405,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		}
 	}
 
-	if (!pol) {
-		struct flow_cache_object *flo;
-
-		flo = flow_cache_lookup(net, &fl, family, fl_dir,
-					xfrm_policy_lookup, NULL);
-		if (IS_ERR_OR_NULL(flo))
-			pol = ERR_CAST(flo);
-		else
-			pol = container_of(flo, struct xfrm_policy, flo);
-	}
+	if (!pol)
+		pol = xfrm_policy_lookup(net, &fl, family, dir);
 
 	if (IS_ERR(pol)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@ -2641,11 +2552,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 	 * notice.  That's what we are validating here via the
 	 * stale_bundle() check.
 	 *
-	 * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will
-	 * be marked on it.
 	 * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will
 	 * be marked on it.
-	 * Both will force stable_bundle() to fail on any xdst bundle with
+	 * This will force stale_bundle() to fail on any xdst bundle with
 	 * this dst linked in it.
 	 */
 	if (dst->obsolete < 0 && !stale_bundle(dst))
@@ -2685,18 +2594,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
 	return dst;
 }
 
-void xfrm_garbage_collect(struct net *net)
-{
-	flow_cache_flush(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect);
-
-void xfrm_garbage_collect_deferred(struct net *net)
-{
-	flow_cache_flush_deferred(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect_deferred);
-
 static void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
@@ -3034,14 +2931,9 @@ static int __net_init xfrm_net_init(struct net *net)
 	rv = xfrm_sysctl_init(net);
 	if (rv < 0)
 		goto out_sysctl;
-	rv = flow_cache_init(net);
-	if (rv < 0)
-		goto out;
 
 	return 0;
 
-out:
-	xfrm_sysctl_fini(net);
 out_sysctl:
 	xfrm_policy_fini(net);
 out_policy:
@@ -3054,7 +2946,6 @@ out_statistics:
 
 static void __net_exit xfrm_net_exit(struct net *net)
 {
-	flow_cache_fini(net);
 	xfrm_sysctl_fini(net);
 	xfrm_policy_fini(net);
 	xfrm_state_fini(net);
@@ -3068,7 +2959,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
-	flow_cache_hp_init();
+	int i;
+
+	xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
+				       GFP_KERNEL);
+	BUG_ON(!xfrm_pcpu_work);
+
+	for (i = 0; i < NR_CPUS; i++)
+		INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
+
 	register_pernet_subsys(&xfrm_net_ops);
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 6c0956d10db6..82cbbce69b79 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -724,9 +724,10 @@ restart:
 			}
 		}
 	}
-	if (cnt)
+	if (cnt) {
 		err = 0;
-
+		xfrm_policy_cache_flush();
+	}
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 	return err;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2be4c6af008a..1b539b7dcfab 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 out:
 	xfrm_pol_put(xp);
-	if (delete && err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2027,7 +2025,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			return 0;
 		return err;
 	}
-	xfrm_garbage_collect(net);
 
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 87246be6feb8..770d46cdf9f4 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -37,6 +37,8 @@ hostprogs-y += xdp_tx_iptunnel
 hostprogs-y += test_map_in_map
 hostprogs-y += per_socket_stats_example
 hostprogs-y += load_sock_ops
+hostprogs-y += xdp_redirect
+hostprogs-y += xdp_redirect_map
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -78,6 +80,8 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
 xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
 test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
 per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
+xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
+xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -119,6 +123,8 @@ always += tcp_bufs_kern.o
 always += tcp_cong_kern.o
 always += tcp_iw_kern.o
 always += tcp_clamp_kern.o
+always += xdp_redirect_kern.o
+always += xdp_redirect_map_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -155,6 +161,8 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf
 HOSTLOADLIBES_lwt_len_hist += -l elf
 HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
 HOSTLOADLIBES_test_map_in_map += -lelf
+HOSTLOADLIBES_xdp_redirect += -lelf
+HOSTLOADLIBES_xdp_redirect_map += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c
new file mode 100644
index 000000000000..a34ad457a684
--- /dev/null
+++ b/samples/bpf/xdp_redirect_kern.c
@@ -0,0 +1,81 @@
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") tx_port = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 1,
+};
+
+
+static void swap_src_dst_mac(void *data)
+{
+	unsigned short *p = data;
+	unsigned short dst[3];
+
+	dst[0] = p[0];
+	dst[1] = p[1];
+	dst[2] = p[2];
+	p[0] = p[3];
+	p[1] = p[4];
+	p[2] = p[5];
+	p[3] = dst[0];
+	p[4] = dst[1];
+	p[5] = dst[2];
+}
+
+SEC("xdp_redirect")
+int xdp_redirect_prog(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	int rc = XDP_DROP;
+	int *ifindex, port = 0;
+	long *value;
+	u32 key = 0;
+	u64 nh_off;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return rc;
+
+	ifindex = bpf_map_lookup_elem(&tx_port, &port);
+	if (!ifindex)
+		return rc;
+
+	value = bpf_map_lookup_elem(&rxcnt, &key);
+	if (value)
+		*value += 1;
+
+	swap_src_dst_mac(data);
+	return bpf_redirect(*ifindex, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c
new file mode 100644
index 000000000000..2faf196e17ea
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map_kern.c
@@ -0,0 +1,83 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") tx_port = {
+	.type = BPF_MAP_TYPE_DEVMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 100,
+};
+
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 1,
+};
+
+
+static void swap_src_dst_mac(void *data)
+{
+	unsigned short *p = data;
+	unsigned short dst[3];
+
+	dst[0] = p[0];
+	dst[1] = p[1];
+	dst[2] = p[2];
+	p[0] = p[3];
+	p[1] = p[4];
+	p[2] = p[5];
+	p[3] = dst[0];
+	p[4] = dst[1];
+	p[5] = dst[2];
+}
+
+SEC("xdp_redirect_map")
+int xdp_redirect_map_prog(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	int rc = XDP_DROP;
+	int vport, port = 0, m = 0;
+	long *value;
+	u32 key = 0;
+	u64 nh_off;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return rc;
+
+	/* constant virtual port */
+	vport = 0;
+
+	/* count packet in global counter */
+	value = bpf_map_lookup_elem(&rxcnt, &key);
+	if (value)
+		*value += 1;
+
+	swap_src_dst_mac(data);
+
+	/* send packet out physical port */
+	return bpf_redirect_map(&tx_port, vport, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
new file mode 100644
index 000000000000..a1ad00fdaa8a
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -0,0 +1,137 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "libbpf.h"
+
+static int ifindex_in;
+static int ifindex_out;
+
+static __u32 xdp_flags;
+
+static void int_exit(int sig)
+{
+	set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+	exit(0);
+}
+
+/* simple per-protocol drop counter
+ */
+static void poll_stats(int interval, int ifindex)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	__u64 values[nr_cpus], prev[nr_cpus];
+
+	memset(prev, 0, sizeof(prev));
+
+	while (1) {
+		__u64 sum = 0;
+		__u32 key = 0;
+		int i;
+
+		sleep(interval);
+		assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
+		for (i = 0; i < nr_cpus; i++)
+			sum += (values[i] - prev[i]);
+		if (sum)
+			printf("ifindex %i: %10llu pkt/s\n",
+			       ifindex, sum / interval);
+		memcpy(prev, values, sizeof(values));
+	}
+}
+
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+		"OPTS:\n"
+		"    -S    use skb-mode\n"
+		"    -N    enforce native mode\n",
+		prog);
+}
+
+
+int main(int argc, char **argv)
+{
+	const char *optstr = "SN";
+	char filename[256];
+	int ret, opt, key = 0;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
+		default:
+			usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (optind == argc) {
+		printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+		return 1;
+	}
+
+	ifindex_in = strtoul(argv[optind], NULL, 0);
+	ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+	printf("input: %d output: %d\n", ifindex_in, ifindex_out);
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if (!prog_fd[0]) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+		printf("link set xdp fd failed\n");
+		return 1;
+	}
+
+	printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n",
+		map_fd[0], map_fd[1], map_fd[2]);
+
+	/* populate virtual to physical port map */
+	ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0);
+	if (ret) {
+		perror("bpf_update_elem");
+		goto out;
+	}
+
+	poll_stats(2, ifindex_out);
+
+out:
+	return 0;
+}
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
new file mode 100644
index 000000000000..f705a1905d2d
--- /dev/null
+++ b/samples/bpf/xdp_redirect_user.c
@@ -0,0 +1,134 @@
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "libbpf.h"
+
+static int ifindex_in;
+static int ifindex_out;
+
+static __u32 xdp_flags;
+
+static void int_exit(int sig)
+{
+	set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+	exit(0);
+}
+
+/* simple per-protocol drop counter
+ */
+static void poll_stats(int interval, int ifindex)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	__u64 values[nr_cpus], prev[nr_cpus];
+
+	memset(prev, 0, sizeof(prev));
+
+	while (1) {
+		__u64 sum = 0;
+		__u32 key = 0;
+		int i;
+
+		sleep(interval);
+		assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
+		for (i = 0; i < nr_cpus; i++)
+			sum += (values[i] - prev[i]);
+		if (sum)
+			printf("ifindex %i: %10llu pkt/s\n",
+			       ifindex, sum / interval);
+		memcpy(prev, values, sizeof(values));
+	}
+}
+
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+		"OPTS:\n"
+		"    -S    use skb-mode\n"
+		"    -N    enforce native mode\n",
+		prog);
+}
+
+
+int main(int argc, char **argv)
+{
+	const char *optstr = "SN";
+	char filename[256];
+	int ret, opt, key = 0;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
+		default:
+			usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (optind == argc) {
+		printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+		return 1;
+	}
+
+	ifindex_in = strtoul(argv[optind], NULL, 0);
+	ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+	printf("input: %d output: %d\n", ifindex_in, ifindex_out);
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if (!prog_fd[0]) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+		printf("link set xdp fd failed\n");
+		return 1;
+	}
+
+	/* bpf redirect port */
+	ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0);
+	if (ret) {
+		perror("bpf_update_elem");
+		goto out;
+	}
+
+	poll_stats(2, ifindex_out);
+
+out:
+	return 0;
+}
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 1450f85b946d..36a7ce9e11ff 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void)
 	struct net *net;
 
 	rtnl_lock();
-	for_each_net(net) {
-		atomic_inc(&net->xfrm.flow_cache_genid);
+	for_each_net(net)
 		rt_genid_bump_all(net);
-	}
 	rtnl_unlock();
 }
 #else
diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
deleted file mode 100755
index 89b25068cd98..000000000000
--- a/tools/hv/bondvf.sh
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/bin/bash
-
-# This example script creates bonding network devices based on synthetic NIC
-# (the virtual network adapter usually provided by Hyper-V) and the matching
-# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can
-# function as one network device, and fail over to the synthetic NIC if VF is
-# down.
-#
-# Usage:
-# - After configured vSwitch and vNIC with SRIOV, start Linux virtual
-#   machine (VM)
-# - Run this scripts on the VM. It will create configuration files in
-#   distro specific directory.
-# - Reboot the VM, so that the bonding config are enabled.
-#
-# The config files are DHCP by default. You may edit them if you need to change
-# to Static IP or change other settings.
-#
-
-sysdir=/sys/class/net
-netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e}
-bondcnt=0
-
-# Detect Distro
-if [ -f /etc/redhat-release ];
-then
-	cfgdir=/etc/sysconfig/network-scripts
-	distro=redhat
-elif grep -q 'Ubuntu' /etc/issue
-then
-	cfgdir=/etc/network
-	distro=ubuntu
-elif grep -q 'SUSE' /etc/issue
-then
-	cfgdir=/etc/sysconfig/network
-	distro=suse
-else
-	echo "Unsupported Distro"
-	exit 1
-fi
-
-echo Detected Distro: $distro, or compatible
-
-# Get a list of ethernet names
-list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`)
-eth_cnt=${#list_eth[@]}
-
-echo List of net devices:
-
-# Get the MAC addresses
-for (( i=0; i < $eth_cnt; i++ ))
-do
-	list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address`
-	echo ${list_eth[$i]}, ${list_mac[$i]}
-done
-
-# Find NIC with matching MAC
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
-	for (( j=i+1; j < $eth_cnt; j++ ))
-	do
-		if [ "${list_mac[$i]}" = "${list_mac[$j]}" ]
-		then
-			list_match[$i]=${list_eth[$j]}
-			break
-		fi
-	done
-done
-
-function create_eth_cfg_redhat {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo DEVICE=$1 >>$fn
-	echo TYPE=Ethernet >>$fn
-	echo BOOTPROTO=none >>$fn
-	echo UUID=`uuidgen` >>$fn
-	echo ONBOOT=yes >>$fn
-	echo PEERDNS=yes >>$fn
-	echo IPV6INIT=yes >>$fn
-	echo MASTER=$2 >>$fn
-	echo SLAVE=yes >>$fn
-}
-
-function create_eth_cfg_pri_redhat {
-	create_eth_cfg_redhat $1 $2
-}
-
-function create_bond_cfg_redhat {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo DEVICE=$1 >>$fn
-	echo TYPE=Bond >>$fn
-	echo BOOTPROTO=dhcp >>$fn
-	echo UUID=`uuidgen` >>$fn
-	echo ONBOOT=yes >>$fn
-	echo PEERDNS=yes >>$fn
-	echo IPV6INIT=yes >>$fn
-	echo BONDING_MASTER=yes >>$fn
-	echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
-}
-
-function del_eth_cfg_ubuntu {
-	local mainfn=$cfgdir/interfaces
-	local fnlist=( $mainfn )
-
-	local dirlist=(`awk '/^[ \t]*source/{print $2}' $mainfn`)
-
-	local i
-	for i in "${dirlist[@]}"
-	do
-		fnlist+=(`ls $i 2>/dev/null`)
-	done
-
-	local tmpfl=$(mktemp)
-
-	local nic_start='^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+'$1
-	local nic_end='^[ \t]*(auto|iface|mapping|allow-.*|source)'
-
-	local fn
-	for fn in "${fnlist[@]}"
-	do
-		awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1" \
-			$fn >$tmpfl
-
-		cp $tmpfl $fn
-	done
-
-	rm $tmpfl
-}
-
-function create_eth_cfg_ubuntu {
-	local fn=$cfgdir/interfaces
-
-	del_eth_cfg_ubuntu $1
-	echo $'\n'auto $1 >>$fn
-	echo iface $1 inet manual >>$fn
-	echo bond-master $2 >>$fn
-}
-
-function create_eth_cfg_pri_ubuntu {
-	local fn=$cfgdir/interfaces
-
-	del_eth_cfg_ubuntu $1
-	echo $'\n'allow-hotplug $1 >>$fn
-	echo iface $1 inet manual >>$fn
-	echo bond-master $2 >>$fn
-	echo bond-primary $1 >>$fn
-}
-
-function create_bond_cfg_ubuntu {
-	local fn=$cfgdir/interfaces
-
-	del_eth_cfg_ubuntu $1
-
-	echo $'\n'auto $1 >>$fn
-	echo iface $1 inet dhcp >>$fn
-	echo bond-mode active-backup >>$fn
-	echo bond-miimon 100 >>$fn
-	echo bond-slaves none >>$fn
-}
-
-function create_eth_cfg_suse {
-        local fn=$cfgdir/ifcfg-$1
-
-        rm -f $fn
-	echo BOOTPROTO=none >>$fn
-	echo STARTMODE=auto >>$fn
-}
-
-function create_eth_cfg_pri_suse {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo BOOTPROTO=none >>$fn
-	echo STARTMODE=hotplug >>$fn
-}
-
-function create_bond_cfg_suse {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo BOOTPROTO=dhcp >>$fn
-	echo STARTMODE=auto >>$fn
-	echo BONDING_MASTER=yes >>$fn
-	echo BONDING_SLAVE_0=$2 >>$fn
-	echo BONDING_SLAVE_1=$3 >>$fn
-	echo BONDING_MODULE_OPTS=\'mode=active-backup miimon=100 primary=$2\' >>$fn
-}
-
-function create_bond {
-	local bondname=bond$bondcnt
-	local primary
-	local secondary
-
-	local class_id1=`cat $sysdir/$1/device/class_id 2>/dev/null`
-	local class_id2=`cat $sysdir/$2/device/class_id 2>/dev/null`
-
-	if [ "$class_id1" = "$netvsc_cls" ]
-	then
-		primary=$2
-		secondary=$1
-	elif [ "$class_id2" = "$netvsc_cls" ]
-	then
-		primary=$1
-		secondary=$2
-	else
-		return 0
-	fi
-
-	echo $'\nBond name:' $bondname
-
-	echo configuring $primary
-	create_eth_cfg_pri_$distro $primary $bondname
-
-	echo configuring $secondary
-	create_eth_cfg_$distro $secondary $bondname
-
-	echo creating: $bondname with primary slave: $primary
-	create_bond_cfg_$distro $bondname $primary $secondary
-
-	let bondcnt=bondcnt+1
-}
-
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
-        if [ -n "${list_match[$i]}" ]
-        then
-		create_bond ${list_eth[$i]} ${list_match[$i]}
-        fi
-done
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ce2988be4f0e..1579cab49717 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -104,6 +104,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_LPM_TRIE,
 	BPF_MAP_TYPE_ARRAY_OF_MAPS,
 	BPF_MAP_TYPE_HASH_OF_MAPS,
+	BPF_MAP_TYPE_DEVMAP,
 };
 
 enum bpf_prog_type {
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 1f5300e56b44..445289555487 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -189,6 +189,10 @@ install_lib: all_cmd
 	$(call QUIET_INSTALL, $(LIB_FILE)) \
 		$(call do_install,$(LIB_FILE),$(libdir_SQ))
 
+install_headers:
+	$(call QUIET_INSTALL, headers) \
+		$(call do_install,bpf.h,$(prefix)/include/bpf,644)
+
 install: install_lib
 
 ### Cleaning rules
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index d50ac342dc92..acbd60519467 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -38,6 +38,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
 	(void *) BPF_FUNC_clone_redirect;
 static int (*bpf_redirect)(int ifindex, int flags) =
 	(void *) BPF_FUNC_redirect;
+static int (*bpf_redirect_map)(void *map, int key, int flags) =
+	(void *) BPF_FUNC_redirect_map;
 static int (*bpf_perf_event_output)(void *ctx, void *map,
 				    unsigned long long flags, void *data,
 				    int size) =
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 79601c81e169..c991ab69a720 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -438,6 +438,21 @@ static void test_arraymap_percpu_many_keys(void)
 	close(fd);
 }
 
+static void test_devmap(int task, void *data)
+{
+	int fd;
+	__u32 key, value;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
+			    2, 0);
+	if (fd < 0) {
+		printf("Failed to create arraymap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	close(fd);
+}
+
 #define MAP_SIZE (32 * 1024)
 
 static void test_map_large(void)
@@ -605,6 +620,8 @@ static void run_all_tests(void)
 
 	test_arraymap_percpu_many_keys();
 
+	test_devmap(0, NULL);
+
 	test_map_large();
 	test_map_parallel();
 	test_map_stress();
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index afe109e5508a..9801253e4802 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,3 +1,4 @@
+msg_zerocopy
 socket
 psock_fanout
 psock_tpacket
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f6c9dbf478f8..6135a8448900 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -7,7 +7,7 @@ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket
 TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_FILES += reuseport_dualstack
+TEST_GEN_FILES += reuseport_dualstack msg_zerocopy
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
new file mode 100644
index 000000000000..448c69a8af74
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -0,0 +1,697 @@
+/* Evaluate MSG_ZEROCOPY
+ *
+ * Send traffic between two processes over one of the supported
+ * protocols and modes:
+ *
+ * PF_INET/PF_INET6
+ * - SOCK_STREAM
+ * - SOCK_DGRAM
+ * - SOCK_DGRAM with UDP_CORK
+ * - SOCK_RAW
+ * - SOCK_RAW with IP_HDRINCL
+ *
+ * PF_PACKET
+ * - SOCK_DGRAM
+ * - SOCK_RAW
+ *
+ * Start this program on two connected hosts, one in send mode and
+ * the other with option '-r' to put it in receiver mode.
+ *
+ * If zerocopy mode ('-z') is enabled, the sender will verify that
+ * the kernel queues completions on the error queue for all zerocopy
+ * transfers.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#ifndef SO_EE_ORIGIN_ZEROCOPY
+#define SO_EE_ORIGIN_ZEROCOPY		SO_EE_ORIGIN_UPAGE
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY	59
+#endif
+
+#ifndef SO_EE_CODE_ZEROCOPY_COPIED
+#define SO_EE_CODE_ZEROCOPY_COPIED	1
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY	0x4000000
+#endif
+
+static int  cfg_cork;
+static bool cfg_cork_mixed;
+static int  cfg_cpu		= -1;		/* default: pin to last cpu */
+static int  cfg_family		= PF_UNSPEC;
+static int  cfg_ifindex		= 1;
+static int  cfg_payload_len;
+static int  cfg_port		= 8000;
+static bool cfg_rx;
+static int  cfg_runtime_ms	= 4200;
+static int  cfg_verbose;
+static int  cfg_waittime_ms	= 500;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+
+static char payload[IP_MAXPACKET];
+static long packets, bytes, completions, expected_completions;
+static int  zerocopied = -1;
+static uint32_t next_completion;
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static uint16_t get_ip_csum(const uint16_t *start, int num_words)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_words; i++)
+		sum += start[i];
+
+	while (sum >> 16)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+
+	return ~sum;
+}
+
+static int do_setcpu(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask))
+		error(1, 0, "setaffinity %d", cpu);
+
+	if (cfg_verbose)
+		fprintf(stderr, "cpu: %u\n", cpu);
+
+	return 0;
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+	if (setsockopt(fd, level, optname, &val, sizeof(val)))
+		error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_poll(int fd, int events)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.events = events;
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	ret = poll(&pfd, 1, cfg_waittime_ms);
+	if (ret == -1)
+		error(1, errno, "poll");
+
+	return ret && (pfd.revents & events);
+}
+
+static int do_accept(int fd)
+{
+	int fda = fd;
+
+	fd = accept(fda, NULL, NULL);
+	if (fd == -1)
+		error(1, errno, "accept");
+	if (close(fda))
+		error(1, errno, "close listen sock");
+
+	return fd;
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
+{
+	int ret, len, i, flags;
+
+	len = 0;
+	for (i = 0; i < msg->msg_iovlen; i++)
+		len += msg->msg_iov[i].iov_len;
+
+	flags = MSG_DONTWAIT;
+	if (do_zerocopy)
+		flags |= MSG_ZEROCOPY;
+
+	ret = sendmsg(fd, msg, flags);
+	if (ret == -1 && errno == EAGAIN)
+		return false;
+	if (ret == -1)
+		error(1, errno, "send");
+	if (cfg_verbose && ret != len)
+		fprintf(stderr, "send: ret=%u != %u\n", ret, len);
+
+	if (len) {
+		packets++;
+		bytes += ret;
+		if (do_zerocopy && ret)
+			expected_completions++;
+	}
+
+	return true;
+}
+
+static void do_sendmsg_corked(int fd, struct msghdr *msg)
+{
+	bool do_zerocopy = cfg_zerocopy;
+	int i, payload_len, extra_len;
+
+	/* split up the packet. for non-multiple, make first buffer longer */
+	payload_len = cfg_payload_len / cfg_cork;
+	extra_len = cfg_payload_len - (cfg_cork * payload_len);
+
+	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+	for (i = 0; i < cfg_cork; i++) {
+
+		/* in mixed-frags mode, alternate zerocopy and copy frags
+		 * start with non-zerocopy, to ensure attach later works
+		 */
+		if (cfg_cork_mixed)
+			do_zerocopy = (i & 1);
+
+		msg->msg_iov[0].iov_len = payload_len + extra_len;
+		extra_len = 0;
+
+		do_sendmsg(fd, msg, do_zerocopy);
+	}
+
+	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+}
+
+static int setup_iph(struct iphdr *iph, uint16_t payload_len)
+{
+	struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
+	struct sockaddr_in *saddr = (void *) &cfg_src_addr;
+
+	memset(iph, 0, sizeof(*iph));
+
+	iph->version	= 4;
+	iph->tos	= 0;
+	iph->ihl	= 5;
+	iph->ttl	= 2;
+	iph->saddr	= saddr->sin_addr.s_addr;
+	iph->daddr	= daddr->sin_addr.s_addr;
+	iph->protocol	= IPPROTO_EGP;
+	iph->tot_len	= htons(sizeof(*iph) + payload_len);
+	iph->check	= get_ip_csum((void *) iph, iph->ihl << 1);
+
+	return sizeof(*iph);
+}
+
+static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
+{
+	struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
+	struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
+
+	memset(ip6h, 0, sizeof(*ip6h));
+
+	ip6h->version		= 6;
+	ip6h->payload_len	= htons(payload_len);
+	ip6h->nexthdr		= IPPROTO_EGP;
+	ip6h->hop_limit		= 2;
+	ip6h->saddr		= saddr->sin6_addr;
+	ip6h->daddr		= daddr->sin6_addr;
+
+	return sizeof(*ip6h);
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+	int fd;
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+	if (cfg_zerocopy)
+		do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
+
+	if (domain != PF_PACKET)
+		if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+			error(1, errno, "connect");
+
+	return fd;
+}
+
+static bool do_recv_completion(int fd)
+{
+	struct sock_extended_err *serr;
+	struct msghdr msg = {};
+	struct cmsghdr *cm;
+	uint32_t hi, lo, range;
+	int ret, zerocopy;
+	char control[100];
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+	if (ret == -1 && errno == EAGAIN)
+		return false;
+	if (ret == -1)
+		error(1, errno, "recvmsg notification");
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, errno, "recvmsg notification: truncated");
+
+	cm = CMSG_FIRSTHDR(&msg);
+	if (!cm)
+		error(1, 0, "cmsg: no cmsg");
+	if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
+	      (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
+	      (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
+		error(1, 0, "serr: wrong type: %d.%d",
+		      cm->cmsg_level, cm->cmsg_type);
+
+	serr = (void *) CMSG_DATA(cm);
+	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
+	if (serr->ee_errno != 0)
+		error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
+
+	hi = serr->ee_data;
+	lo = serr->ee_info;
+	range = hi - lo + 1;
+
+	/* Detect notification gaps. These should not happen often, if at all.
+	 * Gaps can occur due to drops, reordering and retransmissions.
+	 */
+	if (lo != next_completion)
+		fprintf(stderr, "gap: %u..%u does not append to %u\n",
+			lo, hi, next_completion);
+	next_completion = hi + 1;
+
+	zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
+	if (zerocopied == -1)
+		zerocopied = zerocopy;
+	else if (zerocopied != zerocopy) {
+		fprintf(stderr, "serr: inconsistent\n");
+		zerocopied = zerocopy;
+	}
+
+	if (cfg_verbose >= 2)
+		fprintf(stderr, "completed: %u (h=%u l=%u)\n",
+			range, hi, lo);
+
+	completions += range;
+	return true;
+}
+
+/* Read all outstanding messages on the errqueue */
+static void do_recv_completions(int fd)
+{
+	while (do_recv_completion(fd)) {}
+}
+
+/* Wait for all remaining completions on the errqueue */
+static void do_recv_remaining_completions(int fd)
+{
+	int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
+
+	while (completions < expected_completions &&
+	       gettimeofday_ms() < tstop) {
+		if (do_poll(fd, POLLERR))
+			do_recv_completions(fd);
+	}
+
+	if (completions < expected_completions)
+		error(1, 0, "missing notifications: %lu < %lu\n",
+		      completions, expected_completions);
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+	struct iovec iov[3] = { {0} };
+	struct sockaddr_ll laddr;
+	struct msghdr msg = {0};
+	struct ethhdr eth;
+	union {
+		struct ipv6hdr ip6h;
+		struct iphdr iph;
+	} nh;
+	uint64_t tstop;
+	int fd;
+
+	fd = do_setup_tx(domain, type, protocol);
+
+	if (domain == PF_PACKET) {
+		uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
+
+		/* sock_raw passes ll header as data */
+		if (type == SOCK_RAW) {
+			memset(eth.h_dest, 0x06, ETH_ALEN);
+			memset(eth.h_source, 0x02, ETH_ALEN);
+			eth.h_proto = htons(proto);
+			iov[0].iov_base = &eth;
+			iov[0].iov_len = sizeof(eth);
+			msg.msg_iovlen++;
+		}
+
+		/* both sock_raw and sock_dgram expect name */
+		memset(&laddr, 0, sizeof(laddr));
+		laddr.sll_family	= AF_PACKET;
+		laddr.sll_ifindex	= cfg_ifindex;
+		laddr.sll_protocol	= htons(proto);
+		laddr.sll_halen		= ETH_ALEN;
+
+		memset(laddr.sll_addr, 0x06, ETH_ALEN);
+
+		msg.msg_name		= &laddr;
+		msg.msg_namelen		= sizeof(laddr);
+	}
+
+	/* packet and raw sockets with hdrincl must pass network header */
+	if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
+		if (cfg_family == PF_INET)
+			iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
+		else
+			iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
+
+		iov[1].iov_base = (void *) &nh;
+		msg.msg_iovlen++;
+	}
+
+	iov[2].iov_base = payload;
+	iov[2].iov_len = cfg_payload_len;
+	msg.msg_iovlen++;
+	msg.msg_iov = &iov[3 - msg.msg_iovlen];
+
+	tstop = gettimeofday_ms() + cfg_runtime_ms;
+	do {
+		if (cfg_cork)
+			do_sendmsg_corked(fd, &msg);
+		else
+			do_sendmsg(fd, &msg, cfg_zerocopy);
+
+		while (!do_poll(fd, POLLOUT)) {
+			if (cfg_zerocopy)
+				do_recv_completions(fd);
+		}
+
+	} while (gettimeofday_ms() < tstop);
+
+	if (cfg_zerocopy)
+		do_recv_remaining_completions(fd);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
+		packets, bytes >> 20, completions,
+		zerocopied == 1 ? 'y' : 'n');
+}
+
+static int do_setup_rx(int domain, int type, int protocol)
+{
+	int fd;
+
+	/* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
+	 * to recv the only copy of the packet, not a clone
+	 */
+	if (domain == PF_PACKET)
+		error(1, 0, "Use PF_INET/SOCK_RAW to read");
+
+	if (type == SOCK_RAW && protocol == IPPROTO_RAW)
+		error(1, 0, "IPPROTO_RAW: not supported on Rx");
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
+	do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
+	do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
+
+	if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
+		error(1, errno, "bind");
+
+	if (type == SOCK_STREAM) {
+		if (listen(fd, 1))
+			error(1, errno, "listen");
+		fd = do_accept(fd);
+	}
+
+	return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+	int ret;
+
+	/* MSG_TRUNC flushes up to len bytes */
+	ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+	if (ret == -1 && errno == EAGAIN)
+		return;
+	if (ret == -1)
+		error(1, errno, "flush");
+	if (!ret)
+		return;
+
+	packets++;
+	bytes += ret;
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_datagram(int fd, int type)
+{
+	int ret, off = 0;
+	char buf[64];
+
+	/* MSG_TRUNC will return full datagram length */
+	ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
+	if (ret == -1 && errno == EAGAIN)
+		return;
+
+	/* raw ipv4 return with header, raw ipv6 without */
+	if (cfg_family == PF_INET && type == SOCK_RAW) {
+		off += sizeof(struct iphdr);
+		ret -= sizeof(struct iphdr);
+	}
+
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (ret != cfg_payload_len)
+		error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
+	if (ret > sizeof(buf) - off)
+		ret = sizeof(buf) - off;
+	if (memcmp(buf + off, payload, ret))
+		error(1, 0, "recv: data mismatch");
+
+	packets++;
+	bytes += cfg_payload_len;
+}
+
+static void do_rx(int domain, int type, int protocol)
+{
+	uint64_t tstop;
+	int fd;
+
+	fd = do_setup_rx(domain, type, protocol);
+
+	tstop = gettimeofday_ms() + cfg_runtime_ms;
+	do {
+		if (type == SOCK_STREAM)
+			do_flush_tcp(fd);
+		else
+			do_flush_datagram(fd, type);
+
+		do_poll(fd, POLLIN);
+
+	} while (gettimeofday_ms() < tstop);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+	int i;
+
+	if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
+		error(1, 0, "can only cork udp sockets");
+
+	do_setcpu(cfg_cpu);
+
+	for (i = 0; i < IP_MAXPACKET; i++)
+		payload[i] = 'a' + (i % 26);
+
+	if (cfg_rx)
+		do_rx(domain, type, protocol);
+	else
+		do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s [options] <test>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	const int max_payload_len = sizeof(payload) -
+				    sizeof(struct ipv6hdr) -
+				    sizeof(struct tcphdr) -
+				    40 /* max tcp options */;
+	int c;
+
+	cfg_payload_len = max_payload_len;
+
+	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
+		switch (c) {
+		case '4':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'c':
+			cfg_cork = strtol(optarg, NULL, 0);
+			break;
+		case 'C':
+			cfg_cpu = strtol(optarg, NULL, 0);
+			break;
+		case 'D':
+			setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+			break;
+		case 'i':
+			cfg_ifindex = if_nametoindex(optarg);
+			if (cfg_ifindex == 0)
+				error(1, errno, "invalid iface: %s", optarg);
+			break;
+		case 'm':
+			cfg_cork_mixed = true;
+			break;
+		case 'p':
+			cfg_port = htons(strtoul(optarg, NULL, 0));
+			break;
+		case 'r':
+			cfg_rx = true;
+			break;
+		case 's':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'S':
+			setup_sockaddr(cfg_family, optarg, &cfg_src_addr);
+			break;
+		case 't':
+			cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+			break;
+		case 'v':
+			cfg_verbose++;
+			break;
+		case 'z':
+			cfg_zerocopy = true;
+			break;
+		}
+	}
+
+	if (cfg_payload_len > max_payload_len)
+		error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+	if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
+		error(1, 0, "-m: cork_mixed requires corking and zerocopy");
+
+	if (optind != argc - 1)
+		usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+	const char *cfg_test;
+
+	parse_opts(argc, argv);
+
+	cfg_test = argv[argc - 1];
+
+	if (!strcmp(cfg_test, "packet"))
+		do_test(PF_PACKET, SOCK_RAW, 0);
+	else if (!strcmp(cfg_test, "packet_dgram"))
+		do_test(PF_PACKET, SOCK_DGRAM, 0);
+	else if (!strcmp(cfg_test, "raw"))
+		do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
+	else if (!strcmp(cfg_test, "raw_hdrincl"))
+		do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
+	else if (!strcmp(cfg_test, "tcp"))
+		do_test(cfg_family, SOCK_STREAM, 0);
+	else if (!strcmp(cfg_test, "udp"))
+		do_test(cfg_family, SOCK_DGRAM, 0);
+	else
+		error(1, 0, "unknown cfg_test %s", cfg_test);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
new file mode 100755
index 000000000000..d571d213418d
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+	echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+	exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+	readonly SADDR="${SADDR4}"
+	readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+	readonly SADDR="${SADDR6}"
+	readonly DADDR="${DADDR6}"
+else
+	echo "Invalid IP version ${IP}"
+	exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet:	use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+	RXMODE='raw'
+	;;
+*)
+	RXMODE="${TXMODE}"
+	;;
+esac
+
+# Start of state changes: install cleanup handler
+save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
+
+cleanup() {
+	ip netns del "${NS2}"
+	ip netns del "${NS1}"
+	sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
+}
+
+trap cleanup EXIT
+
+# Configure system settings
+sysctl -w -q "${path_sysctl_mem}=1000000"
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+  peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+	local readonly ARGS="$1"
+
+	echo "ipv${IP} ${TXMODE} ${ARGS}"
+	ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+	sleep 0.2
+	ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+	wait
+}
+
+do_test "${EXTRA_ARGS}"
+do_test "-z ${EXTRA_ARGS}"
+echo ok