From 1de3cd4fb49f3463679c49afe0aa9ceb133f3e49 Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Wed, 8 Mar 2017 16:10:19 +0000
Subject: drm: hdlcd: Fix the calculation of the scanout start address

The calculation of the framebuffer's start address was wrongly using
the CRTC's x and y position rather than the one of the source
framebuffer. To fix that we need to update the plane_check code to
call drm_plane_helper_check_state() to clip the src and dst coordinates.
While there so some minor cleanup of redundant freeing of
devm_alloc-ated memory.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
---
 drivers/gpu/drm/arm/hdlcd_crtc.c | 47 ++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c
index 20ebfb4fbdfa..c65116348281 100644
--- a/drivers/gpu/drm/arm/hdlcd_crtc.c
+++ b/drivers/gpu/drm/arm/hdlcd_crtc.c
@@ -10,6 +10,7 @@
  */
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
@@ -206,16 +207,33 @@ static const struct drm_crtc_helper_funcs hdlcd_crtc_helper_funcs = {
 static int hdlcd_plane_atomic_check(struct drm_plane *plane,
 				    struct drm_plane_state *state)
 {
-	u32 src_w, src_h;
+	struct drm_rect clip = { 0 };
+	struct drm_crtc_state *crtc_state;
+	u32 src_h = state->src_h >> 16;
 
-	src_w = state->src_w >> 16;
-	src_h = state->src_h >> 16;
+	/* only the HDLCD_REG_FB_LINE_COUNT register has a limit */
+	if (src_h >= HDLCD_MAX_YRES) {
+		DRM_DEBUG_KMS("Invalid source width: %d\n", src_h);
+		return -EINVAL;
+	}
+
+	if (!state->fb || !state->crtc)
+		return 0;
 
-	/* we can't do any scaling of the plane source */
-	if ((src_w != state->crtc_w) || (src_h != state->crtc_h))
+	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
+							state->crtc);
+	if (!crtc_state) {
+		DRM_DEBUG_KMS("Invalid crtc state\n");
 		return -EINVAL;
+	}
 
-	return 0;
+	clip.x2 = crtc_state->adjusted_mode.hdisplay;
+	clip.y2 = crtc_state->adjusted_mode.vdisplay;
+
+	return drm_plane_helper_check_state(state, &clip,
+					    DRM_PLANE_HELPER_NO_SCALING,
+					    DRM_PLANE_HELPER_NO_SCALING,
+					    false, true);
 }
 
 static void hdlcd_plane_atomic_update(struct drm_plane *plane,
@@ -224,21 +242,20 @@ static void hdlcd_plane_atomic_update(struct drm_plane *plane,
 	struct drm_framebuffer *fb = plane->state->fb;
 	struct hdlcd_drm_private *hdlcd;
 	struct drm_gem_cma_object *gem;
-	u32 src_w, src_h, dest_w, dest_h;
+	u32 src_x, src_y, dest_h;
 	dma_addr_t scanout_start;
 
 	if (!fb)
 		return;
 
-	src_w = plane->state->src_w >> 16;
-	src_h = plane->state->src_h >> 16;
-	dest_w = plane->state->crtc_w;
-	dest_h = plane->state->crtc_h;
+	src_x = plane->state->src.x1 >> 16;
+	src_y = plane->state->src.y1 >> 16;
+	dest_h = drm_rect_height(&plane->state->dst);
 	gem = drm_fb_cma_get_gem_obj(fb, 0);
+
 	scanout_start = gem->paddr + fb->offsets[0] +
-		plane->state->crtc_y * fb->pitches[0] +
-		plane->state->crtc_x *
-		fb->format->cpp[0];
+			src_y * fb->pitches[0] +
+			src_x *	fb->format->cpp[0];
 
 	hdlcd = plane->dev->dev_private;
 	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_LENGTH, fb->pitches[0]);
@@ -285,7 +302,6 @@ static struct drm_plane *hdlcd_plane_init(struct drm_device *drm)
 				       formats, ARRAY_SIZE(formats),
 				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret) {
-		devm_kfree(drm->dev, plane);
 		return ERR_PTR(ret);
 	}
 
@@ -309,7 +325,6 @@ int hdlcd_setup_crtc(struct drm_device *drm)
 					&hdlcd_crtc_funcs, NULL);
 	if (ret) {
 		hdlcd_plane_destroy(primary);
-		devm_kfree(drm->dev, primary);
 		return ret;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From ec8542cb8a628fd10522f8421182207277f90185 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Sun, 29 Jan 2017 20:40:59 +0200
Subject: ARM: dts: bcm2835: fix uart0 pinctrl node names

Downstream kernel uses pins 32, 33 as UART0 (PL011) Rx/Tx to communicate with
the Bluetooth chip. So ALT3 of these pins is most likely not CTS/RTS. Change
the node name to reflect that. This matches section 6.2 "Alternative Function
Assignments" in the BCM2835 ARM Peripherals document.

With this change in place, adding

  &uart0 {
	 pinctrl-names = "default";
	 pinctrl-0 = <&uart0_gpio32 &gpclk2_gpio43>;
	 status = "okay";
  };

to bcm2837-rpi-3-b.dts does the right thing on my Raspberry Pi 3.

Pins 30, 31 are CTS/RTS of UART0 in alternate function 3. Rename uart0_gpio30
as well.

While at it, fix a little typo in a nearby comment.

Fixes: 21ff843931b ("ARM: dts: bcm283x: Define standard pinctrl groups in the gpio node.")
Acked-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 arch/arm/boot/dts/bcm283x.dtsi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 35cea3fcaf5c..e9623af3ad76 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -295,17 +295,17 @@
 			/* Separate from the uart0_gpio14 group
 			 * because it conflicts with spi1_gpio16, and
 			 * people often run uart0 on the two pins
-			 * without flow contrl.
+			 * without flow control.
 			 */
 			uart0_ctsrts_gpio16: uart0_ctsrts_gpio16 {
 				brcm,pins = <16 17>;
 				brcm,function = <BCM2835_FSEL_ALT3>;
 			};
-			uart0_gpio30: uart0_gpio30 {
+			uart0_ctsrts_gpio30: uart0_ctsrts_gpio30 {
 				brcm,pins = <30 31>;
 				brcm,function = <BCM2835_FSEL_ALT3>;
 			};
-			uart0_ctsrts_gpio32: uart0_ctsrts_gpio32 {
+			uart0_gpio32: uart0_gpio32 {
 				brcm,pins = <32 33>;
 				brcm,function = <BCM2835_FSEL_ALT3>;
 			};
-- 
cgit v1.2.3-59-g8ed1b


From 843b2287fb77f0b1966fa90912e093a0d417b27b Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Sun, 29 Jan 2017 21:53:10 +0200
Subject: ARM: dts: bcm2835: fix i2c0 pins

According to the BCM2835 ARM Peripherals document i2c0 doesn't map to pins 32,
34 but to 28, 29.

Fixes: 21ff843931b ("ARM: dts: bcm283x: Define standard pinctrl groups in the gpio node.")
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 arch/arm/boot/dts/bcm283x.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index e9623af3ad76..7b4880dc291a 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -198,8 +198,8 @@
 				brcm,pins = <0 1>;
 				brcm,function = <BCM2835_FSEL_ALT0>;
 			};
-			i2c0_gpio32: i2c0_gpio32 {
-				brcm,pins = <32 34>;
+			i2c0_gpio28: i2c0_gpio28 {
+				brcm,pins = <28 29>;
 				brcm,function = <BCM2835_FSEL_ALT0>;
 			};
 			i2c0_gpio44: i2c0_gpio44 {
-- 
cgit v1.2.3-59-g8ed1b


From e1be65a5e426460ab567076107311e83532904e9 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Sun, 29 Jan 2017 21:53:11 +0200
Subject: ARM: dts: bcm2835: fix uart0/uart1 pins

According to the BCM2835 ARM Peripherals document uart1 doesn't map to pins
36-39, but uart0 does.

Also, split into separate Rx/Tx and CST/RTS groups to match other uart nodes.

Fixes: 21ff843931b ("ARM: dts: bcm283x: Define standard pinctrl groups in the gpio node.")
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 arch/arm/boot/dts/bcm283x.dtsi | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 7b4880dc291a..561f27d8d922 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -309,6 +309,14 @@
 				brcm,pins = <32 33>;
 				brcm,function = <BCM2835_FSEL_ALT3>;
 			};
+			uart0_gpio36: uart0_gpio36 {
+				brcm,pins = <36 37>;
+				brcm,function = <BCM2835_FSEL_ALT2>;
+			};
+			uart0_ctsrts_gpio38: uart0_ctsrts_gpio38 {
+				brcm,pins = <38 39>;
+				brcm,function = <BCM2835_FSEL_ALT2>;
+			};
 
 			uart1_gpio14: uart1_gpio14 {
 				brcm,pins = <14 15>;
@@ -326,10 +334,6 @@
 				brcm,pins = <30 31>;
 				brcm,function = <BCM2835_FSEL_ALT5>;
 			};
-			uart1_gpio36: uart1_gpio36 {
-				brcm,pins = <36 37 38 39>;
-				brcm,function = <BCM2835_FSEL_ALT2>;
-			};
 			uart1_gpio40: uart1_gpio40 {
 				brcm,pins = <40 41>;
 				brcm,function = <BCM2835_FSEL_ALT5>;
-- 
cgit v1.2.3-59-g8ed1b


From 10b6c0c2e2bb8cd1be682f8d36ef597e3419cb88 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Mon, 30 Jan 2017 20:44:39 +0200
Subject: ARM: dts: bcm2835: add index to the ethernet alias

An alias name should have an index number even when it is the only of its type.
This allows U-Boot to add the local-mac-address property. Otherwise U-Boot
skips the alias.

Fixes: 6a93792774 ("ARM: bcm2835: dt: Add the ethernet to the device trees")
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Acked-by: Lubomir Rintel <lkundrak@v3.sk>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi | 2 +-
 arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi b/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi
index 12c981e51134..9a0599f711ff 100644
--- a/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi
+++ b/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi
@@ -1,6 +1,6 @@
 / {
 	aliases {
-		ethernet = &ethernet;
+		ethernet0 = &ethernet;
 	};
 };
 
diff --git a/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi b/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi
index 3f0a56ebcf1f..dc7ae776db5f 100644
--- a/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi
+++ b/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi
@@ -1,6 +1,6 @@
 / {
 	aliases {
-		ethernet = &ethernet;
+		ethernet0 = &ethernet;
 	};
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From d2718d1365f7fce624fd7ed163f60532f92ed016 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Wed, 5 Apr 2017 17:18:03 +0200
Subject: arm64: marvell: enable the Armada 37xx pinctrl driver

This commit makes sure the driver for the Armada 37xx pin controller is
enabled.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm64/Kconfig.platforms | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 129cc5ae4091..9aa71a3f3f50 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -103,8 +103,13 @@ config ARCH_MVEBU
 	select ARMADA_AP806_SYSCON
 	select ARMADA_CP110_SYSCON
 	select ARMADA_37XX_CLK
+	select GPIOLIB
+	select GPIOLIB_IRQCHIP
 	select MVEBU_ODMI
 	select MVEBU_PIC
+	select OF_GPIO
+	select PINCTRL
+	select PINCTRL_ARMADA_37XX
 	help
 	  This enables support for Marvell EBU familly, including:
 	   - Armada 3700 SoC Family
-- 
cgit v1.2.3-59-g8ed1b


From 7b4ccb3c466f62bbf2f4dd5d6a143d945a6f3051 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 25 Apr 2017 19:36:25 +0200
Subject: soc: renesas: Provide dummy rcar_rst_read_mode_pins() for
 compile-testing

If the R-Car RST driver is not included, compile-testing R-Car clock
drivers fails with a link error:

    undefined reference to `rcar_rst_read_mode_pins'

To fix this, provide a dummy version.  Use the exact same test logic as
in drivers/soc/renesas/Makefile, as there is no Kconfig symbol (yet) to
control compilation of the R-Car RST driver.

Fixes: 527c02f66d263d2e ("soc: renesas: Add R-Car RST driver")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 include/linux/soc/renesas/rcar-rst.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h
index a18e0783946b..787e7ad53d45 100644
--- a/include/linux/soc/renesas/rcar-rst.h
+++ b/include/linux/soc/renesas/rcar-rst.h
@@ -1,6 +1,11 @@
 #ifndef __LINUX_SOC_RENESAS_RCAR_RST_H__
 #define __LINUX_SOC_RENESAS_RCAR_RST_H__
 
+#if defined(CONFIG_ARCH_RCAR_GEN1) || defined(CONFIG_ARCH_RCAR_GEN2) || \
+    defined(CONFIG_ARCH_R8A7795) || defined(CONFIG_ARCH_R8A7796)
 int rcar_rst_read_mode_pins(u32 *mode);
+#else
+static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; }
+#endif
 
 #endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From afda007feda5cfe7463f3281dfeee703a5dc7ca3 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Wed, 5 Apr 2017 17:18:07 +0200
Subject: ARM64: dts: marvell: Add pinctrl nodes for Armada 3700

Add the nodes for the two pin controller present in the Armada 37xx SoCs.

Initially the node was named gpio1 using the same name that for the
register range in the datasheet. However renaming it pinctr_nb (nb for
North Bridge) makes more sens.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 42 ++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index 311b97c80c7b..e6216cbd4b38 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -161,10 +161,29 @@
 				#clock-cells = <1>;
 			};
 
-			gpio1: gpio@13800 {
-				compatible = "marvell,mvebu-gpio-3700",
+			pinctrl_nb: pinctrl@13800 {
+				compatible = "marvell,armada3710-nb-pinctrl",
 				"syscon", "simple-mfd";
-				reg = <0x13800 0x500>;
+				reg = <0x13800 0x100>, <0x13C00 0x20>;
+				gpionb: gpio {
+					#gpio-cells = <2>;
+					gpio-ranges = <&pinctrl_nb 0 0 36>;
+					gpio-controller;
+					interrupts =
+					<GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>;
+
+				};
 
 				xtalclk: xtal-clk {
 					compatible = "marvell,armada-3700-xtal-clock";
@@ -173,6 +192,23 @@
 				};
 			};
 
+			pinctrl_sb: pinctrl@18800 {
+				compatible = "marvell,armada3710-sb-pinctrl",
+				"syscon", "simple-mfd";
+				reg = <0x18800 0x100>, <0x18C00 0x20>;
+				gpiosb: gpio {
+					#gpio-cells = <2>;
+					gpio-ranges = <&pinctrl_sb 0 0 29>;
+					gpio-controller;
+					interrupts =
+					<GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>;
+				};
+			};
+
 			eth0: ethernet@30000 {
 				   compatible = "marvell,armada-3700-neta";
 				   reg = <0x30000 0x4000>;
-- 
cgit v1.2.3-59-g8ed1b


From 6a680783aaadd168557eec695374929ac066536f Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Wed, 14 Dec 2016 17:43:48 +0100
Subject: ARM64: dts: marvell: armada37xx: add pinctrl definition

Start to populate the device tree of the Armada 37xx with the pincontrol
configuration used on the board providing a dts.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm64/boot/dts/marvell/armada-3720-db.dts |  8 +++++++
 arch/arm64/boot/dts/marvell/armada-37xx.dtsi   | 31 ++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/arch/arm64/boot/dts/marvell/armada-3720-db.dts b/arch/arm64/boot/dts/marvell/armada-3720-db.dts
index 950cbd23a5bd..01cdcb98416f 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-db.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-db.dts
@@ -79,6 +79,8 @@
 };
 
 &i2c0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c1_pins>;
 	status = "okay";
 
 	gpio_exp: pca9555@22 {
@@ -113,6 +115,8 @@
 
 &spi0 {
 	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&spi_quad_pins>;
 
 	m25p80@0 {
 		compatible = "jedec,spi-nor";
@@ -143,6 +147,8 @@
 
 /* Exported on the micro USB connector CON32 through an FTDI */
 &uart0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart1_pins>;
 	status = "okay";
 };
 
@@ -178,6 +184,8 @@
 };
 
 &eth0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&rgmii_pins>;
 	phy-mode = "rgmii-id";
 	phy = <&phy0>;
 	status = "okay";
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index e6216cbd4b38..f581c74c0bb2 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -190,6 +190,31 @@
 					clock-output-names = "xtal";
 					#clock-cells = <0>;
 				};
+
+				spi_quad_pins: spi-quad-pins {
+					groups = "spi_quad";
+					function = "spi";
+				};
+
+				i2c1_pins: i2c1-pins {
+					groups = "i2c1";
+					function = "i2c";
+				};
+
+				i2c2_pins: i2c2-pins {
+					groups = "i2c2";
+					function = "i2c";
+				};
+
+				uart1_pins: uart1-pins {
+					groups = "uart1";
+					function = "uart";
+				};
+
+				uart2_pins: uart2-pins {
+					groups = "uart2";
+					function = "uart";
+				};
 			};
 
 			pinctrl_sb: pinctrl@18800 {
@@ -207,6 +232,12 @@
 					<GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
 					<GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>;
 				};
+
+				rgmii_pins: mii-pins {
+					groups = "rgmii";
+					function = "mii";
+				};
+
 			};
 
 			eth0: ethernet@30000 {
-- 
cgit v1.2.3-59-g8ed1b


From 07a63cbe8bcb6ba72fb989dcab1ec55ec6c36c7e Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 2 May 2017 13:36:00 +0200
Subject: s390/cputime: fix incorrect system time

git commit c5328901aa1db134 "[S390] entry[64].S improvements" removed
the update of the exit_timer lowcore field from the critical section
cleanup of the .Lsysc_restore/.Lsysc_done and .Lio_restore/.Lio_done
blocks. If the PSW is updated by the critical section cleanup to point to
user space again, the interrupt entry code will do a vtime calculation
after the cleanup completed with an exit_timer value which has *not* been
updated. Due to this incorrect system time deltas are calculated.

If an interrupt occured with an old PSW between .Lsysc_restore/.Lsysc_done
or .Lio_restore/.Lio_done update __LC_EXIT_TIMER with the system entry
time of the interrupt.

Cc: stable@vger.kernel.org # 3.3+
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index a5f5d3bb3dbc..e408d9cc5b96 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -312,6 +312,7 @@ ENTRY(system_call)
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
+.Lsysc_exit_timer:
 	stpt	__LC_EXIT_TIMER
 	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 	lmg	%r11,%r15,__PT_R11(%r11)
@@ -623,6 +624,7 @@ ENTRY(io_int_handler)
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
+.Lio_exit_timer:
 	stpt	__LC_EXIT_TIMER
 	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 	lmg	%r11,%r15,__PT_R11(%r11)
@@ -1174,15 +1176,23 @@ cleanup_critical:
 	br	%r14
 
 .Lcleanup_sysc_restore:
+	# check if stpt has been executed
 	clg	%r9,BASED(.Lcleanup_sysc_restore_insn)
+	jh	0f
+	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	cghi	%r11,__LC_SAVE_AREA_ASYNC
 	je	0f
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+0:	clg	%r9,BASED(.Lcleanup_sysc_restore_insn+8)
+	je	1f
 	lg	%r9,24(%r11)		# get saved pointer to pt_regs
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
 	mvc	0(64,%r11),__PT_R8(%r9)
 	lmg	%r0,%r7,__PT_R0(%r9)
-0:	lmg	%r8,%r9,__LC_RETURN_PSW
+1:	lmg	%r8,%r9,__LC_RETURN_PSW
 	br	%r14
 .Lcleanup_sysc_restore_insn:
+	.quad	.Lsysc_exit_timer
 	.quad	.Lsysc_done - 4
 
 .Lcleanup_io_tif:
@@ -1190,15 +1200,20 @@ cleanup_critical:
 	br	%r14
 
 .Lcleanup_io_restore:
+	# check if stpt has been executed
 	clg	%r9,BASED(.Lcleanup_io_restore_insn)
-	je	0f
+	jh	0f
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+0:	clg	%r9,BASED(.Lcleanup_io_restore_insn+8)
+	je	1f
 	lg	%r9,24(%r11)		# get saved r11 pointer to pt_regs
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
 	mvc	0(64,%r11),__PT_R8(%r9)
 	lmg	%r0,%r7,__PT_R0(%r9)
-0:	lmg	%r8,%r9,__LC_RETURN_PSW
+1:	lmg	%r8,%r9,__LC_RETURN_PSW
 	br	%r14
 .Lcleanup_io_restore_insn:
+	.quad	.Lio_exit_timer
 	.quad	.Lio_done - 4
 
 .Lcleanup_idle:
-- 
cgit v1.2.3-59-g8ed1b


From 085b6ba0f7971d18fc3078b25e8309e9e75659cb Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 2 May 2017 12:38:57 +0200
Subject: s390/ftrace: fix compile for !MODULES

Fix this compile error if CONFIG_MODULES is disabled:

arch/s390/built-in.o: In function `ftrace_plt_init':
arch/s390/kernel/ftrace.o:(.init.text+0x34cc): undefined reference to `module_alloc'

Reported-by: Rob Landley <rob@landley.net>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ftrace.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 60a8a4e207ed..68f2b8a15eab 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -172,6 +172,8 @@ int __init ftrace_dyn_arch_init(void)
 	return 0;
 }
 
+#ifdef CONFIG_MODULES
+
 static int __init ftrace_plt_init(void)
 {
 	unsigned int *ip;
@@ -190,6 +192,8 @@ static int __init ftrace_plt_init(void)
 }
 device_initcall(ftrace_plt_init);
 
+#endif /* CONFIG_MODULES */
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 /*
  * Hook the return address and push it in the stack of return addresses
-- 
cgit v1.2.3-59-g8ed1b


From db55947dd2d09cd3e6f722d1205934fec793ee63 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 2 May 2017 13:20:11 +0200
Subject: s390/uprobes: fix compile for !KPROBES

Fix the following compile error(s) if CONFIG_KPROBES is disabled:

arch/s390/kernel/uprobes.c:79:14:
 error: implicit declaration of function 'probe_get_fixup_type'
arch/s390/kernel/uprobes.c:87:14:
 error: 'FIXUP_PSW_NORMAL' undeclared (first use in this function)

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/dis.h     |  2 ++
 arch/s390/include/asm/kprobes.h | 20 ++++++++++----------
 arch/s390/lib/probes.c          |  1 +
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h
index 60323c21938b..37f617dfbede 100644
--- a/arch/s390/include/asm/dis.h
+++ b/arch/s390/include/asm/dis.h
@@ -40,6 +40,8 @@ static inline int insn_length(unsigned char code)
 	return ((((int) code + 64) >> 7) + 1) << 1;
 }
 
+struct pt_regs;
+
 void show_code(struct pt_regs *regs);
 void print_fn_code(unsigned char *code, unsigned long len);
 int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len);
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 1293c4066cfc..28792ef82c83 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -27,12 +27,21 @@
  * 2005-Dec	Used as a template for s390 by Mike Grundy
  *		<grundym@us.ibm.com>
  */
+#include <linux/types.h>
 #include <asm-generic/kprobes.h>
 
 #define BREAKPOINT_INSTRUCTION	0x0002
 
+#define FIXUP_PSW_NORMAL	0x08
+#define FIXUP_BRANCH_NOT_TAKEN	0x04
+#define FIXUP_RETURN_REGISTER	0x02
+#define FIXUP_NOT_REQUIRED	0x01
+
+int probe_is_prohibited_opcode(u16 *insn);
+int probe_get_fixup_type(u16 *insn);
+int probe_is_insn_relative_long(u16 *insn);
+
 #ifdef CONFIG_KPROBES
-#include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
 #include <linux/sched/task_stack.h>
@@ -56,11 +65,6 @@ typedef u16 kprobe_opcode_t;
 
 #define KPROBE_SWAP_INST	0x10
 
-#define FIXUP_PSW_NORMAL	0x08
-#define FIXUP_BRANCH_NOT_TAKEN	0x04
-#define FIXUP_RETURN_REGISTER	0x02
-#define FIXUP_NOT_REQUIRED	0x01
-
 /* Architecture specific copy of original instruction */
 struct arch_specific_insn {
 	/* copy of original instruction */
@@ -90,10 +94,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 int kprobe_exceptions_notify(struct notifier_block *self,
 	unsigned long val, void *data);
 
-int probe_is_prohibited_opcode(u16 *insn);
-int probe_get_fixup_type(u16 *insn);
-int probe_is_insn_relative_long(u16 *insn);
-
 #define flush_insn_slot(p)	do { } while (0)
 
 #endif /* CONFIG_KPROBES */
diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c
index ae90e1ae3607..1963ddbf4ab3 100644
--- a/arch/s390/lib/probes.c
+++ b/arch/s390/lib/probes.c
@@ -4,6 +4,7 @@
  *    Copyright IBM Corp. 2014
  */
 
+#include <linux/errno.h>
 #include <asm/kprobes.h>
 #include <asm/dis.h>
 
-- 
cgit v1.2.3-59-g8ed1b


From eb77e6b80f3bed262c7773236f0fb84649fd3091 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 27 Apr 2017 12:11:54 -0500
Subject: EDAC, amd64: Fix reporting of Chip Select sizes on Fam17h

The wrong index into the csbases/csmasks arrays was being passed to
the function to compute the chip select sizes, which resulted in the
wrong size being computed. Address that so that the correct values are
computed and printed.

Also, redo how we calculate the number of pages in a CS row.

Reported-by: Benjamin Bennett <benbennett@gmail.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: <stable@vger.kernel.org> # 4.10.x
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1493313114-11260-1-git-send-email-Yazen.Ghannam@amd.com
[ Remove unneeded integer math comment, minor cleanups. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/amd64_edac.c | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 82dab1692264..3aea55698165 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -782,24 +782,26 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
 
 static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
 {
-	u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
-	int dimm, size0, size1;
+	int dimm, size0, size1, cs0, cs1;
 
 	edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
 
 	for (dimm = 0; dimm < 4; dimm++) {
 		size0 = 0;
+		cs0 = dimm * 2;
 
-		if (dcsb[dimm*2] & DCSB_CS_ENABLE)
-			size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, dimm);
+		if (csrow_enabled(cs0, ctrl, pvt))
+			size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0);
 
 		size1 = 0;
-		if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
-			size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, dimm);
+		cs1 = dimm * 2 + 1;
+
+		if (csrow_enabled(cs1, ctrl, pvt))
+			size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
 
 		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
-				dimm * 2,     size0,
-				dimm * 2 + 1, size1);
+				cs0,	size0,
+				cs1,	size1);
 	}
 }
 
@@ -2756,26 +2758,22 @@ skip:
  *	encompasses
  *
  */
-static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
+static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig)
 {
-	u32 cs_mode, nr_pages;
 	u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
+	int csrow_nr = csrow_nr_orig;
+	u32 cs_mode, nr_pages;
 
+	if (!pvt->umc)
+		csrow_nr >>= 1;
 
-	/*
-	 * The math on this doesn't look right on the surface because x/2*4 can
-	 * be simplified to x*2 but this expression makes use of the fact that
-	 * it is integral math where 1/2=0. This intermediate value becomes the
-	 * number of bits to shift the DBAM register to extract the proper CSROW
-	 * field.
-	 */
-	cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
+	cs_mode = DBAM_DIMM(csrow_nr, dbam);
 
-	nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, (csrow_nr / 2))
-							   << (20 - PAGE_SHIFT);
+	nr_pages   = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr);
+	nr_pages <<= 20 - PAGE_SHIFT;
 
 	edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
-		    csrow_nr, dct,  cs_mode);
+		    csrow_nr_orig, dct,  cs_mode);
 	edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
 
 	return nr_pages;
-- 
cgit v1.2.3-59-g8ed1b


From 0e78a87306a6f55b1c7bbafad1de62c3975953ca Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 3 May 2017 08:44:27 +0200
Subject: esp4: Fix udpencap for local TCP packets.

Locally generated TCP packets are usually cloned, so we
do skb_cow_data() on this packets. After that we need to
reload the pointer to the esp header. On udpencap this
header has an offset to skb_transport_header, so take this
offset into account.

Fixes: 67d349ed603 ("net/esp4: Fix invalid esph pointer crash")
Fixes: fca11ebde3f0 ("esp4: Reorganize esp_output")
Reported-by: Don Bowman <db@donbowman.ca>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/esp4.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 65cc02bd82bc..93322f895eab 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -248,6 +248,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	u8 *tail;
 	u8 *vaddr;
 	int nfrags;
+	int esph_offset;
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
@@ -313,11 +314,13 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	}
 
 cow:
+	esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb);
+
 	nfrags = skb_cow_data(skb, tailen, &trailer);
 	if (nfrags < 0)
 		goto out;
 	tail = skb_tail_pointer(trailer);
-	esp->esph = ip_esp_hdr(skb);
+	esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset);
 
 skip_cow:
 	esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
-- 
cgit v1.2.3-59-g8ed1b


From 9b3eb54106cf6acd03f07cf0ab01c13676a226c2 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 3 May 2017 16:43:19 +0200
Subject: xfrm: fix stack access out of bounds with CONFIG_XFRM_SUB_POLICY

When CONFIG_XFRM_SUB_POLICY=y, xfrm_dst stores a copy of the flowi for
that dst. Unfortunately, the code that allocates and fills this copy
doesn't care about what type of flowi (flowi, flowi4, flowi6) gets
passed. In multiple code paths (from raw_sendmsg, from TCP when
replying to a FIN, in vxlan, geneve, and gre), the flowi that gets
passed to xfrm is actually an on-stack flowi4, so we end up reading
stuff from the stack past the end of the flowi4 struct.

Since xfrm_dst->origin isn't used anywhere following commit
ca116922afa8 ("xfrm: Eliminate "fl" and "pol" args to
xfrm_bundle_ok()."), just get rid of it.  xfrm_dst->partner isn't used
either, so get rid of that too.

Fixes: 9d6ec938019c ("ipv4: Use flowi4 in public route lookup interfaces.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     | 10 ----------
 net/xfrm/xfrm_policy.c | 47 -----------------------------------------------
 2 files changed, 57 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6793a30c66b1..7e7e2b0d2915 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -979,10 +979,6 @@ struct xfrm_dst {
 	struct flow_cache_object flo;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int num_pols, num_xfrms;
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct flowi *origin;
-	struct xfrm_selector *partner;
-#endif
 	u32 xfrm_genid;
 	u32 policy_genid;
 	u32 route_mtu_cached;
@@ -998,12 +994,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 	dst_release(xdst->route);
 	if (likely(xdst->u.dst.xfrm))
 		xfrm_state_put(xdst->u.dst.xfrm);
-#ifdef CONFIG_XFRM_SUB_POLICY
-	kfree(xdst->origin);
-	xdst->origin = NULL;
-	kfree(xdst->partner);
-	xdst->partner = NULL;
-#endif
 }
 #endif
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b00a1d5a7f52..ed4e52d95172 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1797,43 +1797,6 @@ free_dst:
 	goto out;
 }
 
-#ifdef CONFIG_XFRM_SUB_POLICY
-static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
-{
-	if (!*target) {
-		*target = kmalloc(size, GFP_ATOMIC);
-		if (!*target)
-			return -ENOMEM;
-	}
-
-	memcpy(*target, src, size);
-	return 0;
-}
-#endif
-
-static int xfrm_dst_update_parent(struct dst_entry *dst,
-				  const struct xfrm_selector *sel)
-{
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
-				   sel, sizeof(*sel));
-#else
-	return 0;
-#endif
-}
-
-static int xfrm_dst_update_origin(struct dst_entry *dst,
-				  const struct flowi *fl)
-{
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
-#else
-	return 0;
-#endif
-}
-
 static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 				struct xfrm_policy **pols,
 				int *num_pols, int *num_xfrms)
@@ -1905,16 +1868,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 
 	xdst = (struct xfrm_dst *)dst;
 	xdst->num_xfrms = err;
-	if (num_pols > 1)
-		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
-	else
-		err = xfrm_dst_update_origin(dst, fl);
-	if (unlikely(err)) {
-		dst_free(dst);
-		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-		return ERR_PTR(err);
-	}
-
 	xdst->num_pols = num_pols;
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
-- 
cgit v1.2.3-59-g8ed1b


From 3d05f3aed5d721c2c77d20288c29ab26c6193ed5 Mon Sep 17 00:00:00 2001
From: Julia Cartwright <julia@ni.com>
Date: Fri, 28 Apr 2017 12:41:02 -0500
Subject: md/raid5: make use of spin_lock_irq over local_irq_disable +
 spin_lock

On mainline, there is no functional difference, just less code, and
symmetric lock/unlock paths.

On PREEMPT_RT builds, this fixes the following warning, seen by
Alexander GQ Gerasiov, due to the sleeping nature of spinlocks.

   BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:993
   in_atomic(): 0, irqs_disabled(): 1, pid: 58, name: kworker/u12:1
   CPU: 5 PID: 58 Comm: kworker/u12:1 Tainted: G        W       4.9.20-rt16-stand6-686 #1
   Hardware name: Supermicro SYS-5027R-WRF/X9SRW-F, BIOS 3.2a 10/28/2015
   Workqueue: writeback wb_workfn (flush-253:0)
   Call Trace:
    dump_stack+0x47/0x68
    ? migrate_enable+0x4a/0xf0
    ___might_sleep+0x101/0x180
    rt_spin_lock+0x17/0x40
    add_stripe_bio+0x4e3/0x6c0 [raid456]
    ? preempt_count_add+0x42/0xb0
    raid5_make_request+0x737/0xdd0 [raid456]

Reported-by: Alexander GQ Gerasiov <gq@redlab-i.ru>
Tested-by: Alexander GQ Gerasiov <gq@redlab-i.ru>
Signed-off-by: Julia Cartwright <julia@ni.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2e38cfac5b1d..3809a2192132 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -103,8 +103,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
 static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
 {
 	int i;
-	local_irq_disable();
-	spin_lock(conf->hash_locks);
+	spin_lock_irq(conf->hash_locks);
 	for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
 		spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
 	spin_lock(&conf->device_lock);
@@ -114,9 +113,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
 {
 	int i;
 	spin_unlock(&conf->device_lock);
-	for (i = NR_STRIPE_HASH_LOCKS; i; i--)
-		spin_unlock(conf->hash_locks + i - 1);
-	local_irq_enable();
+	for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--)
+		spin_unlock(conf->hash_locks + i);
+	spin_unlock_irq(conf->hash_locks);
 }
 
 /* Find first data disk in a raid6 stripe */
@@ -714,12 +713,11 @@ static bool is_full_stripe_write(struct stripe_head *sh)
 
 static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
 {
-	local_irq_disable();
 	if (sh1 > sh2) {
-		spin_lock(&sh2->stripe_lock);
+		spin_lock_irq(&sh2->stripe_lock);
 		spin_lock_nested(&sh1->stripe_lock, 1);
 	} else {
-		spin_lock(&sh1->stripe_lock);
+		spin_lock_irq(&sh1->stripe_lock);
 		spin_lock_nested(&sh2->stripe_lock, 1);
 	}
 }
@@ -727,8 +725,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
 static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
 {
 	spin_unlock(&sh1->stripe_lock);
-	spin_unlock(&sh2->stripe_lock);
-	local_irq_enable();
+	spin_unlock_irq(&sh2->stripe_lock);
 }
 
 /* Only freshly new full stripe normal write stripe can be added to a batch list */
-- 
cgit v1.2.3-59-g8ed1b


From 7f48d0b48cba2ddc03d09353ba4ef6ae680da520 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 25 Apr 2017 10:05:12 +0100
Subject: drm/i915/gvt: fix typo: "supporte" -> "support"

trivial fix to typo in WARN_ONCE message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 6da9ae1618e3..31624f1df893 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1240,7 +1240,7 @@ static int dma_ctrl_write(struct intel_vgpu *vgpu, unsigned int offset,
 	mode = vgpu_vreg(vgpu, offset);
 
 	if (GFX_MODE_BIT_SET_IN_MASK(mode, START_DMA)) {
-		WARN_ONCE(1, "VM(%d): iGVT-g doesn't supporte GuC\n",
+		WARN_ONCE(1, "VM(%d): iGVT-g doesn't support GuC\n",
 				vgpu->id);
 		return 0;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 657314b7a5d16961e7e0ecdae4a59d28123e74c0 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 28 Apr 2017 17:30:52 +0200
Subject: drm/etnaviv: don't put fence in case of submit failure

If we bail out of the submit before actually adding the cmdstream
to the kernel ring there is no valid fence to put. Make sure to skip
the fence_put in that case.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index e1909429837e..de80ee1b71df 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -44,6 +44,7 @@ static struct etnaviv_gem_submit *submit_create(struct drm_device *dev,
 
 		/* initially, until copy_from_user() and bo lookup succeeds: */
 		submit->nr_bos = 0;
+		submit->fence = NULL;
 
 		ww_acquire_init(&submit->ticket, &reservation_ww_class);
 	}
@@ -294,7 +295,8 @@ static void submit_cleanup(struct etnaviv_gem_submit *submit)
 	}
 
 	ww_acquire_fini(&submit->ticket);
-	dma_fence_put(submit->fence);
+	if (submit->fence)
+		dma_fence_put(submit->fence);
 	kfree(submit);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From d86ab9cff8b936aadde444d0e263a8db5ff0349b Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@arm.com>
Date: Wed, 3 May 2017 14:30:48 +0100
Subject: cpufreq: schedutil: use now as reference when aggregating shared
 policy requests

Currently, sugov_next_freq_shared() uses last_freq_update_time as a
reference to decide when to start considering CPU contributions as
stale.

However, since last_freq_update_time is set by the last CPU that issued
a frequency transition, this might cause problems in certain cases. In
practice, the detection of stale utilization values fails whenever the
CPU with such values was the last to update the policy. For example (and
please note again that the SCHED_CPUFREQ_RT flag is not the problem
here, but only the detection of after how much time that flag has to be
considered stale), suppose a policy with 2 CPUs:

               CPU0                |               CPU1
                                   |
                                   |     RT task scheduled
                                   |     SCHED_CPUFREQ_RT is set
                                   |     CPU1->last_update = now
                                   |     freq transition to max
                                   |     last_freq_update_time = now
                                   |

                        more than TICK_NSEC nsecs

                                   |
     a small CFS wakes up          |
     CPU0->last_update = now1      |
     delta_ns(CPU0) < TICK_NSEC*   |
     CPU0's util is considered     |
     delta_ns(CPU1) =              |
      last_freq_update_time -      |
      CPU1->last_update = 0        |
      < TICK_NSEC                  |
     CPU1 is still considered      |
     CPU1->SCHED_CPUFREQ_RT is set |
     we stay at max (until CPU1    |
     exits from idle)              |

* delta_ns is actually negative as now1 > last_freq_update_time

While last_freq_update_time is a sensible reference for rate limiting,
it doesn't seem to be useful for working around stale CPU states.

Fix the problem by always considering now (time) as the reference for
deciding when CPUs have stale contributions.

Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Acked-by: Vincent Guittot <vincent.guittot@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 76877a62b5fa..622eed1b7658 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -245,11 +245,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	u64 last_freq_update_time = sg_policy->last_freq_update_time;
 	unsigned long util = 0, max = 1;
 	unsigned int j;
 
@@ -265,7 +264,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
 		 * enough, don't take the CPU into account as it probably is
 		 * idle now (and clear iowait_boost for it).
 		 */
-		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
+		delta_ns = time - j_sg_cpu->last_update;
 		if (delta_ns > TICK_NSEC) {
 			j_sg_cpu->iowait_boost = 0;
 			continue;
@@ -309,7 +308,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 		if (flags & SCHED_CPUFREQ_RT_DL)
 			next_f = sg_policy->policy->cpuinfo.max_freq;
 		else
-			next_f = sugov_next_freq_shared(sg_cpu);
+			next_f = sugov_next_freq_shared(sg_cpu, time);
 
 		sugov_update_commit(sg_policy, time, next_f);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From d90c902449a7561f1b1d58ba5a0d11728ce8b0b2 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 5 May 2017 07:40:42 +0200
Subject: af_key: Fix slab-out-of-bounds in pfkey_compile_policy.

The sadb_x_sec_len is stored in the unit 'byte divided by eight'.
So we have to multiply this value by eight before we can do
size checks. Otherwise we may get a slab-out-of-bounds when
we memcpy the user sec_ctx.

Fixes: df71837d502 ("[LSM-IPSec]: Security association restriction.")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/key/af_key.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index c1950bb14735..512dc43d0ce6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3285,7 +3285,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
 		p += pol->sadb_x_policy_len*8;
 		sec_ctx = (struct sadb_x_sec_ctx *)p;
 		if (len < pol->sadb_x_policy_len*8 +
-		    sec_ctx->sadb_x_sec_len) {
+		    sec_ctx->sadb_x_sec_len*8) {
 			*dir = -EINVAL;
 			goto out;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 2c1497bbc8fdee897341ab48ee9c9209b421b8c0 Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Mon, 8 May 2017 10:30:18 +0300
Subject: xfrm: Fix NETDEV_DOWN with IPSec offload

Upon NETDEV_DOWN event, all xfrm_state objects which are bound to
the device are flushed.

The condition for this is wrong, though, testing dev->hw_features
instead of dev->features. If a device has non-user-modifiable
NETIF_F_HW_ESP, then its xfrm_state objects are not flushed,
causing a crash later on after the device is deleted.

Check dev->features instead of dev->hw_features.

Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API")
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 8ec8a3fcf8d4..574e6f32f94f 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -170,7 +170,7 @@ static int xfrm_dev_feat_change(struct net_device *dev)
 
 static int xfrm_dev_down(struct net_device *dev)
 {
-	if (dev->hw_features & NETIF_F_HW_ESP)
+	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
 	xfrm_garbage_collect(dev_net(dev));
-- 
cgit v1.2.3-59-g8ed1b


From 2345ab1df8a9aa3cdca942142b48eb141faeb1c3 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Mon, 8 May 2017 09:27:39 +0800
Subject: drm/i915/gvt: not to restore in-context mmio

Needn't to restore the in-context MMIO when SCHEDULE_OUT. Sometimes
with restoring the in-context MMIO, some GPU hang can be observed. So
remove the in-context MMIO restore

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/render.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 0beb83563b08..05b75b9f852a 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -333,6 +333,9 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id)
 		} else
 			v = mmio->value;
 
+		if (mmio->in_context)
+			continue;
+
 		I915_WRITE(mmio->reg, v);
 		POSTING_READ(mmio->reg);
 
-- 
cgit v1.2.3-59-g8ed1b


From 3c5ab3f395d66a9e4e937fcfdf6ebc63894f028b Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 29 Apr 2017 20:33:09 +0300
Subject: ipvs: SNAT packet replies only for NATed connections

We do not check if packet from real server is for NAT
connection before performing SNAT. This causes problems
for setups that use DR/TUN and allow local clients to
access the real server directly, for example:

- local client in director creates IPVS-DR/TUN connection
CIP->VIP and the request packets are routed to RIP.
Talks are finished but IPVS connection is not expired yet.

- second local client creates non-IPVS connection CIP->RIP
with same reply tuple RIP->CIP and when replies are received
on LOCAL_IN we wrongly assign them for the first client
connection because RIP->CIP matches the reply direction.
As result, IPVS SNATs replies for non-IPVS connections.

The problem is more visible to local UDP clients but in rare
cases it can happen also for TCP or remote clients when the
real server sends the reply traffic via the director.

So, better to be more precise for the reply traffic.
As replies are not expected for DR/TUN connections, better
to not touch them.

Reported-by: Nick Moriarty <nick.moriarty@york.ac.uk>
Tested-by: Nick Moriarty <nick.moriarty@york.ac.uk>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d2d7bdf1d510..ad99c1ceea6f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 {
 	unsigned int verdict = NF_DROP;
 
-	if (IP_VS_FWD_METHOD(cp) != 0) {
-		pr_err("shouldn't reach here, because the box is on the "
-		       "half connection in the tun/dr module.\n");
-	}
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		goto ignore_cp;
 
 	/* Ensure the checksum is correct */
 	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
@@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 		ip_vs_notrack(skb);
 	else
 		ip_vs_update_conntrack(skb, cp, 0);
+
+ignore_cp:
 	verdict = NF_ACCEPT;
 
 out:
@@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 	 */
 	cp = pp->conn_out_get(ipvs, af, skb, &iph);
 
-	if (likely(cp))
+	if (likely(cp)) {
+		if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+			goto ignore_cp;
 		return handle_response(af, skb, pd, cp, &iph, hooknum);
+	}
 
 	/* Check for real-server-started requests */
 	if (atomic_read(&ipvs->conn_out_counter)) {
@@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 			}
 		}
 	}
+
+out:
 	IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
 		      "ip_vs_out: packet continues traversal as normal");
 	return NF_ACCEPT;
+
+ignore_cp:
+	__ip_vs_conn_put(cp);
+	goto out;
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 2214c260c72b0bd94e6c1c19bf451686212025d3 Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Mon, 8 May 2017 11:56:55 +0200
Subject: md: don't return -EAGAIN in md_allow_write for external metadata
 arrays

This essentially reverts commit b5470dc5fc18 ("md: resolve external
metadata handling deadlock in md_allow_write") with some adjustments.

Since commit 6791875e2e53 ("md: make reconfig_mutex optional for writes
to md sysfs files.") changing array_state to 'active' does not use
mddev_lock() and will not cause a deadlock with md_allow_write(). This
revert simplifies userspace tools that write to sysfs attributes like
"stripe_cache_size" or "consistency_policy" because it removes the need
for special handling for external metadata arrays, checking for EAGAIN
and retrying the write.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c    | 20 ++++++++------------
 drivers/md/md.h    |  2 +-
 drivers/md/raid1.c |  9 +++------
 drivers/md/raid5.c | 12 +++---------
 4 files changed, 15 insertions(+), 28 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 82f798be964f..10367ffe92e3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8022,18 +8022,15 @@ EXPORT_SYMBOL(md_write_end);
  * may proceed without blocking.  It is important to call this before
  * attempting a GFP_KERNEL allocation while holding the mddev lock.
  * Must be called with mddev_lock held.
- *
- * In the ->external case MD_SB_CHANGE_PENDING can not be cleared until mddev->lock
- * is dropped, so return -EAGAIN after notifying userspace.
  */
-int md_allow_write(struct mddev *mddev)
+void md_allow_write(struct mddev *mddev)
 {
 	if (!mddev->pers)
-		return 0;
+		return;
 	if (mddev->ro)
-		return 0;
+		return;
 	if (!mddev->pers->sync_request)
-		return 0;
+		return;
 
 	spin_lock(&mddev->lock);
 	if (mddev->in_sync) {
@@ -8046,13 +8043,12 @@ int md_allow_write(struct mddev *mddev)
 		spin_unlock(&mddev->lock);
 		md_update_sb(mddev, 0);
 		sysfs_notify_dirent_safe(mddev->sysfs_state);
+		/* wait for the dirty state to be recorded in the metadata */
+		wait_event(mddev->sb_wait,
+			   !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) &&
+			   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
 	} else
 		spin_unlock(&mddev->lock);
-
-	if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
-		return -EAGAIN;
-	else
-		return 0;
 }
 EXPORT_SYMBOL_GPL(md_allow_write);
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 4e75d121bfcc..11f15146ce51 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -665,7 +665,7 @@ extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
 			bool metadata_op);
 extern void md_do_sync(struct md_thread *thread);
 extern void md_new_event(struct mddev *mddev);
-extern int md_allow_write(struct mddev *mddev);
+extern void md_allow_write(struct mddev *mddev);
 extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
 extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
 extern int md_check_no_bitmap(struct mddev *mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7ed59351fe97..7c1f73398800 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3197,7 +3197,7 @@ static int raid1_reshape(struct mddev *mddev)
 	struct r1conf *conf = mddev->private;
 	int cnt, raid_disks;
 	unsigned long flags;
-	int d, d2, err;
+	int d, d2;
 
 	/* Cannot change chunk_size, layout, or level */
 	if (mddev->chunk_sectors != mddev->new_chunk_sectors ||
@@ -3209,11 +3209,8 @@ static int raid1_reshape(struct mddev *mddev)
 		return -EINVAL;
 	}
 
-	if (!mddev_is_clustered(mddev)) {
-		err = md_allow_write(mddev);
-		if (err)
-			return err;
-	}
+	if (!mddev_is_clustered(mddev))
+		md_allow_write(mddev);
 
 	raid_disks = mddev->raid_disks + mddev->delta_disks;
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3809a2192132..f8055a7abb4b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2309,14 +2309,12 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 	struct stripe_head *osh, *nsh;
 	LIST_HEAD(newstripes);
 	struct disk_info *ndisks;
-	int err;
+	int err = 0;
 	struct kmem_cache *sc;
 	int i;
 	int hash, cnt;
 
-	err = md_allow_write(conf->mddev);
-	if (err)
-		return err;
+	md_allow_write(conf->mddev);
 
 	/* Step 1 */
 	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
@@ -6310,7 +6308,6 @@ int
 raid5_set_cache_size(struct mddev *mddev, int size)
 {
 	struct r5conf *conf = mddev->private;
-	int err;
 
 	if (size <= 16 || size > 32768)
 		return -EINVAL;
@@ -6322,10 +6319,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
 		;
 	mutex_unlock(&conf->cache_size_mutex);
 
-
-	err = md_allow_write(mddev);
-	if (err)
-		return err;
+	md_allow_write(mddev);
 
 	mutex_lock(&conf->cache_size_mutex);
 	while (size > conf->max_nr_stripes)
-- 
cgit v1.2.3-59-g8ed1b


From c1061255031dea76ee9e082d081b88ac6d6c8265 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Thu, 27 Apr 2017 16:25:03 +0200
Subject: scsi: libfc: do not flood console with messages 'libfc: queue full
 ...'

When the FCoE sending side becomes congested libfc tries to reduce the
queue depth on the host; however due to the built-in lag before
attempting to ramp down the queue depth _again_ the message log is
flooded with the following message:

	libfc: queue full, reducing can_queue to 512

With this patch the message is printed only once (ie when it's
actually changed).

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Acked-by: Johannes Thumshirn <jth@kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libfc/fc_fcp.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index a808e8ef1d08..234352da5c3c 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -407,11 +407,12 @@ unlock:
  * can_queue. Eventually we will hit the point where we run
  * on all reserved structs.
  */
-static void fc_fcp_can_queue_ramp_down(struct fc_lport *lport)
+static bool fc_fcp_can_queue_ramp_down(struct fc_lport *lport)
 {
 	struct fc_fcp_internal *si = fc_get_scsi_internal(lport);
 	unsigned long flags;
 	int can_queue;
+	bool changed = false;
 
 	spin_lock_irqsave(lport->host->host_lock, flags);
 
@@ -427,9 +428,11 @@ static void fc_fcp_can_queue_ramp_down(struct fc_lport *lport)
 	if (!can_queue)
 		can_queue = 1;
 	lport->host->can_queue = can_queue;
+	changed = true;
 
 unlock:
 	spin_unlock_irqrestore(lport->host->host_lock, flags);
+	return changed;
 }
 
 /*
@@ -1896,11 +1899,11 @@ int fc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc_cmd)
 
 	if (!fc_fcp_lport_queue_ready(lport)) {
 		if (lport->qfull) {
-			fc_fcp_can_queue_ramp_down(lport);
-			shost_printk(KERN_ERR, lport->host,
-				     "libfc: queue full, "
-				     "reducing can_queue to %d.\n",
-				     lport->host->can_queue);
+			if (fc_fcp_can_queue_ramp_down(lport))
+				shost_printk(KERN_ERR, lport->host,
+					     "libfc: queue full, "
+					     "reducing can_queue to %d.\n",
+					     lport->host->can_queue);
 		}
 		rc = SCSI_MLQUEUE_HOST_BUSY;
 		goto out;
-- 
cgit v1.2.3-59-g8ed1b


From 4492b739c9ccfaf828bd7c02dc779ec2a5e55ff4 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 27 Apr 2017 15:08:26 -0700
Subject: scsi: lpfc: Fix panic on BFS configuration

To select the appropriate shost template, the driver is issuing a
mailbox command to retrieve the wwn. Turns out the sending of the
command precedes the reset of the function.  On SLI-4 adapters, this is
inconsequential as the mailbox command location is specified by dma via
the BMBX register. However, on SLI-3 adapters, the location of the
mailbox command submission area changes. When the function is first
powered on or reset, the cmd is submitted via PCI bar memory. Later the
driver changes the function config to use host memory and DMA. The
request to start a mailbox command is the same, a simple doorbell write,
regardless of submission area.  So.. if there has not been a boot driver
run against the adapter, the mailbox command works as defaults are
ok. But, if the boot driver has configured the card and, and if no
platform pci function/slot reset occurs as the os starts, the mailbox
command will fail. The SLI-3 device will use the stale boot driver dma
location. This can cause PCI eeh errors.

Fix is to reset the sli-3 function before sending the mailbox command,
thus synchronizing the function/driver on mailbox location.

Note: The fix uses routines that are typically invoked later in the call
flow to reset the sli-3 device. The issue in using those routines is
that the normal (non-fix) flow does additional initialization, namely
the allocation of the pport structure. So, rather than significantly
reworking the initialization flow so that the pport is alloc'd first,
pointer checks are added to work around it. Checks are limited to the
routines invoked by a sli-3 adapter (s3 routines) as this fix/early call
is only invoked on a sli3 adapter. Nothing changes post the
fix. Subsequent initialization, and another adapter reset, still occur -
both on sli-3 and sli-4 adapters.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Fixes: 96418b5e2c88 ("scsi: lpfc: Fix eh_deadline setting for sli3 adapters.")
Cc: stable@vger.kernel.org # v4.11+
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_crtn.h |  1 +
 drivers/scsi/lpfc/lpfc_init.c |  7 +++++++
 drivers/scsi/lpfc/lpfc_sli.c  | 19 ++++++++++++-------
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 944b32ca4931..1c55408ac718 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -294,6 +294,7 @@ int lpfc_selective_reset(struct lpfc_hba *);
 void lpfc_reset_barrier(struct lpfc_hba *);
 int lpfc_sli_brdready(struct lpfc_hba *, uint32_t);
 int lpfc_sli_brdkill(struct lpfc_hba *);
+int lpfc_sli_chipset_init(struct lpfc_hba *phba);
 int lpfc_sli_brdreset(struct lpfc_hba *);
 int lpfc_sli_brdrestart(struct lpfc_hba *);
 int lpfc_sli_hba_setup(struct lpfc_hba *);
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 90ae354a9c45..e85f273e34ac 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3602,6 +3602,13 @@ lpfc_get_wwpn(struct lpfc_hba *phba)
 	LPFC_MBOXQ_t *mboxq;
 	MAILBOX_t *mb;
 
+	if (phba->sli_rev < LPFC_SLI_REV4) {
+		/* Reset the port first */
+		lpfc_sli_brdrestart(phba);
+		rc = lpfc_sli_chipset_init(phba);
+		if (rc)
+			return (uint64_t)-1;
+	}
 
 	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
 						GFP_KERNEL);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index cf19f4976f5f..2a4fc00dfa9b 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -4204,13 +4204,16 @@ lpfc_sli_brdreset(struct lpfc_hba *phba)
 	/* Reset HBA */
 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
 			"0325 Reset HBA Data: x%x x%x\n",
-			phba->pport->port_state, psli->sli_flag);
+			(phba->pport) ? phba->pport->port_state : 0,
+			psli->sli_flag);
 
 	/* perform board reset */
 	phba->fc_eventTag = 0;
 	phba->link_events = 0;
-	phba->pport->fc_myDID = 0;
-	phba->pport->fc_prevDID = 0;
+	if (phba->pport) {
+		phba->pport->fc_myDID = 0;
+		phba->pport->fc_prevDID = 0;
+	}
 
 	/* Turn off parity checking and serr during the physical reset */
 	pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value);
@@ -4336,7 +4339,8 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba)
 	/* Restart HBA */
 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
 			"0337 Restart HBA Data: x%x x%x\n",
-			phba->pport->port_state, psli->sli_flag);
+			(phba->pport) ? phba->pport->port_state : 0,
+			psli->sli_flag);
 
 	word0 = 0;
 	mb = (MAILBOX_t *) &word0;
@@ -4350,7 +4354,7 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba)
 	readl(to_slim); /* flush */
 
 	/* Only skip post after fc_ffinit is completed */
-	if (phba->pport->port_state)
+	if (phba->pport && phba->pport->port_state)
 		word0 = 1;	/* This is really setting up word1 */
 	else
 		word0 = 0;	/* This is really setting up word1 */
@@ -4359,7 +4363,8 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba)
 	readl(to_slim); /* flush */
 
 	lpfc_sli_brdreset(phba);
-	phba->pport->stopped = 0;
+	if (phba->pport)
+		phba->pport->stopped = 0;
 	phba->link_state = LPFC_INIT_START;
 	phba->hba_flag = 0;
 	spin_unlock_irq(&phba->hbalock);
@@ -4446,7 +4451,7 @@ lpfc_sli_brdrestart(struct lpfc_hba *phba)
  * iteration, the function will restart the HBA again. The function returns
  * zero if HBA successfully restarted else returns negative error code.
  **/
-static int
+int
 lpfc_sli_chipset_init(struct lpfc_hba *phba)
 {
 	uint32_t status, i = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 4ff7adc8c7886bcf6e48f09c49d3f339f33d7e79 Mon Sep 17 00:00:00 2001
From: Zhou Zhengping <johnzzpcrystal@gmail.com>
Date: Fri, 28 Apr 2017 17:43:04 +0800
Subject: scsi: Skip deleted devices in __scsi_device_lookup

When a device is unplugged from a SCSI controller, if the scsi_device is
still in use by application layer, it won't get released until users
close it.

In this case, scsi_device_remove just set the scsi_device's state to be
SDEV_DEL. But if you plug the disk just before the old scsi_device is
released, then there will be two scsi_device structures in
scsi_host->__devices. When the next unplug event happens, some low-level
drivers will check whether the scsi_device has been added to host (for
example the MegaRAID SAS series controller) by calling
scsi_device_lookup(call __scsi_device_lookup) in function
megasas_aen_polling. __scsi_device_lookup will return the first
scsi_device. Because its state is SDEV_DEL, the scsi_device_lookup will
return NULL, making the low-level driver assume that the scsi_device has
been removed, and won't call scsi_device_remove which will lead to hot
swap failure.

Signed-off-by: Zhou Zhengping <johnzzpcrystal@gmail.com>
Tested-by: Zeng Rujia <ZengRujia@sangfor.com.cn>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195607
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 7bfbcfa7af40..61cdd99ae41e 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -763,6 +763,8 @@ struct scsi_device *__scsi_device_lookup(struct Scsi_Host *shost,
 	struct scsi_device *sdev;
 
 	list_for_each_entry(sdev, &shost->__devices, siblings) {
+		if (sdev->sdev_state == SDEV_DEL)
+			continue;
 		if (sdev->channel == channel && sdev->id == id &&
 				sdev->lun ==lun)
 			return sdev;
-- 
cgit v1.2.3-59-g8ed1b


From 7cc49bbd005a5447afafc285dd302cbb8065eb28 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 2 May 2017 09:57:20 +0200
Subject: scsi: MAINTAINERS: update OSD entries

The open-osd domain doesn't exist anymore, and mails to the list lead to
really annoying bounced that repeat every day.

Also the primarydata address for Benny bounces, and while I have a new
one for him he doesn't seem to be maintaining the OSD code any more.

Which beggs the question: should we really leave the Supported status in
MAINTAINERS given that the code is barely maintained?

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Boaz Harrosh <ooo@electrozaur.com>
Acked-by: Benny Halevy <bhalevy@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 MAINTAINERS | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 08360bb0468b..0e174d4b86a3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9530,10 +9530,6 @@ F:	drivers/net/wireless/intersil/orinoco/
 
 OSD LIBRARY and FILESYSTEM
 M:	Boaz Harrosh <ooo@electrozaur.com>
-M:	Benny Halevy <bhalevy@primarydata.com>
-L:	osd-dev@open-osd.org
-W:	http://open-osd.org
-T:	git git://git.open-osd.org/open-osd.git
 S:	Maintained
 F:	drivers/scsi/osd/
 F:	include/scsi/osd_*
-- 
cgit v1.2.3-59-g8ed1b


From 7aa686d35598cbe58b3f7ce32e4ec17649190f3d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 2 May 2017 10:45:03 -0700
Subject: scsi: scsi_lib: Add #include <scsi/scsi_transport.h>

This patch avoids that when building with W=1 the compiler complains
that __scsi_init_queue() has not been declared.  See also commit
d48777a633d6 ("scsi: remove __scsi_alloc_queue").

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 814a4bd8405d..e31f1cc90b81 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -30,6 +30,7 @@
 #include <scsi/scsi_driver.h>
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_host.h>
+#include <scsi/scsi_transport.h> /* __scsi_init_queue() */
 #include <scsi/scsi_dh.h>
 
 #include <trace/events/scsi.h>
-- 
cgit v1.2.3-59-g8ed1b


From 87ea6bdd8da88842a8247921f58ca495b4da79ef Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Wed, 3 May 2017 14:17:55 -0500
Subject: scsi: qedf: properly update arguments position in function call

Properly update the position of the arguments in function call.

Addresses-Coverity-ID: 1402010
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_els.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c
index c505d41f6dc8..90627033bde6 100644
--- a/drivers/scsi/qedf/qedf_els.c
+++ b/drivers/scsi/qedf/qedf_els.c
@@ -109,7 +109,7 @@ retry_els:
 	did = fcport->rdata->ids.port_id;
 	sid = fcport->sid;
 
-	__fc_fill_fc_hdr(fc_hdr, FC_RCTL_ELS_REQ, sid, did,
+	__fc_fill_fc_hdr(fc_hdr, FC_RCTL_ELS_REQ, did, sid,
 			   FC_TYPE_ELS, FC_FC_FIRST_SEQ | FC_FC_END_SEQ |
 			   FC_FC_SEQ_INIT, 0);
 
-- 
cgit v1.2.3-59-g8ed1b


From 0d618cf4f24cebb822eaf57c27985266e4110176 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 4 May 2017 00:22:16 +0300
Subject: scsi: lpfc: double lock typo in lpfc_ns_rsp()

There is a double lock bug here so this will deadlock instead of
unlocking.

Fixes: 1c5b12f76301 ("Fix implicit logo and RSCN handling for NVMET")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_ct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 1487406aea77..c7962dae4dab 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -630,7 +630,7 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type,
 						NLP_EVT_DEVICE_RECOVERY);
 			spin_lock_irq(shost->host_lock);
 			ndlp->nlp_flag &= ~NLP_NVMET_RECOV;
-			spin_lock_irq(shost->host_lock);
+			spin_unlock_irq(shost->host_lock);
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 0d2fc3b48ba0e267962c861c5258564c335dd1f2 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 4 May 2017 00:23:57 +0300
Subject: scsi: qedf: Cleanup the type of io_log->op

We store sc_cmd->cmnd[0] which is an unsigned char in io_log->op so
this should also be unsigned char.  The other thing is that this is
displayed in the debugfs:

	seq_printf(s, "0x%02x:", io_log->op);

Smatch complains that the formatting won't work for negative values so
changing it to unsigned silences that warning as well.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h
index 40aeb6bb96a2..07ee88200e91 100644
--- a/drivers/scsi/qedf/qedf.h
+++ b/drivers/scsi/qedf/qedf.h
@@ -259,7 +259,7 @@ struct qedf_io_log {
 	uint16_t task_id;
 	uint32_t port_id; /* Remote port fabric ID */
 	int lun;
-	char op; /* SCSI CDB */
+	unsigned char op; /* SCSI CDB */
 	uint8_t lba[4];
 	unsigned int bufflen; /* SCSI buffer length */
 	unsigned int sg_count; /* Number of SG elements */
-- 
cgit v1.2.3-59-g8ed1b


From cd22874fec8fbecd4999b55e67baf3d71775f6f3 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 May 2017 15:42:55 -0700
Subject: scsi: qedf: Avoid reading past end of buffer

Using memcpy() from a string that is shorter than the length copied
means the destination buffer is being filled with arbitrary data from
the kernel rodata segment. Instead, use strncpy() which will fill the
trailing bytes with zeros.

This was found with the future CONFIG_FORTIFY_SOURCE feature.

Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index cceddd995a4b..a5c97342fd5d 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2895,7 +2895,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 	slowpath_params.drv_minor = QEDF_DRIVER_MINOR_VER;
 	slowpath_params.drv_rev = QEDF_DRIVER_REV_VER;
 	slowpath_params.drv_eng = QEDF_DRIVER_ENG_VER;
-	memcpy(slowpath_params.name, "qedf", QED_DRV_VER_STR_SIZE);
+	strncpy(slowpath_params.name, "qedf", QED_DRV_VER_STR_SIZE);
 	rc = qed_ops->common->slowpath_start(qedf->cdev, &slowpath_params);
 	if (rc) {
 		QEDF_ERR(&(qedf->dbg_ctx), "Cannot start slowpath.\n");
-- 
cgit v1.2.3-59-g8ed1b


From 07cc1ccfb84320582c9ac389a21cd81df82bc123 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 5 May 2017 16:31:47 -0700
Subject: scsi: cxlflash: Select IRQ_POLL

The driver now uses IRQ_POLL and needs to select it to avoid the
following build error.

ERROR: ".irq_poll_complete" [drivers/scsi/cxlflash/cxlflash.ko] undefined!
ERROR: ".irq_poll_sched" [drivers/scsi/cxlflash/cxlflash.ko] undefined!
ERROR: ".irq_poll_disable" [drivers/scsi/cxlflash/cxlflash.ko] undefined!
ERROR: ".irq_poll_init" [drivers/scsi/cxlflash/cxlflash.ko] undefined!

Fixes: cba06e6de403 ("scsi: cxlflash: Implement IRQ polling for RRQ processing")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/cxlflash/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/cxlflash/Kconfig b/drivers/scsi/cxlflash/Kconfig
index c052104e523e..a011c5dbf214 100644
--- a/drivers/scsi/cxlflash/Kconfig
+++ b/drivers/scsi/cxlflash/Kconfig
@@ -5,6 +5,7 @@
 config CXLFLASH
 	tristate "Support for IBM CAPI Flash"
 	depends on PCI && SCSI && CXL && EEH
+	select IRQ_POLL
 	default m
 	help
 	  Allows CAPI Accelerated IO to Flash
-- 
cgit v1.2.3-59-g8ed1b


From 019c0d66f66a8612bb867caf05e865a4766238a4 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 6 May 2017 23:13:55 +0100
Subject: scsi: lpfc: ensure els_wq is being checked before destroying it

I believe there is a typo on the wq destroy of els_wq, currently the
driver is checking if els_cq is not null and I think this should be a
check on els_wq instead.

Detected by CoverityScan, CID#1411629 ("Copy-paste error")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index e85f273e34ac..4b1eb98c228d 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -8854,7 +8854,7 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 		lpfc_wq_destroy(phba, phba->sli4_hba.nvmels_wq);
 
 	/* Unset ELS work queue */
-	if (phba->sli4_hba.els_cq)
+	if (phba->sli4_hba.els_wq)
 		lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
 
 	/* Unset unsolicited receive queue */
-- 
cgit v1.2.3-59-g8ed1b


From b77b36cb7272ec5b9fb000e2ff18e947d9586a22 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 3 May 2017 17:29:01 +0100
Subject: scsi: pmcraid: remove redundant check to see if request_size is less
 than zero

The 2nd check to see if request_size is less than zero is redundant
because the first check takes error exit path on this condition. So,
since it is redundant, remove it.

Detected by CoverityScan, CID#146149 ("Logically Dead Code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pmcraid.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index a4aadf5f4dc6..1cc814f1505a 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -3770,9 +3770,6 @@ static long pmcraid_ioctl_passthrough(
 			pmcraid_err("couldn't build passthrough ioadls\n");
 			goto out_free_cmd;
 		}
-	} else if (request_size < 0) {
-		rc = -EINVAL;
-		goto out_free_cmd;
 	}
 
 	/* If data is being written into the device, copy the data from user
-- 
cgit v1.2.3-59-g8ed1b


From 29efc390b9462582ae95eb9a0b8cd17ab956afc0 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Sun, 7 May 2017 17:36:24 -0700
Subject: md/md0: optimize raid0 discard handling

There are complaints that raid0 discard handling is slow. Currently we
divide discard request into chunks and dispatch to underlayer disks. The
block layer will do merge to form big requests. This causes a lot of
request split/merge and uses significant CPU time.

A simple idea is to calculate the range for each raid disk for an IO
request and send a discard request to raid disks, which will avoid the
split/merge completely. Previously Coly tried the approach, but the
implementation was too complex because of raid0 zones. This patch always
split bio in zone boundary and handle bio within one zone. It simplifies
the implementation a lot.

Reviewed-by: NeilBrown <neilb@suse.com>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid0.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 102 insertions(+), 14 deletions(-)

diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 84e58596594d..d6c0bc76e837 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -385,7 +385,7 @@ static int raid0_run(struct mddev *mddev)
 		blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
 		blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
 		blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
-		blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
+		blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);
 
 		blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
 		blk_queue_io_opt(mddev->queue,
@@ -459,6 +459,95 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
 	}
 }
 
+static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
+{
+	struct r0conf *conf = mddev->private;
+	struct strip_zone *zone;
+	sector_t start = bio->bi_iter.bi_sector;
+	sector_t end;
+	unsigned int stripe_size;
+	sector_t first_stripe_index, last_stripe_index;
+	sector_t start_disk_offset;
+	unsigned int start_disk_index;
+	sector_t end_disk_offset;
+	unsigned int end_disk_index;
+	unsigned int disk;
+
+	zone = find_zone(conf, &start);
+
+	if (bio_end_sector(bio) > zone->zone_end) {
+		struct bio *split = bio_split(bio,
+			zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
+			mddev->bio_set);
+		bio_chain(split, bio);
+		generic_make_request(bio);
+		bio = split;
+		end = zone->zone_end;
+	} else
+		end = bio_end_sector(bio);
+
+	if (zone != conf->strip_zone)
+		end = end - zone[-1].zone_end;
+
+	/* Now start and end is the offset in zone */
+	stripe_size = zone->nb_dev * mddev->chunk_sectors;
+
+	first_stripe_index = start;
+	sector_div(first_stripe_index, stripe_size);
+	last_stripe_index = end;
+	sector_div(last_stripe_index, stripe_size);
+
+	start_disk_index = (int)(start - first_stripe_index * stripe_size) /
+		mddev->chunk_sectors;
+	start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
+		mddev->chunk_sectors) +
+		first_stripe_index * mddev->chunk_sectors;
+	end_disk_index = (int)(end - last_stripe_index * stripe_size) /
+		mddev->chunk_sectors;
+	end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
+		mddev->chunk_sectors) +
+		last_stripe_index * mddev->chunk_sectors;
+
+	for (disk = 0; disk < zone->nb_dev; disk++) {
+		sector_t dev_start, dev_end;
+		struct bio *discard_bio = NULL;
+		struct md_rdev *rdev;
+
+		if (disk < start_disk_index)
+			dev_start = (first_stripe_index + 1) *
+				mddev->chunk_sectors;
+		else if (disk > start_disk_index)
+			dev_start = first_stripe_index * mddev->chunk_sectors;
+		else
+			dev_start = start_disk_offset;
+
+		if (disk < end_disk_index)
+			dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
+		else if (disk > end_disk_index)
+			dev_end = last_stripe_index * mddev->chunk_sectors;
+		else
+			dev_end = end_disk_offset;
+
+		if (dev_end <= dev_start)
+			continue;
+
+		rdev = conf->devlist[(zone - conf->strip_zone) *
+			conf->strip_zone[0].nb_dev + disk];
+		if (__blkdev_issue_discard(rdev->bdev,
+			dev_start + zone->dev_start + rdev->data_offset,
+			dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
+		    !discard_bio)
+			continue;
+		bio_chain(discard_bio, bio);
+		if (mddev->gendisk)
+			trace_block_bio_remap(bdev_get_queue(rdev->bdev),
+				discard_bio, disk_devt(mddev->gendisk),
+				bio->bi_iter.bi_sector);
+		generic_make_request(discard_bio);
+	}
+	bio_endio(bio);
+}
+
 static void raid0_make_request(struct mddev *mddev, struct bio *bio)
 {
 	struct strip_zone *zone;
@@ -473,6 +562,11 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
 		return;
 	}
 
+	if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
+		raid0_handle_discard(mddev, bio);
+		return;
+	}
+
 	bio_sector = bio->bi_iter.bi_sector;
 	sector = bio_sector;
 	chunk_sects = mddev->chunk_sectors;
@@ -498,19 +592,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
 	bio->bi_iter.bi_sector = sector + zone->dev_start +
 		tmp_dev->data_offset;
 
-	if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
-		     !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
-		/* Just ignore it */
-		bio_endio(bio);
-	} else {
-		if (mddev->gendisk)
-			trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
-					      bio, disk_devt(mddev->gendisk),
-					      bio_sector);
-		mddev_check_writesame(mddev, bio);
-		mddev_check_write_zeroes(mddev, bio);
-		generic_make_request(bio);
-	}
+	if (mddev->gendisk)
+		trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
+				      bio, disk_devt(mddev->gendisk),
+				      bio_sector);
+	mddev_check_writesame(mddev, bio);
+	mddev_check_write_zeroes(mddev, bio);
+	generic_make_request(bio);
 }
 
 static void raid0_status(struct seq_file *seq, struct mddev *mddev)
-- 
cgit v1.2.3-59-g8ed1b


From f5c8b9601036869e162cb278aaafbf003dc4e5a0 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 3 May 2017 09:15:07 +0200
Subject: s390/uaccess: use sane length for __strncpy_from_user()

The average string that is copied from user space to kernel space is
rather short. E.g. booting a system involves about 50.000
strncpy_from_user() calls where the NULL terminated string has an
average size of 27 bytes.

By default our s390 specific strncpy_from_user() implementation
however copies up to 4096 bytes, which is a waste of cpu cycles and
cache lines. Therefore reduce the default length to L1_CACHE_BYTES
(256 bytes), which also reduces the average execution time of
strncpy_from_user() by 30-40%.

Alternatively we could have switched to the generic
strncpy_from_user() implementation, however it turned out that that
variant would be slower than the now optimized s390 variant.

Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/lib/uaccess.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 1e5bb2b86c42..b3bd3f23b8e8 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -337,8 +337,8 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
 		return 0;
 	done = 0;
 	do {
-		offset = (size_t)src & ~PAGE_MASK;
-		len = min(size - done, PAGE_SIZE - offset);
+		offset = (size_t)src & (L1_CACHE_BYTES - 1);
+		len = min(size - done, L1_CACHE_BYTES - offset);
 		if (copy_from_user(dst, src, len))
 			return -EFAULT;
 		len_str = strnlen(dst, len);
-- 
cgit v1.2.3-59-g8ed1b


From 80ba38469aa28bbcfc7a31e5b41adfc42120465e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 4 May 2017 13:06:58 +0200
Subject: s390/topology: let topology_mnest_limit() return unsigned char

Fixes a couple of compile warnings with gcc 7.1.0 :

arch/s390/kernel/sysinfo.c:578:20:
  note: directive argument in the range [-2147483648, 4]
   sprintf(link_to, "15_1_%d", topology_mnest_limit());

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/sysinfo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index 73bff45ced55..e784bed6ed7f 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -146,7 +146,7 @@ extern int topology_max_mnest;
  * Returns the maximum nesting level supported by the cpu topology code.
  * The current maximum level is 4 which is the drawer level.
  */
-static inline int topology_mnest_limit(void)
+static inline unsigned char topology_mnest_limit(void)
 {
 	return min(topology_max_mnest, 4);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 2685df6776b2c69967eaead48f694869f7dc91ca Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 4 May 2017 13:35:33 +0200
Subject: s390/ccwgroup: increase string buffer size

Avoid false positive warnings like this with gcc 7.1:

drivers/s390/cio/ccwgroup.c:41:21:
 warning: '%d' directive writing between 1 and 10 bytes into a region of size 4
   sprintf(str, "cdev%d", i);

and simply increase the size of the string buffer.

Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/ccwgroup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index e443b0d0b236..34b9ad6b3143 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -35,7 +35,7 @@ static struct bus_type ccwgroup_bus_type;
 static void __ccwgroup_remove_symlinks(struct ccwgroup_device *gdev)
 {
 	int i;
-	char str[8];
+	char str[16];
 
 	for (i = 0; i < gdev->count; i++) {
 		sprintf(str, "cdev%d", i);
@@ -238,7 +238,7 @@ static void ccwgroup_release(struct device *dev)
 
 static int __ccwgroup_create_symlinks(struct ccwgroup_device *gdev)
 {
-	char str[8];
+	char str[16];
 	int i, rc;
 
 	for (i = 0; i < gdev->count; i++) {
-- 
cgit v1.2.3-59-g8ed1b


From 6423ef691c50568e56bb7e33a35bf9f8d6142d23 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 4 May 2017 13:39:18 +0200
Subject: s390/qdio: increase string buffer size

Avoid false positive warnings like this with gcc 7.1:

drivers/s390/cio/qdio_debug.h:63:4:
 note: 'snprintf' output between 8 and 17 bytes into a destination of size 16
    snprintf(debug_buffer, QDIO_DBF_LEN, text);

and simply increase the size of the string buffer.

Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio_debug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/qdio_debug.h b/drivers/s390/cio/qdio_debug.h
index f33ce8577619..1d595d17bf11 100644
--- a/drivers/s390/cio/qdio_debug.h
+++ b/drivers/s390/cio/qdio_debug.h
@@ -11,7 +11,7 @@
 #include "qdio.h"
 
 /* that gives us 15 characters in the text event views */
-#define QDIO_DBF_LEN	16
+#define QDIO_DBF_LEN	32
 
 extern debug_info_t *qdio_dbf_setup;
 extern debug_info_t *qdio_dbf_error;
-- 
cgit v1.2.3-59-g8ed1b


From d04a4c76f71dd5335f8e499b59617382d84e2b8d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 4 May 2017 09:42:22 +0200
Subject: s390: move _text symbol to address higher than zero

The perf tool assumes that kernel symbols are never present at address
zero. In fact it assumes if functions that map symbols to addresses
return zero, that the symbol was not found.

Given that s390's _text symbol historically is located at address zero
this yields at least a couple of false errors and warnings in one of
perf's test cases about not present symbols ("perf test 1").

To fix this simply move the _text symbol to address 0x200, just behind
the initial psw and channel program located at the beginning of the
kernel image. This is now hard coded within the linker script.

I tried a nicer solution which moves the initial psw and channel
program into an own section. However that would move the symbols
within the "real" head.text section to different addresses, since the
".org" statements within head.S are relative to the head.text
section. If there is a new section in front, everything else will be
moved. Alternatively I could have adjusted all ".org" statements. But
this current solution seems to be the easiest one, since nobody really
cares where the _text symbol is actually located.

Reported-by: Zvonko Kosic <zkosic@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/vmlinux.lds.S | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 72307f108c40..6e2c42bd1c3b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -31,8 +31,14 @@ SECTIONS
 {
 	. = 0x00000000;
 	.text : {
-	_text = .;		/* Text and read-only data */
+		/* Text and read-only data */
 		HEAD_TEXT
+		/*
+		 * E.g. perf doesn't like symbols starting at address zero,
+		 * therefore skip the initial PSW and channel program located
+		 * at address zero and let _text start at 0x200.
+		 */
+	_text = 0x200;
 		TEXT_TEXT
 		SCHED_TEXT
 		CPUIDLE_TEXT
-- 
cgit v1.2.3-59-g8ed1b


From 7e6c7fe95e3d0be80d0d05a2efde5c8fb97602ba Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Wed, 26 Apr 2017 17:25:50 +0800
Subject: arm: dts: mt7623: add clock-frequency to the a7 timer node to
 mt7623.dtsi

We need to tell the driver what the timers frequency is and that the core
has not be configured by the bootrom. Not doing so makes the unit not
boot.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 arch/arm/boot/dts/mt7623.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/mt7623.dtsi b/arch/arm/boot/dts/mt7623.dtsi
index 402579ab70d2..3a9e9b6aea68 100644
--- a/arch/arm/boot/dts/mt7623.dtsi
+++ b/arch/arm/boot/dts/mt7623.dtsi
@@ -72,6 +72,8 @@
 			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
 			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
 			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+		clock-frequency = <13000000>;
+		arm,cpu-registers-not-fw-configured;
 	};
 
 	watchdog: watchdog@10007000 {
-- 
cgit v1.2.3-59-g8ed1b


From 1c0803652d82b9a62980886e019c9f70576028a5 Mon Sep 17 00:00:00 2001
From: yong mao <yong.mao@mediatek.com>
Date: Wed, 15 Mar 2017 15:26:39 +0800
Subject: ARM64: dts: mediatek: configure some fixed mmc parameters

configure some fixed mmc parameters

Signed-off-by: Yong Mao <yong.mao@mediatek.com>
Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 arch/arm64/boot/dts/mediatek/mt8173-evb.dts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 0ecaad4333a7..1c3634fa94bf 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -134,6 +134,9 @@
 	bus-width = <8>;
 	max-frequency = <50000000>;
 	cap-mmc-highspeed;
+	mediatek,hs200-cmd-int-delay=<26>;
+	mediatek,hs400-cmd-int-delay=<14>;
+	mediatek,hs400-cmd-resp-sel-rising;
 	vmmc-supply = <&mt6397_vemc_3v3_reg>;
 	vqmmc-supply = <&mt6397_vio18_reg>;
 	non-removable;
-- 
cgit v1.2.3-59-g8ed1b


From 6f92120892d94ef6c551da5aa2cf7bc9401c9903 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 21 Apr 2017 23:55:28 +0200
Subject: ARM: omap2+: make omap4_get_cpu1_ns_pa_addr declaration usable

When CONFIG_PM is disabled, we get a build error:

arch/arm/mach-omap2/omap-smp.c: In function 'omap4_smp_maybe_reset_cpu1':
arch/arm/mach-omap2/omap-smp.c:309:20: error: implicit declaration of function 'omap4_get_cpu1_ns_pa_addr'; did you mean 'omap4_get_scu_base'? [-Werror=implicit-function-declaration]

We need to fix this in multiple files, to ensure the declaration is visible,
to actually build the function without CONFIG_PM, and to only call it
when OMAP4 and/or OMAP5 are enabled.

Fixes: 351b7c490700 ("ARM: omap2+: Revert omap-smp.c changes resetting CPU1 during boot")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/common.h              |  3 ++-
 arch/arm/mach-omap2/omap-mpuss-lowpower.c | 10 +++++-----
 arch/arm/mach-omap2/omap-smp.c            | 11 +++++++----
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 3089d3bfa19b..8cc6338fcb12 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -266,11 +266,12 @@ extern int omap4_cpu_kill(unsigned int cpu);
 extern const struct smp_operations omap4_smp_ops;
 #endif
 
+extern u32 omap4_get_cpu1_ns_pa_addr(void);
+
 #if defined(CONFIG_SMP) && defined(CONFIG_PM)
 extern int omap4_mpuss_init(void);
 extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state);
 extern int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state);
-extern u32 omap4_get_cpu1_ns_pa_addr(void);
 #else
 static inline int omap4_enter_lowpower(unsigned int cpu,
 					unsigned int power_state)
diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
index 03ec6d307c82..4cfc4f9b2c69 100644
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -213,11 +213,6 @@ static void __init save_l2x0_context(void)
 {}
 #endif
 
-u32 omap4_get_cpu1_ns_pa_addr(void)
-{
-	return old_cpu1_ns_pa_addr;
-}
-
 /**
  * omap4_enter_lowpower: OMAP4 MPUSS Low Power Entry Function
  * The purpose of this function is to manage low power programming
@@ -457,6 +452,11 @@ int __init omap4_mpuss_init(void)
 
 #endif
 
+u32 omap4_get_cpu1_ns_pa_addr(void)
+{
+	return old_cpu1_ns_pa_addr;
+}
+
 /*
  * For kexec, we must set CPU1_WAKEUP_NS_PA_ADDR to point to
  * current kernel's secondary_startup() early before
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 3faf454ba487..33e4953c61a8 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -306,7 +306,6 @@ static void __init omap4_smp_maybe_reset_cpu1(struct omap_smp_config *c)
 
 	cpu1_startup_pa = readl_relaxed(cfg.wakeupgen_base +
 					OMAP_AUX_CORE_BOOT_1);
-	cpu1_ns_pa_addr = omap4_get_cpu1_ns_pa_addr();
 
 	/* Did the configured secondary_startup() get overwritten? */
 	if (!omap4_smp_cpu1_startup_valid(cpu1_startup_pa))
@@ -316,9 +315,13 @@ static void __init omap4_smp_maybe_reset_cpu1(struct omap_smp_config *c)
 	 * If omap4 or 5 has NS_PA_ADDR configured, CPU1 may be in a
 	 * deeper idle state in WFI and will wake to an invalid address.
 	 */
-	if ((soc_is_omap44xx() || soc_is_omap54xx()) &&
-	    !omap4_smp_cpu1_startup_valid(cpu1_ns_pa_addr))
-		needs_reset = true;
+	if ((soc_is_omap44xx() || soc_is_omap54xx())) {
+		cpu1_ns_pa_addr = omap4_get_cpu1_ns_pa_addr();
+		if (!omap4_smp_cpu1_startup_valid(cpu1_ns_pa_addr))
+			needs_reset = true;
+	} else {
+		cpu1_ns_pa_addr = 0;
+	}
 
 	if (!needs_reset || !c->cpu1_rstctrl_va)
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From 560d388950ceda5e7c7cdef7f3d9a8ff297bbf9d Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabinv@axis.com>
Date: Wed, 3 May 2017 17:17:21 +0200
Subject: CIFS: silence lockdep splat in cifs_relock_file()

cifs_relock_file() can perform a down_write() on the inode's lock_sem even
though it was already performed in cifs_strict_readv().  Lockdep complains
about this.  AFAICS, there is no problem here, and lockdep just needs to be
told that this nesting is OK.

 =============================================
 [ INFO: possible recursive locking detected ]
 4.11.0+ #20 Not tainted
 ---------------------------------------------
 cat/701 is trying to acquire lock:
  (&cifsi->lock_sem){++++.+}, at: cifs_reopen_file+0x7a7/0xc00

 but task is already holding lock:
  (&cifsi->lock_sem){++++.+}, at: cifs_strict_readv+0x177/0x310

 other info that might help us debug this:
  Possible unsafe locking scenario:

        CPU0
        ----
   lock(&cifsi->lock_sem);
   lock(&cifsi->lock_sem);

  *** DEADLOCK ***

  May be due to missing lock nesting notation

 1 lock held by cat/701:
  #0:  (&cifsi->lock_sem){++++.+}, at: cifs_strict_readv+0x177/0x310

 stack backtrace:
 CPU: 0 PID: 701 Comm: cat Not tainted 4.11.0+ #20
 Call Trace:
  dump_stack+0x85/0xc2
  __lock_acquire+0x17dd/0x2260
  ? trace_hardirqs_on_thunk+0x1a/0x1c
  ? preempt_schedule_irq+0x6b/0x80
  lock_acquire+0xcc/0x260
  ? lock_acquire+0xcc/0x260
  ? cifs_reopen_file+0x7a7/0xc00
  down_read+0x2d/0x70
  ? cifs_reopen_file+0x7a7/0xc00
  cifs_reopen_file+0x7a7/0xc00
  ? printk+0x43/0x4b
  cifs_readpage_worker+0x327/0x8a0
  cifs_readpage+0x8c/0x2a0
  generic_file_read_iter+0x692/0xd00
  cifs_strict_readv+0x29f/0x310
  generic_file_splice_read+0x11c/0x1c0
  do_splice_to+0xa5/0xc0
  splice_direct_to_actor+0xfa/0x350
  ? generic_pipe_buf_nosteal+0x10/0x10
  do_splice_direct+0xb5/0xe0
  do_sendfile+0x278/0x3a0
  SyS_sendfile64+0xc4/0xe0
  entry_SYSCALL_64_fastpath+0x1f/0xbe

Signed-off-by: Rabin Vincent <rabinv@axis.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6ef78ad838e6..0fd081bd2a2f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -582,7 +582,7 @@ cifs_relock_file(struct cifsFileInfo *cfile)
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 	int rc = 0;
 
-	down_read(&cinode->lock_sem);
+	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
 	if (cinode->can_cache_brlcks) {
 		/* can cache locks - no need to relock */
 		up_read(&cinode->lock_sem);
-- 
cgit v1.2.3-59-g8ed1b


From de1892b887eeb85ce458a93979c2108e6f329618 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Thu, 4 May 2017 07:54:04 -0500
Subject: Don't delay freeing mids when blocked on slow socket write of request

When processing responses, and in particular freeing mids (DeleteMidQEntry),
which is very important since it also frees the associated buffers (cifs_buf_release),
we can block a long time if (writes to) socket is slow due to low memory or networking
issues.

We can block in send (smb request) waiting for memory, and be blocked in processing
responess (which could free memory if we let it) - since they both grab the
server->srv_mutex.

In practice, in the DeleteMidQEntry case - there is no reason we need to
grab the srv_mutex so remove these around DeleteMidQEntry, and it allows
us to free memory faster.

Signed-off-by: Steve French <steve.french@primarydata.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/cifssmb.c   | 7 -------
 fs/cifs/smb2pdu.c   | 7 -------
 fs/cifs/transport.c | 2 --
 3 files changed, 16 deletions(-)

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4c01b3f9abf0..4de3186d8a71 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -697,9 +697,7 @@ cifs_echo_callback(struct mid_q_entry *mid)
 {
 	struct TCP_Server_Info *server = mid->callback_data;
 
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(server, 1, CIFS_ECHO_OP);
 }
 
@@ -1599,9 +1597,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
 	}
 
 	queue_work(cifsiod_wq, &rdata->work);
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(server, 1, 0);
 }
 
@@ -2058,7 +2054,6 @@ cifs_writev_callback(struct mid_q_entry *mid)
 {
 	struct cifs_writedata *wdata = mid->callback_data;
 	struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
-	struct TCP_Server_Info *server = tcon->ses->server;
 	unsigned int written;
 	WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf;
 
@@ -2095,9 +2090,7 @@ cifs_writev_callback(struct mid_q_entry *mid)
 	}
 
 	queue_work(cifsiod_wq, &wdata->work);
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(tcon->ses->server, 1, 0);
 }
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 48ff7703b919..e0517f499c38 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2173,9 +2173,7 @@ smb2_echo_callback(struct mid_q_entry *mid)
 	if (mid->mid_state == MID_RESPONSE_RECEIVED)
 		credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest);
 
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(server, credits_received, CIFS_ECHO_OP);
 }
 
@@ -2433,9 +2431,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
 		cifs_stats_fail_inc(tcon, SMB2_READ_HE);
 
 	queue_work(cifsiod_wq, &rdata->work);
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(server, credits_received, 0);
 }
 
@@ -2594,7 +2590,6 @@ smb2_writev_callback(struct mid_q_entry *mid)
 {
 	struct cifs_writedata *wdata = mid->callback_data;
 	struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
-	struct TCP_Server_Info *server = tcon->ses->server;
 	unsigned int written;
 	struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf;
 	unsigned int credits_received = 1;
@@ -2634,9 +2629,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
 		cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
 
 	queue_work(cifsiod_wq, &wdata->work);
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	add_credits(tcon->ses->server, credits_received, 0);
 }
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 4d64b5b8fc9c..de589d0d3739 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -613,9 +613,7 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
 	}
 	spin_unlock(&GlobalMid_Lock);
 
-	mutex_lock(&server->srv_mutex);
 	DeleteMidQEntry(mid);
-	mutex_unlock(&server->srv_mutex);
 	return rc;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ae157902aea13926e9596d091cad6bd87b3be084 Mon Sep 17 00:00:00 2001
From: Ping Gao <ping.a.gao@intel.com>
Date: Wed, 19 Apr 2017 14:02:31 +0800
Subject: drm/i915/gvt: avoid unnecessary vgpu switch

It's no need to switch vgpu if next vgpu is the same with current
vgpu, otherwise it will make performance drop in some case.

v2: correct the comments.

Signed-off-by: Ping Gao <ping.a.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/sched_policy.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 34b9acdf3479..6ac77f8cc405 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -53,9 +53,13 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 	enum intel_engine_id i;
 	struct intel_engine_cs *engine;
 
-	/* no target to schedule */
-	if (!scheduler->next_vgpu)
+	/* no need to schedule if next_vgpu is the same with current_vgpu,
+	 * let scheduler chose next_vgpu again by setting it to NULL.
+	 */
+	if (scheduler->next_vgpu == scheduler->current_vgpu) {
+		scheduler->next_vgpu = NULL;
 		return;
+	}
 
 	gvt_dbg_sched("try to schedule next vgpu %d\n",
 			scheduler->next_vgpu->id);
-- 
cgit v1.2.3-59-g8ed1b


From bb3338d3474e0329918fda9dae2c52751731eb58 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 8 May 2017 17:39:24 -0700
Subject: md/raid5-cache: in r5l_do_submit_io(), submit io->split_bio first

In r5l_do_submit_io(), it is necessary to check io->split_bio before
submit io->current_bio. This is because, endio of current_bio may
free the whole IO unit, and thus change io->split_bio.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 26ba09282e7c..a6a62e212cd3 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -622,20 +622,30 @@ static void r5l_do_submit_io(struct r5l_log *log, struct r5l_io_unit *io)
 	__r5l_set_io_unit_state(io, IO_UNIT_IO_START);
 	spin_unlock_irqrestore(&log->io_list_lock, flags);
 
+	/*
+	 * In case of journal device failures, submit_bio will get error
+	 * and calls endio, then active stripes will continue write
+	 * process. Therefore, it is not necessary to check Faulty bit
+	 * of journal device here.
+	 *
+	 * We can't check split_bio after current_bio is submitted. If
+	 * io->split_bio is null, after current_bio is submitted, current_bio
+	 * might already be completed and the io_unit is freed. We submit
+	 * split_bio first to avoid the issue.
+	 */
+	if (io->split_bio) {
+		if (io->has_flush)
+			io->split_bio->bi_opf |= REQ_PREFLUSH;
+		if (io->has_fua)
+			io->split_bio->bi_opf |= REQ_FUA;
+		submit_bio(io->split_bio);
+	}
+
 	if (io->has_flush)
 		io->current_bio->bi_opf |= REQ_PREFLUSH;
 	if (io->has_fua)
 		io->current_bio->bi_opf |= REQ_FUA;
 	submit_bio(io->current_bio);
-
-	if (!io->split_bio)
-		return;
-
-	if (io->has_flush)
-		io->split_bio->bi_opf |= REQ_PREFLUSH;
-	if (io->has_fua)
-		io->split_bio->bi_opf |= REQ_FUA;
-	submit_bio(io->split_bio);
 }
 
 /* deferred io_unit will be dispatched here */
-- 
cgit v1.2.3-59-g8ed1b


From e84188852a7239d7a144af12f7e5dac8fa88600b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 10 May 2017 21:05:16 +0200
Subject: tee: add ARM_SMCCC dependency

For the moment, the tee subsystem only makes sense in combination with
the op-tee driver that depends on ARM_SMCCC, so let's hide the subsystem
from users that can't select that.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/tee/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig
index 2330a4eb4e8b..a6df12d88f90 100644
--- a/drivers/tee/Kconfig
+++ b/drivers/tee/Kconfig
@@ -1,6 +1,7 @@
 # Generic Trusted Execution Environment Configuration
 config TEE
 	tristate "Trusted Execution Environment support"
+	depends on HAVE_ARM_SMCCC || COMPILE_TEST
 	select DMA_SHARED_BUFFER
 	select GENERIC_ALLOCATOR
 	help
-- 
cgit v1.2.3-59-g8ed1b


From efc0c21c9ea786d6f019d7df7b4e3932f3578d90 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Thu, 2 Mar 2017 12:23:45 +0100
Subject: s390: convert debug_info.ref_count from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/debug.h | 3 ++-
 arch/s390/kernel/debug.c      | 8 ++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 0206c8052328..df7b54ea956d 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -10,6 +10,7 @@
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/time.h>
+#include <linux/refcount.h>
 #include <uapi/asm/debug.h>
 
 #define DEBUG_MAX_LEVEL            6  /* debug levels range from 0 to 6 */
@@ -31,7 +32,7 @@ struct debug_view;
 typedef struct debug_info {	
 	struct debug_info* next;
 	struct debug_info* prev;
-	atomic_t ref_count;
+	refcount_t ref_count;
 	spinlock_t lock;			
 	int level;
 	int nr_areas;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 530226b6cb19..86b3e74f569e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -277,7 +277,7 @@ debug_info_alloc(const char *name, int pages_per_area, int nr_areas,
 	memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
 	memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS *
 		sizeof(struct dentry*));
-	atomic_set(&(rc->ref_count), 0);
+	refcount_set(&(rc->ref_count), 0);
 
 	return rc;
 
@@ -361,7 +361,7 @@ debug_info_create(const char *name, int pages_per_area, int nr_areas,
         debug_area_last = rc;
         rc->next = NULL;
 
-	debug_info_get(rc);
+	refcount_set(&rc->ref_count, 1);
 out:
 	return rc;
 }
@@ -416,7 +416,7 @@ static void
 debug_info_get(debug_info_t * db_info)
 {
 	if (db_info)
-		atomic_inc(&db_info->ref_count);
+		refcount_inc(&db_info->ref_count);
 }
 
 /*
@@ -431,7 +431,7 @@ debug_info_put(debug_info_t *db_info)
 
 	if (!db_info)
 		return;
-	if (atomic_dec_and_test(&db_info->ref_count)) {
+	if (refcount_dec_and_test(&db_info->ref_count)) {
 		for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
 			if (!db_info->views[i])
 				continue;
-- 
cgit v1.2.3-59-g8ed1b


From df3ed932394488e57e72dd0e73c224d1804fdc8f Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 11 May 2017 10:15:10 -0500
Subject: Partially Revert "of: fix sparse warnings in fdt, irq, reserved mem,
 and resolver code"

A change to function pointers that was meant to address a sparse warning
turned out to cause hundreds of new gcc-7 warnings:

include/linux/of_irq.h:11:13: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers]
drivers/of/of_reserved_mem.c: In function '__reserved_mem_init_node':
drivers/of/of_reserved_mem.c:200:7: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers]
   int const (*initfn)(struct reserved_mem *rmem) = i->data;

Turns out the sparse warnings were spurious and have been fixed in
upstream sparse since 0.5.0 in commit "sparse: treat function pointers
as pointers to const data".

This partially reverts commit 17a70355ea576843a7ac851f1db26872a50b2850.

Fixes: 17a70355ea57 ("of: fix sparse warnings in fdt, irq, reserved mem, and resolver code")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/of_reserved_mem.c | 2 +-
 include/linux/of_irq.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 4dec07ea510f..d507c3569a88 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -197,7 +197,7 @@ static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
 	const struct of_device_id *i;
 
 	for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) {
-		int const (*initfn)(struct reserved_mem *rmem) = i->data;
+		reservedmem_of_init_fn initfn = i->data;
 		const char *compat = i->compatible;
 
 		if (!of_flat_dt_is_compatible(rmem->fdt_node, compat))
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index ec6b11deb773..1e0deb8e8494 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -8,7 +8,7 @@
 #include <linux/ioport.h>
 #include <linux/of.h>
 
-typedef int const (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
+typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
 
 /*
  * Workarounds only applied to 32bit powermac machines
-- 
cgit v1.2.3-59-g8ed1b


From a00ebd1cf12c378a1d4f7a1d6daf1d76c1eaad82 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Thu, 11 May 2017 10:21:46 +0200
Subject: drbd: fix request leak introduced by locking/atomic, kref: Kill
 kref_sub()

When killing kref_sub(), the unconditional additional kref_get()
was not properly paired with the necessary kref_put(), causing
a leak of struct drbd_requests (~ 224 Bytes) per submitted bio,
and breaking DRBD in general, as the destructor of those "drbd_requests"
does more than just the mempoll_free().

Fixes: bdfafc4ffdd2 ("locking/atomic, kref: Kill kref_sub()")
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Cc: stable@vger.kernel.org # v4.11
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/drbd/drbd_req.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index b5730e17b455..656624314f0d 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -315,24 +315,32 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
 }
 
 /* still holds resource->req_lock */
-static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
+static void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
 {
 	struct drbd_device *device = req->device;
 	D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED));
 
+	if (!put)
+		return;
+
 	if (!atomic_sub_and_test(put, &req->completion_ref))
-		return 0;
+		return;
 
 	drbd_req_complete(req, m);
 
+	/* local completion may still come in later,
+	 * we need to keep the req object around. */
+	if (req->rq_state & RQ_LOCAL_ABORTED)
+		return;
+
 	if (req->rq_state & RQ_POSTPONED) {
 		/* don't destroy the req object just yet,
 		 * but queue it for retry */
 		drbd_restart_request(req);
-		return 0;
+		return;
 	}
 
-	return 1;
+	kref_put(&req->kref, drbd_req_destroy);
 }
 
 static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
@@ -519,12 +527,8 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
 	if (req->i.waiting)
 		wake_up(&device->misc_wait);
 
-	if (c_put) {
-		if (drbd_req_put_completion_ref(req, m, c_put))
-			kref_put(&req->kref, drbd_req_destroy);
-	} else {
-		kref_put(&req->kref, drbd_req_destroy);
-	}
+	drbd_req_put_completion_ref(req, m, c_put);
+	kref_put(&req->kref, drbd_req_destroy);
 }
 
 static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req)
@@ -1366,8 +1370,7 @@ nodata:
 	}
 
 out:
-	if (drbd_req_put_completion_ref(req, &m, 1))
-		kref_put(&req->kref, drbd_req_destroy);
+	drbd_req_put_completion_ref(req, &m, 1);
 	spin_unlock_irq(&resource->req_lock);
 
 	/* Even though above is a kref_put(), this is safe.
-- 
cgit v1.2.3-59-g8ed1b


From 1a4a5bf52a4adb477adb075e5afce925824ad132 Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Tue, 9 May 2017 18:27:33 +0800
Subject: driver: vrf: Fix one possible use-after-free issue

The current codes only deal with the case that the skb is dropped, it
may meet one use-after-free issue when NF_HOOK returns 0 that means
the skb is stolen by one netfilter rule or hook.

When one netfilter rule or hook stoles the skb and return NF_STOLEN,
it means the skb is taken by the rule, and other modules should not
touch this skb ever. Maybe the skb is queued or freed directly by the
rule.

Now uses the nf_hook instead of NF_HOOK to get the result of netfilter,
and check the return value of nf_hook. Only when its value equals 1, it
means the skb could go ahead. Or reset the skb as NULL.

BTW, because vrf_rcv_finish is empty function, so needn't invoke it
even though nf_hook returns 1. But we need to modify vrf_rcv_finish
to deal with the NF_STOLEN case.

There are two cases when skb is stolen.
1. The skb is stolen and freed directly.
   There is nothing we need to do, and vrf_rcv_finish isn't invoked.
2. The skb is queued and reinjected again.
   The vrf_rcv_finish would be invoked as okfn, so need to free the
   skb in it.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index ceda5861da78..db882493875c 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -989,6 +989,7 @@ static u32 vrf_fib_table(const struct net_device *dev)
 
 static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	kfree_skb(skb);
 	return 0;
 }
 
@@ -998,7 +999,7 @@ static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook,
 {
 	struct net *net = dev_net(dev);
 
-	if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0)
+	if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1)
 		skb = NULL;    /* kfree_skb(skb) handled by nf code */
 
 	return skb;
-- 
cgit v1.2.3-59-g8ed1b


From 8d66c30b12ed3cb533696dea8b9a9eadd5da426a Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Tue, 9 May 2017 15:40:38 +0200
Subject: net: qca_spi: Fix alignment issues in rx path

The qca_spi driver causes alignment issues on ARM devices.
So fix this by using netdev_alloc_skb_ip_align().

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/qca_spi.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 513e6c74e199..24ca7df15d07 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -296,8 +296,9 @@ qcaspi_receive(struct qcaspi *qca)
 
 	/* Allocate rx SKB if we don't have one available. */
 	if (!qca->rx_skb) {
-		qca->rx_skb = netdev_alloc_skb(net_dev,
-					       net_dev->mtu + VLAN_ETH_HLEN);
+		qca->rx_skb = netdev_alloc_skb_ip_align(net_dev,
+							net_dev->mtu +
+							VLAN_ETH_HLEN);
 		if (!qca->rx_skb) {
 			netdev_dbg(net_dev, "out of RX resources\n");
 			qca->stats.out_of_mem++;
@@ -377,7 +378,7 @@ qcaspi_receive(struct qcaspi *qca)
 					qca->rx_skb, qca->rx_skb->dev);
 				qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
 				netif_rx_ni(qca->rx_skb);
-				qca->rx_skb = netdev_alloc_skb(net_dev,
+				qca->rx_skb = netdev_alloc_skb_ip_align(net_dev,
 					net_dev->mtu + VLAN_ETH_HLEN);
 				if (!qca->rx_skb) {
 					netdev_dbg(net_dev, "out of RX resources\n");
@@ -759,7 +760,8 @@ qcaspi_netdev_init(struct net_device *dev)
 	if (!qca->rx_buffer)
 		return -ENOBUFS;
 
-	qca->rx_skb = netdev_alloc_skb(dev, qca->net_dev->mtu + VLAN_ETH_HLEN);
+	qca->rx_skb = netdev_alloc_skb_ip_align(dev, qca->net_dev->mtu +
+						VLAN_ETH_HLEN);
 	if (!qca->rx_skb) {
 		kfree(qca->rx_buffer);
 		netdev_info(qca->net_dev, "Failed to allocate RX sk_buff.\n");
-- 
cgit v1.2.3-59-g8ed1b


From 0fe20fafd1791f993806d417048213ec57b81045 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 9 May 2017 17:19:42 +0100
Subject: netxen_nic: set rcode to the return status from the call to
 netxen_issue_cmd

Currently rcode is being initialized to NX_RCODE_SUCCESS and later it
is checked to see if it is not NX_RCODE_SUCCESS which is never true. It
appears that there is an unintentional missing assignment of rcode from
the return of the call to netxen_issue_cmd() that was dropped in
an earlier fix, so add it in.

Detected by CoverityScan, CID#401900 ("Logically dead code")

Fixes: 2dcd5d95ad6b2 ("netxen_nic: fix cdrp race condition")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
index b8d5270359cd..e30676515529 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
@@ -247,7 +247,7 @@ nx_fw_cmd_set_mtu(struct netxen_adapter *adapter, int mtu)
 	cmd.req.arg3 = 0;
 
 	if (recv_ctx->state == NX_HOST_CTX_STATE_ACTIVE)
-		netxen_issue_cmd(adapter, &cmd);
+		rcode = netxen_issue_cmd(adapter, &cmd);
 
 	if (rcode != NX_RCODE_SUCCESS)
 		return -EIO;
-- 
cgit v1.2.3-59-g8ed1b


From 83eaddab4378db256d00d295bda6ca997cd13a52 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 9 May 2017 16:59:54 -0700
Subject: ipv6/dccp: do not inherit ipv6_mc_list from parent

Like commit 657831ffc38e ("dccp/tcp: do not inherit mc_list from parent")
we should clear ipv6_mc_list etc. for IPv6 sockets too.

Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv6.c     | 6 ++++++
 net/ipv6/tcp_ipv6.c | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d9b6a4e403e7..b6bbb71e713e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -426,6 +426,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
+		newnp->ipv6_mc_list = NULL;
+		newnp->ipv6_ac_list = NULL;
+		newnp->ipv6_fl_list = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
 		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
 
@@ -490,6 +493,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
 	/* Clone RX bits */
 	newnp->rxopt.all = np->rxopt.all;
 
+	newnp->ipv6_mc_list = NULL;
+	newnp->ipv6_ac_list = NULL;
+	newnp->ipv6_fl_list = NULL;
 	newnp->pktoptions = NULL;
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index aeb9497b5bb7..df5a9ff71f3f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1062,6 +1062,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
 #endif
 
+		newnp->ipv6_mc_list = NULL;
 		newnp->ipv6_ac_list = NULL;
 		newnp->ipv6_fl_list = NULL;
 		newnp->pktoptions  = NULL;
@@ -1131,6 +1132,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	   First: no IPv4 options.
 	 */
 	newinet->inet_opt = NULL;
+	newnp->ipv6_mc_list = NULL;
 	newnp->ipv6_ac_list = NULL;
 	newnp->ipv6_fl_list = NULL;
 
-- 
cgit v1.2.3-59-g8ed1b


From b60161668199ac62011c024adc9e66713b9554e7 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Wed, 10 May 2017 11:20:27 -0400
Subject: mdio: mux: Correct mdio_mux_init error path issues

There is a potential unnecessary refcount decrement on error path of
put_device(&pb->mii_bus->dev), as it is possible to avoid the
of_mdio_find_bus() call if mux_bus is specified by the calling function.

The same put_device() is not called in the error path if the
devm_kzalloc of pb fails.  This caused the variable used in the
put_device() to be changed, as the pb pointer was obviously not set up.

There is an unnecessary of_node_get() on child_bus_node if the
of_mdiobus_register() is successful, as the
for_each_available_child_of_node() automatically increments this.
Thus the refcount on this node will always be +1 more than it should be.

There is no of_node_put() on child_bus_node if the of_mdiobus_register()
call fails.

Finally, it is lacking devm_kfree() of pb in the error path.  While this
might not be technically necessary, it was present in other parts of the
function.  So, I am adding it where necessary to make it uniform.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: f20e6657a875 ("mdio: mux: Enhanced MDIO mux framework for integrated multiplexers")
Fixes: 0ca2997d1452 ("netdev/of/phy: Add MDIO bus multiplexer support.")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mux.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index 963838d4fac1..6943c5ece44a 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -122,10 +122,9 @@ int mdio_mux_init(struct device *dev,
 	pb = devm_kzalloc(dev, sizeof(*pb), GFP_KERNEL);
 	if (pb == NULL) {
 		ret_val = -ENOMEM;
-		goto err_parent_bus;
+		goto err_pb_kz;
 	}
 
-
 	pb->switch_data = data;
 	pb->switch_fn = switch_fn;
 	pb->current_child = -1;
@@ -154,6 +153,7 @@ int mdio_mux_init(struct device *dev,
 		cb->mii_bus = mdiobus_alloc();
 		if (!cb->mii_bus) {
 			ret_val = -ENOMEM;
+			devm_kfree(dev, cb);
 			of_node_put(child_bus_node);
 			break;
 		}
@@ -169,8 +169,8 @@ int mdio_mux_init(struct device *dev,
 		if (r) {
 			mdiobus_free(cb->mii_bus);
 			devm_kfree(dev, cb);
+			of_node_put(child_bus_node);
 		} else {
-			of_node_get(child_bus_node);
 			cb->next = pb->children;
 			pb->children = cb;
 		}
@@ -181,9 +181,11 @@ int mdio_mux_init(struct device *dev,
 		return 0;
 	}
 
+	devm_kfree(dev, pb);
+err_pb_kz:
 	/* balance the reference of_mdio_find_bus() took */
-	put_device(&pb->mii_bus->dev);
-
+	if (!mux_bus)
+		put_device(&parent_bus->dev);
 err_parent_bus:
 	of_node_put(parent_bus_node);
 	return ret_val;
-- 
cgit v1.2.3-59-g8ed1b


From 9111e7880ccf419548c7b0887df020b08eadb075 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Wed, 10 May 2017 19:07:51 +0200
Subject: s390/qeth: handle sysfs error during initialization

When setting up the device from within the layer discipline's
probe routine, creating the layer-specific sysfs attributes can fail.
Report this error back to the caller, and handle it by
releasing the layer discipline.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
[jwi: updated commit msg, moved an OSN change to a subsequent patch]
Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 4 +++-
 drivers/s390/net/qeth_core_sys.c  | 2 ++
 drivers/s390/net/qeth_l2_main.c   | 5 ++++-
 drivers/s390/net/qeth_l3_main.c   | 5 ++++-
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 38114a8d56e0..5bfd7c15d6a9 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5731,8 +5731,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev)
 		if (rc)
 			goto err;
 		rc = card->discipline->setup(card->gdev);
-		if (rc)
+		if (rc) {
+			qeth_core_free_discipline(card);
 			goto err;
+		}
 	}
 	rc = card->discipline->set_online(gdev);
 err:
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 75b29fd2fcf4..412ff61891ac 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -426,6 +426,8 @@ static ssize_t qeth_dev_layer2_store(struct device *dev,
 		goto out;
 
 	rc = card->discipline->setup(card->gdev);
+	if (rc)
+		qeth_core_free_discipline(card);
 out:
 	mutex_unlock(&card->discipline_mutex);
 	return rc ? rc : count;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 1b07f382d74c..2d2623182abf 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -883,8 +883,11 @@ static int qeth_l2_stop(struct net_device *dev)
 static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
 {
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+	int rc;
 
-	qeth_l2_create_device_attributes(&gdev->dev);
+	rc = qeth_l2_create_device_attributes(&gdev->dev);
+	if (rc)
+		return rc;
 	INIT_LIST_HEAD(&card->vid_list);
 	hash_init(card->mac_htable);
 	card->options.layer2 = 1;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 6e0354ef4b86..ae70daf33bd5 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3039,8 +3039,11 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 static int qeth_l3_probe_device(struct ccwgroup_device *gdev)
 {
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+	int rc;
 
-	qeth_l3_create_device_attributes(&gdev->dev);
+	rc = qeth_l3_create_device_attributes(&gdev->dev);
+	if (rc)
+		return rc;
 	card->options.layer2 = 0;
 	card->info.hwtrap = 0;
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 2d2ebb3ed0c6acfb014f98e427298673a5d07b82 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Wed, 10 May 2017 19:07:52 +0200
Subject: s390/qeth: unbreak OSM and OSN support

commit b4d72c08b358 ("qeth: bridgeport support - basic control")
broke the support for OSM and OSN devices as follows:

As OSM and OSN are L2 only, qeth_core_probe_device() does an early
setup by loading the l2 discipline and calling qeth_l2_probe_device().
In this context, adding the l2-specific bridgeport sysfs attributes
via qeth_l2_create_device_attributes() hits a BUG_ON in fs/sysfs/group.c,
since the basic sysfs infrastructure for the device hasn't been
established yet.

Note that OSN actually has its own unique sysfs attributes
(qeth_osn_devtype), so the additional attributes shouldn't be created
at all.
For OSM, add a new qeth_l2_devtype that contains all the common
and l2-specific sysfs attributes.
When qeth_core_probe_device() does early setup for OSM or OSN, assign
the corresponding devtype so that the ccwgroup probe code creates the
full set of sysfs attributes.
This allows us to skip qeth_l2_create_device_attributes() in case
of an early setup.

Any device that can't do early setup will initially have only the
generic sysfs attributes, and when it's probed later
qeth_l2_probe_device() adds the l2-specific attributes.

If an early-setup device is removed (by calling ccwgroup_ungroup()),
device_unregister() will - using the devtype - delete the
l2-specific attributes before qeth_l2_remove_device() is called.
So make sure to not remove them twice.

What complicates the issue is that qeth_l2_probe_device() and
qeth_l2_remove_device() is also called on a device when its
layer2 attribute changes (ie. its layer mode is switched).
For early-setup devices this wouldn't work properly - we wouldn't
remove the l2-specific attributes when switching to L3.
But switching the layer mode doesn't actually make any sense;
we already decided that the device can only operate in L2!
So just refuse to switch the layer mode on such devices. Note that
OSN doesn't have a layer2 attribute, so we only need to special-case
OSM.

Based on an initial patch by Ursula Braun.

Fixes: b4d72c08b358 ("qeth: bridgeport support - basic control")
Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  4 ++++
 drivers/s390/net/qeth_core_main.c | 17 +++++++++--------
 drivers/s390/net/qeth_core_sys.c  | 22 ++++++++++++++--------
 drivers/s390/net/qeth_l2.h        |  2 ++
 drivers/s390/net/qeth_l2_main.c   | 17 +++++++++++++----
 drivers/s390/net/qeth_l2_sys.c    |  8 ++++++++
 drivers/s390/net/qeth_l3_main.c   |  1 +
 7 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index f6aa21176d89..30bc6105aac3 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -701,6 +701,7 @@ enum qeth_discipline_id {
 };
 
 struct qeth_discipline {
+	const struct device_type *devtype;
 	void (*start_poll)(struct ccw_device *, int, unsigned long);
 	qdio_handler_t *input_handler;
 	qdio_handler_t *output_handler;
@@ -875,6 +876,9 @@ extern struct qeth_discipline qeth_l2_discipline;
 extern struct qeth_discipline qeth_l3_discipline;
 extern const struct attribute_group *qeth_generic_attr_groups[];
 extern const struct attribute_group *qeth_osn_attr_groups[];
+extern const struct attribute_group qeth_device_attr_group;
+extern const struct attribute_group qeth_device_blkt_group;
+extern const struct device_type qeth_generic_devtype;
 extern struct workqueue_struct *qeth_wq;
 
 int qeth_card_hw_is_reachable(struct qeth_card *);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 5bfd7c15d6a9..fc6d85f2b38d 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5530,10 +5530,12 @@ void qeth_core_free_discipline(struct qeth_card *card)
 	card->discipline = NULL;
 }
 
-static const struct device_type qeth_generic_devtype = {
+const struct device_type qeth_generic_devtype = {
 	.name = "qeth_generic",
 	.groups = qeth_generic_attr_groups,
 };
+EXPORT_SYMBOL_GPL(qeth_generic_devtype);
+
 static const struct device_type qeth_osn_devtype = {
 	.name = "qeth_osn",
 	.groups = qeth_osn_attr_groups,
@@ -5659,23 +5661,22 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
 		goto err_card;
 	}
 
-	if (card->info.type == QETH_CARD_TYPE_OSN)
-		gdev->dev.type = &qeth_osn_devtype;
-	else
-		gdev->dev.type = &qeth_generic_devtype;
-
 	switch (card->info.type) {
 	case QETH_CARD_TYPE_OSN:
 	case QETH_CARD_TYPE_OSM:
 		rc = qeth_core_load_discipline(card, QETH_DISCIPLINE_LAYER2);
 		if (rc)
 			goto err_card;
+
+		gdev->dev.type = (card->info.type != QETH_CARD_TYPE_OSN)
+					? card->discipline->devtype
+					: &qeth_osn_devtype;
 		rc = card->discipline->setup(card->gdev);
 		if (rc)
 			goto err_disc;
-	case QETH_CARD_TYPE_OSD:
-	case QETH_CARD_TYPE_OSX:
+		break;
 	default:
+		gdev->dev.type = &qeth_generic_devtype;
 		break;
 	}
 
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 412ff61891ac..db6a285d41e0 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -413,12 +413,16 @@ static ssize_t qeth_dev_layer2_store(struct device *dev,
 
 	if (card->options.layer2 == newdis)
 		goto out;
-	else {
-		card->info.mac_bits  = 0;
-		if (card->discipline) {
-			card->discipline->remove(card->gdev);
-			qeth_core_free_discipline(card);
-		}
+	if (card->info.type == QETH_CARD_TYPE_OSM) {
+		/* fixed layer, can't switch */
+		rc = -EOPNOTSUPP;
+		goto out;
+	}
+
+	card->info.mac_bits = 0;
+	if (card->discipline) {
+		card->discipline->remove(card->gdev);
+		qeth_core_free_discipline(card);
 	}
 
 	rc = qeth_core_load_discipline(card, newdis);
@@ -705,10 +709,11 @@ static struct attribute *qeth_blkt_device_attrs[] = {
 	&dev_attr_inter_jumbo.attr,
 	NULL,
 };
-static struct attribute_group qeth_device_blkt_group = {
+const struct attribute_group qeth_device_blkt_group = {
 	.name = "blkt",
 	.attrs = qeth_blkt_device_attrs,
 };
+EXPORT_SYMBOL_GPL(qeth_device_blkt_group);
 
 static struct attribute *qeth_device_attrs[] = {
 	&dev_attr_state.attr,
@@ -728,9 +733,10 @@ static struct attribute *qeth_device_attrs[] = {
 	&dev_attr_switch_attrs.attr,
 	NULL,
 };
-static struct attribute_group qeth_device_attr_group = {
+const struct attribute_group qeth_device_attr_group = {
 	.attrs = qeth_device_attrs,
 };
+EXPORT_SYMBOL_GPL(qeth_device_attr_group);
 
 const struct attribute_group *qeth_generic_attr_groups[] = {
 	&qeth_device_attr_group,
diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h
index 29d9fb3890ad..0d59f9a45ea9 100644
--- a/drivers/s390/net/qeth_l2.h
+++ b/drivers/s390/net/qeth_l2.h
@@ -8,6 +8,8 @@
 
 #include "qeth_core.h"
 
+extern const struct attribute_group *qeth_l2_attr_groups[];
+
 int qeth_l2_create_device_attributes(struct device *);
 void qeth_l2_remove_device_attributes(struct device *);
 void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 2d2623182abf..04666fe231aa 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -880,14 +880,21 @@ static int qeth_l2_stop(struct net_device *dev)
 	return 0;
 }
 
+static const struct device_type qeth_l2_devtype = {
+	.name = "qeth_layer2",
+	.groups = qeth_l2_attr_groups,
+};
+
 static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
 {
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
 	int rc;
 
-	rc = qeth_l2_create_device_attributes(&gdev->dev);
-	if (rc)
-		return rc;
+	if (gdev->dev.type == &qeth_generic_devtype) {
+		rc = qeth_l2_create_device_attributes(&gdev->dev);
+		if (rc)
+			return rc;
+	}
 	INIT_LIST_HEAD(&card->vid_list);
 	hash_init(card->mac_htable);
 	card->options.layer2 = 1;
@@ -899,7 +906,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 {
 	struct qeth_card *card = dev_get_drvdata(&cgdev->dev);
 
-	qeth_l2_remove_device_attributes(&cgdev->dev);
+	if (cgdev->dev.type == &qeth_generic_devtype)
+		qeth_l2_remove_device_attributes(&cgdev->dev);
 	qeth_set_allowed_threads(card, 0, 1);
 	wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
 
@@ -1272,6 +1280,7 @@ static int qeth_l2_control_event(struct qeth_card *card,
 }
 
 struct qeth_discipline qeth_l2_discipline = {
+	.devtype = &qeth_l2_devtype,
 	.start_poll = qeth_qdio_start_poll,
 	.input_handler = (qdio_handler_t *) qeth_qdio_input_handler,
 	.output_handler = (qdio_handler_t *) qeth_qdio_output_handler,
diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c
index 687972356d6b..9696baa49e2d 100644
--- a/drivers/s390/net/qeth_l2_sys.c
+++ b/drivers/s390/net/qeth_l2_sys.c
@@ -269,3 +269,11 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card)
 	} else
 		qeth_bridgeport_an_set(card, 0);
 }
+
+const struct attribute_group *qeth_l2_attr_groups[] = {
+	&qeth_device_attr_group,
+	&qeth_device_blkt_group,
+	/* l2 specific, see l2_{create,remove}_device_attributes(): */
+	&qeth_l2_bridgeport_attr_group,
+	NULL,
+};
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index ae70daf33bd5..6c2146fc831a 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3309,6 +3309,7 @@ static int qeth_l3_control_event(struct qeth_card *card,
 }
 
 struct qeth_discipline qeth_l3_discipline = {
+	.devtype = &qeth_generic_devtype,
 	.start_poll = qeth_qdio_start_poll,
 	.input_handler = (qdio_handler_t *) qeth_qdio_input_handler,
 	.output_handler = (qdio_handler_t *) qeth_qdio_output_handler,
-- 
cgit v1.2.3-59-g8ed1b


From 25e2c341e7818a394da9abc403716278ee646014 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Wed, 10 May 2017 19:07:53 +0200
Subject: s390/qeth: avoid null pointer dereference on OSN

Access card->dev only after checking whether's its valid.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 04666fe231aa..bd2df62a5cdf 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -965,7 +965,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 	case QETH_CARD_TYPE_OSN:
 		card->dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN,
 					 ether_setup);
-		card->dev->flags |= IFF_NOARP;
 		break;
 	default:
 		card->dev = alloc_etherdev(0);
@@ -980,9 +979,12 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 	card->dev->min_mtu = 64;
 	card->dev->max_mtu = ETH_MAX_MTU;
 	card->dev->netdev_ops = &qeth_l2_netdev_ops;
-	card->dev->ethtool_ops =
-		(card->info.type != QETH_CARD_TYPE_OSN) ?
-		&qeth_l2_ethtool_ops : &qeth_l2_osn_ops;
+	if (card->info.type == QETH_CARD_TYPE_OSN) {
+		card->dev->ethtool_ops = &qeth_l2_osn_ops;
+		card->dev->flags |= IFF_NOARP;
+	} else {
+		card->dev->ethtool_ops = &qeth_l2_ethtool_ops;
+	}
 	card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
 		card->dev->hw_features = NETIF_F_SG;
-- 
cgit v1.2.3-59-g8ed1b


From ebccc7397e4a49ff64c8f44a54895de9d32fe742 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Wed, 10 May 2017 19:07:54 +0200
Subject: s390/qeth: add missing hash table initializations

commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
added new hash tables, but missed to initialize them.

Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Reviewed-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 6c2146fc831a..d8df1e635163 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3044,6 +3044,8 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev)
 	rc = qeth_l3_create_device_attributes(&gdev->dev);
 	if (rc)
 		return rc;
+	hash_init(card->ip_htable);
+	hash_init(card->ip_mc_htable);
 	card->options.layer2 = 0;
 	card->info.hwtrap = 0;
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 5b6cb43b4d625b04a4049d727a116edbfe5cf0f4 Mon Sep 17 00:00:00 2001
From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Date: Wed, 10 May 2017 10:28:05 -0700
Subject: net: ethernet: ti: netcp_core: return error while dma channel open
 issue

Fix error path while dma open channel issue. Also, no need to check output
on NULL if it's never returned.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/netcp_core.c | 6 ++++--
 drivers/soc/ti/knav_dma.c            | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 729a7da90b5b..e6222e535019 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1353,9 +1353,10 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe)
 
 	tx_pipe->dma_channel = knav_dma_open_channel(dev,
 				tx_pipe->dma_chan_name, &config);
-	if (IS_ERR_OR_NULL(tx_pipe->dma_channel)) {
+	if (IS_ERR(tx_pipe->dma_channel)) {
 		dev_err(dev, "failed opening tx chan(%s)\n",
 			tx_pipe->dma_chan_name);
+		ret = PTR_ERR(tx_pipe->dma_channel);
 		goto err;
 	}
 
@@ -1673,9 +1674,10 @@ static int netcp_setup_navigator_resources(struct net_device *ndev)
 
 	netcp->rx_channel = knav_dma_open_channel(netcp->netcp_device->device,
 					netcp->dma_chan_name, &config);
-	if (IS_ERR_OR_NULL(netcp->rx_channel)) {
+	if (IS_ERR(netcp->rx_channel)) {
 		dev_err(netcp->ndev_dev, "failed opening rx chan(%s\n",
 			netcp->dma_chan_name);
+		ret = PTR_ERR(netcp->rx_channel);
 		goto fail;
 	}
 
diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c
index ecebe2eecc3a..026182d3b27c 100644
--- a/drivers/soc/ti/knav_dma.c
+++ b/drivers/soc/ti/knav_dma.c
@@ -413,7 +413,7 @@ static int of_channel_match_helper(struct device_node *np, const char *name,
  * @name:	slave channel name
  * @config:	dma configuration parameters
  *
- * Returns pointer to appropriate DMA channel on success or NULL.
+ * Returns pointer to appropriate DMA channel on success or error.
  */
 void *knav_dma_open_channel(struct device *dev, const char *name,
 					struct knav_dma_cfg *config)
-- 
cgit v1.2.3-59-g8ed1b


From d8b54110ee944de522ccd3531191f39986ec20f9 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 11 May 2017 01:53:15 +0200
Subject: bpf, arm64: fix faulty emission of map access in tail calls

Shubham was recently asking on netdev why in arm64 JIT we don't multiply
the index for accessing the tail call map by 8. That led me into testing
out arm64 JIT wrt tail calls and it turned out I got a NULL pointer
dereference on the tail call.

The buggy access is at:

  prog = array->ptrs[index];
  if (prog == NULL)
      goto out;

  [...]
  00000060:  d2800e0a  mov x10, #0x70 // #112
  00000064:  f86a682a  ldr x10, [x1,x10]
  00000068:  f862694b  ldr x11, [x10,x2]
  0000006c:  b40000ab  cbz x11, 0x00000080
  [...]

The code triggering the crash is f862694b. x1 at the time contains the
address of the bpf array, x10 offsetof(struct bpf_array, ptrs). Meaning,
above we load the pointer to the program at map slot 0 into x10. x10
can then be NULL if the slot is not occupied, which we later on try to
access with a user given offset in x2 that is the map index.

Fix this by emitting the following instead:

  [...]
  00000060:  d2800e0a  mov x10, #0x70 // #112
  00000064:  8b0a002a  add x10, x1, x10
  00000068:  d37df04b  lsl x11, x2, #3
  0000006c:  f86b694b  ldr x11, [x10,x11]
  00000070:  b40000ab  cbz x11, 0x00000084
  [...]

This basically adds the offset to ptrs to the base address of the bpf
array we got and we later on access the map with an index * 8 offset
relative to that. The tail call map itself is basically one large area
with meta data at the head followed by the array of prog pointers.
This makes tail calls working again, tested on Cavium ThunderX ARMv8.

Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper")
Reported-by: Shubham Bansal <illusionist.neo@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/net/bpf_jit_comp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c6e53580aefe..71f930501ade 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -253,8 +253,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	 */
 	off = offsetof(struct bpf_array, ptrs);
 	emit_a64_mov_i64(tmp, off, ctx);
-	emit(A64_LDR64(tmp, r2, tmp), ctx);
-	emit(A64_LDR64(prg, tmp, r3), ctx);
+	emit(A64_ADD(1, tmp, r2, tmp), ctx);
+	emit(A64_LSL(1, prg, r3, 3), ctx);
+	emit(A64_LDR64(prg, tmp, prg), ctx);
 	emit(A64_CBZ(1, prg, jmp_offset), ctx);
 
 	/* goto *(prog->bpf_func + prologue_size); */
-- 
cgit v1.2.3-59-g8ed1b


From d1174416747d790d750742d0514915deeed93acf Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 10 May 2017 11:22:52 -0700
Subject: bpf: Track alignment of register values in the verifier.

Currently if we add only constant values to pointers we can fully
validate the alignment, and properly check if we need to reject the
program on !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS architectures.

However, once an unknown value is introduced we only allow byte sized
memory accesses which is too restrictive.

Add logic to track the known minimum alignment of register values,
and propagate this state into registers containing pointers.

The most common paradigm that makes use of this new logic is computing
the transport header using the IP header length field.  For example:

	struct ethhdr *ep = skb->data;
	struct iphdr *iph = (struct iphdr *) (ep + 1);
	struct tcphdr *th;
 ...
	n = iph->ihl;
	th = ((void *)iph + (n * 4));
	port = th->dest;

The existing code will reject the load of th->dest because it cannot
validate that the alignment is at least 2 once "n * 4" is added the
the packet pointer.

In the new code, the register holding "n * 4" will have a reg->min_align
value of 4, because any value multiplied by 4 will be at least 4 byte
aligned.  (actually, the eBPF code emitted by the compiler in this case
is most likely to use a shift left by 2, but the end result is identical)

At the critical addition:

	th = ((void *)iph + (n * 4));

The register holding 'th' will start with reg->off value of 14.  The
pointer addition will transform that reg into something that looks like:

	reg->aux_off = 14
	reg->aux_off_align = 4

Next, the verifier will look at the th->dest load, and it will see
a load offset of 2, and first check:

	if (reg->aux_off_align % size)

which will pass because aux_off_align is 4.  reg_off will be computed:

	reg_off = reg->off;
 ...
		reg_off += reg->aux_off;

plus we have off==2, and it will thus check:

	if ((NET_IP_ALIGN + reg_off + off) % size != 0)

which evaluates to:

	if ((NET_IP_ALIGN + 14 + 2) % size != 0)

On strict alignment architectures, NET_IP_ALIGN is 2, thus:

	if ((2 + 14 + 2) % size != 0)

which passes.

These pointer transformations and checks work regardless of whether
the constant offset or the variable with known alignment is added
first to the pointer register.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h |   3 ++
 kernel/bpf/verifier.c        | 108 +++++++++++++++++++++++++++++++++++--------
 2 files changed, 92 insertions(+), 19 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 5efb4db44e1e..7c6a51924afc 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -40,6 +40,9 @@ struct bpf_reg_state {
 	 */
 	s64 min_value;
 	u64 max_value;
+	u32 min_align;
+	u32 aux_off;
+	u32 aux_off_align;
 };
 
 enum bpf_stack_slot_type {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c5b56c92f8e2..cc7b626fa447 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -241,6 +241,12 @@ static void print_verifier_state(struct bpf_verifier_state *state)
 		if (reg->max_value != BPF_REGISTER_MAX_RANGE)
 			verbose(",max_value=%llu",
 				(unsigned long long)reg->max_value);
+		if (reg->min_align)
+			verbose(",min_align=%u", reg->min_align);
+		if (reg->aux_off)
+			verbose(",aux_off=%u", reg->aux_off);
+		if (reg->aux_off_align)
+			verbose(",aux_off_align=%u", reg->aux_off_align);
 	}
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] == STACK_SPILL)
@@ -466,6 +472,9 @@ static void init_reg_state(struct bpf_reg_state *regs)
 		regs[i].imm = 0;
 		regs[i].min_value = BPF_REGISTER_MIN_RANGE;
 		regs[i].max_value = BPF_REGISTER_MAX_RANGE;
+		regs[i].min_align = 0;
+		regs[i].aux_off = 0;
+		regs[i].aux_off_align = 0;
 	}
 
 	/* frame pointer */
@@ -492,6 +501,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
 {
 	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
 	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+	regs[regno].min_align = 0;
 }
 
 static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
@@ -779,17 +789,28 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 }
 
 static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
-				   int off, int size)
+				   int off, int size, bool strict)
 {
-	if (reg->id && size != 1) {
-		verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
-		return -EACCES;
+	int reg_off;
+
+	/* Byte size accesses are always allowed. */
+	if (!strict || size == 1)
+		return 0;
+
+	reg_off = reg->off;
+	if (reg->id) {
+		if (reg->aux_off_align % size) {
+			verbose("Packet access is only %u byte aligned, %d byte access not allowed\n",
+				reg->aux_off_align, size);
+			return -EACCES;
+		}
+		reg_off += reg->aux_off;
 	}
 
 	/* skb->data is NET_IP_ALIGN-ed */
-	if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
+	if ((NET_IP_ALIGN + reg_off + off) % size != 0) {
 		verbose("misaligned packet access off %d+%d+%d size %d\n",
-			NET_IP_ALIGN, reg->off, off, size);
+			NET_IP_ALIGN, reg_off, off, size);
 		return -EACCES;
 	}
 
@@ -797,9 +818,9 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 }
 
 static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
-				   int size)
+				   int size, bool strict)
 {
-	if (size != 1) {
+	if (strict && size != 1) {
 		verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
 		return -EACCES;
 	}
@@ -810,13 +831,16 @@ static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
 static int check_ptr_alignment(const struct bpf_reg_state *reg,
 			       int off, int size)
 {
+	bool strict = false;
+
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		strict = true;
+
 	switch (reg->type) {
 	case PTR_TO_PACKET:
-		return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
-		       check_pkt_ptr_alignment(reg, off, size);
+		return check_pkt_ptr_alignment(reg, off, size, strict);
 	case PTR_TO_MAP_VALUE_ADJ:
-		return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
-		       check_val_ptr_alignment(reg, size);
+		return check_val_ptr_alignment(reg, size, strict);
 	default:
 		if (off % size != 0) {
 			verbose("misaligned access off %d size %d\n",
@@ -883,6 +907,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 							 value_regno);
 			/* note that reg.[id|off|range] == 0 */
 			state->regs[value_regno].type = reg_type;
+			state->regs[value_regno].aux_off = 0;
+			state->regs[value_regno].aux_off_align = 0;
 		}
 
 	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
@@ -1455,6 +1481,8 @@ add_imm:
 		 */
 		dst_reg->off += imm;
 	} else {
+		bool had_id;
+
 		if (src_reg->type == PTR_TO_PACKET) {
 			/* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */
 			tmp_reg = *dst_reg;  /* save r7 state */
@@ -1488,14 +1516,23 @@ add_imm:
 				src_reg->imm);
 			return -EACCES;
 		}
+
+		had_id = (dst_reg->id != 0);
+
 		/* dst_reg stays as pkt_ptr type and since some positive
 		 * integer value was added to the pointer, increment its 'id'
 		 */
 		dst_reg->id = ++env->id_gen;
 
-		/* something was added to pkt_ptr, set range and off to zero */
+		/* something was added to pkt_ptr, set range to zero */
+		dst_reg->aux_off = dst_reg->off;
 		dst_reg->off = 0;
 		dst_reg->range = 0;
+		if (had_id)
+			dst_reg->aux_off_align = min(dst_reg->aux_off_align,
+						     src_reg->min_align);
+		else
+			dst_reg->aux_off_align = src_reg->min_align;
 	}
 	return 0;
 }
@@ -1669,6 +1706,13 @@ static void check_reg_overflow(struct bpf_reg_state *reg)
 		reg->min_value = BPF_REGISTER_MIN_RANGE;
 }
 
+static u32 calc_align(u32 imm)
+{
+	if (!imm)
+		return 1U << 31;
+	return imm - ((imm - 1) & imm);
+}
+
 static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 				    struct bpf_insn *insn)
 {
@@ -1676,8 +1720,10 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 	s64 min_val = BPF_REGISTER_MIN_RANGE;
 	u64 max_val = BPF_REGISTER_MAX_RANGE;
 	u8 opcode = BPF_OP(insn->code);
+	u32 dst_align, src_align;
 
 	dst_reg = &regs[insn->dst_reg];
+	src_align = 0;
 	if (BPF_SRC(insn->code) == BPF_X) {
 		check_reg_overflow(&regs[insn->src_reg]);
 		min_val = regs[insn->src_reg].min_value;
@@ -1693,12 +1739,18 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 		    regs[insn->src_reg].type != UNKNOWN_VALUE) {
 			min_val = BPF_REGISTER_MIN_RANGE;
 			max_val = BPF_REGISTER_MAX_RANGE;
+			src_align = 0;
+		} else {
+			src_align = regs[insn->src_reg].min_align;
 		}
 	} else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
 		   (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
 		min_val = max_val = insn->imm;
+		src_align = calc_align(insn->imm);
 	}
 
+	dst_align = dst_reg->min_align;
+
 	/* We don't know anything about what was done to this register, mark it
 	 * as unknown.
 	 */
@@ -1723,18 +1775,21 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 			dst_reg->min_value += min_val;
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value += max_val;
+		dst_reg->min_align = min(src_align, dst_align);
 		break;
 	case BPF_SUB:
 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
 			dst_reg->min_value -= min_val;
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value -= max_val;
+		dst_reg->min_align = min(src_align, dst_align);
 		break;
 	case BPF_MUL:
 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
 			dst_reg->min_value *= min_val;
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value *= max_val;
+		dst_reg->min_align = max(src_align, dst_align);
 		break;
 	case BPF_AND:
 		/* Disallow AND'ing of negative numbers, ain't nobody got time
@@ -1746,17 +1801,23 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 		else
 			dst_reg->min_value = 0;
 		dst_reg->max_value = max_val;
+		dst_reg->min_align = max(src_align, dst_align);
 		break;
 	case BPF_LSH:
 		/* Gotta have special overflow logic here, if we're shifting
 		 * more than MAX_RANGE then just assume we have an invalid
 		 * range.
 		 */
-		if (min_val > ilog2(BPF_REGISTER_MAX_RANGE))
+		if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) {
 			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-		else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value <<= min_val;
-
+			dst_reg->min_align = 1;
+		} else {
+			if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+				dst_reg->min_value <<= min_val;
+			if (!dst_reg->min_align)
+				dst_reg->min_align = 1;
+			dst_reg->min_align <<= min_val;
+		}
 		if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
 			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
 		else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
@@ -1766,11 +1827,19 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 		/* RSH by a negative number is undefined, and the BPF_RSH is an
 		 * unsigned shift, so make the appropriate casts.
 		 */
-		if (min_val < 0 || dst_reg->min_value < 0)
+		if (min_val < 0 || dst_reg->min_value < 0) {
 			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-		else
+		} else {
 			dst_reg->min_value =
 				(u64)(dst_reg->min_value) >> min_val;
+		}
+		if (min_val < 0) {
+			dst_reg->min_align = 1;
+		} else {
+			dst_reg->min_align >>= (u64) min_val;
+			if (!dst_reg->min_align)
+				dst_reg->min_align = 1;
+		}
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value >>= max_val;
 		break;
@@ -1872,6 +1941,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 			regs[insn->dst_reg].imm = insn->imm;
 			regs[insn->dst_reg].max_value = insn->imm;
 			regs[insn->dst_reg].min_value = insn->imm;
+			regs[insn->dst_reg].min_align = calc_align(insn->imm);
 		}
 
 	} else if (opcode > BPF_END) {
-- 
cgit v1.2.3-59-g8ed1b


From c5fc9692d101d1318b0f53f9f691cd88ac029317 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 10 May 2017 11:25:17 -0700
Subject: bpf: Do per-instruction state dumping in verifier when log_level > 1.

If log_level > 1, do a state dump every instruction and emit it in
a more compact way (without a leading newline).

This will facilitate more sophisticated test cases which inspect the
verifier log for register state.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index cc7b626fa447..ff2bfe1d656a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2926,8 +2926,12 @@ static int do_check(struct bpf_verifier_env *env)
 			goto process_bpf_exit;
 		}
 
-		if (log_level && do_print_state) {
-			verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
+		if (log_level > 1 || (log_level && do_print_state)) {
+			if (log_level > 1)
+				verbose("%d:", insn_idx);
+			else
+				verbose("\nfrom %d to %d:",
+					prev_insn_idx, insn_idx);
 			print_verifier_state(&env->cur_state);
 			do_print_state = false;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From e07b98d9bffe410019dfcf62c3428d4a96c56a2c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 10 May 2017 11:38:07 -0700
Subject: bpf: Add strict alignment flag for BPF_PROG_LOAD.

Add a new field, "prog_flags", and an initial flag value
BPF_F_STRICT_ALIGNMENT.

When set, the verifier will enforce strict pointer alignment
regardless of the setting of CONFIG_EFFICIENT_UNALIGNED_ACCESS.

The verifier, in this mode, will also use a fixed value of "2" in
place of NET_IP_ALIGN.

This facilitates test cases that will exercise and validate this part
of the verifier even when run on architectures where alignment doesn't
matter.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h   |  1 +
 include/uapi/linux/bpf.h       |  8 ++++++++
 kernel/bpf/syscall.c           |  5 ++++-
 kernel/bpf/verifier.c          | 23 +++++++++++++++++------
 tools/build/feature/test-bpf.c |  1 +
 tools/include/uapi/linux/bpf.h | 11 +++++++++--
 6 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7c6a51924afc..d5093b52b485 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -90,6 +90,7 @@ struct bpf_verifier_env {
 	struct bpf_prog *prog;		/* eBPF program being verified */
 	struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
 	int stack_size;			/* number of states to be processed */
+	bool strict_alignment;		/* perform strict pointer alignment checks */
 	struct bpf_verifier_state cur_state; /* current verifier state */
 	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
 	const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 945a1f5f63c5..94dfa9def355 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -132,6 +132,13 @@ enum bpf_attach_type {
  */
 #define BPF_F_ALLOW_OVERRIDE	(1U << 0)
 
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT	(1U << 0)
+
 #define BPF_PSEUDO_MAP_FD	1
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -177,6 +184,7 @@ union bpf_attr {
 		__u32		log_size;	/* size of user buffer */
 		__aligned_u64	log_buf;	/* user supplied buffer */
 		__u32		kern_version;	/* checked when prog_type=kprobe */
+		__u32		prog_flags;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fd2411fd6914..265a0d854e33 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -783,7 +783,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD kern_version
+#define	BPF_PROG_LOAD_LAST_FIELD prog_flags
 
 static int bpf_prog_load(union bpf_attr *attr)
 {
@@ -796,6 +796,9 @@ static int bpf_prog_load(union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_PROG_LOAD))
 		return -EINVAL;
 
+	if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
+		return -EINVAL;
+
 	/* copy eBPF program license from user space */
 	if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
 			      sizeof(license) - 1) < 0)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ff2bfe1d656a..e74fb1b87855 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -791,6 +791,7 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 				   int off, int size, bool strict)
 {
+	int ip_align;
 	int reg_off;
 
 	/* Byte size accesses are always allowed. */
@@ -807,10 +808,14 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 		reg_off += reg->aux_off;
 	}
 
-	/* skb->data is NET_IP_ALIGN-ed */
-	if ((NET_IP_ALIGN + reg_off + off) % size != 0) {
+	/* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking
+	 * we force this to 2 which is universally what architectures use
+	 * when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+	 */
+	ip_align = strict ? 2 : NET_IP_ALIGN;
+	if ((ip_align + reg_off + off) % size != 0) {
 		verbose("misaligned packet access off %d+%d+%d size %d\n",
-			NET_IP_ALIGN, reg_off, off, size);
+			ip_align, reg_off, off, size);
 		return -EACCES;
 	}
 
@@ -828,10 +833,11 @@ static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
 	return 0;
 }
 
-static int check_ptr_alignment(const struct bpf_reg_state *reg,
+static int check_ptr_alignment(struct bpf_verifier_env *env,
+			       const struct bpf_reg_state *reg,
 			       int off, int size)
 {
-	bool strict = false;
+	bool strict = env->strict_alignment;
 
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		strict = true;
@@ -873,7 +879,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 	if (size < 0)
 		return size;
 
-	err = check_ptr_alignment(reg, off, size);
+	err = check_ptr_alignment(env, reg, off, size);
 	if (err)
 		return err;
 
@@ -3568,6 +3574,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	} else {
 		log_level = 0;
 	}
+	if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT)
+		env->strict_alignment = true;
+	else
+		env->strict_alignment = false;
 
 	ret = replace_map_fd_with_map_ptr(env);
 	if (ret < 0)
@@ -3673,6 +3683,7 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
 	mutex_lock(&bpf_verifier_lock);
 
 	log_level = 0;
+	env->strict_alignment = false;
 
 	env->explored_states = kcalloc(env->prog->len,
 				       sizeof(struct bpf_verifier_state_list *),
diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
index ebc6dceddb58..7598361ef1f1 100644
--- a/tools/build/feature/test-bpf.c
+++ b/tools/build/feature/test-bpf.c
@@ -29,6 +29,7 @@ int main(void)
 	attr.log_size = 0;
 	attr.log_level = 0;
 	attr.kern_version = 0;
+	attr.prog_flags = 0;
 
 	/*
 	 * Test existence of __NR_bpf and BPF_PROG_LOAD.
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e553529929f6..94dfa9def355 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -132,6 +132,13 @@ enum bpf_attach_type {
  */
 #define BPF_F_ALLOW_OVERRIDE	(1U << 0)
 
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT	(1U << 0)
+
 #define BPF_PSEUDO_MAP_FD	1
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -177,6 +184,7 @@ union bpf_attr {
 		__u32		log_size;	/* size of user buffer */
 		__aligned_u64	log_buf;	/* user supplied buffer */
 		__u32		kern_version;	/* checked when prog_type=kprobe */
+		__u32		prog_flags;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -481,8 +489,7 @@ union bpf_attr {
  * u32 bpf_get_socket_uid(skb)
  *     Get the owner uid of the socket stored inside sk_buff.
  *     @skb: pointer to skb
- *     Return: uid of the socket owner on success or 0 if the socket pointer
- *     inside sk_buff is NULL
+ *     Return: uid of the socket owner on success or overflowuid if failed.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
-- 
cgit v1.2.3-59-g8ed1b


From 91045f5e5238a6d2ad21d41d7e86e2fa65f90d76 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 10 May 2017 11:42:48 -0700
Subject: bpf: Add bpf_verify_program() to the library.

This allows a test case to load a BPF program and unconditionally
acquire the verifier log.

It also allows specification of the strict alignment flag.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/bpf.c | 22 ++++++++++++++++++++++
 tools/lib/bpf/bpf.h |  4 ++++
 2 files changed, 26 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 4fe444b8092e..6e178987af8e 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -117,6 +117,28 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
 }
 
+int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+		       size_t insns_cnt, int strict_alignment,
+		       const char *license, __u32 kern_version,
+		       char *log_buf, size_t log_buf_sz)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.prog_type = type;
+	attr.insn_cnt = (__u32)insns_cnt;
+	attr.insns = ptr_to_u64(insns);
+	attr.license = ptr_to_u64(license);
+	attr.log_buf = ptr_to_u64(log_buf);
+	attr.log_size = log_buf_sz;
+	attr.log_level = 2;
+	log_buf[0] = 0;
+	attr.kern_version = kern_version;
+	attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0;
+
+	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
 int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags)
 {
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index edb4daeff7a5..972bd8333eb7 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -35,6 +35,10 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		     size_t insns_cnt, const char *license,
 		     __u32 kern_version, char *log_buf,
 		     size_t log_buf_sz);
+int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+		       size_t insns_cnt, int strict_alignment,
+		       const char *license, __u32 kern_version,
+		       char *log_buf, size_t log_buf_sz);
 
 int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags);
-- 
cgit v1.2.3-59-g8ed1b


From 18b3ad90b64e9893297357608abddd26170730eb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 10 May 2017 11:43:51 -0700
Subject: bpf: Add verifier test case for alignment.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile     |   3 +-
 tools/testing/selftests/bpf/test_align.c | 417 +++++++++++++++++++++++++++++++
 2 files changed, 419 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_align.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 91edd0566237..f92f27d8156f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -11,7 +11,8 @@ endif
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 LDLIBS += -lcap -lelf
 
-TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
+	test_align
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o
 
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
new file mode 100644
index 000000000000..9dd97948a47f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -0,0 +1,417 @@
+#include <asm/types.h>
+#include <linux/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include <linux/unistd.h>
+#include <linux/filter.h>
+#include <linux/bpf_perf_event.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf.h>
+
+#include "../../../include/linux/filter.h"
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#define MAX_INSNS	512
+#define MAX_MATCHES	16
+
+struct bpf_align_test {
+	const char *descr;
+	struct bpf_insn	insns[MAX_INSNS];
+	enum {
+		UNDEF,
+		ACCEPT,
+		REJECT
+	} result;
+	enum bpf_prog_type prog_type;
+	const char *matches[MAX_MATCHES];
+};
+
+static struct bpf_align_test tests[] = {
+	{
+		.descr = "mov",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 16),
+			BPF_MOV64_IMM(BPF_REG_3, 32),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"1: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
+			"2: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
+			"3: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
+			"4: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
+			"5: R1=ctx R3=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
+		},
+	},
+	{
+		.descr = "shift",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 32),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"1: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp",
+			"2: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
+			"3: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
+			"4: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
+			"5: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
+			"6: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp",
+			"7: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
+			"8: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
+			"9: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
+			"10: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
+			"11: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
+		},
+	},
+	{
+		.descr = "addsub",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"1: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
+			"2: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=4 R10=fp",
+			"3: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R10=fp",
+			"4: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
+			"5: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm12,min_value=12,max_value=12,min_align=4 R10=fp",
+			"6: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm14,min_value=14,max_value=14,min_align=2 R10=fp",
+		},
+	},
+	{
+		.descr = "mul",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"1: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp",
+			"2: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp",
+			"3: R1=ctx R3=imm14,min_value=14,max_value=14,min_align=2 R10=fp",
+			"4: R1=ctx R3=imm56,min_value=56,max_value=56,min_align=4 R10=fp",
+		},
+	},
+
+#define PREP_PKT_POINTERS \
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
+		    offsetof(struct __sk_buff, data)), \
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \
+		    offsetof(struct __sk_buff, data_end))
+
+#define LOAD_UNKNOWN(DST_REG) \
+	PREP_PKT_POINTERS, \
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \
+	BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \
+	BPF_EXIT_INSN(), \
+	BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0)
+
+	{
+		.descr = "unknown shift",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_3),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			LOAD_UNKNOWN(BPF_REG_4),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp",
+			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv55,min_align=2 R10=fp",
+			"9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv54,min_align=4 R10=fp",
+			"10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv53,min_align=8 R10=fp",
+			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv52,min_align=16 R10=fp",
+			"18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv56 R10=fp",
+			"19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv51,min_align=32 R10=fp",
+			"20: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv52,min_align=16 R10=fp",
+			"21: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv53,min_align=8 R10=fp",
+			"22: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv54,min_align=4 R10=fp",
+			"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv55,min_align=2 R10=fp",
+		},
+	},
+	{
+		.descr = "unknown mul",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_3),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp",
+			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
+			"9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv55,min_align=1 R10=fp",
+			"10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
+			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv54,min_align=2 R10=fp",
+			"12: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
+			"13: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv53,min_align=4 R10=fp",
+			"14: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
+			"15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv52,min_align=8 R10=fp",
+			"16: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv50,min_align=8 R10=fp"
+		},
+	},
+	{
+		.descr = "packet const offset",
+		.insns = {
+			PREP_PKT_POINTERS,
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+
+			/* Skip over ethernet header.  */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3),
+			BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			"4: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=0,r=0) R10=fp",
+			"5: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=14,r=0) R10=fp",
+			"6: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R4=pkt(id=0,off=14,r=0) R5=pkt(id=0,off=14,r=0) R10=fp",
+			"10: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv56 R5=pkt(id=0,off=14,r=18) R10=fp",
+			"14: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp",
+			"15: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp",
+		},
+	},
+	{
+		.descr = "packet variable offset",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+
+			/* First, add a constant to the R5 packet pointer,
+			 * then a variable with a known alignment.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			/* Now, test in the other direction.  Adding first
+			 * the variable offset to R5, then the constant.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R6=inv54,min_align=4 R10=fp",
+
+			/* Offset is added to packet pointer R5, resulting in known
+			 * auxiliary alignment and offset.
+			 */
+			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R5=pkt(id=1,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+
+			/* At the time the word size load is performed from R5,
+			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
+			 * reg->aux_off (14) which is 16.  Then the variable
+			 * offset is considered using reg->aux_off_align which
+			 * is 4 and meets the load's requirements.
+			 */
+			"15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=1,off=4,r=4),aux_off=14,aux_off_align=4 R5=pkt(id=1,off=0,r=4),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+
+
+			/* Variable offset is added to R5 packet pointer,
+			 * resulting in auxiliary alignment of 4.
+			 */
+			"18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=0,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+
+			/* Constant offset is added to R5, resulting in
+			 * reg->off of 14.
+			 */
+			"19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=14,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+
+			/* At the time the word size load is performed from R5,
+			 * it's total offset is NET_IP_ALIGN + reg->off (14) which
+			 * is 16.  Then the variable offset is considered using
+			 * reg->aux_off_align which is 4 and meets the load's
+			 * requirements.
+			 */
+			"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+		},
+	},
+};
+
+static int probe_filter_length(const struct bpf_insn *fp)
+{
+	int len;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].imm != 0)
+			break;
+	return len + 1;
+}
+
+static char bpf_vlog[32768];
+
+static int do_test_single(struct bpf_align_test *test)
+{
+	struct bpf_insn *prog = test->insns;
+	int prog_type = test->prog_type;
+	int prog_len, i;
+	int fd_prog;
+	int ret;
+
+	prog_len = probe_filter_length(prog);
+	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+				     prog, prog_len, 1, "GPL", 0,
+				     bpf_vlog, sizeof(bpf_vlog));
+	if (fd_prog < 0) {
+		printf("Failed to load program.\n");
+		printf("%s", bpf_vlog);
+		ret = 1;
+	} else {
+		ret = 0;
+		for (i = 0; i < MAX_MATCHES; i++) {
+			const char *t, *m = test->matches[i];
+
+			if (!m)
+				break;
+			t = strstr(bpf_vlog, m);
+			if (!t) {
+				printf("Failed to find match: %s\n", m);
+				ret = 1;
+				printf("%s", bpf_vlog);
+				break;
+			}
+		}
+		/* printf("%s", bpf_vlog); */
+		close(fd_prog);
+	}
+	return ret;
+}
+
+static int do_test(unsigned int from, unsigned int to)
+{
+	int all_pass = 0;
+	int all_fail = 0;
+	unsigned int i;
+
+	for (i = from; i < to; i++) {
+		struct bpf_align_test *test = &tests[i];
+		int fail;
+
+		printf("Test %3d: %s ... ",
+		       i, test->descr);
+		fail = do_test_single(test);
+		if (fail) {
+			all_fail++;
+			printf("FAIL\n");
+		} else {
+			all_pass++;
+			printf("PASS\n");
+		}
+	}
+	printf("Results: %d pass %d fail\n",
+	       all_pass, all_fail);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int from = 0, to = ARRAY_SIZE(tests);
+
+	if (argc == 3) {
+		unsigned int l = atoi(argv[argc - 2]);
+		unsigned int u = atoi(argv[argc - 1]);
+
+		if (l < to && u < to) {
+			from = l;
+			to   = u + 1;
+		}
+	} else if (argc == 2) {
+		unsigned int t = atoi(argv[argc - 1]);
+
+		if (t < to) {
+			from = t;
+			to   = t + 1;
+		}
+	}
+	return do_test(from, to);
+}
-- 
cgit v1.2.3-59-g8ed1b


From 0a5539f66133a02b24f9cc43da5b84b7e6f3f436 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 11 May 2017 12:00:50 -0700
Subject: bpf: Provide a linux/types.h override for bpf selftests.

We do not want to use the architecture's type.h header when
building BPF programs which are always 64-bit.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile                   | 3 ++-
 tools/testing/selftests/bpf/include/uapi/linux/types.h | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/include/uapi/linux/types.h

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f92f27d8156f..f389b02d43a0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,6 +35,7 @@ $(BPFOBJ): force
 CLANG ?= clang
 
 %.o: %.c
-	$(CLANG) -I. -I../../../include/uapi -I../../../../samples/bpf/ \
+	$(CLANG) -I. -I./include/uapi -I../../../include/uapi \
+		-I../../../../samples/bpf/ \
 		-Wno-compare-distinct-pointer-types \
 		-O2 -target bpf -c $< -o $@
diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h
new file mode 100644
index 000000000000..fbd16a7554af
--- /dev/null
+++ b/tools/testing/selftests/bpf/include/uapi/linux/types.h
@@ -0,0 +1,6 @@
+#ifndef _UAPI_LINUX_TYPES_H
+#define _UAPI_LINUX_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+#endif /* _UAPI_LINUX_TYPES_H */
-- 
cgit v1.2.3-59-g8ed1b


From 23b245c04d0ef408087430dd4d1b214a5da1eb78 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 10 May 2017 08:47:11 -0700
Subject: md/raid1/10: avoid unnecessary locking

If we add bios to block plugging list, locking is unnecessry, since the block
unplug is guaranteed not to run at that time.

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid1.c  | 7 +++----
 drivers/md/raid10.c | 7 +++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7c1f73398800..a17ed6218d51 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1529,17 +1529,16 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 			plug = container_of(cb, struct raid1_plug_cb, cb);
 		else
 			plug = NULL;
-		spin_lock_irqsave(&conf->device_lock, flags);
 		if (plug) {
 			bio_list_add(&plug->pending, mbio);
 			plug->pending_cnt++;
 		} else {
+			spin_lock_irqsave(&conf->device_lock, flags);
 			bio_list_add(&conf->pending_bio_list, mbio);
 			conf->pending_count++;
-		}
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-		if (!plug)
+			spin_unlock_irqrestore(&conf->device_lock, flags);
 			md_wakeup_thread(mddev->thread);
+		}
 	}
 
 	r1_bio_write_done(r1_bio);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6b86a0032cf8..4343d7ff9916 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1282,17 +1282,16 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
 		plug = container_of(cb, struct raid10_plug_cb, cb);
 	else
 		plug = NULL;
-	spin_lock_irqsave(&conf->device_lock, flags);
 	if (plug) {
 		bio_list_add(&plug->pending, mbio);
 		plug->pending_cnt++;
 	} else {
+		spin_lock_irqsave(&conf->device_lock, flags);
 		bio_list_add(&conf->pending_bio_list, mbio);
 		conf->pending_count++;
-	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-	if (!plug)
+		spin_unlock_irqrestore(&conf->device_lock, flags);
 		md_wakeup_thread(mddev->thread);
+	}
 }
 
 static void raid10_write_request(struct mddev *mddev, struct bio *bio,
-- 
cgit v1.2.3-59-g8ed1b


From 0489df9a430e9607de8130a6bc4bf4c02f96eaf1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 12 May 2017 01:04:45 +0200
Subject: xdp: add flag to enforce driver mode

After commit b5cdae3291f7 ("net: Generic XDP") we automatically fall
back to a generic XDP variant if the driver does not support native
XDP. Allow for an option where the user can specify that always the
native XDP variant should be selected and in case it's not supported
by a driver, just bail out.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h       | 6 ++++--
 net/core/dev.c                     | 2 ++
 net/core/rtnetlink.c               | 5 +++++
 samples/bpf/xdp1_user.c            | 8 ++++++--
 samples/bpf/xdp_tx_iptunnel_user.c | 7 ++++++-
 5 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8e56ac70e0d1..549ac8a98b44 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -888,9 +888,11 @@ enum {
 /* XDP section */
 
 #define XDP_FLAGS_UPDATE_IF_NOEXIST	(1U << 0)
-#define XDP_FLAGS_SKB_MODE		(2U << 0)
+#define XDP_FLAGS_SKB_MODE		(1U << 1)
+#define XDP_FLAGS_DRV_MODE		(1U << 2)
 #define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST | \
-					 XDP_FLAGS_SKB_MODE)
+					 XDP_FLAGS_SKB_MODE | \
+					 XDP_FLAGS_DRV_MODE)
 
 enum {
 	IFLA_XDP_UNSPEC,
diff --git a/net/core/dev.c b/net/core/dev.c
index 96cf83da0d66..e56cb71351d4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6873,6 +6873,8 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 	ASSERT_RTNL();
 
 	xdp_op = ops->ndo_xdp;
+	if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE))
+		return -EOPNOTSUPP;
 	if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
 		xdp_op = generic_xdp_install;
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bcb0f610ee42..dda9f1636356 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2199,6 +2199,11 @@ static int do_setlink(const struct sk_buff *skb,
 				err = -EINVAL;
 				goto errout;
 			}
+			if ((xdp_flags & XDP_FLAGS_SKB_MODE) &&
+			    (xdp_flags & XDP_FLAGS_DRV_MODE)) {
+				err = -EINVAL;
+				goto errout;
+			}
 		}
 
 		if (xdp[IFLA_XDP_FD]) {
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index 378850c70eb8..17be9ea3ecb2 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -62,13 +62,14 @@ static void usage(const char *prog)
 	fprintf(stderr,
 		"usage: %s [OPTS] IFINDEX\n\n"
 		"OPTS:\n"
-		"    -S    use skb-mode\n",
+		"    -S    use skb-mode\n"
+		"    -N    enforce native mode\n",
 		prog);
 }
 
 int main(int argc, char **argv)
 {
-	const char *optstr = "S";
+	const char *optstr = "SN";
 	char filename[256];
 	int opt;
 
@@ -77,6 +78,9 @@ int main(int argc, char **argv)
 		case 'S':
 			xdp_flags |= XDP_FLAGS_SKB_MODE;
 			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
 		default:
 			usage(basename(argv[0]));
 			return 1;
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 92b8bde9337c..631cdcc41c97 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -79,6 +79,8 @@ static void usage(const char *cmd)
 	printf("    -m <dest-MAC> Used in sending the IP Tunneled pkt\n");
 	printf("    -T <stop-after-X-seconds> Default: 0 (forever)\n");
 	printf("    -P <IP-Protocol> Default is TCP\n");
+	printf("    -S use skb-mode\n");
+	printf("    -N enforce native mode\n");
 	printf("    -h Display this help\n");
 }
 
@@ -138,7 +140,7 @@ int main(int argc, char **argv)
 {
 	unsigned char opt_flags[256] = {};
 	unsigned int kill_after_s = 0;
-	const char *optstr = "i:a:p:s:d:m:T:P:Sh";
+	const char *optstr = "i:a:p:s:d:m:T:P:SNh";
 	int min_port = 0, max_port = 0;
 	struct iptnl_info tnl = {};
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
@@ -206,6 +208,9 @@ int main(int argc, char **argv)
 		case 'S':
 			xdp_flags |= XDP_FLAGS_SKB_MODE;
 			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
 		default:
 			usage(argv[0]);
 			return 1;
-- 
cgit v1.2.3-59-g8ed1b


From d67b9cd28c1d7f82c2e5e727731ea7c89b23a0a8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 12 May 2017 01:04:46 +0200
Subject: xdp: refine xdp api with regards to generic xdp

While working on the iproute2 generic XDP frontend, I noticed that
as of right now it's possible to have native *and* generic XDP
programs loaded both at the same time for the case when a driver
supports native XDP.

The intended model for generic XDP from b5cdae3291f7 ("net: Generic
XDP") is, however, that only one out of the two can be present at
once which is also indicated as such in the XDP netlink dump part.
The main rationale for generic XDP is to ease accessibility (in
case a driver does not yet have XDP support) and to generically
provide a semantical model as an example for driver developers
wanting to add XDP support. The generic XDP option for an XDP
aware driver can still be useful for comparing and testing both
implementations.

However, it is not intended to have a second XDP processing stage
or layer with exactly the same functionality of the first native
stage. Only reason could be to have a partial fallback for future
XDP features that are not supported yet in the native implementation
and we probably also shouldn't strive for such fallback and instead
encourage native feature support in the first place. Given there's
currently no such fallback issue or use case, lets not go there yet
if we don't need to.

Therefore, change semantics for loading XDP and bail out if the
user tries to load a generic XDP program when a native one is
present and vice versa. Another alternative to bailing out would
be to handle the transition from one flavor to another gracefully,
but that would require to bring the device down, exchange both
types of programs, and bring it up again in order to avoid a tiny
window where a packet could hit both hooks. Given this complicates
the logic for just a debugging feature in the native case, I went
with the simpler variant.

For the dump, remove IFLA_XDP_FLAGS that was added with b5cdae3291f7
and reuse IFLA_XDP_ATTACHED for indicating the mode. Dumping all
or just a subset of flags that were used for loading the XDP prog
is suboptimal in the long run since not all flags are useful for
dumping and if we start to reuse the same flag definitions for
load and dump, then we'll waste bit space. What we really just
want is to dump the mode for now.

Current IFLA_XDP_ATTACHED semantics are: nothing was installed (0),
a program is running at the native driver layer (1). Thus, add a
mode that says that a program is running at generic XDP layer (2).
Applications will handle this fine in that older binaries will
just indicate that something is attached at XDP layer, effectively
this is similar to IFLA_XDP_FLAGS attr that we would have had
modulo the redundancy.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h    |  8 +++++--
 include/uapi/linux/if_link.h |  7 ++++++
 net/core/dev.c               | 55 +++++++++++++++++++++++++++++---------------
 net/core/rtnetlink.c         | 40 +++++++++++++++-----------------
 4 files changed, 67 insertions(+), 43 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9c23bd2efb56..3f39d27decf4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3296,11 +3296,15 @@ int dev_get_phys_port_id(struct net_device *dev,
 int dev_get_phys_port_name(struct net_device *dev,
 			   char *name, size_t len);
 int dev_change_proto_down(struct net_device *dev, bool proto_down);
-int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
-		      int fd, u32 flags);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
+
+typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp);
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+		      int fd, u32 flags);
+bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op);
+
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 bool is_skb_forwardable(const struct net_device *dev,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 549ac8a98b44..15ac20382aba 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -894,6 +894,13 @@ enum {
 					 XDP_FLAGS_SKB_MODE | \
 					 XDP_FLAGS_DRV_MODE)
 
+/* These are stored into IFLA_XDP_ATTACHED on dump. */
+enum {
+	XDP_ATTACHED_NONE = 0,
+	XDP_ATTACHED_DRV,
+	XDP_ATTACHED_SKB,
+};
+
 enum {
 	IFLA_XDP_UNSPEC,
 	IFLA_XDP_FD,
diff --git a/net/core/dev.c b/net/core/dev.c
index e56cb71351d4..fca407b4a6ea 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6852,6 +6852,32 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
+bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op)
+{
+	struct netdev_xdp xdp;
+
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = XDP_QUERY_PROG;
+
+	/* Query must always succeed. */
+	WARN_ON(xdp_op(dev, &xdp) < 0);
+	return xdp.prog_attached;
+}
+
+static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+			   struct netlink_ext_ack *extack,
+			   struct bpf_prog *prog)
+{
+	struct netdev_xdp xdp;
+
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = XDP_SETUP_PROG;
+	xdp.extack = extack;
+	xdp.prog = prog;
+
+	return xdp_op(dev, &xdp);
+}
+
 /**
  *	dev_change_xdp_fd - set or clear a bpf program for a device rx path
  *	@dev: device
@@ -6864,43 +6890,34 @@ EXPORT_SYMBOL(dev_change_proto_down);
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags)
 {
-	int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
 	const struct net_device_ops *ops = dev->netdev_ops;
 	struct bpf_prog *prog = NULL;
-	struct netdev_xdp xdp;
+	xdp_op_t xdp_op, xdp_chk;
 	int err;
 
 	ASSERT_RTNL();
 
-	xdp_op = ops->ndo_xdp;
+	xdp_op = xdp_chk = ops->ndo_xdp;
 	if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE))
 		return -EOPNOTSUPP;
 	if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
 		xdp_op = generic_xdp_install;
+	if (xdp_op == xdp_chk)
+		xdp_chk = generic_xdp_install;
 
 	if (fd >= 0) {
-		if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
-			memset(&xdp, 0, sizeof(xdp));
-			xdp.command = XDP_QUERY_PROG;
-
-			err = xdp_op(dev, &xdp);
-			if (err < 0)
-				return err;
-			if (xdp.prog_attached)
-				return -EBUSY;
-		}
+		if (xdp_chk && __dev_xdp_attached(dev, xdp_chk))
+			return -EEXIST;
+		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
+		    __dev_xdp_attached(dev, xdp_op))
+			return -EBUSY;
 
 		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
 		if (IS_ERR(prog))
 			return PTR_ERR(prog);
 	}
 
-	memset(&xdp, 0, sizeof(xdp));
-	xdp.command = XDP_SETUP_PROG;
-	xdp.extack = extack;
-	xdp.prog = prog;
-
-	err = xdp_op(dev, &xdp);
+	err = dev_xdp_install(dev, xdp_op, extack, prog);
 	if (err < 0 && prog)
 		bpf_prog_put(prog);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dda9f1636356..d7f82c3450b1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -899,8 +899,7 @@ static size_t rtnl_port_size(const struct net_device *dev,
 static size_t rtnl_xdp_size(void)
 {
 	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */
-			  nla_total_size(1) +	/* XDP_ATTACHED */
-			  nla_total_size(4);	/* XDP_FLAGS */
+			  nla_total_size(1);	/* XDP_ATTACHED */
 
 	return xdp_size;
 }
@@ -1247,37 +1246,34 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
+static u8 rtnl_xdp_attached_mode(struct net_device *dev)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	ASSERT_RTNL();
+
+	if (rcu_access_pointer(dev->xdp_prog))
+		return XDP_ATTACHED_SKB;
+	if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp))
+		return XDP_ATTACHED_DRV;
+
+	return XDP_ATTACHED_NONE;
+}
+
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 {
 	struct nlattr *xdp;
-	u32 xdp_flags = 0;
-	u8 val = 0;
 	int err;
 
 	xdp = nla_nest_start(skb, IFLA_XDP);
 	if (!xdp)
 		return -EMSGSIZE;
-	if (rcu_access_pointer(dev->xdp_prog)) {
-		xdp_flags = XDP_FLAGS_SKB_MODE;
-		val = 1;
-	} else if (dev->netdev_ops->ndo_xdp) {
-		struct netdev_xdp xdp_op = {};
-
-		xdp_op.command = XDP_QUERY_PROG;
-		err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
-		if (err)
-			goto err_cancel;
-		val = xdp_op.prog_attached;
-	}
-	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val);
+
+	err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
+			 rtnl_xdp_attached_mode(dev));
 	if (err)
 		goto err_cancel;
 
-	if (xdp_flags) {
-		err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags);
-		if (err)
-			goto err_cancel;
-	}
 	nla_nest_end(skb, xdp);
 	return 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From f6ba8d33cfbb46df569972e64dbb5bb7e929bfd9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 11 May 2017 15:24:41 -0700
Subject: netem: fix skb_orphan_partial()

I should have known that lowering skb->truesize was dangerous :/

In case packets are not leaving the host via a standard Ethernet device,
but looped back to local sockets, bad things can happen, as reported
by Michael Madsen ( https://bugzilla.kernel.org/show_bug.cgi?id=195713 )

So instead of tweaking skb->truesize, lets change skb->destructor
and keep a reference on the owner socket via its sk_refcnt.

Fixes: f2f872f9272a ("netem: Introduce skb_orphan_partial() helper")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Michael Madsen <mkm@nabto.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 79c6aee6af9b..e43e71d7856b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1803,28 +1803,24 @@ EXPORT_SYMBOL(skb_set_owner_w);
  * delay queue. We want to allow the owner socket to send more
  * packets, as if they were already TX completed by a typical driver.
  * But we also want to keep skb->sk set because some packet schedulers
- * rely on it (sch_fq for example). So we set skb->truesize to a small
- * amount (1) and decrease sk_wmem_alloc accordingly.
+ * rely on it (sch_fq for example).
  */
 void skb_orphan_partial(struct sk_buff *skb)
 {
-	/* If this skb is a TCP pure ACK or already went here,
-	 * we have nothing to do. 2 is already a very small truesize.
-	 */
-	if (skb->truesize <= 2)
+	if (skb_is_tcp_pure_ack(skb))
 		return;
 
-	/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
-	 * so we do not completely orphan skb, but transfert all
-	 * accounted bytes but one, to avoid unexpected reorders.
-	 */
 	if (skb->destructor == sock_wfree
 #ifdef CONFIG_INET
 	    || skb->destructor == tcp_wfree
 #endif
 		) {
-		atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
-		skb->truesize = 1;
+		struct sock *sk = skb->sk;
+
+		if (atomic_inc_not_zero(&sk->sk_refcnt)) {
+			atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+			skb->destructor = sock_efree;
+		}
 	} else {
 		skb_orphan(skb);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From b451e5d24ba6687c6f0e7319c727a709a1846c06 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 10 May 2017 17:01:27 -0700
Subject: tcp: avoid fragmenting peculiar skbs in SACK

This patch fixes a bug in splitting an SKB during SACK
processing. Specifically if an skb contains multiple
packets and is only partially sacked in the higher sequences,
tcp_match_sack_to_skb() splits the skb and marks the second fragment
as SACKed.

The current code further attempts rounding up the first fragment
to MSS boundaries. But it misses a boundary condition when the
rounded-up fragment size (pkt_len) is exactly skb size.  Spliting
such an skb is pointless and causses a kernel warning and aborts
the SACK processing. This patch universally checks such over-split
before calling tcp_fragment to prevent these unnecessary warnings.

Fixes: adb92db857ee ("tcp: Make SACK code to split only at mss boundaries")
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5a3ad09e2786..06e2dbc2b4a2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1179,13 +1179,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 		 */
 		if (pkt_len > mss) {
 			unsigned int new_len = (pkt_len / mss) * mss;
-			if (!in_sack && new_len < pkt_len) {
+			if (!in_sack && new_len < pkt_len)
 				new_len += mss;
-				if (new_len >= skb->len)
-					return 0;
-			}
 			pkt_len = new_len;
 		}
+
+		if (pkt_len >= skb->len && !in_sack)
+			return 0;
+
 		err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
 		if (err < 0)
 			return err;
-- 
cgit v1.2.3-59-g8ed1b


From cb395b2010879a8461aa1b1c37025769708c32cf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 10 May 2017 21:59:28 -0700
Subject: net: sched: optimize class dumps

In commit 59cc1f61f09c ("net: sched: convert qdisc linked list to
hashtable") we missed the opportunity to considerably speed up
tc_dump_tclass_root() if a qdisc handle is provided by user.

Instead of iterating all the qdiscs, use qdisc_match_from_root()
to directly get the one we look for.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bbe57d57b67f..e88342fde1bc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1831,6 +1831,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 	if (!qdisc_dev(root))
 		return 0;
 
+	if (tcm->tcm_parent) {
+		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
+		if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+			return -1;
+		return 0;
+	}
 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
 			return -1;
-- 
cgit v1.2.3-59-g8ed1b


From d86b5672b1adb98b4cdd6fbf0224bbfb03db6e2e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 11 May 2017 13:58:06 +0200
Subject: xen-netfront: avoid crashing on resume after a failure in
 talk_to_netback()

Unavoidable crashes in netfront_resume() and netback_changed() after a
previous fail in talk_to_netback() (e.g. when we fail to read MAC from
xenstore) were discovered. The failure path in talk_to_netback() does
unregister/free for netdev but we don't reset drvdata and we try accessing
it after resume.

Fix the bug by removing the whole xen device completely with
device_unregister(), this guarantees we won't have any calls into netfront
after a failure.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netfront.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 6ffc482550c1..7b61adb6270c 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1934,8 +1934,7 @@ abort_transaction_no_dev_fatal:
 	xennet_disconnect_backend(info);
 	xennet_destroy_queues(info);
  out:
-	unregister_netdev(info->netdev);
-	xennet_free_netdev(info->netdev);
+	device_unregister(&dev->dev);
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f9c3fe2f43343be7972226c2e736e7a68e383dc1 Mon Sep 17 00:00:00 2001
From: "Chopra, Manish" <Manish.Chopra@cavium.com>
Date: Thu, 11 May 2017 07:12:47 -0700
Subject: qlcnic: Fix link configuration with autoneg disabled

Currently driver returns error on speed configurations
for 83xx adapter's non XGBE ports, due to this link doesn't
come up on the ports using 1000Base-T as a connector with
autoneg disabled. This patch fixes this with initializing
appropriate port type based on queried module/connector
types from hardware before any speed/autoneg configuration.

Signed-off-by: Manish Chopra <manish.chopra@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c    | 34 ++++++++++++++++++++++
 .../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h    |  1 +
 .../net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c    |  3 ++
 3 files changed, 38 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 718bf58a7da6..4fb68797630e 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -3168,6 +3168,40 @@ int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr,
 	return 0;
 }
 
+void qlcnic_83xx_get_port_type(struct qlcnic_adapter *adapter)
+{
+	struct qlcnic_hardware_context *ahw = adapter->ahw;
+	struct qlcnic_cmd_args cmd;
+	u32 config;
+	int err;
+
+	err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_GET_LINK_STATUS);
+	if (err)
+		return;
+
+	err = qlcnic_issue_cmd(adapter, &cmd);
+	if (err) {
+		dev_info(&adapter->pdev->dev,
+			 "Get Link Status Command failed: 0x%x\n", err);
+		goto out;
+	} else {
+		config = cmd.rsp.arg[3];
+
+		switch (QLC_83XX_SFP_MODULE_TYPE(config)) {
+		case QLC_83XX_MODULE_FIBRE_1000BASE_SX:
+		case QLC_83XX_MODULE_FIBRE_1000BASE_LX:
+		case QLC_83XX_MODULE_FIBRE_1000BASE_CX:
+		case QLC_83XX_MODULE_TP_1000BASE_T:
+			ahw->port_type = QLCNIC_GBE;
+			break;
+		default:
+			ahw->port_type = QLCNIC_XGBE;
+		}
+	}
+out:
+	qlcnic_free_mbx_args(&cmd);
+}
+
 int qlcnic_83xx_test_link(struct qlcnic_adapter *adapter)
 {
 	u8 pci_func;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
index 3dfe8e27b51c..b75a81246856 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
@@ -637,6 +637,7 @@ void qlcnic_83xx_get_pauseparam(struct qlcnic_adapter *,
 int qlcnic_83xx_set_pauseparam(struct qlcnic_adapter *,
 			       struct ethtool_pauseparam *);
 int qlcnic_83xx_test_link(struct qlcnic_adapter *);
+void qlcnic_83xx_get_port_type(struct qlcnic_adapter *adapter);
 int qlcnic_83xx_reg_test(struct qlcnic_adapter *);
 int qlcnic_83xx_get_regs_len(struct qlcnic_adapter *);
 int qlcnic_83xx_get_registers(struct qlcnic_adapter *, u32 *);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 9a869c15d8bf..7f7deeaf1cf0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -486,6 +486,9 @@ static int qlcnic_set_link_ksettings(struct net_device *dev,
 	u32 ret = 0;
 	struct qlcnic_adapter *adapter = netdev_priv(dev);
 
+	if (qlcnic_83xx_check(adapter))
+		qlcnic_83xx_get_port_type(adapter);
+
 	if (adapter->ahw->port_type != QLCNIC_GBE)
 		return -EOPNOTSUPP;
 
-- 
cgit v1.2.3-59-g8ed1b


From 33c16bfd5bc00bbb9823d25af46c496966e0ac0d Mon Sep 17 00:00:00 2001
From: "Chopra, Manish" <Manish.Chopra@cavium.com>
Date: Thu, 11 May 2017 07:12:48 -0700
Subject: qlcnic: Update version to 5.3.66

Bumping up the version as couple of fixes added after 5.3.65

Signed-off-by: Manish Chopra <manish.chopra@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 49bad00a0f8f..7245b1072518 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -37,8 +37,8 @@
 
 #define _QLCNIC_LINUX_MAJOR 5
 #define _QLCNIC_LINUX_MINOR 3
-#define _QLCNIC_LINUX_SUBVERSION 65
-#define QLCNIC_LINUX_VERSIONID  "5.3.65"
+#define _QLCNIC_LINUX_SUBVERSION 66
+#define QLCNIC_LINUX_VERSIONID  "5.3.66"
 #define QLCNIC_DRV_IDC_VER  0x01
 #define QLCNIC_DRIVER_VERSION  ((_QLCNIC_LINUX_MAJOR << 16) |\
 		 (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
-- 
cgit v1.2.3-59-g8ed1b


From 69a73e744db1a57175039e3d1e6ef3913e816eb8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 11 May 2017 21:41:09 -0400
Subject: bpf: Remove commented out debugging hack in test_align.

Reported-by: Alexander Alemayhu <alexander@alemayhu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_align.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 9dd97948a47f..ed242552e492 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -361,7 +361,6 @@ static int do_test_single(struct bpf_align_test *test)
 				break;
 			}
 		}
-		/* printf("%s", bpf_vlog); */
 		close(fd_prog);
 	}
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From d2be3667f3769b3c60aa294ef7f2b03d1b16559c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 11 May 2017 19:29:40 +0100
Subject: ethernet: aquantia: remove redundant checks on error status

The error status err is initialized as zero and then being checked
several times to see if it is less than zero even when it has not
been updated.  It may seem that the err should be assigned to the
return code of the call to the various *offload_en_set calls and
then we check for failure, however, these functions are void and
never actually return any status.

Since these error checks are redundant we can remove these
as well as err and the error exit label err_exit.

Detected by CoverityScan, CID#1398313 and CID#1398306 ("Logically
dead code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Acked-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 13 +------------
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 12 +-----------
 2 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 4ee15ff06a44..faeb4935ef3e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -200,29 +200,18 @@ err_exit:
 static int hw_atl_a0_hw_offload_set(struct aq_hw_s *self,
 				    struct aq_nic_cfg_s *aq_nic_cfg)
 {
-	int err = 0;
-
 	/* TX checksums offloads*/
 	tpo_ipv4header_crc_offload_en_set(self, 1);
 	tpo_tcp_udp_crc_offload_en_set(self, 1);
-	if (err < 0)
-		goto err_exit;
 
 	/* RX checksums offloads*/
 	rpo_ipv4header_crc_offload_en_set(self, 1);
 	rpo_tcp_udp_crc_offload_en_set(self, 1);
-	if (err < 0)
-		goto err_exit;
 
 	/* LSO offloads*/
 	tdm_large_send_offload_en_set(self, 0xFFFFFFFFU);
-	if (err < 0)
-		goto err_exit;
-
-	err = aq_hw_err_from_flags(self);
 
-err_exit:
-	return err;
+	return aq_hw_err_from_flags(self);
 }
 
 static int hw_atl_a0_hw_init_tx_path(struct aq_hw_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 42150708191d..1bceb7358e5c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -200,25 +200,18 @@ err_exit:
 static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 				    struct aq_nic_cfg_s *aq_nic_cfg)
 {
-	int err = 0;
 	unsigned int i;
 
 	/* TX checksums offloads*/
 	tpo_ipv4header_crc_offload_en_set(self, 1);
 	tpo_tcp_udp_crc_offload_en_set(self, 1);
-	if (err < 0)
-		goto err_exit;
 
 	/* RX checksums offloads*/
 	rpo_ipv4header_crc_offload_en_set(self, 1);
 	rpo_tcp_udp_crc_offload_en_set(self, 1);
-	if (err < 0)
-		goto err_exit;
 
 	/* LSO offloads*/
 	tdm_large_send_offload_en_set(self, 0xFFFFFFFFU);
-	if (err < 0)
-		goto err_exit;
 
 /* LRO offloads */
 	{
@@ -245,10 +238,7 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 
 		rpo_lro_en_set(self, aq_nic_cfg->is_lro ? 0xFFFFFFFFU : 0U);
 	}
-	err = aq_hw_err_from_flags(self);
-
-err_exit:
-	return err;
+	return aq_hw_err_from_flags(self);
 }
 
 static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self)
-- 
cgit v1.2.3-59-g8ed1b


From ad990dbe6d3ac3af1f5f4484b1126b9fc601e98a Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <andy@greyhouse.net>
Date: Thu, 11 May 2017 15:52:30 -0400
Subject: samples/bpf: run cleanup routines when receiving SIGTERM

Shahid Habib noticed that when xdp1 was killed from a different console the xdp
program was not cleaned-up properly in the kernel and it continued to forward
traffic.

Most of the applications in samples/bpf cleanup properly, but only when getting
SIGINT.  Since kill defaults to using SIGTERM, add support to cleanup when the
application receives either SIGINT or SIGTERM.

Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
Reported-by: Shahid Habib <shahid.habib@broadcom.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/cookie_uid_helper_example.c | 4 +++-
 samples/bpf/offwaketime_user.c          | 1 +
 samples/bpf/sampleip_user.c             | 1 +
 samples/bpf/trace_event_user.c          | 1 +
 samples/bpf/tracex2_user.c              | 1 +
 samples/bpf/xdp1_user.c                 | 1 +
 samples/bpf/xdp_tx_iptunnel_user.c      | 1 +
 7 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
index b08ab4e88929..9d751e209f31 100644
--- a/samples/bpf/cookie_uid_helper_example.c
+++ b/samples/bpf/cookie_uid_helper_example.c
@@ -306,7 +306,9 @@ int main(int argc, char *argv[])
 	prog_attach_iptables(argv[2]);
 	if (cfg_test_traffic) {
 		if (signal(SIGINT, finish) == SIG_ERR)
-			error(1, errno, "register handler failed");
+			error(1, errno, "register SIGINT handler failed");
+		if (signal(SIGTERM, finish) == SIG_ERR)
+			error(1, errno, "register SIGTERM handler failed");
 		while (!test_finish) {
 			print_table();
 			printf("\n");
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index 9cce2a66bd66..512f87a5fd20 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -100,6 +100,7 @@ int main(int argc, char **argv)
 	setrlimit(RLIMIT_MEMLOCK, &r);
 
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	if (load_kallsyms()) {
 		printf("failed to process /proc/kallsyms\n");
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index be59d7dcbdde..4ed690b907ff 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -180,6 +180,7 @@ int main(int argc, char **argv)
 		return 1;
 	}
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	/* do sampling */
 	printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n",
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index 0c5561d193a4..fa4336423da5 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -192,6 +192,7 @@ int main(int argc, char **argv)
 	setrlimit(RLIMIT_MEMLOCK, &r);
 
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	if (load_kallsyms()) {
 		printf("failed to process /proc/kallsyms\n");
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index 7fee0f1ba9a3..7321a3f253c9 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -127,6 +127,7 @@ int main(int ac, char **argv)
 	}
 
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	/* start 'ping' in the background to have some kfree_skb events */
 	f = popen("ping -c5 localhost", "r");
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index 17be9ea3ecb2..2431c0321b71 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -106,6 +106,7 @@ int main(int argc, char **argv)
 	}
 
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
 		printf("link set xdp fd failed\n");
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 631cdcc41c97..715cd12eaca5 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -244,6 +244,7 @@ int main(int argc, char **argv)
 	}
 
 	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
 
 	while (min_port <= max_port) {
 		vip.dport = htons(min_port++);
-- 
cgit v1.2.3-59-g8ed1b


From 844cf763fba654436d3a4279b6a672c196cf1901 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 11 May 2017 20:28:15 +0200
Subject: tipc: make macro tipc_wait_for_cond() smp safe

The macro tipc_wait_for_cond() is embedding the macro sk_wait_event()
to fulfil its task. The latter, in turn, is evaluating the stated
condition outside the socket lock context. This is problematic if
the condition is accessing non-trivial data structures which may be
altered by incoming interrupts, as is the case with the cong_links()
linked list, used by socket to keep track of the current set of
congested links. We sometimes see crashes when this list is accessed
by a condition function at the same time as a SOCK_WAKEUP interrupt
is removing an element from the list.

We fix this by expanding selected parts of sk_wait_event() into the
outer macro, while ensuring that all evaluations of a given condition
are performed under socket lock protection.

Fixes: commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion")
Reviewed-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 0d4f2f455a7c..1b92b72e812f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -362,25 +362,25 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout)
 	return 0;
 }
 
-#define tipc_wait_for_cond(sock_, timeout_, condition_)			\
-({								        \
-	int rc_ = 0;							\
-	int done_ = 0;							\
-									\
-	while (!(condition_) && !done_) {				\
-		struct sock *sk_ = sock->sk;				\
-		DEFINE_WAIT_FUNC(wait_, woken_wake_function);		\
-									\
-		rc_ = tipc_sk_sock_err(sock_, timeout_);		\
-		if (rc_)						\
-			break;						\
-		prepare_to_wait(sk_sleep(sk_), &wait_,			\
-				TASK_INTERRUPTIBLE);			\
-		done_ = sk_wait_event(sk_, timeout_,			\
-				      (condition_), &wait_);		\
-		remove_wait_queue(sk_sleep(sk_), &wait_);		\
-	}								\
-	rc_;								\
+#define tipc_wait_for_cond(sock_, timeo_, condition_)			       \
+({                                                                             \
+	struct sock *sk_;						       \
+	int rc_;							       \
+									       \
+	while ((rc_ = !(condition_))) {					       \
+		DEFINE_WAIT_FUNC(wait_, woken_wake_function);	               \
+		sk_ = (sock_)->sk;					       \
+		rc_ = tipc_sk_sock_err((sock_), timeo_);		       \
+		if (rc_)						       \
+			break;						       \
+		prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE);    \
+		release_sock(sk_);					       \
+		*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
+		sched_annotate_sleep();				               \
+		lock_sock(sk_);						       \
+		remove_wait_queue(sk_sleep(sk_), &wait_);		       \
+	}								       \
+	rc_;								       \
 })
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 6832a333ed4a7cc4fcb170c045d1d96d0976fdd4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 11 May 2017 19:30:02 -0700
Subject: bpf: Handle multiple variable additions into packet pointers in
 verifier.

We must accumulate into reg->aux_off rather than use a plain assignment.

Add a test for this situation to test_align.

Reported-by: Alexei Starovoitov <ast@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c                    |  2 +-
 tools/testing/selftests/bpf/test_align.c | 37 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e74fb1b87855..39f2dcbc4cbc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1531,7 +1531,7 @@ add_imm:
 		dst_reg->id = ++env->id_gen;
 
 		/* something was added to pkt_ptr, set range to zero */
-		dst_reg->aux_off = dst_reg->off;
+		dst_reg->aux_off += dst_reg->off;
 		dst_reg->off = 0;
 		dst_reg->range = 0;
 		if (had_id)
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index ed242552e492..9644d4e069de 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -273,6 +273,20 @@ static struct bpf_align_test tests[] = {
 			BPF_EXIT_INSN(),
 			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
 
+			/* Test multiple accumulations of unknown values
+			 * into a packet pointer.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
@@ -314,6 +328,29 @@ static struct bpf_align_test tests[] = {
 			 * requirements.
 			 */
 			"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+
+			/* Constant offset is added to R5 packet pointer,
+			 * resulting in reg->off value of 14.
+			 */
+			"26: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=0,off=14,r=8) R6=inv54,min_align=4 R10=fp",
+			/* Variable offset is added to R5, resulting in an
+			 * auxiliary offset of 14, and an auxiliary alignment of 4.
+			 */
+			"27: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+			/* Constant is added to R5 again, setting reg->off to 4. */
+			"28: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=4,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+			/* And once more we add a variable, which causes an accumulation
+			 * of reg->off into reg->aux_off_align, with resulting value of
+			 * 18.  The auxiliary alignment stays at 4.
+			 */
+			"29: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=4,off=0,r=0),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+			/* At the time the word size load is performed from R5,
+			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
+			 * reg->aux_off (18) which is 20.  Then the variable offset
+			 * is considered using reg->aux_off_align which is 4 and meets
+			 * the load's requirements.
+			 */
+			"33: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=4,off=4,r=4),aux_off=18,aux_off_align=4 R5=pkt(id=4,off=0,r=4),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
 		},
 	},
 };
-- 
cgit v1.2.3-59-g8ed1b


From df0c8d911abf6ba97b2c2fc3c5a12769e0b081a3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 11 May 2017 11:24:16 -0700
Subject: net: phy: Call bus->reset() after releasing PHYs from reset

The API convention makes it that a given MDIO bus reset should be able
to access PHY devices in its reset() callback and perform additional
MDIO accesses in order to bring the bus and PHYs in a working state.

Commit 69226896ad63 ("mdio_bus: Issue GPIO RESET to PHYs.") broke that
contract by first calling bus->reset() and then release all PHYs from
reset using their shared GPIO line, so restore the expected
functionality here.

Fixes: 69226896ad63 ("mdio_bus: Issue GPIO RESET to PHYs.")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index a898e5c4ef1b..8e73f5f36e71 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -364,9 +364,6 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
 
 	mutex_init(&bus->mdio_lock);
 
-	if (bus->reset)
-		bus->reset(bus);
-
 	/* de-assert bus level PHY GPIO resets */
 	if (bus->num_reset_gpios > 0) {
 		bus->reset_gpiod = devm_kcalloc(&bus->dev,
@@ -396,6 +393,9 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
 		}
 	}
 
+	if (bus->reset)
+		bus->reset(bus);
+
 	for (i = 0; i < PHY_MAX_ADDR; i++) {
 		if ((bus->phy_mask & (1 << i)) == 0) {
 			struct phy_device *phydev;
-- 
cgit v1.2.3-59-g8ed1b


From 29f6ca6916e29fc46f1418885374d9ed50430687 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Mon, 8 May 2017 14:59:02 +0900
Subject: scsi: sd: Unlock zone in case of error in sd_setup_write_same_cmnd()

scsi_io_init() may fail, leaving a zone of a zoned block device locked.
Fix this by properly unlocking the write same request target zone if
scsi_io_init() fails.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index f9d1432d7cc5..e60a309b26bf 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -948,6 +948,10 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 	rq->__data_len = sdp->sector_size;
 	ret = scsi_init_io(cmd);
 	rq->__data_len = nr_bytes;
+
+	if (sd_is_zoned(sdkp) && ret != BLKPREP_OK)
+		sd_zbc_write_unlock_zone(cmd);
+
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ed44fd7fd8a6785b73cfc6d44594c434e578d724 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Mon, 8 May 2017 15:48:19 +0900
Subject: scsi: sd: Write lock zone for REQ_OP_WRITE_ZEROES

For a zoned block device, sd_zbc_complete() handles zone write unlock on
completion of a REQ_OP_WRITE_ZEROES command but the zone write locking
is missing from sd_setup_write_zeroes_cmnd(). This patch fixes this
problem by locking the target zone of a REQ_OP_WRITE_ZEROES request.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e60a309b26bf..de9e2f2ef662 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -827,21 +827,32 @@ static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
 	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	int ret;
 
 	if (!(rq->cmd_flags & REQ_NOUNMAP)) {
 		switch (sdkp->zeroing_mode) {
 		case SD_ZERO_WS16_UNMAP:
-			return sd_setup_write_same16_cmnd(cmd, true);
+			ret = sd_setup_write_same16_cmnd(cmd, true);
+			goto out;
 		case SD_ZERO_WS10_UNMAP:
-			return sd_setup_write_same10_cmnd(cmd, true);
+			ret = sd_setup_write_same10_cmnd(cmd, true);
+			goto out;
 		}
 	}
 
 	if (sdp->no_write_same)
 		return BLKPREP_INVALID;
+
 	if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
-		return sd_setup_write_same16_cmnd(cmd, false);
-	return sd_setup_write_same10_cmnd(cmd, false);
+		ret = sd_setup_write_same16_cmnd(cmd, false);
+	else
+		ret = sd_setup_write_same10_cmnd(cmd, false);
+
+out:
+	if (sd_is_zoned(sdkp) && ret == BLKPREP_OK)
+		return sd_zbc_write_lock_zone(cmd);
+
+	return ret;
 }
 
 static void sd_config_write_same(struct scsi_disk *sdkp)
-- 
cgit v1.2.3-59-g8ed1b


From 48ae8484e9fc324b4968d33c585e54bc98e44d61 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Wed, 10 May 2017 09:53:40 +0200
Subject: scsi: sg: don't return bogus Sg_requests

If the list search in sg_get_rq_mark() fails to find a valid request, we
return a bogus element. This then can later lead to a GPF in
sg_remove_scat().

So don't return bogus Sg_requests in sg_get_rq_mark() but NULL in case
the list search doesn't find a valid request.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Doug Gilbert <dgilbert@interlog.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Doug Gilbert <dgilbert@interlog.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0a38ba01b7b4..82c33a6edbea 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2074,11 +2074,12 @@ sg_get_rq_mark(Sg_fd * sfp, int pack_id)
 		if ((1 == resp->done) && (!resp->sg_io_owned) &&
 		    ((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
 			resp->done = 2;	/* guard against other readers */
-			break;
+			write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+			return resp;
 		}
 	}
 	write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
-	return resp;
+	return NULL;
 }
 
 /* always adds to end of list */
-- 
cgit v1.2.3-59-g8ed1b


From acde25726bc6034b628febb8a4c6c0838736ccbf Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Wed, 10 May 2017 16:39:41 +1000
Subject: KVM: PPC: Book3S HV: Add radix checks in real-mode hypercall handlers

POWER9 running a radix guest will take some hypervisor interrupts
without going to real mode (turning off the MMU).  This means that
early hypercall handlers may now be called in virtual mode.  Most of
the handlers work just fine in both modes, but there are some that
can crash the host if called in virtual mode, notably the TCE (IOMMU)
hypercalls H_PUT_TCE, H_STUFF_TCE and H_PUT_TCE_INDIRECT.  These
already have both a real-mode and a virtual-mode version, so we
arrange for the real-mode version to return H_TOO_HARD for radix
guests, which will result in the virtual-mode version being called.

The other hypercall which is sensitive to the MMU mode is H_RANDOM.
It doesn't have a virtual-mode version, so this adds code to enable
it to be called in either mode.

An alternative solution was considered which would refuse to call any
of the early hypercall handlers when doing a virtual-mode exit from a
radix guest.  However, the XICS-on-XIVE code depends on the XICS
hypercalls being handled early even for virtual-mode exits, because
the handlers need to be called before the XIVE vCPU state has been
pulled off the hardware.  Therefore that solution would have become
quite invasive and complicated, and was rejected in favour of the
simpler, though less elegant, solution presented here.

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Tested-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_vio_hv.c  | 13 +++++++++++++
 arch/powerpc/kvm/book3s_hv_builtin.c |  9 ++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index eda0a8f6fae8..3adfd2f5301c 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -301,6 +301,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 	/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
 	/* 	    liobn, ioba, tce); */
 
+	/* For radix, we might be in virtual mode, so punt */
+	if (kvm_is_radix(vcpu->kvm))
+		return H_TOO_HARD;
+
 	stt = kvmppc_find_table(vcpu->kvm, liobn);
 	if (!stt)
 		return H_TOO_HARD;
@@ -381,6 +385,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 	bool prereg = false;
 	struct kvmppc_spapr_tce_iommu_table *stit;
 
+	/* For radix, we might be in virtual mode, so punt */
+	if (kvm_is_radix(vcpu->kvm))
+		return H_TOO_HARD;
+
 	stt = kvmppc_find_table(vcpu->kvm, liobn);
 	if (!stt)
 		return H_TOO_HARD;
@@ -491,6 +499,10 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 	long i, ret;
 	struct kvmppc_spapr_tce_iommu_table *stit;
 
+	/* For radix, we might be in virtual mode, so punt */
+	if (kvm_is_radix(vcpu->kvm))
+		return H_TOO_HARD;
+
 	stt = kvmppc_find_table(vcpu->kvm, liobn);
 	if (!stt)
 		return H_TOO_HARD;
@@ -527,6 +539,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 	return H_SUCCESS;
 }
 
+/* This can be called in either virtual mode or real mode */
 long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 		      unsigned long ioba)
 {
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a65923c649..ee4c2558c305 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -207,7 +207,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
 
 long kvmppc_h_random(struct kvm_vcpu *vcpu)
 {
-	if (powernv_get_random_real_mode(&vcpu->arch.gpr[4]))
+	int r;
+
+	/* Only need to do the expensive mfmsr() on radix */
+	if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
+		r = powernv_get_random_long(&vcpu->arch.gpr[4]);
+	else
+		r = powernv_get_random_real_mode(&vcpu->arch.gpr[4]);
+	if (r)
 		return H_SUCCESS;
 
 	return H_HARDWARE;
-- 
cgit v1.2.3-59-g8ed1b


From 67325e988faea735d663799b6d152b5f4254093c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 11 May 2017 11:33:30 +1000
Subject: KVM: PPC: Book3S PR: Check copy_to/from_user return values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PR KVM implementation of the PAPR HPT hypercalls (H_ENTER etc.)
access an image of the HPT in userspace memory using copy_from_user
and copy_to_user.  Recently, the declarations of those functions were
annotated to indicate that the return value must be checked.  Since
this code doesn't currently check the return value, this causes
compile warnings like the ones shown below, and since on PPC the
default is to compile arch/powerpc with -Werror, this causes the
build to fail.

To fix this, we check the return values, and if non-zero, fail the
hypercall being processed with a H_FUNCTION error return value.
There is really no good error return value to use since PAPR didn't
envisage the possibility that the hypervisor may not be able to access
the guest's HPT, and H_FUNCTION (function not supported) seems as
good as any.

The typical compile warnings look like this:

  CC      arch/powerpc/kvm/book3s_pr_papr.o
/home/paulus/kernel/kvm/arch/powerpc/kvm/book3s_pr_papr.c: In function ‘kvmppc_h_pr_enter’:
/home/paulus/kernel/kvm/arch/powerpc/kvm/book3s_pr_papr.c:53:2: error: ignoring return value of ‘copy_from_user’, declared with attribute warn_unused_result [-Werror=unused-result]
  copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
  ^
/home/paulus/kernel/kvm/arch/powerpc/kvm/book3s_pr_papr.c:74:2: error: ignoring return value of ‘copy_to_user’, declared with attribute warn_unused_result [-Werror=unused-result]
  copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
  ^

... etc.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr_papr.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index bcbeeb62dd13..a04384adece7 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -50,7 +50,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
 	pteg_addr = get_pteg_addr(vcpu, pte_index);
 
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
-	copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
+	ret = H_FUNCTION;
+	if (copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)))
+		goto done;
 	hpte = pteg;
 
 	ret = H_PTEG_FULL;
@@ -71,7 +73,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
 	hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6));
 	hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7));
 	pteg_addr += i * HPTE_SIZE;
-	copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
+	ret = H_FUNCTION;
+	if (copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE))
+		goto done;
 	kvmppc_set_gpr(vcpu, 4, pte_index | i);
 	ret = H_SUCCESS;
 
@@ -93,7 +97,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
 
 	pteg = get_pteg_addr(vcpu, pte_index);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
-	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+	ret = H_FUNCTION;
+	if (copy_from_user(pte, (void __user *)pteg, sizeof(pte)))
+		goto done;
 	pte[0] = be64_to_cpu((__force __be64)pte[0]);
 	pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
@@ -103,7 +109,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
 	    ((flags & H_ANDCOND) && (pte[0] & avpn) != 0))
 		goto done;
 
-	copy_to_user((void __user *)pteg, &v, sizeof(v));
+	ret = H_FUNCTION;
+	if (copy_to_user((void __user *)pteg, &v, sizeof(v)))
+		goto done;
 
 	rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
 	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
@@ -171,7 +179,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
 		}
 
 		pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
-		copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+		if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) {
+			ret = H_FUNCTION;
+			break;
+		}
 		pte[0] = be64_to_cpu((__force __be64)pte[0]);
 		pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
@@ -184,7 +195,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
 			tsh |= H_BULK_REMOVE_NOT_FOUND;
 		} else {
 			/* Splat the pteg in (userland) hpt */
-			copy_to_user((void __user *)pteg, &v, sizeof(v));
+			if (copy_to_user((void __user *)pteg, &v, sizeof(v))) {
+				ret = H_FUNCTION;
+				break;
+			}
 
 			rb = compute_tlbie_rb(pte[0], pte[1],
 					      tsh & H_BULK_REMOVE_PTEX);
@@ -211,7 +225,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 
 	pteg = get_pteg_addr(vcpu, pte_index);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
-	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+	ret = H_FUNCTION;
+	if (copy_from_user(pte, (void __user *)pteg, sizeof(pte)))
+		goto done;
 	pte[0] = be64_to_cpu((__force __be64)pte[0]);
 	pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
@@ -234,7 +250,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
 	pte[0] = (__force u64)cpu_to_be64(pte[0]);
 	pte[1] = (__force u64)cpu_to_be64(pte[1]);
-	copy_to_user((void __user *)pteg, pte, sizeof(pte));
+	ret = H_FUNCTION;
+	if (copy_to_user((void __user *)pteg, pte, sizeof(pte)))
+		goto done;
 	ret = H_SUCCESS;
 
  done:
-- 
cgit v1.2.3-59-g8ed1b


From 70d466f760b351fe30b5f8c956354ddf29aa676b Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 11 May 2017 15:28:28 -0700
Subject: md/r5cache: gracefully handle journal device errors for writeback
 mode

For the raid456 with writeback cache, when journal device failed during
normal operation, it is still possible to persist all data, as all
pending data is still in stripe cache. However, it is necessary to handle
journal failure gracefully.

During journal failures, the following logic handles the graceful shutdown
of journal:
1. raid5_error() marks the device as Faulty and schedules async work
   log->disable_writeback_work;
2. In disable_writeback_work (r5c_disable_writeback_async), the mddev is
   suspended, set to write through, and then resumed. mddev_suspend()
   flushes all cached stripes;
3. All cached stripes need to be flushed carefully to the RAID array.

This patch fixes issues within the process above:
1. In r5c_update_on_rdev_error() schedule disable_writeback_work for
   journal failures;
2. In r5c_disable_writeback_async(), wait for MD_SB_CHANGE_PENDING,
   since raid5_error() updates superblock.
3. In handle_stripe(), allow stripes with data in journal (s.injournal > 0)
   to make progress during log_failed;
4. In delay_towrite(), if log failed only process data in the cache (skip
   new writes in dev->towrite);
5. In __get_priority_stripe(), process loprio_list during journal device
   failures.
6. In raid5_remove_disk(), wait for all cached stripes are flushed before
   calling log_exit().

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 11 +++++++++--
 drivers/md/raid5-log.h   |  3 ++-
 drivers/md/raid5.c       | 29 +++++++++++++++++++++++------
 3 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index a6a62e212cd3..cc3f8442f11f 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -24,6 +24,7 @@
 #include "md.h"
 #include "raid5.h"
 #include "bitmap.h"
+#include "raid5-log.h"
 
 /*
  * metadata/data stored in disk with 4k size unit (a block) regardless
@@ -680,6 +681,11 @@ static void r5c_disable_writeback_async(struct work_struct *work)
 		return;
 	pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
 		mdname(mddev));
+
+	/* wait superblock change before suspend */
+	wait_event(mddev->sb_wait,
+		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+
 	mddev_suspend(mddev);
 	log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
 	mddev_resume(mddev);
@@ -2983,7 +2989,7 @@ ioerr:
 	return ret;
 }
 
-void r5c_update_on_rdev_error(struct mddev *mddev)
+void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
 {
 	struct r5conf *conf = mddev->private;
 	struct r5l_log *log = conf->log;
@@ -2991,7 +2997,8 @@ void r5c_update_on_rdev_error(struct mddev *mddev)
 	if (!log)
 		return;
 
-	if (raid5_calc_degraded(conf) > 0 &&
+	if ((raid5_calc_degraded(conf) > 0 ||
+	     test_bit(Journal, &rdev->flags)) &&
 	    conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
 		schedule_work(&log->disable_writeback_work);
 }
diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index 27097101ccca..328d67aedda4 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -28,7 +28,8 @@ extern void r5c_flush_cache(struct r5conf *conf, int num);
 extern void r5c_check_stripe_cache_usage(struct r5conf *conf);
 extern void r5c_check_cached_full_stripe(struct r5conf *conf);
 extern struct md_sysfs_entry r5c_journal_mode;
-extern void r5c_update_on_rdev_error(struct mddev *mddev);
+extern void r5c_update_on_rdev_error(struct mddev *mddev,
+				     struct md_rdev *rdev);
 extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
 
 extern struct dma_async_tx_descriptor *
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f8055a7abb4b..0ac57a925606 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2689,7 +2689,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 		bdevname(rdev->bdev, b),
 		mdname(mddev),
 		conf->raid_disks - mddev->degraded);
-	r5c_update_on_rdev_error(mddev);
+	r5c_update_on_rdev_error(mddev, rdev);
 }
 
 /*
@@ -3050,6 +3050,11 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
  *      When LOG_CRITICAL, stripes with injournal == 0 will be sent to
  *      no_space_stripes list.
  *
+ *   3. during journal failure
+ *      In journal failure, we try to flush all cached data to raid disks
+ *      based on data in stripe cache. The array is read-only to upper
+ *      layers, so we would skip all pending writes.
+ *
  */
 static inline bool delay_towrite(struct r5conf *conf,
 				 struct r5dev *dev,
@@ -3063,6 +3068,9 @@ static inline bool delay_towrite(struct r5conf *conf,
 	if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) &&
 	    s->injournal > 0)
 		return true;
+	/* case 3 above */
+	if (s->log_failed && s->injournal)
+		return true;
 	return false;
 }
 
@@ -4696,10 +4704,15 @@ static void handle_stripe(struct stripe_head *sh)
 	       " to_write=%d failed=%d failed_num=%d,%d\n",
 	       s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
 	       s.failed_num[0], s.failed_num[1]);
-	/* check if the array has lost more than max_degraded devices and,
+	/*
+	 * check if the array has lost more than max_degraded devices and,
 	 * if so, some requests might need to be failed.
+	 *
+	 * When journal device failed (log_failed), we will only process
+	 * the stripe if there is data need write to raid disks
 	 */
-	if (s.failed > conf->max_degraded || s.log_failed) {
+	if (s.failed > conf->max_degraded ||
+	    (s.log_failed && s.injournal == 0)) {
 		sh->check_state = 0;
 		sh->reconstruct_state = 0;
 		break_stripe_batch_list(sh, 0);
@@ -5272,8 +5285,10 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
 	struct stripe_head *sh, *tmp;
 	struct list_head *handle_list = NULL;
 	struct r5worker_group *wg;
-	bool second_try = !r5c_is_writeback(conf->log);
-	bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state);
+	bool second_try = !r5c_is_writeback(conf->log) &&
+		!r5l_log_disk_error(conf);
+	bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) ||
+		r5l_log_disk_error(conf);
 
 again:
 	wg = NULL;
@@ -7521,7 +7536,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 		 * neilb: there is no locking about new writes here,
 		 * so this cannot be safe.
 		 */
-		if (atomic_read(&conf->active_stripes)) {
+		if (atomic_read(&conf->active_stripes) ||
+		    atomic_read(&conf->r5c_cached_full_stripes) ||
+		    atomic_read(&conf->r5c_cached_partial_stripes)) {
 			return -EBUSY;
 		}
 		log_exit(conf);
-- 
cgit v1.2.3-59-g8ed1b


From 5ddf0440a1a28f00f69ed2e093476bab3b60c2c3 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 11 May 2017 17:03:44 -0700
Subject: md/r5cache: handle sync with data in write back cache

Currently, sync of raid456 array cannot make progress when hitting
data in writeback r5cache.

This patch fixes this issue by flushing cached data of the stripe
before processing the sync request. This is achived by:

1. In handle_stripe(), do not set STRIPE_SYNCING if the stripe is
   in write back cache;
2. In r5c_try_caching_write(), handle the stripe in sync with write
   through;
3. In do_release_stripe(), make stripe in sync write out and send
   it to the state machine.

Shaohua: explictly set STRIPE_HANDLE after write out completed

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c |  8 +++++++-
 drivers/md/raid5.c       | 21 +++++++++++++++------
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index cc3f8442f11f..4c00bc248287 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2637,8 +2637,11 @@ int r5c_try_caching_write(struct r5conf *conf,
 	 * When run in degraded mode, array is set to write-through mode.
 	 * This check helps drain pending write safely in the transition to
 	 * write-through mode.
+	 *
+	 * When a stripe is syncing, the write is also handled in write
+	 * through mode.
 	 */
-	if (s->failed) {
+	if (s->failed || test_bit(STRIPE_SYNCING, &sh->state)) {
 		r5c_make_stripe_write_out(sh);
 		return -EAGAIN;
 	}
@@ -2841,6 +2844,9 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
 	}
 
 	r5l_append_flush_payload(log, sh->sector);
+	/* stripe is flused to raid disks, we can do resync now */
+	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
+		set_bit(STRIPE_HANDLE, &sh->state);
 }
 
 int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0ac57a925606..9c4f7659f8b1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -233,11 +233,15 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
 			if (test_bit(R5_InJournal, &sh->dev[i].flags))
 				injournal++;
 	/*
-	 * When quiesce in r5c write back, set STRIPE_HANDLE for stripes with
-	 * data in journal, so they are not released to cached lists
+	 * In the following cases, the stripe cannot be released to cached
+	 * lists. Therefore, we make the stripe write out and set
+	 * STRIPE_HANDLE:
+	 *   1. when quiesce in r5c write back;
+	 *   2. when resync is requested fot the stripe.
 	 */
-	if (conf->quiesce && r5c_is_writeback(conf->log) &&
-	    !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0) {
+	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) ||
+	    (conf->quiesce && r5c_is_writeback(conf->log) &&
+	     !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) {
 		if (test_bit(STRIPE_R5C_CACHING, &sh->state))
 			r5c_make_stripe_write_out(sh);
 		set_bit(STRIPE_HANDLE, &sh->state);
@@ -4656,8 +4660,13 @@ static void handle_stripe(struct stripe_head *sh)
 
 	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
 		spin_lock(&sh->stripe_lock);
-		/* Cannot process 'sync' concurrently with 'discard' */
-		if (!test_bit(STRIPE_DISCARD, &sh->state) &&
+		/*
+		 * Cannot process 'sync' concurrently with 'discard'.
+		 * Flush data in r5cache before 'sync'.
+		 */
+		if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) &&
+		    !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) &&
+		    !test_bit(STRIPE_DISCARD, &sh->state) &&
 		    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
 			set_bit(STRIPE_SYNCING, &sh->state);
 			clear_bit(STRIPE_INSYNC, &sh->state);
-- 
cgit v1.2.3-59-g8ed1b


From 76d837a4c0f905f98088877d780169d7a14a6b29 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 11 May 2017 14:31:59 +1000
Subject: KVM: PPC: Book3S PR: Don't include SPAPR TCE code on non-pseries
 platforms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit e91aa8e6ecd5 ("KVM: PPC: Enable IOMMU_API for KVM_BOOK3S_64
permanently", 2017-03-22) enabled the SPAPR TCE code for all 64-bit
Book 3S kernel configurations in order to simplify the code and
reduce #ifdefs.  However, 64-bit Book 3S PPC platforms other than
pseries and powernv don't implement the necessary IOMMU callbacks,
leading to build failures like the following (for a pasemi config):

scripts/kconfig/conf  --silentoldconfig Kconfig
warning: (KVM_BOOK3S_64) selects SPAPR_TCE_IOMMU which has unmet direct dependencies (IOMMU_SUPPORT && (PPC_POWERNV || PPC_PSERIES))

...

  CC [M]  arch/powerpc/kvm/book3s_64_vio.o
/home/paulus/kernel/kvm/arch/powerpc/kvm/book3s_64_vio.c: In function ‘kvmppc_clear_tce’:
/home/paulus/kernel/kvm/arch/powerpc/kvm/book3s_64_vio.c:363:2: error: implicit declaration of function ‘iommu_tce_xchg’ [-Werror=implicit-function-declaration]
  iommu_tce_xchg(tbl, entry, &hpa, &dir);
  ^

To fix this, we make the inclusion of the SPAPR TCE support, and the
code that uses it in book3s_vio.c and book3s_vio_hv.c, depend on
the inclusion of support for the pseries and/or powernv platforms.
This means that when running a 'pseries' guest on those platforms,
the guest won't have in-kernel acceleration of the PAPR TCE hypercalls,
but at least now they compile.

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/Kconfig          |  2 +-
 arch/powerpc/kvm/Makefile         |  4 ++--
 arch/powerpc/kvm/book3s_pr_papr.c | 36 +++++++++++++++++++++++++++---------
 arch/powerpc/kvm/powerpc.c        |  4 +++-
 4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 24de532c1736..0c52cb5d43f5 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -67,7 +67,7 @@ config KVM_BOOK3S_64
 	select KVM_BOOK3S_64_HANDLER
 	select KVM
 	select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
-	select SPAPR_TCE_IOMMU if IOMMU_SUPPORT
+	select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV)
 	---help---
 	  Support running unmodified book3s_64 and book3s_32 guest kernels
 	  in virtual machines on book3s_64 host processors.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index d91a2604c496..381a6ec0ff3b 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -46,7 +46,7 @@ kvm-e500mc-objs := \
 	e500_emulate.o
 kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
 
-kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
+kvm-book3s_64-builtin-objs-$(CONFIG_SPAPR_TCE_IOMMU) := \
 	book3s_64_vio_hv.o
 
 kvm-pr-y := \
@@ -90,11 +90,11 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
 kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
+kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
 
 kvm-book3s_64-module-objs := \
 	$(common-objs-y) \
 	book3s.o \
-	book3s_64_vio.o \
 	book3s_rtas.o \
 	$(kvm-book3s_64-objs-y)
 
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index a04384adece7..8a4205fa774f 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -262,36 +262,37 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
-static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
+static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
 {
-	unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
-	unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
-	unsigned long tce = kvmppc_get_gpr(vcpu, 6);
 	long rc;
 
-	rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
+	rc = kvmppc_h_logical_ci_load(vcpu);
 	if (rc == H_TOO_HARD)
 		return EMULATE_FAIL;
 	kvmppc_set_gpr(vcpu, 3, rc);
 	return EMULATE_DONE;
 }
 
-static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
+static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
 {
 	long rc;
 
-	rc = kvmppc_h_logical_ci_load(vcpu);
+	rc = kvmppc_h_logical_ci_store(vcpu);
 	if (rc == H_TOO_HARD)
 		return EMULATE_FAIL;
 	kvmppc_set_gpr(vcpu, 3, rc);
 	return EMULATE_DONE;
 }
 
-static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
 {
+	unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+	unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+	unsigned long tce = kvmppc_get_gpr(vcpu, 6);
 	long rc;
 
-	rc = kvmppc_h_logical_ci_store(vcpu);
+	rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
 	if (rc == H_TOO_HARD)
 		return EMULATE_FAIL;
 	kvmppc_set_gpr(vcpu, 3, rc);
@@ -329,6 +330,23 @@ static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
+#else /* CONFIG_SPAPR_TCE_IOMMU */
+static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
+{
+	return EMULATE_FAIL;
+}
+
+static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
+{
+	return EMULATE_FAIL;
+}
+
+static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
+{
+	return EMULATE_FAIL;
+}
+#endif /* CONFIG_SPAPR_TCE_IOMMU */
+
 static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 {
 	long rc = kvmppc_xics_hcall(vcpu, cmd);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f7cf2cd564ef..7f71ab5fcad1 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1749,7 +1749,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 		break;
 	}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_SPAPR_TCE_IOMMU
 	case KVM_CREATE_SPAPR_TCE_64: {
 		struct kvm_create_spapr_tce_64 create_tce_64;
 
@@ -1780,6 +1780,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
 		goto out;
 	}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_PPC_GET_SMMU_INFO: {
 		struct kvm_ppc_smmu_info info;
 		struct kvm *kvm = filp->private_data;
-- 
cgit v1.2.3-59-g8ed1b


From 5ba9b0a14132d0b8d97affe909f324045a968d03 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <hanjun.guo@linaro.org>
Date: Fri, 12 May 2017 11:55:26 +0800
Subject: irqchip/mbigen: Fix memory mapping code

Some mbigens share memory regions, and devm_ioremap_resource
does not allow to share resources which will break the probe
of mbigen, in opposition to devm_ioremap.

This patch restores back usage of devm_ioremap function, but
with proper error handling and logging.

Fixes: 216646e4d82e ("irqchip/mbigen: Fix return value check in mbigen_device_probe()")
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: linuxarm@huawei.com
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Cc: MaJun <majun258@huawei.com>
Link: http://lkml.kernel.org/r/1494561328-39514-2-git-send-email-guohanjun@huawei.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-mbigen.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index d2306c821ebb..0f5e66e96bd9 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -337,9 +337,12 @@ static int mbigen_device_probe(struct platform_device *pdev)
 	mgn_chip->pdev = pdev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mgn_chip->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(mgn_chip->base))
-		return PTR_ERR(mgn_chip->base);
+	mgn_chip->base = devm_ioremap(&pdev->dev, res->start,
+				      resource_size(res));
+	if (!mgn_chip->base) {
+		dev_err(&pdev->dev, "failed to ioremap %pR\n", res);
+		return -ENOMEM;
+	}
 
 	if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node)
 		err = mbigen_of_create_domain(pdev, mgn_chip);
-- 
cgit v1.2.3-59-g8ed1b


From ad7cc3c0c57d77b442db323056354d0e49833569 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <hanjun.guo@linaro.org>
Date: Fri, 12 May 2017 11:55:27 +0800
Subject: irqchip/mbigen: Fix potential NULL dereferencing

platform_get_resource() may return NULL, add proper
check to avoid potential NULL dereferencing.

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: linuxarm@huawei.com
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Cc: MaJun <majun258@huawei.com>
Link: http://lkml.kernel.org/r/1494561328-39514-3-git-send-email-guohanjun@huawei.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-mbigen.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 0f5e66e96bd9..2fa1e457190d 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -337,6 +337,9 @@ static int mbigen_device_probe(struct platform_device *pdev)
 	mgn_chip->pdev = pdev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
 	mgn_chip->base = devm_ioremap(&pdev->dev, res->start,
 				      resource_size(res));
 	if (!mgn_chip->base) {
-- 
cgit v1.2.3-59-g8ed1b


From 9459a04b6a5a09967eec94a1b66f0a74312819d9 Mon Sep 17 00:00:00 2001
From: MaJun <majun258@huawei.com>
Date: Fri, 12 May 2017 11:55:28 +0800
Subject: irqchip/mbigen: Fix the clear register offset calculation

The register array offset for clearing an interrupt is calculated by:

    offset = (hwirq - RESERVED_IRQ_PER_MBIGEN_CHIP) / 32;

This is wrong because the clear register array includes the reserved
interrupts. So the clear operation ends up in the wrong register.

This went unnoticed so far, because the hardware clears the real bit
through a timeout mechanism when the hardware is configured in debug
mode. That debug mode was enabled on early generations of the hardware, so
the problem was papered over.

On newer hardware with updated firmware the debug mode was disabled, so the
bits did not get cleared which causes the system to malfunction.

Remove the subtraction of RESERVED_IRQ_PER_MBIGEN_CHIP, so the correct
register is accessed.

[ tglx: Rewrote changelog ]

Fixes: a6c2f87b8820 ("irqchip/mbigen: Implement the mbigen irq chip operation functions")
Signed-off-by: MaJun <majun258@huawei.com>
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: linuxarm@huawei.com
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Link: http://lkml.kernel.org/r/1494561328-39514-4-git-send-email-guohanjun@huawei.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-mbigen.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 2fa1e457190d..31d6b5a582d2 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -106,10 +106,7 @@ static inline void get_mbigen_type_reg(irq_hw_number_t hwirq,
 static inline void get_mbigen_clear_reg(irq_hw_number_t hwirq,
 					u32 *mask, u32 *addr)
 {
-	unsigned int ofst;
-
-	hwirq -= RESERVED_IRQ_PER_MBIGEN_CHIP;
-	ofst = hwirq / 32 * 4;
+	unsigned int ofst = (hwirq / 32) * 4;
 
 	*mask = 1 << (hwirq % 32);
 	*addr = ofst + REG_MBIGEN_CLEAR_OFFSET;
-- 
cgit v1.2.3-59-g8ed1b


From dbc2b5e9a09e9a6664679a667ff81cff6e5f2641 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 12 May 2017 14:39:52 +0800
Subject: sctp: fix src address selection if using secondary addresses for ipv6

Commit 0ca50d12fe46 ("sctp: fix src address selection if using secondary
addresses") has fixed a src address selection issue when using secondary
addresses for ipv4.

Now sctp ipv6 also has the similar issue. When using a secondary address,
sctp_v6_get_dst tries to choose the saddr which has the most same bits
with the daddr by sctp_v6_addr_match_len. It may make some cases not work
as expected.

hostA:
  [1] fd21:356b:459a:cf10::11 (eth1)
  [2] fd21:356b:459a:cf20::11 (eth2)

hostB:
  [a] fd21:356b:459a:cf30::2  (eth1)
  [b] fd21:356b:459a:cf40::2  (eth2)

route from hostA to hostB:
  fd21:356b:459a:cf30::/64 dev eth1  metric 1024  mtu 1500

The expected path should be:
  fd21:356b:459a:cf10::11 <-> fd21:356b:459a:cf30::2
But addr[2] matches addr[a] more bits than addr[1] does, according to
sctp_v6_addr_match_len. It causes the path to be:
  fd21:356b:459a:cf20::11 <-> fd21:356b:459a:cf30::2

This patch is to fix it with the same way as Marcelo's fix for sctp ipv4.
As no ip_dev_find for ipv6, this patch is to use ipv6_chk_addr to check
if the saddr is in a dev instead.

Note that for backwards compatibility, it will still do the addr_match_len
check here when no optimal is found.

Reported-by: Patrick Talbert <ptalbert@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 46 +++++++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 961ee59f696a..142b70e959af 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -240,12 +240,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	struct sctp_bind_addr *bp;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sctp_sockaddr_entry *laddr;
-	union sctp_addr *baddr = NULL;
 	union sctp_addr *daddr = &t->ipaddr;
 	union sctp_addr dst_saddr;
 	struct in6_addr *final_p, final;
 	__u8 matchlen = 0;
-	__u8 bmatchlen;
 	sctp_scope_t scope;
 
 	memset(fl6, 0, sizeof(struct flowi6));
@@ -312,23 +310,37 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	 */
 	rcu_read_lock();
 	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
-		if (!laddr->valid)
+		struct dst_entry *bdst;
+		__u8 bmatchlen;
+
+		if (!laddr->valid ||
+		    laddr->state != SCTP_ADDR_SRC ||
+		    laddr->a.sa.sa_family != AF_INET6 ||
+		    scope > sctp_scope(&laddr->a))
 			continue;
-		if ((laddr->state == SCTP_ADDR_SRC) &&
-		    (laddr->a.sa.sa_family == AF_INET6) &&
-		    (scope <= sctp_scope(&laddr->a))) {
-			bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
-			if (!baddr || (matchlen < bmatchlen)) {
-				baddr = &laddr->a;
-				matchlen = bmatchlen;
-			}
-		}
-	}
-	if (baddr) {
-		fl6->saddr = baddr->v6.sin6_addr;
-		fl6->fl6_sport = baddr->v6.sin6_port;
+
+		fl6->saddr = laddr->a.v6.sin6_addr;
+		fl6->fl6_sport = laddr->a.v6.sin6_port;
 		final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
-		dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+		bdst = ip6_dst_lookup_flow(sk, fl6, final_p);
+
+		if (!IS_ERR(bdst) &&
+		    ipv6_chk_addr(dev_net(bdst->dev),
+				  &laddr->a.v6.sin6_addr, bdst->dev, 1)) {
+			if (!IS_ERR_OR_NULL(dst))
+				dst_release(dst);
+			dst = bdst;
+			break;
+		}
+
+		bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
+		if (matchlen > bmatchlen)
+			continue;
+
+		if (!IS_ERR_OR_NULL(dst))
+			dst_release(dst);
+		dst = bdst;
+		matchlen = bmatchlen;
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3-59-g8ed1b


From 9fc3e4dc67fde953fd26adb3cea8f92597810b64 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Thu, 11 May 2017 22:11:29 -0500
Subject: net: dsa: mv88e6xxx: add default case to switch

Add default case to switch in order to avoid any chance of using an
uninitialized variable _low_, in case s->type does not match any of
the listed case values.

Addresses-Coverity-ID: 1398130
Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 19581d783d8e..d034d8cd7d22 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -849,6 +849,9 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 		mv88e6xxx_g1_stats_read(chip, reg, &low);
 		if (s->sizeof_stat == 8)
 			mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
+		break;
+	default:
+		return UINT64_MAX;
 	}
 	value = (((u64)high) << 16) | low;
 	return value;
-- 
cgit v1.2.3-59-g8ed1b


From 75cf067953d5ee543b3bda90bbfcbee5e1f94ae8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 12 May 2017 12:11:13 +0200
Subject: net: irda: irda-usb: fix firmware name on big-endian hosts

Add missing endianness conversion when using the USB device-descriptor
bcdDevice field to construct a firmware file name.

Fixes: 8ef80aef118e ("[IRDA]: irda-usb.c: STIR421x cleanups")
Cc: stable <stable@vger.kernel.org>     # 2.6.18
Cc: Nick Fedchik <nfedchik@atlantic-link.com.ua>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/irda-usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
index 8716b8c07feb..6f3c805f7211 100644
--- a/drivers/net/irda/irda-usb.c
+++ b/drivers/net/irda/irda-usb.c
@@ -1077,7 +1077,7 @@ static int stir421x_patch_device(struct irda_usb_cb *self)
          * are "42101001.sb" or "42101002.sb"
          */
         sprintf(stir421x_fw_name, "4210%4X.sb",
-                self->usbdev->descriptor.bcdDevice);
+		le16_to_cpu(self->usbdev->descriptor.bcdDevice));
         ret = request_firmware(&fw, stir421x_fw_name, &self->usbdev->dev);
         if (ret < 0)
                 return ret;
-- 
cgit v1.2.3-59-g8ed1b


From b12ca80ca145cecadf841ba27cc061c510cd97ca Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 12 May 2017 12:13:26 +0200
Subject: net: ch9200: add missing USB-descriptor endianness conversions

Add the missing endianness conversions to a debug statement printing
the USB device-descriptor idVendor and idProduct fields during probe.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ch9200.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c
index c4f1c363e24b..9df3c1ffff35 100644
--- a/drivers/net/usb/ch9200.c
+++ b/drivers/net/usb/ch9200.c
@@ -310,8 +310,8 @@ static int get_mac_address(struct usbnet *dev, unsigned char *data)
 	int rd_mac_len = 0;
 
 	netdev_dbg(dev->net, "get_mac_address:\n\tusbnet VID:%0x PID:%0x\n",
-		   dev->udev->descriptor.idVendor,
-		   dev->udev->descriptor.idProduct);
+		   le16_to_cpu(dev->udev->descriptor.idVendor),
+		   le16_to_cpu(dev->udev->descriptor.idProduct));
 
 	memset(mac_addr, 0, sizeof(mac_addr));
 	rd_mac_len = control_read(dev, REQUEST_READ, 0,
-- 
cgit v1.2.3-59-g8ed1b


From 42e6cae1b35b186650f5abc7b24c20ab6986c5a0 Mon Sep 17 00:00:00 2001
From: Bert Kenward <bkenward@solarflare.com>
Date: Fri, 12 May 2017 17:18:50 +0100
Subject: sfc: revert changes to NIC revision numbers

The revision enum values (eg EFX_REV_HUNT_A0) form part of our API,
 and are included in ethtool. If these are inconsistent then ethtool
 will print garbage for a register dump (ethtool -d).

Fixes: 5a6681e22c14 ("sfc: separate out SFC4000 ("Falcon") support into new sfc-falcon driver")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/nic.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 7b916aa21bde..4d7fb8af880d 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -18,8 +18,12 @@
 #include "mcdi.h"
 
 enum {
-	EFX_REV_SIENA_A0 = 0,
-	EFX_REV_HUNT_A0 = 1,
+	/* Revisions 0-2 were Falcon A0, A1 and B0 respectively.
+	 * They are not supported by this driver but these revision numbers
+	 * form part of the ethtool API for register dumping.
+	 */
+	EFX_REV_SIENA_A0 = 3,
+	EFX_REV_HUNT_A0 = 4,
 };
 
 static inline int efx_nic_rev(struct efx_nic *efx)
-- 
cgit v1.2.3-59-g8ed1b


From 1c4d5f51a812a82de97beee24f48ed05c65ebda5 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 12 May 2017 12:00:01 -0400
Subject: vmxnet3: ensure that adapter is in proper state during force_close

There are several paths in vmxnet3, where settings changes cause the
adapter to be brought down and back up (vmxnet3_set_ringparam among
them).  Should part of the reset operation fail, these paths call
vmxnet3_force_close, which enables all napi instances prior to calling
dev_close (with the expectation that vmxnet3_close will then properly
disable them again).  However, vmxnet3_force_close neglects to clear
VMXNET3_STATE_BIT_QUIESCED prior to calling dev_close.  As a result
vmxnet3_quiesce_dev (called from vmxnet3_close), returns early, and
leaves all the napi instances in a enabled state while the device itself
is closed.  If a device in this state is activated again, napi_enable
will be called on already enabled napi_instances, leading to a BUG halt.

The fix is to simply enausre that the QUIESCED bit is cleared in
vmxnet3_force_close to allow quesence to be completed properly on close.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Shrikrishna Khare <skhare@vmware.com>
CC: "VMware, Inc." <pv-drivers@vmware.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_drv.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 25bc764ae7dc..d1c7029ded7c 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2962,6 +2962,11 @@ vmxnet3_force_close(struct vmxnet3_adapter *adapter)
 	/* we need to enable NAPI, otherwise dev_close will deadlock */
 	for (i = 0; i < adapter->num_rx_queues; i++)
 		napi_enable(&adapter->rx_queue[i].napi);
+	/*
+	 * Need to clear the quiesce bit to ensure that vmxnet3_close
+	 * can quiesce the device properly
+	 */
+	clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
 	dev_close(adapter->netdev);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From bdce57e7ae9cac56cda958029104c752afaf0894 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Fri, 3 Mar 2017 09:44:11 -0500
Subject: tools/power/acpi: Add .gitignore file

Add a .gitignore file so that git commands do not pick up the resulting
binaries and directories.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Acked-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/acpi/.gitignore | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 tools/power/acpi/.gitignore

diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore
new file mode 100644
index 000000000000..cba3d994995c
--- /dev/null
+++ b/tools/power/acpi/.gitignore
@@ -0,0 +1,4 @@
+acpidbg
+acpidump
+ec
+include
-- 
cgit v1.2.3-59-g8ed1b


From f369fdf4f661322b73f3307e9f3cd55fb3a20123 Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Tue, 9 May 2017 15:02:22 +0800
Subject: Revert "ACPI / button: Remove lid_init_state=method mode"

This reverts commit ecb10b694b72ca5ea51b3c90a71ff2a11963425a.

The only expected ACPI control method lid device's usage model is

 1. Listen to the lid notification,
 2. Evaluate _LID after being notified by BIOS,
 3. Suspend the system (if users configure to do so) after seeing "close".

It's not ensured that BIOS will notify OS after boot/resume, and
it's not ensured that BIOS will always generate "open" event upon
opening the lid.

But there are 2 wrong usage models:

 1. When the lid device is responsible for suspend/resume the system,
    userspace requires to see "open" event to be paired with "close" after
    the system is resumed, or it will suspend the system again.

 2. When an external monitor connects to the laptop attached docks,
    userspace requires to see "close" event after the system is resumed so
    that it can determine whether the internal display should remain dark
    and the external display should be lit on.

After we made default kernel behavior to be suitable for usage model 1,
users of usage model 2 start to report regressions for such behavior
change.

Reversion of button.lid_init_state=method doesn't actually reverts to old
default behavior as doing so can enter a regression loop, but facilitates
users to work the reported regressions around with
button.lid_init_state=method.

Fixes: ecb10b694b72 (ACPI / button: Remove lid_init_state=method mode)
Cc: 4.11+ <stable@vger.kernel.org> # 4.11+
Link: https://bugzilla.kernel.org/show_bug.cgi?id=195455
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1430259
Tested-by: Steffen Weber <steffen.weber@gmail.com>
Tested-by: Julian Wiedmann <julian.wiedmann@jwi.name>
Reported-by: Joachim Frieben <jfrieben@hotmail.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/acpi-lid.txt | 16 ++++++++++++----
 drivers/acpi/button.c           |  9 +++++++++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/acpi/acpi-lid.txt
index 22cb3091f297..effe7af3a5af 100644
--- a/Documentation/acpi/acpi-lid.txt
+++ b/Documentation/acpi/acpi-lid.txt
@@ -59,20 +59,28 @@ button driver uses the following 3 modes in order not to trigger issues.
 If the userspace hasn't been prepared to ignore the unreliable "opened"
 events and the unreliable initial state notification, Linux users can use
 the following kernel parameters to handle the possible issues:
-A. button.lid_init_state=open:
+A. button.lid_init_state=method:
+   When this option is specified, the ACPI button driver reports the
+   initial lid state using the returning value of the _LID control method
+   and whether the "opened"/"closed" events are paired fully relies on the
+   firmware implementation.
+   This option can be used to fix some platforms where the returning value
+   of the _LID control method is reliable but the initial lid state
+   notification is missing.
+   This option is the default behavior during the period the userspace
+   isn't ready to handle the buggy AML tables.
+B. button.lid_init_state=open:
    When this option is specified, the ACPI button driver always reports the
    initial lid state as "opened" and whether the "opened"/"closed" events
    are paired fully relies on the firmware implementation.
    This may fix some platforms where the returning value of the _LID
    control method is not reliable and the initial lid state notification is
    missing.
-   This option is the default behavior during the period the userspace
-   isn't ready to handle the buggy AML tables.
 
 If the userspace has been prepared to ignore the unreliable "opened" events
 and the unreliable initial state notification, Linux users should always
 use the following kernel parameter:
-B. button.lid_init_state=ignore:
+C. button.lid_init_state=ignore:
    When this option is specified, the ACPI button driver never reports the
    initial lid state and there is a compensation mechanism implemented to
    ensure that the reliable "closed" notifications can always be delievered
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 668137e4a069..6d5a8c1d3132 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -57,6 +57,7 @@
 
 #define ACPI_BUTTON_LID_INIT_IGNORE	0x00
 #define ACPI_BUTTON_LID_INIT_OPEN	0x01
+#define ACPI_BUTTON_LID_INIT_METHOD	0x02
 
 #define _COMPONENT		ACPI_BUTTON_COMPONENT
 ACPI_MODULE_NAME("button");
@@ -376,6 +377,9 @@ static void acpi_lid_initialize_state(struct acpi_device *device)
 	case ACPI_BUTTON_LID_INIT_OPEN:
 		(void)acpi_lid_notify_state(device, 1);
 		break;
+	case ACPI_BUTTON_LID_INIT_METHOD:
+		(void)acpi_lid_update_state(device);
+		break;
 	case ACPI_BUTTON_LID_INIT_IGNORE:
 	default:
 		break;
@@ -559,6 +563,9 @@ static int param_set_lid_init_state(const char *val, struct kernel_param *kp)
 	if (!strncmp(val, "open", sizeof("open") - 1)) {
 		lid_init_state = ACPI_BUTTON_LID_INIT_OPEN;
 		pr_info("Notify initial lid state as open\n");
+	} else if (!strncmp(val, "method", sizeof("method") - 1)) {
+		lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
+		pr_info("Notify initial lid state with _LID return value\n");
 	} else if (!strncmp(val, "ignore", sizeof("ignore") - 1)) {
 		lid_init_state = ACPI_BUTTON_LID_INIT_IGNORE;
 		pr_info("Do not notify initial lid state\n");
@@ -572,6 +579,8 @@ static int param_get_lid_init_state(char *buffer, struct kernel_param *kp)
 	switch (lid_init_state) {
 	case ACPI_BUTTON_LID_INIT_OPEN:
 		return sprintf(buffer, "open");
+	case ACPI_BUTTON_LID_INIT_METHOD:
+		return sprintf(buffer, "method");
 	case ACPI_BUTTON_LID_INIT_IGNORE:
 		return sprintf(buffer, "ignore");
 	default:
-- 
cgit v1.2.3-59-g8ed1b


From d82dd0e34d0347be201fd274dc84cd645dccc064 Mon Sep 17 00:00:00 2001
From: Tomasz Majchrzak <tomasz.majchrzak@intel.com>
Date: Fri, 12 May 2017 14:26:10 +0200
Subject: raid1: prefer disk without bad blocks

If an array consists of two drives and the first drive has the bad
block, the read request to the region overlapping the bad block chooses
the same disk (with bad block) as device to read from over and over and
the request gets stuck. If the first disk only partially overlaps with
bad block, it becomes a candidate ("best disk") for shorter range of
sectors. The second disk is capable of reading the entire requested
range and it is updated accordingly, however it is not recorded as a
best device for the request. In the end the request is sent to the first
disk to read entire range of sectors. It fails and is re-tried in a
moment but with the same outcome.

Actually it is quite likely scenario but it had little exposure in my
test until commit 715d40b93b10 ("md/raid1: add failfast handling for
reads.") removed preference for idle disk. Such scenario had been
passing as second disk was always chosen when idle.

Reset a candidate ("best disk") to read from if disk can read entire
range. Do it only if other disk has already been chosen as a candidate
for a smaller range. The head position / disk type logic will select
the best disk to read from - it is fine as disk with bad block won't be
considered for it.

Signed-off-by: Tomasz Majchrzak <tomasz.majchrzak@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid1.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a17ed6218d51..af5056d56878 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -666,8 +666,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
 					break;
 			}
 			continue;
-		} else
+		} else {
+			if ((sectors > best_good_sectors) && (best_disk >= 0))
+				best_disk = -1;
 			best_good_sectors = sectors;
+		}
 
 		if (best_disk >= 0)
 			/* At least two disks to choose from so failfast is OK */
-- 
cgit v1.2.3-59-g8ed1b


From ecdcf622eb74b52cebde1387a7a1852a787d8050 Mon Sep 17 00:00:00 2001
From: Joe Perches via samba-technical <samba-technical@lists.samba.org>
Date: Sun, 7 May 2017 01:31:47 -0700
Subject: cifs: cifsacl: Use a temporary ops variable to reduce code length

Create an ops variable to store tcon->ses->server->ops and cache
indirections and reduce code size a trivial bit.

$ size fs/cifs/cifsacl.o*
   text	   data	    bss	    dec	    hex	filename
   5338	    136	      8	   5482	   156a	fs/cifs/cifsacl.o.new
   5371	    136	      8	   5515	   158b	fs/cifs/cifsacl.o.old

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsacl.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 15bac390dff9..b98436f5c7c7 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1135,20 +1135,19 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
 	u32 acllen = 0;
 	int rc = 0;
 	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
-	struct cifs_tcon *tcon;
+	struct smb_version_operations *ops;
 
 	cifs_dbg(NOISY, "converting ACL to mode for %s\n", path);
 
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
-	tcon = tlink_tcon(tlink);
 
-	if (pfid && (tcon->ses->server->ops->get_acl_by_fid))
-		pntsd = tcon->ses->server->ops->get_acl_by_fid(cifs_sb, pfid,
-							  &acllen);
-	else if (tcon->ses->server->ops->get_acl)
-		pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path,
-							&acllen);
+	ops = tlink_tcon(tlink)->ses->server->ops;
+
+	if (pfid && (ops->get_acl_by_fid))
+		pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen);
+	else if (ops->get_acl)
+		pntsd = ops->get_acl(cifs_sb, inode, path, &acllen);
 	else {
 		cifs_put_tlink(tlink);
 		return -EOPNOTSUPP;
@@ -1181,23 +1180,23 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
 	struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
-	struct cifs_tcon *tcon;
+	struct smb_version_operations *ops;
 
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
-	tcon = tlink_tcon(tlink);
+
+	ops = tlink_tcon(tlink)->ses->server->ops;
 
 	cifs_dbg(NOISY, "set ACL from mode for %s\n", path);
 
 	/* Get the security descriptor */
 
-	if (tcon->ses->server->ops->get_acl == NULL) {
+	if (ops->get_acl == NULL) {
 		cifs_put_tlink(tlink);
 		return -EOPNOTSUPP;
 	}
 
-	pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path,
-						&secdesclen);
+	pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen);
 	if (IS_ERR(pntsd)) {
 		rc = PTR_ERR(pntsd);
 		cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
@@ -1224,13 +1223,12 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
 
 	cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
 
-	if (tcon->ses->server->ops->set_acl == NULL)
+	if (ops->set_acl == NULL)
 		rc = -EOPNOTSUPP;
 
 	if (!rc) {
 		/* Set the security descriptor */
-		rc = tcon->ses->server->ops->set_acl(pnntsd, secdesclen, inode,
-						     path, aclflag);
+		rc = ops->set_acl(pnntsd, secdesclen, inode, path, aclflag);
 		cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc);
 	}
 	cifs_put_tlink(tlink);
-- 
cgit v1.2.3-59-g8ed1b


From cd1230070ae1c12fd34cf6a557bfa81bf9311009 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 12 May 2017 17:59:32 +0200
Subject: SMB2: Fix share type handling

In fs/cifs/smb2pdu.h, we have:
#define SMB2_SHARE_TYPE_DISK    0x01
#define SMB2_SHARE_TYPE_PIPE    0x02
#define SMB2_SHARE_TYPE_PRINT   0x03

Knowing that, with the current code, the SMB2_SHARE_TYPE_PRINT case can
never trigger and printer share would be interpreted as disk share.

So, test the ShareType value for equality instead.

Fixes: faaf946a7d5b ("CIFS: Add tree connect/disconnect capability for SMB2")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e0517f499c38..e4afdaae743f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1240,15 +1240,19 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 		goto tcon_exit;
 	}
 
-	if (rsp->ShareType & SMB2_SHARE_TYPE_DISK)
+	switch (rsp->ShareType) {
+	case SMB2_SHARE_TYPE_DISK:
 		cifs_dbg(FYI, "connection to disk share\n");
-	else if (rsp->ShareType & SMB2_SHARE_TYPE_PIPE) {
+		break;
+	case SMB2_SHARE_TYPE_PIPE:
 		tcon->ipc = true;
 		cifs_dbg(FYI, "connection to pipe share\n");
-	} else if (rsp->ShareType & SMB2_SHARE_TYPE_PRINT) {
-		tcon->print = true;
+		break;
+	case SMB2_SHARE_TYPE_PRINT:
+		tcon->ipc = true;
 		cifs_dbg(FYI, "connection to printer\n");
-	} else {
+		break;
+	default:
 		cifs_dbg(VFS, "unknown share type %d\n", rsp->ShareType);
 		rc = -EOPNOTSUPP;
 		goto tcon_error_exit;
-- 
cgit v1.2.3-59-g8ed1b


From 4328fea77ca30ef6af938ae3f263a3d055a9c0e6 Mon Sep 17 00:00:00 2001
From: Karim Eshapa <karim.eshapa@gmail.com>
Date: Fri, 12 May 2017 01:53:38 +0200
Subject: fs: cifs: transport: Use time_after for time comparison

Use time_after kernel macro for time comparison
that has safety check.

Signed-off-by: Karim Eshapa <karim.eshapa@gmail.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index de589d0d3739..47a125ece11e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -94,7 +94,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
 	now = jiffies;
 	/* commands taking longer than one second are indications that
 	   something is wrong, unless it is quite a slow link or server */
-	if ((now - midEntry->when_alloc) > HZ) {
+	if (time_after(now, midEntry->when_alloc + HZ)) {
 		if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) {
 			pr_debug(" CIFS slow rsp: cmd %d mid %llu",
 			       midEntry->command, midEntry->mid);
-- 
cgit v1.2.3-59-g8ed1b


From 67b4c889cc835a2a6e2ff4e20544a33e37e2875d Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Fri, 12 May 2017 20:59:10 -0500
Subject: [CIFS] Minor cleanup of xattr query function

Some minor cleanup of cifs query xattr functions (will also make
SMB3 xattr implementation cleaner as well).

Signed-off-by: Steve French <steve.french@primarydata.com>
---
 fs/cifs/cifsglob.h  | 2 +-
 fs/cifs/cifsproto.h | 3 +--
 fs/cifs/cifssmb.c   | 4 +++-
 fs/cifs/inode.c     | 3 +--
 fs/cifs/xattr.c     | 6 ++----
 5 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8be55be70faf..bcc7d9acad64 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -418,7 +418,7 @@ struct smb_version_operations {
 	int (*validate_negotiate)(const unsigned int, struct cifs_tcon *);
 	ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *,
 			const unsigned char *, const unsigned char *, char *,
-			size_t, const struct nls_table *, int);
+			size_t, struct cifs_sb_info *);
 	int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *,
 			const char *, const void *, const __u16,
 			const struct nls_table *, int);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e49958c3f8bb..6eb3147132e3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -480,8 +480,7 @@ extern int CIFSSMBCopy(unsigned int xid,
 extern ssize_t CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon,
 			const unsigned char *searchName,
 			const unsigned char *ea_name, char *EAData,
-			size_t bufsize, const struct nls_table *nls_codepage,
-			int remap_special_chars);
+			size_t bufsize, struct cifs_sb_info *cifs_sb);
 extern int CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
 		const char *fileName, const char *ea_name,
 		const void *ea_value, const __u16 ea_value_len,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4de3186d8a71..fbb0d4cbda41 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -6069,11 +6069,13 @@ ssize_t
 CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon,
 		const unsigned char *searchName, const unsigned char *ea_name,
 		char *EAData, size_t buf_size,
-		const struct nls_table *nls_codepage, int remap)
+		struct cifs_sb_info *cifs_sb)
 {
 		/* BB assumes one setup word */
 	TRANSACTION2_QPI_REQ *pSMB = NULL;
 	TRANSACTION2_QPI_RSP *pSMBr = NULL;
+	int remap = cifs_remap(cifs_sb);
+	struct nls_table *nls_codepage = cifs_sb->local_nls;
 	int rc = 0;
 	int bytes_returned;
 	int list_len;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index c3b2fa0b2ec8..4d1fcd76d022 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -563,8 +563,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
 
 	rc = tcon->ses->server->ops->query_all_EAs(xid, tcon, path,
 			"SETFILEBITS", ea_value, 4 /* size of buf */,
-			cifs_sb->local_nls,
-			cifs_remap(cifs_sb));
+			cifs_sb);
 	cifs_put_tlink(tlink);
 	if (rc < 0)
 		return (int)rc;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 20af5187ba63..3cb5c9e2d4e7 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -235,8 +235,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
 
 		if (pTcon->ses->server->ops->query_all_EAs)
 			rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
-				full_path, name, value, size,
-				cifs_sb->local_nls, cifs_remap(cifs_sb));
+				full_path, name, value, size, cifs_sb);
 		break;
 
 	case XATTR_CIFS_ACL: {
@@ -336,8 +335,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
 
 	if (pTcon->ses->server->ops->query_all_EAs)
 		rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
-				full_path, NULL, data, buf_size,
-				cifs_sb->local_nls, cifs_remap(cifs_sb));
+				full_path, NULL, data, buf_size, cifs_sb);
 list_ea_exit:
 	kfree(full_path);
 	free_xid(xid);
-- 
cgit v1.2.3-59-g8ed1b


From b9a985db98961ae1ba0be169f19df1c567e4ffe0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 11 May 2017 18:21:01 -0500
Subject: pid_ns: Sleep in TASK_INTERRUPTIBLE in zap_pid_ns_processes

The code can potentially sleep for an indefinite amount of time in
zap_pid_ns_processes triggering the hung task timeout, and increasing
the system average.  This is undesirable.  Sleep with a task state of
TASK_INTERRUPTIBLE instead of TASK_UNINTERRUPTIBLE to remove these
undesirable side effects.

Apparently under heavy load this has been allowing Chrome to trigger
the hung time task timeout error and cause ChromeOS to reboot.

Reported-by: Vovo Yang <vovoy@google.com>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Fixes: 6347e9009104 ("pidns: guarantee that the pidns init will be the last pidns process reaped")
Cc: stable@vger.kernel.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/pid_namespace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index d1f3e9f558b8..74a5a7255b4d 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -277,7 +277,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 	 * if reparented.
 	 */
 	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
+		set_current_state(TASK_INTERRUPTIBLE);
 		if (pid_ns->nr_hashed == init_pids)
 			break;
 		schedule();
-- 
cgit v1.2.3-59-g8ed1b


From 3fd37226216620c1a468afa999739d5016fbc349 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 12 May 2017 19:11:31 +0300
Subject: pid_ns: Fix race between setns'ed fork() and zap_pid_ns_processes()

Imagine we have a pid namespace and a task from its parent's pid_ns,
which made setns() to the pid namespace. The task is doing fork(),
while the pid namespace's child reaper is dying. We have the race
between them:

Task from parent pid_ns             Child reaper
copy_process()                      ..
  alloc_pid()                       ..
  ..                                zap_pid_ns_processes()
  ..                                  disable_pid_allocation()
  ..                                  read_lock(&tasklist_lock)
  ..                                  iterate over pids in pid_ns
  ..                                    kill tasks linked to pids
  ..                                  read_unlock(&tasklist_lock)
  write_lock_irq(&tasklist_lock);   ..
  attach_pid(p, PIDTYPE_PID);       ..
  ..                                ..

So, just created task p won't receive SIGKILL signal,
and the pid namespace will be in contradictory state.
Only manual kill will help there, but does the userspace
care about this? I suppose, the most users just inject
a task into a pid namespace and wait a SIGCHLD from it.

The patch fixes the problem. It simply checks for
(pid_ns->nr_hashed & PIDNS_HASH_ADDING) in copy_process().
We do it under the tasklist_lock, and can't skip
PIDNS_HASH_ADDING as noted by Oleg:

"zap_pid_ns_processes() does disable_pid_allocation()
and then takes tasklist_lock to kill the whole namespace.
Given that copy_process() checks PIDNS_HASH_ADDING
under write_lock(tasklist) they can't race;
if copy_process() takes this lock first, the new child will
be killed, otherwise copy_process() can't miss
the change in ->nr_hashed."

If allocation is disabled, we just return -ENOMEM
like it's made for such cases in alloc_pid().

v2: Do not move disable_pid_allocation(), do not
introduce a new variable in copy_process() and simplify
the patch as suggested by Oleg Nesterov.
Account the problem with double irq enabling
found by Eric W. Biederman.

Fixes: c876ad768215 ("pidns: Stop pid allocation when init dies")
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Ingo Molnar <mingo@kernel.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Mike Rapoport <rppt@linux.vnet.ibm.com>
CC: Michal Hocko <mhocko@suse.com>
CC: Andy Lutomirski <luto@kernel.org>
CC: "Eric W. Biederman" <ebiederm@xmission.com>
CC: Andrei Vagin <avagin@openvz.org>
CC: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Serge Hallyn <serge@hallyn.com>
Cc: stable@vger.kernel.org
Acked-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/fork.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 06d759ab4c62..aa1076c5e4a9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1845,11 +1845,13 @@ static __latent_entropy struct task_struct *copy_process(
 	*/
 	recalc_sigpending();
 	if (signal_pending(current)) {
-		spin_unlock(&current->sighand->siglock);
-		write_unlock_irq(&tasklist_lock);
 		retval = -ERESTARTNOINTR;
 		goto bad_fork_cancel_cgroup;
 	}
+	if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
+		retval = -ENOMEM;
+		goto bad_fork_cancel_cgroup;
+	}
 
 	if (likely(p->pid)) {
 		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
@@ -1907,6 +1909,8 @@ static __latent_entropy struct task_struct *copy_process(
 	return p;
 
 bad_fork_cancel_cgroup:
+	spin_unlock(&current->sighand->siglock);
+	write_unlock_irq(&tasklist_lock);
 	cgroup_cancel_fork(p);
 bad_fork_free_pid:
 	cgroup_threadgroup_change_end(current);
-- 
cgit v1.2.3-59-g8ed1b


From 9d109081c261d87fc84e0cce245796796ae4c460 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 13 May 2017 16:18:21 -0700
Subject: dax: fix false CONFIG_BLOCK dependency

In the BLOCK=n case the dax core does not need to / must not emit the
block-device-dax helpers. Otherwise it leads to compile errors.

Cc: Arnd Bergmann <arnd@arndb.de>
Reported-by: Fabian Frederick <fabf@skynet.be>
Fixes: ef51042472f5 ("block, dax: move 'select DAX' from BLOCK to FS_DAX")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/super.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index ebf43f531ada..6ed32aac8bbe 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -44,6 +44,7 @@ void dax_read_unlock(int id)
 }
 EXPORT_SYMBOL_GPL(dax_read_unlock);
 
+#ifdef CONFIG_BLOCK
 int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
 		pgoff_t *pgoff)
 {
@@ -112,6 +113,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__bdev_dax_supported);
+#endif
 
 /**
  * struct dax_device - anchor object for dax services
-- 
cgit v1.2.3-59-g8ed1b


From 33fc30b470983e5b641a16cccc882f6777dd50ef Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 14 May 2017 02:06:03 +0200
Subject: cpufreq: intel_pstate: Document the current behavior and user
 interface

Add a document describing the current behavior and user space
interface of the intel_pstate driver in the RST format and
drop the existing outdated intel_pstate.txt document.

Also update admin-guide/pm/cpufreq.rst with proper RST references
to the new intel_pstate.rst document.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/cpufreq.rst      |  19 +-
 Documentation/admin-guide/pm/index.rst        |   1 +
 Documentation/admin-guide/pm/intel_pstate.rst | 755 ++++++++++++++++++++++++++
 Documentation/cpu-freq/intel-pstate.txt       | 281 ----------
 4 files changed, 766 insertions(+), 290 deletions(-)
 create mode 100644 Documentation/admin-guide/pm/intel_pstate.rst
 delete mode 100644 Documentation/cpu-freq/intel-pstate.txt

diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 289c80f7760e..09aa2e949787 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -1,4 +1,5 @@
 .. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
+.. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
 
 =======================
 CPU Performance Scaling
@@ -75,7 +76,7 @@ feedback registers, as that information is typically specific to the hardware
 interface it comes from and may not be easily represented in an abstract,
 platform-independent way.  For this reason, ``CPUFreq`` allows scaling drivers
 to bypass the governor layer and implement their own performance scaling
-algorithms.  That is done by the ``intel_pstate`` scaling driver.
+algorithms.  That is done by the |intel_pstate| scaling driver.
 
 
 ``CPUFreq`` Policy Objects
@@ -174,13 +175,13 @@ necessary to restart the scaling governor so that it can take the new online CPU
 into account.  That is achieved by invoking the governor's ``->stop`` and
 ``->start()`` callbacks, in this order, for the entire policy.
 
-As mentioned before, the ``intel_pstate`` scaling driver bypasses the scaling
+As mentioned before, the |intel_pstate| scaling driver bypasses the scaling
 governor layer of ``CPUFreq`` and provides its own P-state selection algorithms.
-Consequently, if ``intel_pstate`` is used, scaling governors are not attached to
+Consequently, if |intel_pstate| is used, scaling governors are not attached to
 new policy objects.  Instead, the driver's ``->setpolicy()`` callback is invoked
 to register per-CPU utilization update callbacks for each policy.  These
 callbacks are invoked by the CPU scheduler in the same way as for scaling
-governors, but in the ``intel_pstate`` case they both determine the P-state to
+governors, but in the |intel_pstate| case they both determine the P-state to
 use and change the hardware configuration accordingly in one go from scheduler
 context.
 
@@ -257,7 +258,7 @@ are the following:
 
 ``scaling_available_governors``
 	List of ``CPUFreq`` scaling governors present in the kernel that can
-	be attached to this policy or (if the ``intel_pstate`` scaling driver is
+	be attached to this policy or (if the |intel_pstate| scaling driver is
 	in use) list of scaling algorithms provided by the driver that can be
 	applied to this policy.
 
@@ -274,7 +275,7 @@ are the following:
 	the CPU is actually running at (due to hardware design and other
 	limitations).
 
-	Some scaling drivers (e.g. ``intel_pstate``) attempt to provide
+	Some scaling drivers (e.g. |intel_pstate|) attempt to provide
 	information more precisely reflecting the current CPU frequency through
 	this attribute, but that still may not be the exact current CPU
 	frequency as seen by the hardware at the moment.
@@ -284,13 +285,13 @@ are the following:
 
 ``scaling_governor``
 	The scaling governor currently attached to this policy or (if the
-	``intel_pstate`` scaling driver is in use) the scaling algorithm
+	|intel_pstate| scaling driver is in use) the scaling algorithm
 	provided by the driver that is currently applied to this policy.
 
 	This attribute is read-write and writing to it will cause a new scaling
 	governor to be attached to this policy or a new scaling algorithm
 	provided by the scaling driver to be applied to it (in the
-	``intel_pstate`` case), as indicated by the string written to this
+	|intel_pstate| case), as indicated by the string written to this
 	attribute (which must be one of the names listed by the
 	``scaling_available_governors`` attribute described above).
 
@@ -619,7 +620,7 @@ This file is located under :file:`/sys/devices/system/cpu/cpufreq/` and controls
 the "boost" setting for the whole system.  It is not present if the underlying
 scaling driver does not support the frequency boost mechanism (or supports it,
 but provides a driver-specific interface for controlling it, like
-``intel_pstate``).
+|intel_pstate|).
 
 If the value in this file is 1, the frequency boost mechanism is enabled.  This
 means that either the hardware can be put into states in which it is able to
diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst
index c80f087321fc..7f148f76f432 100644
--- a/Documentation/admin-guide/pm/index.rst
+++ b/Documentation/admin-guide/pm/index.rst
@@ -6,6 +6,7 @@ Power Management
    :maxdepth: 2
 
    cpufreq
+   intel_pstate
 
 .. only::  subproject and html
 
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
new file mode 100644
index 000000000000..33d703989ea8
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -0,0 +1,755 @@
+===============================================
+``intel_pstate`` CPU Performance Scaling Driver
+===============================================
+
+::
+
+ Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+General Information
+===================
+
+``intel_pstate`` is a part of the
+:doc:`CPU performance scaling subsystem <cpufreq>` in the Linux kernel
+(``CPUFreq``).  It is a scaling driver for the Sandy Bridge and later
+generations of Intel processors.  Note, however, that some of those processors
+may not be supported.  [To understand ``intel_pstate`` it is necessary to know
+how ``CPUFreq`` works in general, so this is the time to read :doc:`cpufreq` if
+you have not done that yet.]
+
+For the processors supported by ``intel_pstate``, the P-state concept is broader
+than just an operating frequency or an operating performance point (see the
+`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more
+information about that).  For this reason, the representation of P-states used
+by ``intel_pstate`` internally follows the hardware specification (for details
+refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual
+Volume 3: System Programming Guide <SDM_>`_).  However, the ``CPUFreq`` core
+uses frequencies for identifying operating performance points of CPUs and
+frequencies are involved in the user space interface exposed by it, so
+``intel_pstate`` maps its internal representation of P-states to frequencies too
+(fortunately, that mapping is unambiguous).  At the same time, it would not be
+practical for ``intel_pstate`` to supply the ``CPUFreq`` core with a table of
+available frequencies due to the possible size of it, so the driver does not do
+that.  Some functionality of the core is limited by that.
+
+Since the hardware P-state selection interface used by ``intel_pstate`` is
+available at the logical CPU level, the driver always works with individual
+CPUs.  Consequently, if ``intel_pstate`` is in use, every ``CPUFreq`` policy
+object corresponds to one logical CPU and ``CPUFreq`` policies are effectively
+equivalent to CPUs.  In particular, this means that they become "inactive" every
+time the corresponding CPU is taken offline and need to be re-initialized when
+it goes back online.
+
+``intel_pstate`` is not modular, so it cannot be unloaded, which means that the
+only way to pass early-configuration-time parameters to it is via the kernel
+command line.  However, its configuration can be adjusted via ``sysfs`` to a
+great extent.  In some configurations it even is possible to unregister it via
+``sysfs`` which allows another ``CPUFreq`` scaling driver to be loaded and
+registered (see `below <status_attr_>`_).
+
+
+Operation Modes
+===============
+
+``intel_pstate`` can operate in three different modes: in the active mode with
+or without hardware-managed P-states support and in the passive mode.  Which of
+them will be in effect depends on what kernel command line options are used and
+on the capabilities of the processor.
+
+Active Mode
+-----------
+
+This is the default operation mode of ``intel_pstate``.  If it works in this
+mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq``
+policies contains the string "intel_pstate".
+
+In this mode the driver bypasses the scaling governors layer of ``CPUFreq`` and
+provides its own scaling algorithms for P-state selection.  Those algorithms
+can be applied to ``CPUFreq`` policies in the same way as generic scaling
+governors (that is, through the ``scaling_governor`` policy attribute in
+``sysfs``).  [Note that different P-state selection algorithms may be chosen for
+different policies, but that is not recommended.]
+
+They are not generic scaling governors, but their names are the same as the
+names of some of those governors.  Moreover, confusingly enough, they generally
+do not work in the same way as the generic governors they share the names with.
+For example, the ``powersave`` P-state selection algorithm provided by
+``intel_pstate`` is not a counterpart of the generic ``powersave`` governor
+(roughly, it corresponds to the ``schedutil`` and ``ondemand`` governors).
+
+There are two P-state selection algorithms provided by ``intel_pstate`` in the
+active mode: ``powersave`` and ``performance``.  The way they both operate
+depends on whether or not the hardware-managed P-states (HWP) feature has been
+enabled in the processor and possibly on the processor model.
+
+Which of the P-state selection algorithms is used by default depends on the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option.
+Namely, if that option is set, the ``performance`` algorithm will be used by
+default, and the other one will be used by default if it is not set.
+
+Active Mode With HWP
+~~~~~~~~~~~~~~~~~~~~
+
+If the processor supports the HWP feature, it will be enabled during the
+processor initialization and cannot be disabled after that.  It is possible
+to avoid enabling it by passing the ``intel_pstate=no_hwp`` argument to the
+kernel in the command line.
+
+If the HWP feature has been enabled, ``intel_pstate`` relies on the processor to
+select P-states by itself, but still it can give hints to the processor's
+internal P-state selection logic.  What those hints are depends on which P-state
+selection algorithm has been applied to the given policy (or to the CPU it
+corresponds to).
+
+Even though the P-state selection is carried out by the processor automatically,
+``intel_pstate`` registers utilization update callbacks with the CPU scheduler
+in this mode.  However, they are not used for running a P-state selection
+algorithm, but for periodic updates of the current CPU frequency information to
+be made available from the ``scaling_cur_freq`` policy attribute in ``sysfs``.
+
+HWP + ``performance``
+.....................
+
+In this configuration ``intel_pstate`` will write 0 to the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's
+internal P-state selection logic is expected to focus entirely on performance.
+
+This will override the EPP/EPB setting coming from the ``sysfs`` interface
+(see `Energy vs Performance Hints`_ below).
+
+Also, in this configuration the range of P-states available to the processor's
+internal P-state selection logic is always restricted to the upper boundary
+(that is, the maximum P-state that the driver is allowed to use).
+
+HWP + ``powersave``
+...................
+
+In this configuration ``intel_pstate`` will set the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob (otherwise) to whatever value it was
+previously set to via ``sysfs`` (or whatever default value it was
+set to by the platform firmware).  This usually causes the processor's
+internal P-state selection logic to be less performance-focused.
+
+Active Mode Without HWP
+~~~~~~~~~~~~~~~~~~~~~~~
+
+This is the default operation mode for processors that do not support the HWP
+feature.  It also is used by default with the ``intel_pstate=no_hwp`` argument
+in the kernel command line.  However, in this mode ``intel_pstate`` may refuse
+to work with the given processor if it does not recognize it.  [Note that
+``intel_pstate`` will never refuse to work with any processor with the HWP
+feature enabled.]
+
+In this mode ``intel_pstate`` registers utilization update callbacks with the
+CPU scheduler in order to run a P-state selection algorithm, either
+``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy
+setting in ``sysfs``.  The current CPU frequency information to be made
+available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is
+periodically updated by those utilization update callbacks too.
+
+``performance``
+...............
+
+Without HWP, this P-state selection algorithm is always the same regardless of
+the processor model and platform configuration.
+
+It selects the maximum P-state it is allowed to use, subject to limits set via
+``sysfs``, every time the P-state selection computations are carried out by the
+driver's utilization update callback for the given CPU (that does not happen
+more often than every 10 ms), but the hardware configuration will not be changed
+if the new P-state is the same as the current one.
+
+This is the default P-state selection algorithm if the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
+is set.
+
+``powersave``
+.............
+
+Without HWP, this P-state selection algorithm generally depends on the
+processor model and/or the system profile setting in the ACPI tables and there
+are two variants of it.
+
+One of them is used with processors from the Atom line and (regardless of the
+processor model) on platforms with the system profile in the ACPI tables set to
+"mobile" (laptops mostly), "tablet", "appliance PC", "desktop", or
+"workstation".  It is also used with processors supporting the HWP feature if
+that feature has not been enabled (that is, with the ``intel_pstate=no_hwp``
+argument in the kernel command line).  It is similar to the algorithm
+implemented by the generic ``schedutil`` scaling governor except that the
+utilization metric used by it is based on numbers coming from feedback
+registers of the CPU.  It generally selects P-states proportional to the
+current CPU utilization, so it is referred to as the "proportional" algorithm.
+
+The second variant of the ``powersave`` P-state selection algorithm, used in all
+of the other cases (generally, on processors from the Core line, so it is
+referred to as the "Core" algorithm), is based on the values read from the APERF
+and MPERF feedback registers and the previously requested target P-state.
+It does not really take CPU utilization into account explicitly, but as a rule
+it causes the CPU P-state to ramp up very quickly in response to increased
+utilization which is generally desirable in server environments.
+
+Regardless of the variant, this algorithm is run by the driver's utilization
+update callback for the given CPU when it is invoked by the CPU scheduler, but
+not more often than every 10 ms (that can be tweaked via ``debugfs`` in `this
+particular case <Tuning Interface in debugfs_>`_).  Like in the ``performance``
+case, the hardware configuration is not touched if the new P-state turns out to
+be the same as the current one.
+
+This is the default P-state selection algorithm if the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
+is not set.
+
+Passive Mode
+------------
+
+This mode is used if the ``intel_pstate=passive`` argument is passed to the
+kernel in the command line (it implies the ``intel_pstate=no_hwp`` setting too).
+Like in the active mode without HWP support, in this mode ``intel_pstate`` may
+refuse to work with the given processor if it does not recognize it.
+
+If the driver works in this mode, the ``scaling_driver`` policy attribute in
+``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq".
+Then, the driver behaves like a regular ``CPUFreq`` scaling driver.  That is,
+it is invoked by generic scaling governors when necessary to talk to the
+hardware in order to change the P-state of a CPU (in particular, the
+``schedutil`` governor can invoke it directly from scheduler context).
+
+While in this mode, ``intel_pstate`` can be used with all of the (generic)
+scaling governors listed by the ``scaling_available_governors`` policy attribute
+in ``sysfs`` (and the P-state selection algorithms described above are not
+used).  Then, it is responsible for the configuration of policy objects
+corresponding to CPUs and provides the ``CPUFreq`` core (and the scaling
+governors attached to the policy objects) with accurate information on the
+maximum and minimum operating frequencies supported by the hardware (including
+the so-called "turbo" frequency ranges).  In other words, in the passive mode
+the entire range of available P-states is exposed by ``intel_pstate`` to the
+``CPUFreq`` core.  However, in this mode the driver does not register
+utilization update callbacks with the CPU scheduler and the ``scaling_cur_freq``
+information comes from the ``CPUFreq`` core (and is the last frequency selected
+by the current scaling governor for the given policy).
+
+
+.. _turbo:
+
+Turbo P-states Support
+======================
+
+In the majority of cases, the entire range of P-states available to
+``intel_pstate`` can be divided into two sub-ranges that correspond to
+different types of processor behavior, above and below a boundary that
+will be referred to as the "turbo threshold" in what follows.
+
+The P-states above the turbo threshold are referred to as "turbo P-states" and
+the whole sub-range of P-states they belong to is referred to as the "turbo
+range".  These names are related to the Turbo Boost technology allowing a
+multicore processor to opportunistically increase the P-state of one or more
+cores if there is enough power to do that and if that is not going to cause the
+thermal envelope of the processor package to be exceeded.
+
+Specifically, if software sets the P-state of a CPU core within the turbo range
+(that is, above the turbo threshold), the processor is permitted to take over
+performance scaling control for that core and put it into turbo P-states of its
+choice going forward.  However, that permission is interpreted differently by
+different processor generations.  Namely, the Sandy Bridge generation of
+processors will never use any P-states above the last one set by software for
+the given core, even if it is within the turbo range, whereas all of the later
+processor generations will take it as a license to use any P-states from the
+turbo range, even above the one set by software.  In other words, on those
+processors setting any P-state from the turbo range will enable the processor
+to put the given core into all turbo P-states up to and including the maximum
+supported one as it sees fit.
+
+One important property of turbo P-states is that they are not sustainable.  More
+precisely, there is no guarantee that any CPUs will be able to stay in any of
+those states indefinitely, because the power distribution within the processor
+package may change over time  or the thermal envelope it was designed for might
+be exceeded if a turbo P-state was used for too long.
+
+In turn, the P-states below the turbo threshold generally are sustainable.  In
+fact, if one of them is set by software, the processor is not expected to change
+it to a lower one unless in a thermal stress or a power limit violation
+situation (a higher P-state may still be used if it is set for another CPU in
+the same package at the same time, for example).
+
+Some processors allow multiple cores to be in turbo P-states at the same time,
+but the maximum P-state that can be set for them generally depends on the number
+of cores running concurrently.  The maximum turbo P-state that can be set for 3
+cores at the same time usually is lower than the analogous maximum P-state for
+2 cores, which in turn usually is lower than the maximum turbo P-state that can
+be set for 1 core.  The one-core maximum turbo P-state is thus the maximum
+supported one overall.
+
+The maximum supported turbo P-state, the turbo threshold (the maximum supported
+non-turbo P-state) and the minimum supported P-state are specific to the
+processor model and can be determined by reading the processor's model-specific
+registers (MSRs).  Moreover, some processors support the Configurable TDP
+(Thermal Design Power) feature and, when that feature is enabled, the turbo
+threshold effectively becomes a configurable value that can be set by the
+platform firmware.
+
+Unlike ``_PSS`` objects in the ACPI tables, ``intel_pstate`` always exposes
+the entire range of available P-states, including the whole turbo range, to the
+``CPUFreq`` core and (in the passive mode) to generic scaling governors.  This
+generally causes turbo P-states to be set more often when ``intel_pstate`` is
+used relative to ACPI-based CPU performance scaling (see `below <acpi-cpufreq_>`_
+for more information).
+
+Moreover, since ``intel_pstate`` always knows what the real turbo threshold is
+(even if the Configurable TDP feature is enabled in the processor), its
+``no_turbo`` attribute in ``sysfs`` (described `below <no_turbo_attr_>`_) should
+work as expected in all cases (that is, if set to disable turbo P-states, it
+always should prevent ``intel_pstate`` from using them).
+
+
+Processor Support
+=================
+
+To handle a given processor ``intel_pstate`` requires a number of different
+pieces of information on it to be known, including:
+
+ * The minimum supported P-state.
+
+ * The maximum supported `non-turbo P-state <turbo_>`_.
+
+ * Whether or not turbo P-states are supported at all.
+
+ * The maximum supported `one-core turbo P-state <turbo_>`_ (if turbo P-states
+   are supported).
+
+ * The scaling formula to translate the driver's internal representation
+   of P-states into frequencies and the other way around.
+
+Generally, ways to obtain that information are specific to the processor model
+or family.  Although it often is possible to obtain all of it from the processor
+itself (using model-specific registers), there are cases in which hardware
+manuals need to be consulted to get to it too.
+
+For this reason, there is a list of supported processors in ``intel_pstate`` and
+the driver initialization will fail if the detected processor is not in that
+list, unless it supports the `HWP feature <Active Mode_>`_.  [The interface to
+obtain all of the information listed above is the same for all of the processors
+supporting the HWP feature, which is why they all are supported by
+``intel_pstate``.]
+
+
+User Space Interface in ``sysfs``
+=================================
+
+Global Attributes
+-----------------
+
+``intel_pstate`` exposes several global attributes (files) in ``sysfs`` to
+control its functionality at the system level.  They are located in the
+``/sys/devices/system/cpu/cpufreq/intel_pstate/`` directory and affect all
+CPUs.
+
+Some of them are not present if the ``intel_pstate=per_cpu_perf_limits``
+argument is passed to the kernel in the command line.
+
+``max_perf_pct``
+	Maximum P-state the driver is allowed to set in percent of the
+	maximum supported performance level (the highest supported `turbo
+	P-state <turbo_>`_).
+
+	This attribute will not be exposed if the
+	``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
+	command line.
+
+``min_perf_pct``
+	Minimum P-state the driver is allowed to set in percent of the
+	maximum supported performance level (the highest supported `turbo
+	P-state <turbo_>`_).
+
+	This attribute will not be exposed if the
+	``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
+	command line.
+
+``num_pstates``
+	Number of P-states supported by the processor (between 0 and 255
+	inclusive) including both turbo and non-turbo P-states (see
+	`Turbo P-states Support`_).
+
+	The value of this attribute is not affected by the ``no_turbo``
+	setting described `below <no_turbo_attr_>`_.
+
+	This attribute is read-only.
+
+``turbo_pct``
+	Ratio of the `turbo range <turbo_>`_ size to the size of the entire
+	range of supported P-states, in percent.
+
+	This attribute is read-only.
+
+.. _no_turbo_attr:
+
+``no_turbo``
+	If set (equal to 1), the driver is not allowed to set any turbo P-states
+	(see `Turbo P-states Support`_).  If unset (equalt to 0, which is the
+	default), turbo P-states can be set by the driver.
+	[Note that ``intel_pstate`` does not support the general ``boost``
+	attribute (supported by some other scaling drivers) which is replaced
+	by this one.]
+
+	This attrubute does not affect the maximum supported frequency value
+	supplied to the ``CPUFreq`` core and exposed via the policy interface,
+	but it affects the maximum possible value of per-policy P-state	limits
+	(see `Interpretation of Policy Attributes`_ below for details).
+
+.. _status_attr:
+
+``status``
+	Operation mode of the driver: "active", "passive" or "off".
+
+	"active"
+		The driver is functional and in the `active mode
+		<Active Mode_>`_.
+
+	"passive"
+		The driver is functional and in the `passive mode
+		<Passive Mode_>`_.
+
+	"off"
+		The driver is not functional (it is not registered as a scaling
+		driver with the ``CPUFreq`` core).
+
+	This attribute can be written to in order to change the driver's
+	operation mode or to unregister it.  The string written to it must be
+	one of the possible values of it and, if successful, the write will
+	cause the driver to switch over to the operation mode represented by
+	that string - or to be unregistered in the "off" case.  [Actually,
+	switching over from the active mode to the passive mode or the other
+	way around causes the driver to be unregistered and registered again
+	with a different set of callbacks, so all of its settings (the global
+	as well as the per-policy ones) are then reset to their default
+	values, possibly depending on the target operation mode.]
+
+	That only is supported in some configurations, though (for example, if
+	the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
+	the operation mode of the driver cannot be changed), and if it is not
+	supported in the current configuration, writes to this attribute with
+	fail with an appropriate error.
+
+Interpretation of Policy Attributes
+-----------------------------------
+
+The interpretation of some ``CPUFreq`` policy attributes described in
+:doc:`cpufreq` is special with ``intel_pstate`` as the current scaling driver
+and it generally depends on the driver's `operation mode <Operation Modes_>`_.
+
+First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and
+``scaling_cur_freq`` attributes are produced by applying a processor-specific
+multiplier to the internal P-state representation used by ``intel_pstate``.
+Also, the values of the ``scaling_max_freq`` and ``scaling_min_freq``
+attributes are capped by the frequency corresponding to the maximum P-state that
+the driver is allowed to set.
+
+If the ``no_turbo`` `global attribute <no_turbo_attr_>`_ is set, the driver is
+not allowed to use turbo P-states, so the maximum value of ``scaling_max_freq``
+and ``scaling_min_freq`` is limited to the maximum non-turbo P-state frequency.
+Accordingly, setting ``no_turbo`` causes ``scaling_max_freq`` and
+``scaling_min_freq`` to go down to that value if they were above it before.
+However, the old values of ``scaling_max_freq`` and ``scaling_min_freq`` will be
+restored after unsetting ``no_turbo``, unless these attributes have been written
+to after ``no_turbo`` was set.
+
+If ``no_turbo`` is not set, the maximum possible value of ``scaling_max_freq``
+and ``scaling_min_freq`` corresponds to the maximum supported turbo P-state,
+which also is the value of ``cpuinfo_max_freq`` in either case.
+
+Next, the following policy attributes have special meaning if
+``intel_pstate`` works in the `active mode <Active Mode_>`_:
+
+``scaling_available_governors``
+	List of P-state selection algorithms provided by ``intel_pstate``.
+
+``scaling_governor``
+	P-state selection algorithm provided by ``intel_pstate`` currently in
+	use with the given policy.
+
+``scaling_cur_freq``
+	Frequency of the average P-state of the CPU represented by the given
+	policy for the time interval between the last two invocations of the
+	driver's utilization update callback by the CPU scheduler for that CPU.
+
+The meaning of these attributes in the `passive mode <Passive Mode_>`_ is the
+same as for other scaling drivers.
+
+Additionally, the value of the ``scaling_driver`` attribute for ``intel_pstate``
+depends on the operation mode of the driver.  Namely, it is either
+"intel_pstate" (in the `active mode <Active Mode_>`_) or "intel_cpufreq" (in the
+`passive mode <Passive Mode_>`_).
+
+Coordination of P-State Limits
+------------------------------
+
+``intel_pstate`` allows P-state limits to be set in two ways: with the help of
+the ``max_perf_pct`` and ``min_perf_pct`` `global attributes
+<Global Attributes_>`_ or via the ``scaling_max_freq`` and ``scaling_min_freq``
+``CPUFreq`` policy attributes.  The coordination between those limits is based
+on the following rules, regardless of the current operation mode of the driver:
+
+ 1. All CPUs are affected by the global limits (that is, none of them can be
+    requested to run faster than the global maximum and none of them can be
+    requested to run slower than the global minimum).
+
+ 2. Each individual CPU is affected by its own per-policy limits (that is, it
+    cannot be requested to run faster than its own per-policy maximum and it
+    cannot be requested to run slower than its own per-policy minimum).
+
+ 3. The global and per-policy limits can be set independently.
+
+If the `HWP feature is enabled in the processor <Active Mode With HWP_>`_, the
+resulting effective values are written into its registers whenever the limits
+change in order to request its internal P-state selection logic to always set
+P-states within these limits.  Otherwise, the limits are taken into account by
+scaling governors (in the `passive mode <Passive Mode_>`_) and by the driver
+every time before setting a new P-state for a CPU.
+
+Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument
+is passed to the kernel, ``max_perf_pct`` and ``min_perf_pct`` are not exposed
+at all and the only way to set the limits is by using the policy attributes.
+
+
+Energy vs Performance Hints
+---------------------------
+
+If ``intel_pstate`` works in the `active mode with the HWP feature enabled
+<Active Mode With HWP_>`_ in the processor, additional attributes are present
+in every ``CPUFreq`` policy directory in ``sysfs``.  They are intended to allow
+user space to help ``intel_pstate`` to adjust the processor's internal P-state
+selection logic by focusing it on performance or on energy-efficiency, or
+somewhere between the two extremes:
+
+``energy_performance_preference``
+	Current value of the energy vs performance hint for the given policy
+	(or the CPU represented by it).
+
+	The hint can be changed by writing to this attribute.
+
+``energy_performance_available_preferences``
+	List of strings that can be written to the
+	``energy_performance_preference`` attribute.
+
+	They represent different energy vs performance hints and should be
+	self-explanatory, except that ``default`` represents whatever hint
+	value was set by the platform firmware.
+
+Strings written to the ``energy_performance_preference`` attribute are
+internally translated to integer values written to the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob.
+
+[Note that tasks may by migrated from one CPU to another by the scheduler's
+load-balancing algorithm and if different energy vs performance hints are
+set for those CPUs, that may lead to undesirable outcomes.  To avoid such
+issues it is better to set the same energy vs performance hint for all CPUs
+or to pin every task potentially sensitive to them to a specific CPU.]
+
+.. _acpi-cpufreq:
+
+``intel_pstate`` vs ``acpi-cpufreq``
+====================================
+
+On the majority of systems supported by ``intel_pstate``, the ACPI tables
+provided by the platform firmware contain ``_PSS`` objects returning information
+that can be used for CPU performance scaling (refer to the `ACPI specification`_
+for details on the ``_PSS`` objects and the format of the information returned
+by them).
+
+The information returned by the ACPI ``_PSS`` objects is used by the
+``acpi-cpufreq`` scaling driver.  On systems supported by ``intel_pstate``
+the ``acpi-cpufreq`` driver uses the same hardware CPU performance scaling
+interface, but the set of P-states it can use is limited by the ``_PSS``
+output.
+
+On those systems each ``_PSS`` object returns a list of P-states supported by
+the corresponding CPU which basically is a subset of the P-states range that can
+be used by ``intel_pstate`` on the same system, with one exception: the whole
+`turbo range <turbo_>`_ is represented by one item in it (the topmost one).  By
+convention, the frequency returned by ``_PSS`` for that item is greater by 1 MHz
+than the frequency of the highest non-turbo P-state listed by it, but the
+corresponding P-state representation (following the hardware specification)
+returned for it matches the maximum supported turbo P-state (or is the
+special value 255 meaning essentially "go as high as you can get").
+
+The list of P-states returned by ``_PSS`` is reflected by the table of
+available frequencies supplied by ``acpi-cpufreq`` to the ``CPUFreq`` core and
+scaling governors and the minimum and maximum supported frequencies reported by
+it come from that list as well.  In particular, given the special representation
+of the turbo range described above, this means that the maximum supported
+frequency reported by ``acpi-cpufreq`` is higher by 1 MHz than the frequency
+of the highest supported non-turbo P-state listed by ``_PSS`` which, of course,
+affects decisions made by the scaling governors, except for ``powersave`` and
+``performance``.
+
+For example, if a given governor attempts to select a frequency proportional to
+estimated CPU load and maps the load of 100% to the maximum supported frequency
+(possibly multiplied by a constant), then it will tend to choose P-states below
+the turbo threshold if ``acpi-cpufreq`` is used as the scaling driver, because
+in that case the turbo range corresponds to a small fraction of the frequency
+band it can use (1 MHz vs 1 GHz or more).  In consequence, it will only go to
+the turbo range for the highest loads and the other loads above 50% that might
+benefit from running at turbo frequencies will be given non-turbo P-states
+instead.
+
+One more issue related to that may appear on systems supporting the
+`Configurable TDP feature <turbo_>`_ allowing the platform firmware to set the
+turbo threshold.  Namely, if that is not coordinated with the lists of P-states
+returned by ``_PSS`` properly, there may be more than one item corresponding to
+a turbo P-state in those lists and there may be a problem with avoiding the
+turbo range (if desirable or necessary).  Usually, to avoid using turbo
+P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state listed
+by ``_PSS``, but that is not sufficient when there are other turbo P-states in
+the list returned by it.
+
+Apart from the above, ``acpi-cpufreq`` works like ``intel_pstate`` in the
+`passive mode <Passive Mode_>`_, except that the number of P-states it can set
+is limited to the ones listed by the ACPI ``_PSS`` objects.
+
+
+Kernel Command Line Options for ``intel_pstate``
+================================================
+
+Several kernel command line options can be used to pass early-configuration-time
+parameters to ``intel_pstate`` in order to enforce specific behavior of it.  All
+of them have to be prepended with the ``intel_pstate=`` prefix.
+
+``disable``
+	Do not register ``intel_pstate`` as the scaling driver even if the
+	processor is supported by it.
+
+``passive``
+	Register ``intel_pstate`` in the `passive mode <Passive Mode_>`_ to
+	start with.
+
+	This option implies the ``no_hwp`` one described below.
+
+``force``
+	Register ``intel_pstate`` as the scaling driver instead of
+	``acpi-cpufreq`` even if the latter is preferred on the given system.
+
+	This may prevent some platform features (such as thermal controls and
+	power capping) that rely on the availability of ACPI P-states
+	information from functioning as expected, so it should be used with
+	caution.
+
+	This option does not work with processors that are not supported by
+	``intel_pstate`` and on platforms where the ``pcc-cpufreq`` scaling
+	driver is used instead of ``acpi-cpufreq``.
+
+``no_hwp``
+	Do not enable the `hardware-managed P-states (HWP) feature
+	<Active Mode With HWP_>`_ even if it is supported by the processor.
+
+``hwp_only``
+	Register ``intel_pstate`` as the scaling driver only if the
+	`hardware-managed P-states (HWP) feature <Active Mode With HWP_>`_ is
+	supported by the processor.
+
+``support_acpi_ppc``
+	Take ACPI ``_PPC`` performance limits into account.
+
+	If the preferred power management profile in the FADT (Fixed ACPI
+	Description Table) is set to "Enterprise Server" or "Performance
+	Server", the ACPI ``_PPC`` limits are taken into account by default
+	and this option has no effect.
+
+``per_cpu_perf_limits``
+	Use per-logical-CPU P-State limits (see `Coordination of P-state
+	Limits`_ for details).
+
+
+Diagnostics and Tuning
+======================
+
+Trace Events
+------------
+
+There are two static trace events that can be used for ``intel_pstate``
+diagnostics.  One of them is the ``cpu_frequency`` trace event generally used
+by ``CPUFreq``, and the other one is the ``pstate_sample`` trace event specific
+to ``intel_pstate``.  Both of them are triggered by ``intel_pstate`` only if
+it works in the `active mode <Active Mode_>`_.
+
+The following sequence of shell commands can be used to enable them and see
+their output (if the kernel is generally configured to support event tracing)::
+
+ # cd /sys/kernel/debug/tracing/
+ # echo 1 > events/power/pstate_sample/enable
+ # echo 1 > events/power/cpu_frequency/enable
+ # cat trace
+ gnome-terminal--4510  [001] ..s.  1177.680733: pstate_sample: core_busy=107 scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 freq=2474476
+ cat-5235  [002] ..s.  1177.681723: cpu_frequency: state=2900000 cpu_id=2
+
+If ``intel_pstate`` works in the `passive mode <Passive Mode_>`_, the
+``cpu_frequency`` trace event will be triggered either by the ``schedutil``
+scaling governor (for the policies it is attached to), or by the ``CPUFreq``
+core (for the policies with other scaling governors).
+
+``ftrace``
+----------
+
+The ``ftrace`` interface can be used for low-level diagnostics of
+``intel_pstate``.  For example, to check how often the function to set a
+P-state is called, the ``ftrace`` filter can be set to to
+:c:func:`intel_pstate_set_pstate`::
+
+ # cd /sys/kernel/debug/tracing/
+ # cat available_filter_functions | grep -i pstate
+ intel_pstate_set_pstate
+ intel_pstate_cpu_init
+ ...
+ # echo intel_pstate_set_pstate > set_ftrace_filter
+ # echo function > current_tracer
+ # cat trace | head -15
+ # tracer: function
+ #
+ # entries-in-buffer/entries-written: 80/80   #P:4
+ #
+ #                              _-----=> irqs-off
+ #                             / _----=> need-resched
+ #                            | / _---=> hardirq/softirq
+ #                            || / _--=> preempt-depth
+ #                            ||| /     delay
+ #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+ #              | |       |   ||||       |         |
+             Xorg-3129  [000] ..s.  2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
+  gnome-terminal--4510  [002] ..s.  2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
+      gnome-shell-3409  [001] ..s.  2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
+           <idle>-0     [000] ..s.  2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
+
+Tuning Interface in ``debugfs``
+-------------------------------
+
+The ``powersave`` algorithm provided by ``intel_pstate`` for `the Core line of
+processors in the active mode <powersave_>`_ is based on a `PID controller`_
+whose parameters were chosen to address a number of different use cases at the
+same time.  However, it still is possible to fine-tune it to a specific workload
+and the ``debugfs`` interface under ``/sys/kernel/debug/pstate_snb/`` is
+provided for this purpose.  [Note that the ``pstate_snb`` directory will be
+present only if the specific P-state selection algorithm matching the interface
+in it actually is in use.]
+
+The following files present in that directory can be used to modify the PID
+controller parameters at run time:
+
+| ``deadband``
+| ``d_gain_pct``
+| ``i_gain_pct``
+| ``p_gain_pct``
+| ``sample_rate_ms``
+| ``setpoint``
+
+Note, however, that achieving desirable results this way generally requires
+expert-level understanding of the power vs performance tradeoff, so extra care
+is recommended when attempting to do that.
+
+
+.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
+.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
+.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
+.. _PID controller: https://en.wikipedia.org/wiki/PID_controller
diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt
deleted file mode 100644
index 3fdcdfd968ba..000000000000
--- a/Documentation/cpu-freq/intel-pstate.txt
+++ /dev/null
@@ -1,281 +0,0 @@
-Intel P-State driver
---------------------
-
-This driver provides an interface to control the P-State selection for the
-SandyBridge+ Intel processors.
-
-The following document explains P-States:
-http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
-As stated in the document, P-State doesn’t exactly mean a frequency. However, for
-the sake of the relationship with cpufreq, P-State and frequency are used
-interchangeably.
-
-Understanding the cpufreq core governors and policies are important before
-discussing more details about the Intel P-State driver. Based on what callbacks
-a cpufreq driver provides to the cpufreq core, it can support two types of
-drivers:
-- with target_index() callback: In this mode, the drivers using cpufreq core
-simply provide the minimum and maximum frequency limits and an additional
-interface target_index() to set the current frequency. The cpufreq subsystem
-has a number of scaling governors ("performance", "powersave", "ondemand",
-etc.). Depending on which governor is in use, cpufreq core will call for
-transitions to a specific frequency using target_index() callback.
-- setpolicy() callback: In this mode, drivers do not provide target_index()
-callback, so cpufreq core can't request a transition to a specific frequency.
-The driver provides minimum and maximum frequency limits and callbacks to set a
-policy. The policy in cpufreq sysfs is referred to as the "scaling governor".
-The cpufreq core can request the driver to operate in any of the two policies:
-"performance" and "powersave". The driver decides which frequency to use based
-on the above policy selection considering minimum and maximum frequency limits.
-
-The Intel P-State driver falls under the latter category, which implements the
-setpolicy() callback. This driver decides what P-State to use based on the
-requested policy from the cpufreq core. If the processor is capable of
-selecting its next P-State internally, then the driver will offload this
-responsibility to the processor (aka HWP: Hardware P-States). If not, the
-driver implements algorithms to select the next P-State.
-
-Since these policies are implemented in the driver, they are not same as the
-cpufreq scaling governors implementation, even if they have the same name in
-the cpufreq sysfs (scaling_governors). For example the "performance" policy is
-similar to cpufreq’s "performance" governor, but "powersave" is completely
-different than the cpufreq "powersave" governor. The strategy here is similar
-to cpufreq "ondemand", where the requested P-State is related to the system load.
-
-Sysfs Interface
-
-In addition to the frequency-controlling interfaces provided by the cpufreq
-core, the driver provides its own sysfs files to control the P-State selection.
-These files have been added to /sys/devices/system/cpu/intel_pstate/.
-Any changes made to these files are applicable to all CPUs (even in a
-multi-package system, Refer to later section on placing "Per-CPU limits").
-
-      max_perf_pct: Limits the maximum P-State that will be requested by
-      the driver. It states it as a percentage of the available performance. The
-      available (P-State) performance may be reduced by the no_turbo
-      setting described below.
-
-      min_perf_pct: Limits the minimum P-State that will be requested by
-      the driver. It states it as a percentage of the max (non-turbo)
-      performance level.
-
-      no_turbo: Limits the driver to selecting P-State below the turbo
-      frequency range.
-
-      turbo_pct: Displays the percentage of the total performance that
-      is supported by hardware that is in the turbo range. This number
-      is independent of whether turbo has been disabled or not.
-
-      num_pstates: Displays the number of P-States that are supported
-      by hardware. This number is independent of whether turbo has
-      been disabled or not.
-
-For example, if a system has these parameters:
-	Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State)
-	Max non turbo ratio: 0x17
-	Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio)
-
-Sysfs will show :
-	max_perf_pct:100, which corresponds to 1 core ratio
-	min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio
-	no_turbo:0, turbo is not disabled
-	num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1)
-	turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates
-
-Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual
-Volume 3: System Programming Guide" to understand ratios.
-
-There is one more sysfs attribute in /sys/devices/system/cpu/intel_pstate/
-that can be used for controlling the operation mode of the driver:
-
-      status: Three settings are possible:
-      "off"     - The driver is not in use at this time.
-      "active"  - The driver works as a P-state governor (default).
-      "passive" - The driver works as a regular cpufreq one and collaborates
-                  with the generic cpufreq governors (it sets P-states as
-                  requested by those governors).
-      The current setting is returned by reads from this attribute.  Writing one
-      of the above strings to it changes the operation mode as indicated by that
-      string, if possible.  If HW-managed P-states (HWP) are enabled, it is not
-      possible to change the driver's operation mode and attempts to write to
-      this attribute will fail.
-
-cpufreq sysfs for Intel P-State
-
-Since this driver registers with cpufreq, cpufreq sysfs is also presented.
-There are some important differences, which need to be considered.
-
-scaling_cur_freq: This displays the real frequency which was used during
-the last sample period instead of what is requested. Some other cpufreq driver,
-like acpi-cpufreq, displays what is requested (Some changes are on the
-way to fix this for acpi-cpufreq driver). The same is true for frequencies
-displayed at /proc/cpuinfo.
-
-scaling_governor: This displays current active policy. Since each CPU has a
-cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this
-is not possible with Intel P-States, as there is one common policy for all
-CPUs. Here, the last requested policy will be applicable to all CPUs. It is
-suggested that one use the cpupower utility to change policy to all CPUs at the
-same time.
-
-scaling_setspeed: This attribute can never be used with Intel P-State.
-
-scaling_max_freq/scaling_min_freq: This interface can be used similarly to
-the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies
-are converted to nearest possible P-State, this is prone to rounding errors.
-This method is not preferred to limit performance.
-
-affected_cpus: Not used
-related_cpus: Not used
-
-For contemporary Intel processors, the frequency is controlled by the
-processor itself and the P-State exposed to software is related to
-performance levels.  The idea that frequency can be set to a single
-frequency is fictional for Intel Core processors. Even if the scaling
-driver selects a single P-State, the actual frequency the processor
-will run at is selected by the processor itself.
-
-Per-CPU limits
-
-The kernel command line option "intel_pstate=per_cpu_perf_limits" forces
-the intel_pstate driver to use per-CPU performance limits.  When it is set,
-the sysfs control interface described above is subject to limitations.
-- The following controls are not available for both read and write
-	/sys/devices/system/cpu/intel_pstate/max_perf_pct
-	/sys/devices/system/cpu/intel_pstate/min_perf_pct
-- The following controls can be used to set performance limits, as far as the
-architecture of the processor permits:
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
-- User can still observe turbo percent and number of P-States from
-	/sys/devices/system/cpu/intel_pstate/turbo_pct
-	/sys/devices/system/cpu/intel_pstate/num_pstates
-- User can read write system wide turbo status
-	/sys/devices/system/cpu/no_turbo
-
-Support of energy performance hints
-It is possible to provide hints to the HWP algorithms in the processor
-to be more performance centric to more energy centric. When the driver
-is using HWP, two additional cpufreq sysfs attributes are presented for
-each logical CPU.
-These attributes are:
-	- energy_performance_available_preferences
-	- energy_performance_preference
-
-To get list of supported hints:
-$ cat energy_performance_available_preferences
-    default performance balance_performance balance_power power
-
-The current preference can be read or changed via cpufreq sysfs
-attribute "energy_performance_preference". Reading from this attribute
-will display current effective setting. User can write any of the valid
-preference string to this attribute. User can always restore to power-on
-default by writing "default".
-
-Since threads can migrate to different CPUs, this is possible that the
-new CPU may have different energy performance preference than the previous
-one. To avoid such issues, either threads can be pinned to specific CPUs
-or set the same energy performance preference value to all CPUs.
-
-Tuning Intel P-State driver
-
-When the performance can be tuned using PID (Proportional Integral
-Derivative) controller, debugfs files are provided for adjusting performance.
-They are presented under:
-/sys/kernel/debug/pstate_snb/
-
-The PID tunable parameters are:
-      deadband
-      d_gain_pct
-      i_gain_pct
-      p_gain_pct
-      sample_rate_ms
-      setpoint
-
-To adjust these parameters, some understanding of driver implementation is
-necessary. There are some tweeks described here, but be very careful. Adjusting
-them requires expert level understanding of power and performance relationship.
-These limits are only useful when the "powersave" policy is active.
-
--To make the system more responsive to load changes, sample_rate_ms can
-be adjusted  (current default is 10ms).
--To make the system use higher performance, even if the load is lower, setpoint
-can be adjusted to a lower number. This will also lead to faster ramp up time
-to reach the maximum P-State.
-If there are no derivative and integral coefficients, The next P-State will be
-equal to:
-	current P-State - ((setpoint - current cpu load) * p_gain_pct)
-
-For example, if the current PID parameters are (Which are defaults for the core
-processors like SandyBridge):
-      deadband = 0
-      d_gain_pct = 0
-      i_gain_pct = 0
-      p_gain_pct = 20
-      sample_rate_ms = 10
-      setpoint = 97
-
-If the current P-State = 0x08 and current load = 100, this will result in the
-next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State
-goes up by only 1. If during next sample interval the current load doesn't
-change and still 100, then P-State goes up by one again. This process will
-continue as long as the load is more than the setpoint until the maximum P-State
-is reached.
-
-For the same load at setpoint = 60, this will result in the next P-State
-= 0x08 - ((60 - 100) * 0.2) = 16
-So by changing the setpoint from 97 to 60, there is an increase of the
-next P-State from 9 to 16. So this will make processor execute at higher
-P-State for the same CPU load. If the load continues to be more than the
-setpoint during next sample intervals, then P-State will go up again till the
-maximum P-State is reached. But the ramp up time to reach the maximum P-State
-will be much faster when the setpoint is 60 compared to 97.
-
-Debugging Intel P-State driver
-
-Event tracing
-To debug P-State transition, the Linux event tracing interface can be used.
-There are two specific events, which can be enabled (Provided the kernel
-configs related to event tracing are enabled).
-
-# cd /sys/kernel/debug/tracing/
-# echo 1 > events/power/pstate_sample/enable
-# echo 1 > events/power/cpu_frequency/enable
-# cat trace
-gnome-terminal--4510  [001] ..s.  1177.680733: pstate_sample: core_busy=107
-	scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618
-		freq=2474476
-cat-5235  [002] ..s.  1177.681723: cpu_frequency: state=2900000 cpu_id=2
-
-
-Using ftrace
-
-If function level tracing is required, the Linux ftrace interface can be used.
-For example if we want to check how often a function to set a P-State is
-called, we can set ftrace filter to intel_pstate_set_pstate.
-
-# cd /sys/kernel/debug/tracing/
-# cat available_filter_functions | grep -i pstate
-intel_pstate_set_pstate
-intel_pstate_cpu_init
-...
-
-# echo intel_pstate_set_pstate > set_ftrace_filter
-# echo function > current_tracer
-# cat trace | head -15
-# tracer: function
-#
-# entries-in-buffer/entries-written: 80/80   #P:4
-#
-#                              _-----=> irqs-off
-#                             / _----=> need-resched
-#                            | / _---=> hardirq/softirq
-#                            || / _--=> preempt-depth
-#                            ||| /     delay
-#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
-#              | |       |   ||||       |         |
-            Xorg-3129  [000] ..s.  2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
- gnome-terminal--4510  [002] ..s.  2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
-     gnome-shell-3409  [001] ..s.  2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
-          <idle>-0     [000] ..s.  2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
-- 
cgit v1.2.3-59-g8ed1b


From 60d4553bdc1a0099adb544e12cafd7bbc7f1d484 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 14 May 2017 02:23:04 +0200
Subject: PM / wakeup: Fix up wakeup_source_report_event()

Commit 8a537ece3d94 (PM / wakeup: Integrate mechanism to abort
transitions in progress) modified wakeup_source_report_event()
and wakeup_source_activate() to make it possible to call
pm_system_wakeup() from the latter if so indicated by the
caller of the former (via a new function argument added by that
commit), but it overlooked the fact that in some situations
wakeup_source_report_event() is called to signal a "hard" event
(ie. such that should abort a system suspend in progress) after
pm_stay_awake() has been called for the same wakeup source object,
in which case the pm_system_wakeup() will not trigger.

To work around this issue, modify wakeup_source_activate() and
wakeup_source_report_event() again so that pm_system_wakeup() is
called by the latter directly (if its last argument is true), in
which case the additional argument does not need to be passed
to wakeup_source_activate() any more, so drop it from there.

Fixes: 8a537ece3d94 (PM / wakeup: Integrate mechanism to abort transitions in progress)
Reported-by: David E. Box <david.e.box@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeup.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index f62082fdd670..9c36b27996fc 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -512,13 +512,12 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws)
 /**
  * wakup_source_activate - Mark given wakeup source as active.
  * @ws: Wakeup source to handle.
- * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
  *
  * Update the @ws' statistics and, if @ws has just been activated, notify the PM
  * core of the event by incrementing the counter of of wakeup events being
  * processed.
  */
-static void wakeup_source_activate(struct wakeup_source *ws, bool hard)
+static void wakeup_source_activate(struct wakeup_source *ws)
 {
 	unsigned int cec;
 
@@ -526,9 +525,6 @@ static void wakeup_source_activate(struct wakeup_source *ws, bool hard)
 			"unregistered wakeup source\n"))
 		return;
 
-	if (hard)
-		pm_system_wakeup();
-
 	ws->active = true;
 	ws->active_count++;
 	ws->last_time = ktime_get();
@@ -554,7 +550,10 @@ static void wakeup_source_report_event(struct wakeup_source *ws, bool hard)
 		ws->wakeup_count++;
 
 	if (!ws->active)
-		wakeup_source_activate(ws, hard);
+		wakeup_source_activate(ws);
+
+	if (hard)
+		pm_system_wakeup();
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 967b08c25a091867b04261fa34addedc950256f1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 14 May 2017 02:23:12 +0200
Subject: RTC: rtc-cmos: Fix wakeup from suspend-to-idle

Commit eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups from
suspend-to-idle) modified the core suspend-to-idle code to filter
out spurious SCI interrupts received while suspended, which requires
ACPI event source handlers to report wakeup events in a way that
will trigger a wakeup from suspend to idle (or abort system suspends
in progress, which is equivalent).

That needs to be done in the rtc-cmos driver too, which was overlooked
by the above commit, so do that now.

Fixes: eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle)
Reported-by: David E. Box <david.e.box@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/rtc/rtc-cmos.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index f4a96dbdabf2..dabe47b9be72 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -1085,7 +1085,7 @@ static u32 rtc_handler(void *context)
 	}
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	pm_wakeup_event(dev, 0);
+	pm_wakeup_hard_event(dev);
 	acpi_clear_event(ACPI_EVENT_RTC);
 	acpi_disable_event(ACPI_EVENT_RTC, 0);
 	return ACPI_INTERRUPT_HANDLED;
-- 
cgit v1.2.3-59-g8ed1b


From f5705aa8cfed142d980ecac12bee0d81b756479e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 13 May 2017 16:31:05 -0700
Subject: dax, xfs, ext4: compile out iomap-dax paths in the FS_DAX=n case

Tetsuo reports:

  fs/built-in.o: In function `xfs_file_iomap_end':
  xfs_iomap.c:(.text+0xe0ef9): undefined reference to `put_dax'
  fs/built-in.o: In function `xfs_file_iomap_begin':
  xfs_iomap.c:(.text+0xe1a7f): undefined reference to `dax_get_by_host'
  make: *** [vmlinux] Error 1
  $ grep DAX .config
  CONFIG_DAX=m
  # CONFIG_DEV_DAX is not set
  # CONFIG_FS_DAX is not set

When FS_DAX=n we can/must throw away the dax code in filesystems.
Implement 'fs_' versions of dax_get_by_host() and put_dax() that are
nops in the FS_DAX=n case.

Cc: <linux-xfs@vger.kernel.org>
Cc: <linux-ext4@vger.kernel.org>
Cc: Jan Kara <jack@suse.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Fixes: ef51042472f5 ("block, dax: move 'select DAX' from BLOCK to FS_DAX")
Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/ext2/inode.c     |  4 ++--
 fs/ext4/inode.c     |  4 ++--
 fs/xfs/xfs_iomap.c  |  4 ++--
 include/linux/dax.h | 34 +++++++++++++++++++++++++++-------
 4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 26d77f9f8c12..2dcbd5698884 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -817,7 +817,7 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	iomap->bdev = bdev;
 	iomap->offset = (u64)first_block << blkbits;
 	if (blk_queue_dax(bdev->bd_queue))
-		iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+		iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
 	else
 		iomap->dax_dev = NULL;
 
@@ -841,7 +841,7 @@ static int
 ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
 		ssize_t written, unsigned flags, struct iomap *iomap)
 {
-	put_dax(iomap->dax_dev);
+	fs_put_dax(iomap->dax_dev);
 	if (iomap->type == IOMAP_MAPPED &&
 	    written < length &&
 	    (flags & IOMAP_WRITE))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5834c4d76be8..1bd0bfa547f6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3412,7 +3412,7 @@ retry:
 	bdev = inode->i_sb->s_bdev;
 	iomap->bdev = bdev;
 	if (blk_queue_dax(bdev->bd_queue))
-		iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+		iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
 	else
 		iomap->dax_dev = NULL;
 	iomap->offset = first_block << blkbits;
@@ -3447,7 +3447,7 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
 	int blkbits = inode->i_blkbits;
 	bool truncate = false;
 
-	put_dax(iomap->dax_dev);
+	fs_put_dax(iomap->dax_dev);
 	if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
 		return 0;
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index a63f61c256bd..94e5bdf7304c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1068,7 +1068,7 @@ xfs_file_iomap_begin(
 	/* optionally associate a dax device with the iomap bdev */
 	bdev = iomap->bdev;
 	if (blk_queue_dax(bdev->bd_queue))
-		iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+		iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
 	else
 		iomap->dax_dev = NULL;
 
@@ -1149,7 +1149,7 @@ xfs_file_iomap_end(
 	unsigned		flags,
 	struct iomap		*iomap)
 {
-	put_dax(iomap->dax_dev);
+	fs_put_dax(iomap->dax_dev);
 	if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
 		return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
 				length, written, iomap);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 00ebac854bb7..5ec1f6c47716 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -18,6 +18,20 @@ struct dax_operations {
 			void **, pfn_t *);
 };
 
+#if IS_ENABLED(CONFIG_DAX)
+struct dax_device *dax_get_by_host(const char *host);
+void put_dax(struct dax_device *dax_dev);
+#else
+static inline struct dax_device *dax_get_by_host(const char *host)
+{
+	return NULL;
+}
+
+static inline void put_dax(struct dax_device *dax_dev)
+{
+}
+#endif
+
 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
 int __bdev_dax_supported(struct super_block *sb, int blocksize);
@@ -25,23 +39,29 @@ static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 {
 	return __bdev_dax_supported(sb, blocksize);
 }
+
+static inline struct dax_device *fs_dax_get_by_host(const char *host)
+{
+	return dax_get_by_host(host);
+}
+
+static inline void fs_put_dax(struct dax_device *dax_dev)
+{
+	put_dax(dax_dev);
+}
+
 #else
 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 {
 	return -EOPNOTSUPP;
 }
-#endif
 
-#if IS_ENABLED(CONFIG_DAX)
-struct dax_device *dax_get_by_host(const char *host);
-void put_dax(struct dax_device *dax_dev);
-#else
-static inline struct dax_device *dax_get_by_host(const char *host)
+static inline struct dax_device *fs_dax_get_by_host(const char *host)
 {
 	return NULL;
 }
 
-static inline void put_dax(struct dax_device *dax_dev)
+static inline void fs_put_dax(struct dax_device *dax_dev)
 {
 }
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 31d848aa1d85530770f0bdf1b61a042335d340ad Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 19 Apr 2017 09:58:23 -0700
Subject: soc: bcm: brcmstb: Correctly match 7435 SoC

Remove the duplicate brcm,bcm7425-sun-top-ctrl compatible string and
replace it with brcm,bcm7435-sun-top-ctrl which was intended.

Fixes: bd0faf08dc7f ("soc: bcm: brcmstb: Match additional compatible strings")
Reported-by: Andreas Oberritter <obi@saftware.de>
Acked-by: Gregory Fong <gregory.0xf0@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/soc/bcm/brcmstb/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/soc/bcm/brcmstb/common.c b/drivers/soc/bcm/brcmstb/common.c
index b6195fdf0d00..22e98a90468c 100644
--- a/drivers/soc/bcm/brcmstb/common.c
+++ b/drivers/soc/bcm/brcmstb/common.c
@@ -49,7 +49,7 @@ static const struct of_device_id sun_top_ctrl_match[] = {
 	{ .compatible = "brcm,bcm7420-sun-top-ctrl", },
 	{ .compatible = "brcm,bcm7425-sun-top-ctrl", },
 	{ .compatible = "brcm,bcm7429-sun-top-ctrl", },
-	{ .compatible = "brcm,bcm7425-sun-top-ctrl", },
+	{ .compatible = "brcm,bcm7435-sun-top-ctrl", },
 	{ .compatible = "brcm,brcmstb-sun-top-ctrl", },
 	{ }
 };
-- 
cgit v1.2.3-59-g8ed1b


From b383b544f2666d67446b951a9a97af239dafed5d Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Mon, 3 Apr 2017 15:11:22 +0300
Subject: net/mlx5e: Use the correct pause values for ethtool advertising

Query the operational pause from firmware (PFCC register) instead of
always passing zeros.

Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index ce7b09d72ff6..d60e681b443e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -849,6 +849,8 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 	struct mlx5e_priv *priv    = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+	u32 rx_pause = 0;
+	u32 tx_pause = 0;
 	u32 eth_proto_cap;
 	u32 eth_proto_admin;
 	u32 eth_proto_lp;
@@ -871,11 +873,13 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 	an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
 	an_status        = MLX5_GET(ptys_reg, out, an_status);
 
+	mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
 
 	get_supported(eth_proto_cap, link_ksettings);
-	get_advertising(eth_proto_admin, 0, 0, link_ksettings);
+	get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings);
 	get_speed_duplex(netdev, eth_proto_oper, link_ksettings);
 
 	eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
-- 
cgit v1.2.3-59-g8ed1b


From e3c19503712d6360239b19c14cded56dd63c40d7 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 19 Apr 2017 14:35:15 +0300
Subject: net/mlx5e: Fix ethtool pause support and advertise reporting

Pause bit should set when RX pause is on, not TX pause.
Also, setting Asym_Pause is incorrect, and should be turned off.

Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index d60e681b443e..8209affa75c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -794,7 +794,6 @@ static void get_supported(u32 eth_proto_cap,
 	ptys2ethtool_supported_port(link_ksettings, eth_proto_cap);
 	ptys2ethtool_supported_link(supported, eth_proto_cap);
 	ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
-	ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Asym_Pause);
 }
 
 static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
@@ -804,7 +803,7 @@ static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
 	unsigned long *advertising = link_ksettings->link_modes.advertising;
 
 	ptys2ethtool_adver_link(advertising, eth_proto_cap);
-	if (tx_pause)
+	if (rx_pause)
 		ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
 	if (tx_pause ^ rx_pause)
 		ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause);
-- 
cgit v1.2.3-59-g8ed1b


From 20b6a1c78280dbeb45214c463cf9cbccb3665146 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Tue, 9 May 2017 16:40:46 +0300
Subject: net/mlx5e: Fix setup TC ndo

Fail-safe support patches introduced a trivial bug,
setup tc callback is doing a wrong check of the netdevice state,
the fix is simply to invert the condition.

Fixes: 6f9485af4020 ("net/mlx5e: Fail safe tc setup")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a61b71b6fff3..41cd22a223dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2976,7 +2976,7 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
 	new_channels.params = priv->channels.params;
 	new_channels.params.num_tc = tc ? tc : 1;
 
-	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		priv->channels.params = new_channels.params;
 		goto out;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 5360fd473c23f18b42722cdb13a1c6ec7acd96ff Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 4 May 2017 17:53:32 +0300
Subject: net/mlx5e: IPoIB, Only support regular RQ for now

IPoIB doesn't support striding RQ at the moment, for this
we need to explicitly choose non striding RQ in IPoIB init,
even if the HW supports it.

Fixes: 8f493ffd88ea ("net/mlx5e: IPoIB, RX steering RSS RQTs and TIRs")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/ipoib.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
index 019c230da498..56bff3540954 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
@@ -66,6 +66,10 @@ static void mlx5i_init(struct mlx5_core_dev *mdev,
 
 	mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev));
 
+	/* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
+	mlx5e_set_rq_type_params(mdev, &priv->channels.params, MLX5_WQ_TYPE_LINKED_LIST);
+	priv->channels.params.lro_en = false;
+
 	mutex_init(&priv->state_lock);
 
 	netdev->hw_features    |= NETIF_F_SG;
-- 
cgit v1.2.3-59-g8ed1b


From 508541146af18e43072e41a31aa62fac2b01aac1 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Tue, 25 Apr 2017 10:39:57 +0300
Subject: net/mlx5: Use underlay QPN from the root name space

Root flow table is dynamically changed by the underlying flow steering
layer, and IPoIB/ULPs have no idea what will be the root flow table in
the future, hence we need a dynamic infrastructure to move Underlay QPs
with the root flow table.

Fixes: b3ba51498bdd ("net/mlx5: Refactor create flow table method to accept underlay QP")
Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   |  5 ++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  9 +++-----
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h  |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 25 +++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/ipoib.c   |  7 +++++--
 include/linux/mlx5/fs.h                           |  4 +++-
 8 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0099a3e397bc..2fd044b23875 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1003,7 +1003,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
 void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
 
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn);
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
 void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
 
 int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 576d6787b484..53ed58320a24 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -800,7 +800,7 @@ void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
 	mlx5e_destroy_flow_table(&ttc->ft);
 }
 
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn)
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
 {
 	struct mlx5e_ttc_table *ttc = &priv->fs.ttc;
 	struct mlx5_flow_table_attr ft_attr = {};
@@ -810,7 +810,6 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn)
 	ft_attr.max_fte = MLX5E_TTC_TABLE_SIZE;
 	ft_attr.level = MLX5E_TTC_FT_LEVEL;
 	ft_attr.prio = MLX5E_NIC_PRIO;
-	ft_attr.underlay_qpn = underlay_qpn;
 
 	ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
 	if (IS_ERR(ft->t)) {
@@ -1147,7 +1146,7 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
 		priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
 	}
 
-	err = mlx5e_create_ttc_table(priv, 0);
+	err = mlx5e_create_ttc_table(priv);
 	if (err) {
 		netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
 			   err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 19e3d2fc2099..fcec7bedd3cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -40,28 +40,25 @@
 #include "eswitch.h"
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft)
+			    struct mlx5_flow_table *ft, u32 underlay_qpn)
 {
 	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
 
 	if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
-	    ft->underlay_qpn == 0)
+	    underlay_qpn == 0)
 		return 0;
 
 	MLX5_SET(set_flow_table_root_in, in, opcode,
 		 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
 	MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
 	MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+	MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn);
 	if (ft->vport) {
 		MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport);
 		MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
 	}
 
-	if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
-	    ft->underlay_qpn != 0)
-		MLX5_SET(set_flow_table_root_in, in, underlay_qpn, ft->underlay_qpn);
-
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 8fad80688536..0f98a7cf4877 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -71,7 +71,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 			unsigned int index);
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft);
+			    struct mlx5_flow_table *ft,
+			    u32 underlay_qpn);
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index b8a176503d38..0e487e8ca634 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -650,7 +650,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	if (ft->level >= min_level)
 		return 0;
 
-	err = mlx5_cmd_update_root_ft(root->dev, ft);
+	err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn);
 	if (err)
 		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
 			       ft->id);
@@ -818,8 +818,6 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 		goto unlock_root;
 	}
 
-	ft->underlay_qpn = ft_attr->underlay_qpn;
-
 	tree_init_node(&ft->node, 1, del_flow_table);
 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = find_next_chained_ft(fs_prio);
@@ -1489,7 +1487,8 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 
 	new_root_ft = find_next_ft(ft);
 	if (new_root_ft) {
-		int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft);
+		int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
+						  root->underlay_qpn);
 
 		if (err) {
 			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
@@ -2062,3 +2061,21 @@ err:
 	mlx5_cleanup_fs(dev);
 	return err;
 }
+
+int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
+{
+	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+
+	root->underlay_qpn = underlay_qpn;
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
+
+int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
+{
+	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+
+	root->underlay_qpn = 0;
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 81eafc7b9dd9..990acee6fb09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -118,7 +118,6 @@ struct mlx5_flow_table {
 	/* FWD rules that point on this flow table */
 	struct list_head		fwd_rules;
 	u32				flags;
-	u32				underlay_qpn;
 };
 
 struct mlx5_fc_cache {
@@ -195,6 +194,7 @@ struct mlx5_flow_root_namespace {
 	struct mlx5_flow_table		*root_ft;
 	/* Should be held when chaining flow tables */
 	struct mutex			chain_lock;
+	u32				underlay_qpn;
 };
 
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
index 56bff3540954..cc1858752e70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
@@ -160,6 +160,8 @@ out:
 
 static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
 {
+	mlx5_fs_remove_rx_underlay_qpn(mdev, qp->qpn);
+
 	mlx5_core_destroy_qp(mdev, qp);
 }
 
@@ -174,6 +176,8 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
 		return err;
 	}
 
+	mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
+
 	err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
 	if (err) {
 		mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
@@ -193,7 +197,6 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
 
 static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
 {
-	struct mlx5i_priv *ipriv = priv->ppriv;
 	int err;
 
 	priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
@@ -209,7 +212,7 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
 		priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
 	}
 
-	err = mlx5e_create_ttc_table(priv, ipriv->qp.qpn);
+	err = mlx5e_create_ttc_table(priv);
 	if (err) {
 		netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
 			   err);
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 1b166d2e19c5..b25e7baa273e 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -109,7 +109,6 @@ struct mlx5_flow_table_attr {
 	int max_fte;
 	u32 level;
 	u32 flags;
-	u32 underlay_qpn;
 };
 
 struct mlx5_flow_table *
@@ -167,4 +166,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse);
+int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
+int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 216c4e9db4c9d1d2a382b42880442dc632cd47d9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 10 May 2017 22:40:06 +0300
Subject: PowerCap: Fix an error code in powercap_register_zone()

In the current code we accidentally return the successful result from
idr_alloc() instead of a negative error pointer.  The caller is looking
for an error pointer and so it treats the returned value as a valid
pointer.

This one might be a bit serious because if it lets people get around the
kernel's protection for remapping NULL.  I'm not sure.

Fixes: 75d2364ea0ca (PowerCap: Add class driver)
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/powercap_sys.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index 14bde0db8c24..5b10b50f8686 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c
@@ -538,6 +538,7 @@ struct powercap_zone *powercap_register_zone(
 
 	power_zone->id = result;
 	idr_init(&power_zone->idr);
+	result = -ENOMEM;
 	power_zone->name = kstrdup(name, GFP_KERNEL);
 	if (!power_zone->name)
 		goto err_name_alloc;
-- 
cgit v1.2.3-59-g8ed1b


From 0bae5fd3330be0517fba697e6b228601d421fade Mon Sep 17 00:00:00 2001
From: Pushkar Jambhlekar <pushkar.iit@gmail.com>
Date: Thu, 11 May 2017 10:31:24 +0530
Subject: PM / hibernate: Declare variables as static

Fixing sparse warnings: 'symbol not declared. Should it be static?'

Signed-off-by: Pushkar Jambhlekar <pushkar.iit@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/snapshot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index d79a38de425a..a628cccafa4a 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1422,7 +1422,7 @@ static unsigned int nr_meta_pages;
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
  */
-unsigned int alloc_normal, alloc_highmem;
+static unsigned int alloc_normal, alloc_highmem;
 /*
  * Memory bitmap used for marking saveable pages (during hibernation) or
  * hibernation image pages (during restore)
-- 
cgit v1.2.3-59-g8ed1b


From be0408d74d9c95de1baf6c2563b6e6d1dfb17b88 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 11 May 2017 14:12:29 +0200
Subject: cpufreq: dbx500: add a Kconfig symbol

Moving the cooling code into the cpufreq driver caused a possible build failure
when the cpu_thermal helper code is a loadable module or disabled:

drivers/cpufreq/dbx500-cpufreq.o: In function `dbx500_cpufreq_ready':
dbx500-cpufreq.c:(.text.dbx500_cpufreq_ready+0x4): undefined reference to `cpufreq_cooling_register'

This adds the same dependency that we have in other cpufreq drivers,
forcing the driver to be disabled when we can't possibly link it.

Fixes: 19678ffb9fd6 (cpufreq: dbx500: Manage cooling device from cpufreq driver)
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/Kconfig.arm | 9 +++++++++
 drivers/cpufreq/Makefile    | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 74ed7e9a7f27..2011fec2d6ad 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -71,6 +71,15 @@ config ARM_HIGHBANK_CPUFREQ
 
 	  If in doubt, say N.
 
+config ARM_DB8500_CPUFREQ
+	tristate "ST-Ericsson DB8500 cpufreq" if COMPILE_TEST && !ARCH_U8500
+	default ARCH_U8500
+	depends on HAS_IOMEM
+	depends on !CPU_THERMAL || THERMAL
+	help
+	  This adds the CPUFreq driver for ST-Ericsson Ux500 (DB8500) SoC
+	  series.
+
 config ARM_IMX6Q_CPUFREQ
 	tristate "Freescale i.MX6 cpufreq support"
 	depends on ARCH_MXC
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index b7e78f063c4f..ab3a42cd29ef 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -53,7 +53,7 @@ obj-$(CONFIG_ARM_DT_BL_CPUFREQ)		+= arm_big_little_dt.o
 
 obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ)	+= brcmstb-avs-cpufreq.o
 obj-$(CONFIG_ARCH_DAVINCI)		+= davinci-cpufreq.o
-obj-$(CONFIG_UX500_SOC_DB8500)		+= dbx500-cpufreq.o
+obj-$(CONFIG_ARM_DB8500_CPUFREQ)	+= dbx500-cpufreq.o
 obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ)	+= exynos5440-cpufreq.o
 obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ)	+= highbank-cpufreq.o
 obj-$(CONFIG_ARM_IMX6Q_CPUFREQ)		+= imx6q-cpufreq.o
-- 
cgit v1.2.3-59-g8ed1b


From 90b4f30b6d15222a509dacf47f29efef2b22571e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 May 2017 16:30:12 +0200
Subject: hwmon: (coretemp) Handle frozen hotplug state correctly

The recent conversion to the hotplug state machine missed that the original
hotplug notifiers did not execute in the frozen state, which is used on
suspend on resume.

This does not matter on single socket machines, but on multi socket systems
this breaks when the device for a non-boot socket is removed when the last
CPU of that socket is brought offline. The device removal locks up the
machine hard w/o any debug output.

Prevent executing the hotplug callbacks when cpuhp_tasks_frozen is true.

Thanks to Tommi for providing debug information patiently while I failed to
spot the obvious.

Fixes: e00ca5df37ad ("hwmon: (coretemp) Convert to hotplug state machine")
Reported-by: Tommi Rantala <tt.rantala@gmail.com>
Tested-by: Tommi Rantala <tt.rantala@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/coretemp.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 3ac4c03ba77b..c13a4fd86b3c 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -604,6 +604,13 @@ static int coretemp_cpu_online(unsigned int cpu)
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct platform_data *pdata;
 
+	/*
+	 * Don't execute this on resume as the offline callback did
+	 * not get executed on suspend.
+	 */
+	if (cpuhp_tasks_frozen)
+		return 0;
+
 	/*
 	 * CPUID.06H.EAX[0] indicates whether the CPU has thermal
 	 * sensors. We check this bit only, all the early CPUs
@@ -654,6 +661,13 @@ static int coretemp_cpu_offline(unsigned int cpu)
 	struct temp_data *tdata;
 	int indx, target;
 
+	/*
+	 * Don't execute this on suspend as the device remove locks
+	 * up the machine.
+	 */
+	if (cpuhp_tasks_frozen)
+		return 0;
+
 	/* If the physical CPU device does not exist, just return */
 	if (!pdev)
 		return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 2fe4bff3516924a37e083e3211364abe59db1161 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Wed, 12 Apr 2017 18:31:18 -0300
Subject: ARM: dts: imx53-qsrb: Pulldown PMIC IRQ pin

Currently the following errors are seen:

[   14.015056] mc13xxx 0-0008: Failed to read IRQ status: -6
[   27.321093] mc13xxx 0-0008: Failed to read IRQ status: -6
[   27.411681] mc13xxx 0-0008: Failed to read IRQ status: -6
[   27.456281] mc13xxx 0-0008: Failed to read IRQ status: -6
[   30.527106] mc13xxx 0-0008: Failed to read IRQ status: -6
[   36.596900] mc13xxx 0-0008: Failed to read IRQ status: -6

Also when reading the interrupts via 'cat /proc/interrupts' the
PMIC GPIO interrupt counter does not stop increasing.

The reason for the storm of interrupts is that the PUS field of
register IOMUXC_SW_PAD_CTL_PAD_CSI0_DAT5 is currently configured as:
10 : 100k pullup

and the PMIC interrupt is being registered as IRQ_TYPE_LEVEL_HIGH type,
which is the correct type as per the MC34708 datasheet.

Use the default power on value for the IOMUX, which sets PUS field as:
00: 360k pull down

This prevents the spurious PMIC interrupts from happening.

Commit e1ffceb078c6 ("ARM: imx53: qsrb: fix PMIC interrupt level")
correctly described the irq type as IRQ_TYPE_LEVEL_HIGH, but
missed to update the IOMUX of the PMIC GPIO as pull down.

Fixes: e1ffceb078c6 ("ARM: imx53: qsrb: fix PMIC interrupt level")
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx53-qsrb.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx53-qsrb.dts b/arch/arm/boot/dts/imx53-qsrb.dts
index de2215832372..4e103a905dc9 100644
--- a/arch/arm/boot/dts/imx53-qsrb.dts
+++ b/arch/arm/boot/dts/imx53-qsrb.dts
@@ -23,7 +23,7 @@
 	imx53-qsrb {
 		pinctrl_pmic: pmicgrp {
 			fsl,pins = <
-				MX53_PAD_CSI0_DAT5__GPIO5_23	0x1e4 /* IRQ */
+				MX53_PAD_CSI0_DAT5__GPIO5_23	0x1c4 /* IRQ */
 			>;
 		};
 	};
-- 
cgit v1.2.3-59-g8ed1b


From d8581c7c8be172dac156a19d261f988a72ce596f Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Fri, 5 May 2017 14:00:17 +0300
Subject: ARM: dts: imx6sx-sdb: Remove OPP override

The board file for imx6sx-sdb overrides cpufreq operating points to use
higher voltages. This is done because the board has a shared rail for
VDD_ARM_IN and VDD_SOC_IN and when using LDO bypass the shared voltage
needs to be a value suitable for both ARM and SOC.

This only applies to LDO bypass mode, a feature not present in upstream.
When LDOs are enabled the effect is to use higher voltages than necessary
for no good reason.

Setting these higher voltages can make some boards fail to boot with ugly
semi-random crashes reminiscent of memory corruption. These failures only
happen on board rev. C, rev. B is reported to still work.

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Fixes: 54183bd7f766 ("ARM: imx6sx-sdb: add revb board and make it default")
Cc: stable@vger.kernel.org
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6sx-sdb.dts | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts
index 5bb8fd57e7f5..d71da30c9cff 100644
--- a/arch/arm/boot/dts/imx6sx-sdb.dts
+++ b/arch/arm/boot/dts/imx6sx-sdb.dts
@@ -12,23 +12,6 @@
 	model = "Freescale i.MX6 SoloX SDB RevB Board";
 };
 
-&cpu0 {
-	operating-points = <
-		/* kHz    uV */
-		996000  1250000
-		792000  1175000
-		396000  1175000
-		198000  1175000
-		>;
-	fsl,soc-operating-points = <
-		/* ARM kHz      SOC uV */
-		996000	1250000
-		792000	1175000
-		396000	1175000
-		198000  1175000
-	>;
-};
-
 &i2c1 {
 	clock-frequency = <100000>;
 	pinctrl-names = "default";
-- 
cgit v1.2.3-59-g8ed1b


From e23c7f7d57831fdae444be9d507e67716ab601d4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 11 May 2017 13:37:47 +0200
Subject: soc: imx: add PM dependency for IMX7_PM_DOMAINS

The new pm domain driver causes a build failure when CONFIG_PM
is not set:

warning: (IMX7_PM_DOMAINS) selects PM_GENERIC_DOMAINS which has unmet direct dependencies (PM)
drivers/base/power/domain_governor.c: In function 'default_suspend_ok':
drivers/base/power/domain_governor.c:75:17: error: 'struct dev_pm_info' has no member named 'ignore_children'

This adds a dependency to ensure that we don't attempt to build the
driver without CONFIG_PM.

Fixes: 03aa12629fc4 ("soc: imx: Add GPCv2 power gating driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 drivers/soc/imx/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/soc/imx/Kconfig b/drivers/soc/imx/Kconfig
index 357a5d8f8da0..a5b86a28f343 100644
--- a/drivers/soc/imx/Kconfig
+++ b/drivers/soc/imx/Kconfig
@@ -2,8 +2,9 @@ menu "i.MX SoC drivers"
 
 config IMX7_PM_DOMAINS
 	bool "i.MX7 PM domains"
-	select PM_GENERIC_DOMAINS
 	depends on SOC_IMX7D || (COMPILE_TEST && OF)
+	depends on PM
+	select PM_GENERIC_DOMAINS
 	default y if SOC_IMX7D
 
 endmenu
-- 
cgit v1.2.3-59-g8ed1b


From 072792dcdfc8d5f91a26050e5665285f50afebf5 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 11 May 2017 06:14:16 -0400
Subject: dm cache: fix incorrect 'idle_time' reset in IO tracker

Some bios have no payload (eg, a FLUSH), don't reset the idle_time when
these come in.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 1db375f50a13..0760ba409c21 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -94,6 +94,9 @@ static void iot_io_begin(struct io_tracker *iot, sector_t len)
 
 static void __iot_io_end(struct io_tracker *iot, sector_t len)
 {
+	if (!len)
+		return;
+
 	iot->in_flight -= len;
 	if (!iot->in_flight)
 		iot->idle_time = jiffies;
-- 
cgit v1.2.3-59-g8ed1b


From a8cd1eba6135e086109e2b94bf96deb17456ede8 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 11 May 2017 05:07:34 -0400
Subject: dm cache policy smq: only demote entries in bottom half of the clean
 multiqueue

Heavy IO load may mean there are very few clean blocks in the cache, and
we risk demoting entries that get hit a lot.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-smq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index 72479bd61e11..a177559f2049 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1190,7 +1190,7 @@ static void queue_demotion(struct smq_policy *mq)
 	if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
 		return;
 
-	e = q_peek(&mq->clean, mq->clean.nr_levels, true);
+	e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
 	if (!e) {
 		if (!clean_target_met(mq, false))
 			queue_writeback(mq);
-- 
cgit v1.2.3-59-g8ed1b


From 78c45607b909fb384c47c134d89b39285a6a8b45 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 11 May 2017 05:09:38 -0400
Subject: dm cache policy smq: be more aggressive about triggering a writeback

If there are no clean entries to demote we really want to writeback
immediately.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-smq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index a177559f2049..5aa8f43856c5 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1192,7 +1192,7 @@ static void queue_demotion(struct smq_policy *mq)
 
 	e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
 	if (!e) {
-		if (!clean_target_met(mq, false))
+		if (!clean_target_met(mq, true))
 			queue_writeback(mq);
 		return;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 4d44ec5ab751be63c5d348f13294304d87baa8c3 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 11 May 2017 05:11:06 -0400
Subject: dm cache policy smq: put newly promoted entries at the top of the
 multiqueue

This stops entries bouncing in and out of the cache quickly.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-smq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index 5aa8f43856c5..54421a846a0c 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1452,6 +1452,7 @@ static void __complete_background_work(struct smq_policy *mq,
 		clear_pending(mq, e);
 		if (success) {
 			e->oblock = work->oblock;
+			e->level = NR_CACHE_LEVELS - 1;
 			push(mq, e);
 			// h, q, a
 		} else {
-- 
cgit v1.2.3-59-g8ed1b


From 6cf4cc8f8b3b7bc9e3c04a7eab44b985d50029fc Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 11 May 2017 07:48:18 -0400
Subject: dm cache policy smq: stop preemptively demoting blocks

It causes a lot of churn if the working set's size is close to the fast
device's size.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-smq.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index 54421a846a0c..758480a1893d 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1134,13 +1134,10 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
 		percent_to_target(mq, CLEAN_TARGET);
 }
 
-static bool free_target_met(struct smq_policy *mq, bool idle)
+static bool free_target_met(struct smq_policy *mq)
 {
 	unsigned nr_free;
 
-	if (!idle)
-		return true;
-
 	nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated;
 	return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
 		percent_to_target(mq, FREE_TARGET);
@@ -1220,7 +1217,7 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
 		 * We always claim to be 'idle' to ensure some demotions happen
 		 * with continuous loads.
 		 */
-		if (!free_target_met(mq, true))
+		if (!free_target_met(mq))
 			queue_demotion(mq);
 		return;
 	}
@@ -1421,14 +1418,10 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
 	spin_lock_irqsave(&mq->lock, flags);
 	r = btracker_issue(mq->bg_work, result);
 	if (r == -ENODATA) {
-		/* find some writeback work to do */
-		if (mq->migrations_allowed && !free_target_met(mq, idle))
-			queue_demotion(mq);
-
-		else if (!clean_target_met(mq, idle))
+		if (!clean_target_met(mq, idle)) {
 			queue_writeback(mq);
-
-		r = btracker_issue(mq->bg_work, result);
+			r = btracker_issue(mq->bg_work, result);
+		}
 	}
 	spin_unlock_irqrestore(&mq->lock, flags);
 
-- 
cgit v1.2.3-59-g8ed1b


From 701e03e4e180f0cd97d4139a32e2b2d879d12da2 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 11 May 2017 08:22:31 -0400
Subject: dm cache: track all IO to the cache rather than just the origin
 device's IO

IO tracking used to throttle writebacks when the origin device is busy.

Even if all the IO is going to the fast device, writebacks can
significantly degrade performance.  So track all IO to gauge whether the
cache is busy or not.

Otherwise, synthetic IO tests (e.g. fio) that might send all IO to the
fast device wouldn't cause writebacks to get throttled.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 0760ba409c21..232078e48167 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -477,7 +477,7 @@ struct cache {
 	spinlock_t invalidation_lock;
 	struct list_head invalidation_requests;
 
-	struct io_tracker origin_tracker;
+	struct io_tracker tracker;
 
 	struct work_struct commit_ws;
 	struct batcher committer;
@@ -904,8 +904,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
 
 static bool accountable_bio(struct cache *cache, struct bio *bio)
 {
-	return ((bio->bi_bdev == cache->origin_dev->bdev) &&
-		bio_op(bio) != REQ_OP_DISCARD);
+	return bio_op(bio) != REQ_OP_DISCARD;
 }
 
 static void accounted_begin(struct cache *cache, struct bio *bio)
@@ -915,7 +914,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio)
 
 	if (accountable_bio(cache, bio)) {
 		pb->len = bio_sectors(bio);
-		iot_io_begin(&cache->origin_tracker, pb->len);
+		iot_io_begin(&cache->tracker, pb->len);
 	}
 }
 
@@ -924,7 +923,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
 	size_t pb_data_size = get_per_bio_data_size(cache);
 	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
 
-	iot_io_end(&cache->origin_tracker, pb->len);
+	iot_io_end(&cache->tracker, pb->len);
 }
 
 static void accounted_request(struct cache *cache, struct bio *bio)
@@ -1725,7 +1724,7 @@ enum busy {
 
 static enum busy spare_migration_bandwidth(struct cache *cache)
 {
-	bool idle = iot_idle_for(&cache->origin_tracker, HZ);
+	bool idle = iot_idle_for(&cache->tracker, HZ);
 	sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
 		cache->sectors_per_block;
 
@@ -2720,7 +2719,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	batcher_init(&cache->committer, commit_op, cache,
 		     issue_op, cache, cache->wq);
-	iot_init(&cache->origin_tracker);
+	iot_init(&cache->tracker);
 
 	init_rwsem(&cache->background_work_lock);
 	prevent_background_work(cache);
@@ -2944,7 +2943,7 @@ static void cache_postsuspend(struct dm_target *ti)
 
 	cancel_delayed_work(&cache->waker);
 	flush_workqueue(cache->wq);
-	WARN_ON(cache->origin_tracker.in_flight);
+	WARN_ON(cache->tracker.in_flight);
 
 	/*
 	 * If it's a flush suspend there won't be any deferred bios, so this
-- 
cgit v1.2.3-59-g8ed1b


From 49b7f768900f4084a65c3689d955b2fceac39e53 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 11 May 2017 09:07:16 -0400
Subject: dm cache: simplify the IDLE vs BUSY state calculation

Drop the MODERATE state since it wasn't buying us much.

Also, in check_migrations(), prepare for the next commit ("dm cache
policy smq: don't do any writebacks unless IDLE") by deferring to the
policy to make the final decision on whether writebacks can be
serviced.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 232078e48167..d682a0511381 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -1718,7 +1718,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
 
 enum busy {
 	IDLE,
-	MODERATE,
 	BUSY
 };
 
@@ -1728,10 +1727,10 @@ static enum busy spare_migration_bandwidth(struct cache *cache)
 	sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
 		cache->sectors_per_block;
 
-	if (current_volume <= cache->migration_threshold)
-		return idle ? IDLE : MODERATE;
+	if (idle && current_volume <= cache->migration_threshold)
+		return IDLE;
 	else
-		return idle ? MODERATE : BUSY;
+		return BUSY;
 }
 
 static void inc_hit_counter(struct cache *cache, struct bio *bio)
@@ -2047,8 +2046,6 @@ static void check_migrations(struct work_struct *ws)
 
 	for (;;) {
 		b = spare_migration_bandwidth(cache);
-		if (b == BUSY)
-			break;
 
 		r = policy_get_background_work(cache->policy, b == IDLE, &op);
 		if (r == -ENODATA)
-- 
cgit v1.2.3-59-g8ed1b


From 2e63309507c818e8b631a03f02c363031c007fb7 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 11 May 2017 09:09:04 -0400
Subject: dm cache policy smq: don't do any writebacks unless IDLE

If there are no clean blocks to be demoted the writeback will be
triggered at that point.  Preemptively writing back can hurt high IO
load scenarios.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-smq.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index 758480a1893d..e5eb9c9b4bc8 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1120,8 +1120,6 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
 	 * Cache entries may not be populated.  So we cannot rely on the
 	 * size of the clean queue.
 	 */
-	unsigned nr_clean;
-
 	if (idle) {
 		/*
 		 * We'd like to clean everything.
@@ -1129,9 +1127,10 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
 		return q_size(&mq->dirty) == 0u;
 	}
 
-	nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
-	return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >=
-		percent_to_target(mq, CLEAN_TARGET);
+	/*
+	 * If we're busy we don't worry about cleaning at all.
+	 */
+	return true;
 }
 
 static bool free_target_met(struct smq_policy *mq)
-- 
cgit v1.2.3-59-g8ed1b


From 01630ab8543f21df30b0dca19087b85744f61aee Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 12 May 2017 11:04:52 +0100
Subject: ARM: KVM: Fix tracepoint generation after move to virt/kvm/arm/

Moving most of the shared code to virt/kvm/arm had for consequence
that KVM/ARM doesn't build anymore, because the code that used to
define the tracepoints is now somewhere else.

Fix this by defining CREATE_TRACE_POINTS in coproc.c, and clean-up
trace.h as well.

Fixes: 35d2d5d490e2 ("KVM: arm/arm64: Move shared files to virt/kvm/arm")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 arch/arm/kvm/coproc.c | 1 +
 arch/arm/kvm/trace.h  | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 2c14b69511e9..ac8d36da4d08 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -32,6 +32,7 @@
 #include <asm/vfp.h>
 #include "../vfp/vfpinstr.h"
 
+#define CREATE_TRACE_POINTS
 #include "trace.h"
 #include "coproc.h"
 
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index fc0943776db2..b0d10648c486 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -1,5 +1,5 @@
-#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_KVM_H
+#if !defined(_TRACE_ARM_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ARM_KVM_H
 
 #include <linux/tracepoint.h>
 
@@ -74,10 +74,10 @@ TRACE_EVENT(kvm_hvc,
 		  __entry->vcpu_pc, __entry->r0, __entry->imm)
 );
 
-#endif /* _TRACE_KVM_H */
+#endif /* _TRACE_ARM_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH arch/arm/kvm
+#define TRACE_INCLUDE_PATH .
 #undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_FILE trace
 
-- 
cgit v1.2.3-59-g8ed1b


From 40dd46048c155b8f0683f468c950a1c107f77a7c Mon Sep 17 00:00:00 2001
From: Daniele Palmas <dnlplm@gmail.com>
Date: Wed, 3 May 2017 10:28:54 +0200
Subject: usb: serial: option: add Telit ME910 support

This patch adds support for Telit ME910 PID 0x1100.

Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index af67a0de6b5d..3bf61acfc26b 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -281,6 +281,7 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_LE922_USBCFG0		0x1042
 #define TELIT_PRODUCT_LE922_USBCFG3		0x1043
 #define TELIT_PRODUCT_LE922_USBCFG5		0x1045
+#define TELIT_PRODUCT_ME910			0x1100
 #define TELIT_PRODUCT_LE920			0x1200
 #define TELIT_PRODUCT_LE910			0x1201
 #define TELIT_PRODUCT_LE910_USBCFG4		0x1206
@@ -640,6 +641,11 @@ static const struct option_blacklist_info simcom_sim7100e_blacklist = {
 	.reserved = BIT(5) | BIT(6),
 };
 
+static const struct option_blacklist_info telit_me910_blacklist = {
+	.sendsetup = BIT(0),
+	.reserved = BIT(1) | BIT(3),
+};
+
 static const struct option_blacklist_info telit_le910_blacklist = {
 	.sendsetup = BIT(0),
 	.reserved = BIT(1) | BIT(2),
@@ -1235,6 +1241,8 @@ static const struct usb_device_id option_ids[] = {
 		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
 	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff),
 		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
+		.driver_info = (kernel_ulong_t)&telit_me910_blacklist },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
 		.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
-- 
cgit v1.2.3-59-g8ed1b


From 8663effb24f9430394d3bf1ed2dac42a771421d1 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 14 Apr 2017 08:48:09 -0400
Subject: sched/core: Call __schedule() from do_idle() without enabling
 preemption

I finally got around to creating trampolines for dynamically allocated
ftrace_ops with using synchronize_rcu_tasks(). For users of the ftrace
function hook callbacks, like perf, that allocate the ftrace_ops
descriptor via kmalloc() and friends, ftrace was not able to optimize
the functions being traced to use a trampoline because they would also
need to be allocated dynamically. The problem is that they cannot be
freed when CONFIG_PREEMPT is set, as there's no way to tell if a task
was preempted on the trampoline. That was before Paul McKenney
implemented synchronize_rcu_tasks() that would make sure all tasks
(except idle) have scheduled out or have entered user space.

While testing this, I triggered this bug:

 BUG: unable to handle kernel paging request at ffffffffa0230077
 ...
 RIP: 0010:0xffffffffa0230077
 ...
 Call Trace:
  schedule+0x5/0xe0
  schedule_preempt_disabled+0x18/0x30
  do_idle+0x172/0x220

What happened was that the idle task was preempted on the trampoline.
As synchronize_rcu_tasks() ignores the idle thread, there's nothing
that lets ftrace know that the idle task was preempted on a trampoline.

The idle task shouldn't need to ever enable preemption. The idle task
is simply a loop that calls schedule or places the cpu into idle mode.
In fact, having preemption enabled is inefficient, because it can
happen when idle is just about to call schedule anyway, which would
cause schedule to be called twice. Once for when the interrupt came in
and was returning back to normal context, and then again in the normal
path that the idle loop is running in, which would be pointless, as it
had already scheduled.

The only reason schedule_preempt_disable() enables preemption is to be
able to call sched_submit_work(), which requires preemption enabled. As
this is a nop when the task is in the RUNNING state, and idle is always
in the running state, there's no reason that idle needs to enable
preemption. But that means it cannot use schedule_preempt_disable() as
other callers of that function require calling sched_submit_work().

Adding a new function local to kernel/sched/ that allows idle to call
the scheduler without enabling preemption, fixes the
synchronize_rcu_tasks() issue, as well as removes the pointless spurious
schedule calls caused by interrupts happening in the brief window where
preemption is enabled just before it calls schedule.

Reviewed: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170414084809.3dacde2a@gandalf.local.home
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c  | 25 +++++++++++++++++++++++++
 kernel/sched/idle.c  |  2 +-
 kernel/sched/sched.h |  2 ++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 759f4bd52cd6..803c3bc274c4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3502,6 +3502,31 @@ asmlinkage __visible void __sched schedule(void)
 }
 EXPORT_SYMBOL(schedule);
 
+/*
+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
+ * state (have scheduled out non-voluntarily) by making sure that all
+ * tasks have either left the run queue or have gone into user space.
+ * As idle tasks do not do either, they must not ever be preempted
+ * (schedule out non-voluntarily).
+ *
+ * schedule_idle() is similar to schedule_preempt_disable() except that it
+ * never enables preemption because it does not call sched_submit_work().
+ */
+void __sched schedule_idle(void)
+{
+	/*
+	 * As this skips calling sched_submit_work(), which the idle task does
+	 * regardless because that function is a nop when the task is in a
+	 * TASK_RUNNING state, make sure this isn't used someplace that the
+	 * current task can be in any other state. Note, idle is always in the
+	 * TASK_RUNNING state.
+	 */
+	WARN_ON_ONCE(current->state);
+	do {
+		__schedule(false);
+	} while (need_resched());
+}
+
 #ifdef CONFIG_CONTEXT_TRACKING
 asmlinkage __visible void __sched schedule_user(void)
 {
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 2a25a9ec2c6e..ef63adce0c9c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -265,7 +265,7 @@ static void do_idle(void)
 	smp_mb__after_atomic();
 
 	sched_ttwu_pending();
-	schedule_preempt_disabled();
+	schedule_idle();
 
 	if (unlikely(klp_patch_pending(current)))
 		klp_update_patch_state(current);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7808ab050599..6dda2aab731e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1467,6 +1467,8 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
 }
 #endif
 
+extern void schedule_idle(void);
+
 extern void sysrq_sched_debug_show(void);
 extern void sched_init_granularity(void);
 extern void update_max_interval(void);
-- 
cgit v1.2.3-59-g8ed1b


From bb246681b3ed0967489a7401ad528c1aaa1a4c2e Mon Sep 17 00:00:00 2001
From: Anthony Mallet <anthony.mallet@laas.fr>
Date: Fri, 5 May 2017 17:30:16 +0200
Subject: USB: serial: ftdi_sio: fix setting latency for unprivileged users

Commit 557aaa7ffab6 ("ft232: support the ASYNC_LOW_LATENCY
flag") enables unprivileged users to set the FTDI latency timer,
but there was a logic flaw that skipped sending the corresponding
USB control message to the device.

Specifically, the device latency timer would not be updated until next
open, something which was later also inadvertently broken by commit
c19db4c9e49a ("USB: ftdi_sio: set device latency timeout at port
probe").

A recent commit c6dce2626606 ("USB: serial: ftdi_sio: fix extreme
low-latency setting") disabled the low-latency mode by default so we now
need this fix to allow unprivileged users to again enable it.

Signed-off-by: Anthony Mallet <anthony.mallet@laas.fr>
[johan: amend commit message]
Fixes: 557aaa7ffab6 ("ft232: support the ASYNC_LOW_LATENCY flag")
Fixes: c19db4c9e49a ("USB: ftdi_sio: set device latency timeout at port probe").
Cc: stable <stable@vger.kernel.org>     # 2.6.31
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ftdi_sio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index d38780fa8788..0e634c11abbf 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1527,9 +1527,9 @@ static int set_serial_info(struct tty_struct *tty,
 					(new_serial.flags & ASYNC_FLAGS));
 	priv->custom_divisor = new_serial.custom_divisor;
 
+check_and_exit:
 	write_latency_timer(port);
 
-check_and_exit:
 	if ((old_priv.flags & ASYNC_SPD_MASK) !=
 	     (priv->flags & ASYNC_SPD_MASK)) {
 		if ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
-- 
cgit v1.2.3-59-g8ed1b


From f83914fdfcc3ecb62a5a83eeb609ff59a9c2052d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 12 May 2017 14:34:37 +0200
Subject: ALSA: usb-audio: fix Amanero Combo384 quirk on big-endian hosts

Add missing endianness conversion when using the USB device-descriptor
bcdDevice field when applying the Amanero Combo384 (endianness!) quirk.

Fixes: 3eff682d765b ("ALSA: usb-audio: Support both DSD LE/BE Amanero firmware versions")
Cc: Jussi Laako <jussi@sonarnerd.net>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/quirks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 01eff6ce6401..d7b0b0a3a2db 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1364,7 +1364,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 	/* Amanero Combo384 USB interface with native DSD support */
 	case USB_ID(0x16d0, 0x071a):
 		if (fp->altsetting == 2) {
-			switch (chip->dev->descriptor.bcdDevice) {
+			switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) {
 			case 0x199:
 				return SNDRV_PCM_FMTBIT_DSD_U32_LE;
 			case 0x19b:
-- 
cgit v1.2.3-59-g8ed1b


From 849ff8190eb9add34c376219b5ae319de83eeb32 Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Sat, 6 May 2017 17:49:08 +0800
Subject: staging/android/ion: remove useless document file

After commit 9828282e33a0 ("staging: android: ion: Remove old platform
support"), the document about devicetree of ion is no need anymore, so
just remove it.

Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Acked-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/staging/ion/hi6220-ion.txt | 31 -------------
 MAINTAINERS                                        |  1 -
 drivers/staging/android/ion/devicetree.txt         | 51 ----------------------
 3 files changed, 83 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt
 delete mode 100644 drivers/staging/android/ion/devicetree.txt

diff --git a/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt b/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt
deleted file mode 100644
index c59e27c632c1..000000000000
--- a/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Hi6220 SoC ION
-===================================================================
-Required properties:
-- compatible : "hisilicon,hi6220-ion"
-- list of the ION heaps
-	- heap name : maybe heap_sys_user@0
-	- heap id   : id should be unique in the system.
-	- heap base : base ddr address of the heap,0 means that
-	it is dynamic.
-	- heap size : memory size and 0 means it is dynamic.
-	- heap type : the heap type of the heap, please also
-	see the define in ion.h(drivers/staging/android/uapi/ion.h)
--------------------------------------------------------------------
-Example:
-	hi6220-ion {
-		compatible = "hisilicon,hi6220-ion";
-		heap_sys_user@0 {
-			heap-name = "sys_user";
-			heap-id   = <0x0>;
-			heap-base = <0x0>;
-			heap-size = <0x0>;
-			heap-type = "ion_system";
-		};
-		heap_sys_contig@0 {
-			heap-name = "sys_contig";
-			heap-id   = <0x1>;
-			heap-base = <0x0>;
-			heap-size = <0x0>;
-			heap-type = "ion_system_contig";
-		};
-	};
diff --git a/MAINTAINERS b/MAINTAINERS
index f7d568b8f133..5cf8b8d0f733 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -846,7 +846,6 @@ M:	Laura Abbott <labbott@redhat.com>
 M:	Sumit Semwal <sumit.semwal@linaro.org>
 L:	devel@driverdev.osuosl.org
 S:	Supported
-F:	Documentation/devicetree/bindings/staging/ion/
 F:	drivers/staging/android/ion
 F:	drivers/staging/android/uapi/ion.h
 F:	drivers/staging/android/uapi/ion_test.h
diff --git a/drivers/staging/android/ion/devicetree.txt b/drivers/staging/android/ion/devicetree.txt
deleted file mode 100644
index 168715271f06..000000000000
--- a/drivers/staging/android/ion/devicetree.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-Ion Memory Manager
-
-Ion is a memory manager that allows for sharing of buffers via dma-buf.
-Ion allows for different types of allocation via an abstraction called
-a 'heap'. A heap represents a specific type of memory. Each heap has
-a different type. There can be multiple instances of the same heap
-type.
-
-Specific heap instances are tied to heap IDs. Heap IDs are not to be specified
-in the devicetree.
-
-Required properties for Ion
-
-- compatible: "linux,ion" PLUS a compatible property for the device
-
-All child nodes of a linux,ion node are interpreted as heaps
-
-required properties for heaps
-
-- compatible: compatible string for a heap type PLUS a compatible property
-for the specific instance of the heap. Current heap types
--- linux,ion-heap-system
--- linux,ion-heap-system-contig
--- linux,ion-heap-carveout
--- linux,ion-heap-chunk
--- linux,ion-heap-dma
--- linux,ion-heap-custom
-
-Optional properties
-- memory-region: A phandle to a memory region. Required for DMA heap type
-(see reserved-memory.txt for details on the reservation)
-
-Example:
-
-	ion {
-		compatbile = "hisilicon,ion", "linux,ion";
-
-		ion-system-heap {
-			compatbile = "hisilicon,system-heap", "linux,ion-heap-system"
-		};
-
-		ion-camera-region {
-			compatible = "hisilicon,camera-heap", "linux,ion-heap-dma"
-			memory-region = <&camera_region>;
-		};
-
-		ion-fb-region {
-			compatbile = "hisilicon,fb-heap", "linux,ion-heap-dma"
-			memory-region = <&fb_region>;
-		};
-	}
-- 
cgit v1.2.3-59-g8ed1b


From 84817ef091f378b3f9fc86b02efb48b3fc1a3428 Mon Sep 17 00:00:00 2001
From: Gilad Ben-Yossef <gilad@benyossef.com>
Date: Sun, 7 May 2017 16:18:27 +0300
Subject: staging: MAINTAINERS: add GBY as ccree maintainer

I work for Arm on maintaining the TrustZone CryptoCell driver.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5cf8b8d0f733..da0149f2d16c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3115,6 +3115,14 @@ F:	drivers/net/ieee802154/cc2520.c
 F:	include/linux/spi/cc2520.h
 F:	Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
 
+CCREE ARM TRUSTZONE CRYPTOCELL 700 REE DRIVER
+M:	Gilad Ben-Yossef <gilad@benyossef.com>
+L:	linux-crypto@vger.kernel.org
+L:	driverdev-devel@linuxdriverproject.org
+S:	Supported
+F:	drivers/staging/ccree/
+W:	https://developer.arm.com/products/system-ip/trustzone-cryptocell/cryptocell-700-family
+
 CEC FRAMEWORK
 M:	Hans Verkuil <hans.verkuil@cisco.com>
 L:	linux-media@vger.kernel.org
-- 
cgit v1.2.3-59-g8ed1b


From c6a9d3eaee508f53ec4f777035522565ed567692 Mon Sep 17 00:00:00 2001
From: Olivier Leveque <o_leveque@orange.fr>
Date: Tue, 9 May 2017 09:04:53 -0700
Subject: staging: typec: tcpci: declare private structure as static

This fixes a sparse warning regarding an undeclared symbol. Since the
structure tcpci_tcpc_config is private to tcpci.c, it should be declared as
static.

Signed-off-by: Olivier Leveque <o_leveque@orange.fr>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/tcpci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/typec/tcpci.c b/drivers/staging/typec/tcpci.c
index 5e5be74c7850..df72d8b01e73 100644
--- a/drivers/staging/typec/tcpci.c
+++ b/drivers/staging/typec/tcpci.c
@@ -425,7 +425,7 @@ static const struct regmap_config tcpci_regmap_config = {
 	.max_register = 0x7F, /* 0x80 .. 0xFF are vendor defined */
 };
 
-const struct tcpc_config tcpci_tcpc_config = {
+static const struct tcpc_config tcpci_tcpc_config = {
 	.type = TYPEC_PORT_DFP,
 	.default_role = TYPEC_SINK,
 };
-- 
cgit v1.2.3-59-g8ed1b


From 227383f8c28ea9e53d958801790d0e2e8f985d08 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@dowhile0.org>
Date: Tue, 9 May 2017 09:04:54 -0700
Subject: staging: typec: fusb302: Fix module autoload

If the driver is built as a module, autoload won't work because the module
alias information is not filled. So user-space can't match the registered
device with the corresponding module.

Export the OF and I2C device ID table entries as module aliases, using the
MODULE_DEVICE_TABLE() macro.

Before this patch:

$ modinfo drivers/staging/typec/fusb302/fusb302.ko | grep alias
$

After this patch:

$ modinfo drivers/staging/typec/fusb302/fusb302.ko | grep alias
alias:          of:N*T*Cfcs,fusb302C*
alias:          of:N*T*Cfcs,fusb302
alias:          i2c:typec_fusb302

Signed-off-by: Javier Martinez Canillas <javier@dowhile0.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/fusb302/fusb302.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c
index 2cee9a952c9b..aa460f93a293 100644
--- a/drivers/staging/typec/fusb302/fusb302.c
+++ b/drivers/staging/typec/fusb302/fusb302.c
@@ -1787,11 +1787,13 @@ static const struct of_device_id fusb302_dt_match[] = {
 	{.compatible = "fcs,fusb302"},
 	{},
 };
+MODULE_DEVICE_TABLE(of, fusb302_dt_match);
 
 static const struct i2c_device_id fusb302_i2c_device_id[] = {
 	{"typec_fusb302", 0},
 	{},
 };
+MODULE_DEVICE_TABLE(i2c, fusb302_i2c_device_id);
 
 static const struct dev_pm_ops fusb302_pm_ops = {
 	.suspend = fusb302_pm_suspend,
-- 
cgit v1.2.3-59-g8ed1b


From aac53ee4557947d778cb6a255c719e5c70963c42 Mon Sep 17 00:00:00 2001
From: Yueyao Zhu <yueyao.zhu@gmail.com>
Date: Tue, 9 May 2017 09:04:55 -0700
Subject: staging: typec: fusb302: Fix chip->vbus_present init value

FUSB_REG_STATUS0 & FUSB_REG_STATUS0_VBUSOK = 0x40 & 0x80 is always
zero. Fix the code to what it is intended to be: check the VBUSOK
bit of the value read from address FUSB_REG_STATUS0.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Yueyao Zhu <yueyao.zhu@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/fusb302/fusb302.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c
index aa460f93a293..d8b50b49bb2d 100644
--- a/drivers/staging/typec/fusb302/fusb302.c
+++ b/drivers/staging/typec/fusb302/fusb302.c
@@ -489,7 +489,7 @@ static int tcpm_init(struct tcpc_dev *dev)
 	ret = fusb302_i2c_read(chip, FUSB_REG_STATUS0, &data);
 	if (ret < 0)
 		return ret;
-	chip->vbus_present = !!(FUSB_REG_STATUS0 & FUSB_REG_STATUS0_VBUSOK);
+	chip->vbus_present = !!(data & FUSB_REG_STATUS0_VBUSOK);
 	ret = fusb302_i2c_read(chip, FUSB_REG_DEVICE_ID, &data);
 	if (ret < 0)
 		return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 5fec4b54d0bf6c3eeb176b624ce50d6aef4819c0 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 9 May 2017 09:04:56 -0700
Subject: staging: typec: tcpm: Drop duplicate PD messages

Per USB PD standard, we have to drop duplicate PD messages.
We can not expect lower protocol layers to drop such messages,
since lower layers don't know if a message was dropped somewhere
else in the stack.

Originally-from: Puma Hsu <puma_hsu@htc.com>
Cc: Yueyao Zhu <yueyao.zhu@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/pd.h   | 10 ++++++++++
 drivers/staging/typec/tcpm.c | 29 +++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/drivers/staging/typec/pd.h b/drivers/staging/typec/pd.h
index 8d97bdb95f23..510ef7279900 100644
--- a/drivers/staging/typec/pd.h
+++ b/drivers/staging/typec/pd.h
@@ -92,6 +92,16 @@ static inline unsigned int pd_header_type_le(__le16 header)
 	return pd_header_type(le16_to_cpu(header));
 }
 
+static inline unsigned int pd_header_msgid(u16 header)
+{
+	return (header >> PD_HEADER_ID_SHIFT) & PD_HEADER_ID_MASK;
+}
+
+static inline unsigned int pd_header_msgid_le(__le16 header)
+{
+	return pd_header_msgid(le16_to_cpu(header));
+}
+
 #define PD_MAX_PAYLOAD		7
 
 struct pd_message {
diff --git a/drivers/staging/typec/tcpm.c b/drivers/staging/typec/tcpm.c
index abba655ba00a..c5d8b129c4f4 100644
--- a/drivers/staging/typec/tcpm.c
+++ b/drivers/staging/typec/tcpm.c
@@ -238,6 +238,7 @@ struct tcpm_port {
 	unsigned int hard_reset_count;
 	bool pd_capable;
 	bool explicit_contract;
+	unsigned int rx_msgid;
 
 	/* Partner capabilities/requests */
 	u32 sink_request;
@@ -1415,6 +1416,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 			break;
 		case SOFT_RESET_SEND:
 			port->message_id = 0;
+			port->rx_msgid = -1;
 			if (port->pwr_role == TYPEC_SOURCE)
 				next_state = SRC_SEND_CAPABILITIES;
 			else
@@ -1503,6 +1505,22 @@ static void tcpm_pd_rx_handler(struct work_struct *work)
 		 port->attached);
 
 	if (port->attached) {
+		enum pd_ctrl_msg_type type = pd_header_type_le(msg->header);
+		unsigned int msgid = pd_header_msgid_le(msg->header);
+
+		/*
+		 * USB PD standard, 6.6.1.2:
+		 * "... if MessageID value in a received Message is the
+		 * same as the stored value, the receiver shall return a
+		 * GoodCRC Message with that MessageID value and drop
+		 * the Message (this is a retry of an already received
+		 * Message). Note: this shall not apply to the Soft_Reset
+		 * Message which always has a MessageID value of zero."
+		 */
+		if (msgid == port->rx_msgid && type != PD_CTRL_SOFT_RESET)
+			goto done;
+		port->rx_msgid = msgid;
+
 		/*
 		 * If both ends believe to be DFP/host, we have a data role
 		 * mismatch.
@@ -1520,6 +1538,7 @@ static void tcpm_pd_rx_handler(struct work_struct *work)
 		}
 	}
 
+done:
 	mutex_unlock(&port->lock);
 	kfree(event);
 }
@@ -1957,6 +1976,12 @@ static void tcpm_reset_port(struct tcpm_port *port)
 	port->attached = false;
 	port->pd_capable = false;
 
+	/*
+	 * First Rx ID should be 0; set this to a sentinel of -1 so that
+	 * we can check tcpm_pd_rx_handler() if we had seen it before.
+	 */
+	port->rx_msgid = -1;
+
 	port->tcpc->set_pd_rx(port->tcpc, false);
 	tcpm_init_vbus(port);	/* also disables charging */
 	tcpm_init_vconn(port);
@@ -2170,6 +2195,7 @@ static void run_state_machine(struct tcpm_port *port)
 		port->pwr_opmode = TYPEC_PWR_MODE_USB;
 		port->caps_count = 0;
 		port->message_id = 0;
+		port->rx_msgid = -1;
 		port->explicit_contract = false;
 		tcpm_set_state(port, SRC_SEND_CAPABILITIES, 0);
 		break;
@@ -2329,6 +2355,7 @@ static void run_state_machine(struct tcpm_port *port)
 		typec_set_pwr_opmode(port->typec_port, TYPEC_PWR_MODE_USB);
 		port->pwr_opmode = TYPEC_PWR_MODE_USB;
 		port->message_id = 0;
+		port->rx_msgid = -1;
 		port->explicit_contract = false;
 		tcpm_set_state(port, SNK_DISCOVERY, 0);
 		break;
@@ -2496,6 +2523,7 @@ static void run_state_machine(struct tcpm_port *port)
 	/* Soft_Reset states */
 	case SOFT_RESET:
 		port->message_id = 0;
+		port->rx_msgid = -1;
 		tcpm_pd_send_control(port, PD_CTRL_ACCEPT);
 		if (port->pwr_role == TYPEC_SOURCE)
 			tcpm_set_state(port, SRC_SEND_CAPABILITIES, 0);
@@ -2504,6 +2532,7 @@ static void run_state_machine(struct tcpm_port *port)
 		break;
 	case SOFT_RESET_SEND:
 		port->message_id = 0;
+		port->rx_msgid = -1;
 		if (tcpm_pd_send_control(port, PD_CTRL_SOFT_RESET))
 			tcpm_set_state_cond(port, hard_reset_state(port), 0);
 		else
-- 
cgit v1.2.3-59-g8ed1b


From 931693f973507b320ecbdccd0ac3e786ddaf795f Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 9 May 2017 09:04:57 -0700
Subject: staging: typec: tcpm: Set correct flags in PD request messages

We do support USB PD communication, and devices supported by this driver
typically use USB power for purposes other than USB communication.

Originally-from: Puma Hsu <puma_hsu@htc.com>
Cc: Yueyao Zhu <yueyao.zhu@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/tcpm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/typec/tcpm.c b/drivers/staging/typec/tcpm.c
index c5d8b129c4f4..a385f7e2a6fd 100644
--- a/drivers/staging/typec/tcpm.c
+++ b/drivers/staging/typec/tcpm.c
@@ -1738,8 +1738,7 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo)
 	}
 	ma = min(ma, port->max_snk_ma);
 
-	/* XXX: Any other flags need to be set? */
-	flags = 0;
+	flags = RDO_USB_COMM | RDO_NO_SUSPEND;
 
 	/* Set mismatch bit if offered power is less than operating power */
 	mw = ma * mv / 1000;
-- 
cgit v1.2.3-59-g8ed1b


From 193a68011fdc002cc03d6e6edabf941251df5690 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 9 May 2017 09:04:58 -0700
Subject: staging: typec: tcpm: Respond to Discover Identity commands

If the lower level driver provided a list of VDOs in its configuration
data, send it to the partner as response to a Discover Identity command
if in device mode (UFP).

Cc: Yueyao Zhu <yueyao.zhu@gmail.com>
Originally-from: Puma Hsu <puma_hsu@htc.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/pd_vdo.h |  4 +++-
 drivers/staging/typec/tcpm.c   | 27 +++++++++++++++++++++++++++
 drivers/staging/typec/tcpm.h   |  3 +++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/typec/pd_vdo.h b/drivers/staging/typec/pd_vdo.h
index dba172e0e0d1..d92259f8de0a 100644
--- a/drivers/staging/typec/pd_vdo.h
+++ b/drivers/staging/typec/pd_vdo.h
@@ -22,6 +22,9 @@
  * VDM object is minimum of VDM header + 6 additional data objects.
  */
 
+#define VDO_MAX_OBJECTS		6
+#define VDO_MAX_SIZE		(VDO_MAX_OBJECTS + 1)
+
 /*
  * VDM header
  * ----------
@@ -34,7 +37,6 @@
  * <5>      :: reserved (SVDM), command type (UVDM)
  * <4:0>    :: command
  */
-#define VDO_MAX_SIZE 7
 #define VDO(vid, type, custom)				\
 	(((vid) << 16) |				\
 	 ((type) << 15) |				\
diff --git a/drivers/staging/typec/tcpm.c b/drivers/staging/typec/tcpm.c
index a385f7e2a6fd..c749e980ddf9 100644
--- a/drivers/staging/typec/tcpm.c
+++ b/drivers/staging/typec/tcpm.c
@@ -252,6 +252,8 @@ struct tcpm_port {
 	unsigned int nr_src_pdo;
 	u32 snk_pdo[PDO_MAX_OBJECTS];
 	unsigned int nr_snk_pdo;
+	u32 snk_vdo[VDO_MAX_OBJECTS];
+	unsigned int nr_snk_vdo;
 
 	unsigned int max_snk_mv;
 	unsigned int max_snk_ma;
@@ -998,6 +1000,7 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 	struct pd_mode_data *modep;
 	int rlen = 0;
 	u16 svid;
+	int i;
 
 	tcpm_log(port, "Rx VDM cmd 0x%x type %d cmd %d len %d",
 		 p0, cmd_type, cmd, cnt);
@@ -1008,6 +1011,14 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 	case CMDT_INIT:
 		switch (cmd) {
 		case CMD_DISCOVER_IDENT:
+			/* 6.4.4.3.1: Only respond as UFP (device) */
+			if (port->data_role == TYPEC_DEVICE &&
+			    port->nr_snk_vdo) {
+				for (i = 0; i <  port->nr_snk_vdo; i++)
+					response[i + 1]
+						= cpu_to_le32(port->snk_vdo[i]);
+				rlen = port->nr_snk_vdo + 1;
+			}
 			break;
 		case CMD_DISCOVER_SVID:
 			break;
@@ -3320,6 +3331,20 @@ static int tcpm_copy_pdos(u32 *dest_pdo, const u32 *src_pdo,
 	return nr_pdo;
 }
 
+static int tcpm_copy_vdos(u32 *dest_vdo, const u32 *src_vdo,
+			  unsigned int nr_vdo)
+{
+	unsigned int i;
+
+	if (nr_vdo > VDO_MAX_OBJECTS)
+		nr_vdo = VDO_MAX_OBJECTS;
+
+	for (i = 0; i < nr_vdo; i++)
+		dest_vdo[i] = src_vdo[i];
+
+	return nr_vdo;
+}
+
 void tcpm_update_source_capabilities(struct tcpm_port *port, const u32 *pdo,
 				     unsigned int nr_pdo)
 {
@@ -3410,6 +3435,8 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 					  tcpc->config->nr_src_pdo);
 	port->nr_snk_pdo = tcpm_copy_pdos(port->snk_pdo, tcpc->config->snk_pdo,
 					  tcpc->config->nr_snk_pdo);
+	port->nr_snk_vdo = tcpm_copy_vdos(port->snk_vdo, tcpc->config->snk_vdo,
+					  tcpc->config->nr_snk_vdo);
 
 	port->max_snk_mv = tcpc->config->max_snk_mv;
 	port->max_snk_ma = tcpc->config->max_snk_ma;
diff --git a/drivers/staging/typec/tcpm.h b/drivers/staging/typec/tcpm.h
index 969b365e6549..19c307d31a5a 100644
--- a/drivers/staging/typec/tcpm.h
+++ b/drivers/staging/typec/tcpm.h
@@ -60,6 +60,9 @@ struct tcpc_config {
 	const u32 *snk_pdo;
 	unsigned int nr_snk_pdo;
 
+	const u32 *snk_vdo;
+	unsigned int nr_snk_vdo;
+
 	unsigned int max_snk_mv;
 	unsigned int max_snk_ma;
 	unsigned int max_snk_mw;
-- 
cgit v1.2.3-59-g8ed1b


From 050161ea3268ad72d276bc2c327e9654048a82b2 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 9 May 2017 09:04:59 -0700
Subject: staging: typec: tcpm: Fix Port Power Role field in PS_RDY messages

PS_RDY messages sent during power swap sequences are expected to reflect
the new power role.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/tcpm.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/typec/tcpm.c b/drivers/staging/typec/tcpm.c
index c749e980ddf9..20eb4ebcf8c3 100644
--- a/drivers/staging/typec/tcpm.c
+++ b/drivers/staging/typec/tcpm.c
@@ -2607,6 +2607,14 @@ static void run_state_machine(struct tcpm_port *port)
 		break;
 	case PR_SWAP_SRC_SNK_SOURCE_OFF:
 		tcpm_set_cc(port, TYPEC_CC_RD);
+		/*
+		 * USB-PD standard, 6.2.1.4, Port Power Role:
+		 * "During the Power Role Swap Sequence, for the initial Source
+		 * Port, the Port Power Role field shall be set to Sink in the
+		 * PS_RDY Message indicating that the initial Source’s power
+		 * supply is turned off"
+		 */
+		tcpm_set_pwr_role(port, TYPEC_SINK);
 		if (tcpm_pd_send_control(port, PD_CTRL_PS_RDY)) {
 			tcpm_set_state(port, ERROR_RECOVERY, 0);
 			break;
@@ -2614,7 +2622,6 @@ static void run_state_machine(struct tcpm_port *port)
 		tcpm_set_state_cond(port, SNK_UNATTACHED, PD_T_PS_SOURCE_ON);
 		break;
 	case PR_SWAP_SRC_SNK_SINK_ON:
-		tcpm_set_pwr_role(port, TYPEC_SINK);
 		tcpm_swap_complete(port, 0);
 		tcpm_set_state(port, SNK_STARTUP, 0);
 		break;
@@ -2626,8 +2633,15 @@ static void run_state_machine(struct tcpm_port *port)
 	case PR_SWAP_SNK_SRC_SOURCE_ON:
 		tcpm_set_cc(port, tcpm_rp_cc(port));
 		tcpm_set_vbus(port, true);
-		tcpm_pd_send_control(port, PD_CTRL_PS_RDY);
+		/*
+		 * USB PD standard, 6.2.1.4:
+		 * "Subsequent Messages initiated by the Policy Engine,
+		 * such as the PS_RDY Message sent to indicate that Vbus
+		 * is ready, will have the Port Power Role field set to
+		 * Source."
+		 */
 		tcpm_set_pwr_role(port, TYPEC_SOURCE);
+		tcpm_pd_send_control(port, PD_CTRL_PS_RDY);
 		tcpm_swap_complete(port, 0);
 		tcpm_set_state(port, SRC_STARTUP, 0);
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From cde13b5dad60471886a3bccb4f4134c647c4a9dc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 May 2017 14:30:37 +0100
Subject: arm64: KVM: Do not use stack-protector to compile EL2 code

We like living dangerously. Nothing explicitely forbids stack-protector
to be used in the EL2 code, while distributions routinely compile their
kernel with it. We're just lucky that no code actually triggers the
instrumentation.

Let's not try our luck for much longer, and disable stack-protector
for code living at EL2.

Cc: stable@vger.kernel.org
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 arch/arm64/kvm/hyp/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index aaf42ae8d8c3..14c4e3b14bcb 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -2,6 +2,8 @@
 # Makefile for Kernel-based Virtual Machine module, HYP part
 #
 
+ccflags-y += -fno-stack-protector
+
 KVM=../../../../virt/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
-- 
cgit v1.2.3-59-g8ed1b


From 43e24e82f35291d4c1ca78877ce1b20d3aeb78f1 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 10 May 2017 16:57:49 +1000
Subject: powerpc/modules: If mprofile-kernel is enabled add it to vermagic

On powerpc we can build the kernel with two different ABIs for mcount(), which
is used by ftrace. Kernels built with one ABI do not know how to load modules
built with the other ABI. The new style ABI is called "mprofile-kernel", for
want of a better name.

Currently if we build a module using the old style ABI, and the kernel with
mprofile-kernel, when we load the module we'll oops something like:

  # insmod autofs4-no-mprofile-kernel.ko
  ftrace-powerpc: Unexpected instruction f8810028 around bl _mcount
  ------------[ cut here ]------------
  WARNING: CPU: 6 PID: 3759 at ../kernel/trace/ftrace.c:2024 ftrace_bug+0x2b8/0x3c0
  CPU: 6 PID: 3759 Comm: insmod Not tainted 4.11.0-rc3-gcc-5.4.1-00017-g5a61ef74f269 #11
  ...
  NIP [c0000000001eaa48] ftrace_bug+0x2b8/0x3c0
  LR [c0000000001eaff8] ftrace_process_locs+0x4a8/0x590
  Call Trace:
    alloc_pages_current+0xc4/0x1d0 (unreliable)
    ftrace_process_locs+0x4a8/0x590
    load_module+0x1c8c/0x28f0
    SyS_finit_module+0x110/0x140
    system_call+0x38/0xfc
  ...
  ftrace failed to modify
  [<d000000002a31024>] 0xd000000002a31024
   actual:   35:65:00:48

We can avoid this by including in the vermagic whether the kernel/module was
built with mprofile-kernel. Which results in:

  # insmod autofs4-pg.ko
  autofs4: version magic
  '4.11.0-rc3-gcc-5.4.1-00017-g5a61ef74f269 SMP mod_unload modversions '
  should be
  '4.11.0-rc3-gcc-5.4.1-00017-g5a61ef74f269-dirty SMP mod_unload modversions mprofile-kernel'
  insmod: ERROR: could not insert module autofs4-pg.ko: Invalid module format

Fixes: 8c50b72a3b4f ("powerpc/ftrace: Add Kconfig & Make glue for mprofile-kernel")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Acked-by: Jessica Yu <jeyu@redhat.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/module.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 53885512b8d3..6c0132c7212f 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -14,6 +14,10 @@
 #include <asm-generic/module.h>
 
 
+#ifdef CC_USING_MPROFILE_KERNEL
+#define MODULE_ARCH_VERMAGIC	"mprofile-kernel"
+#endif
+
 #ifndef __powerpc64__
 /*
  * Thanks to Paul M for explaining this.
-- 
cgit v1.2.3-59-g8ed1b


From 501ad27c67ed0b90df465f23d33e9aed64058a47 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 May 2017 14:30:38 +0100
Subject: arm: KVM: Do not use stack-protector to compile HYP code

We like living dangerously. Nothing explicitely forbids stack-protector
to be used in the HYP code, while distributions routinely compile their
kernel with it. We're just lucky that no code actually triggers the
instrumentation.

Let's not try our luck for much longer, and disable stack-protector
for code living at HYP.

Cc: stable@vger.kernel.org
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 arch/arm/kvm/hyp/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 3023bb530edf..8679405b0b2b 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -2,6 +2,8 @@
 # Makefile for Kernel-based Virtual Machine module, HYP part
 #
 
+ccflags-y += -fno-stack-protector
+
 KVM=../../../../virt/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
-- 
cgit v1.2.3-59-g8ed1b


From f48e91e87e67b56bef63393d1a02c6e22c1d7078 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Mon, 8 May 2017 17:16:26 +1000
Subject: powerpc/tm: Fix FP and VMX register corruption

In commit dc3106690b20 ("powerpc: tm: Always use fp_state and vr_state
to store live registers"), a section of code was removed that copied
the current state to checkpointed state. That code should not have been
removed.

When an FP (Floating Point) unavailable is taken inside a transaction,
we need to abort the transaction. This is because at the time of the
tbegin, the FP state is bogus so the state stored in the checkpointed
registers is incorrect. To fix this, we treclaim (to get the
checkpointed GPRs) and then copy the thread_struct FP live state into
the checkpointed state. We then trecheckpoint so that the FP state is
correctly restored into the CPU.

The copying of the FP registers from live to checkpointed is what was
missing.

This simplifies the logic slightly from the original patch.
tm_reclaim_thread() will now always write the checkpointed FP
state. Either the checkpointed FP state will be written as part of
the actual treclaim (in tm.S), or it'll be a copy of the live
state. Which one we use is based on MSR[FP] from userspace.

Similarly for VMX.

Fixes: dc3106690b20 ("powerpc: tm: Always use fp_state and vr_state to store live registers")
Cc: stable@vger.kernel.org # 4.9+
Signed-off-by: Michael Neuling <mikey@neuling.org>
Reviewed-by: cyrilbur@gmail.com
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d645da302bf2..baae104b16c7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -864,6 +864,25 @@ static void tm_reclaim_thread(struct thread_struct *thr,
 	if (!MSR_TM_SUSPENDED(mfmsr()))
 		return;
 
+	/*
+	 * If we are in a transaction and FP is off then we can't have
+	 * used FP inside that transaction. Hence the checkpointed
+	 * state is the same as the live state. We need to copy the
+	 * live state to the checkpointed state so that when the
+	 * transaction is restored, the checkpointed state is correct
+	 * and the aborted transaction sees the correct state. We use
+	 * ckpt_regs.msr here as that's what tm_reclaim will use to
+	 * determine if it's going to write the checkpointed state or
+	 * not. So either this will write the checkpointed registers,
+	 * or reclaim will. Similarly for VMX.
+	 */
+	if ((thr->ckpt_regs.msr & MSR_FP) == 0)
+		memcpy(&thr->ckfp_state, &thr->fp_state,
+		       sizeof(struct thread_fp_state));
+	if ((thr->ckpt_regs.msr & MSR_VEC) == 0)
+		memcpy(&thr->ckvr_state, &thr->vr_state,
+		       sizeof(struct thread_vr_state));
+
 	giveup_all(container_of(thr, struct task_struct, thread));
 
 	tm_reclaim(thr, thr->ckpt_regs.msr, cause);
-- 
cgit v1.2.3-59-g8ed1b


From ddf42d068f8802de122bb7efdfcb3179336053f1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 May 2017 14:30:39 +0100
Subject: KVM: arm/arm64: vgic-v2: Do not use Active+Pending state for a HW
 interrupt

When an interrupt is injected with the HW bit set (indicating that
deactivation should be propagated to the physical distributor),
special care must be taken so that we never mark the corresponding
LR with the Active+Pending state (as the pending state is kept in
the physycal distributor).

Cc: stable@vger.kernel.org
Fixes: 140b086dd197 ("KVM: arm/arm64: vgic-new: Add GICv2 world switch backend")
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-v2.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index a65757aab6d3..504b4bd0d651 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -149,6 +149,13 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 	if (irq->hw) {
 		val |= GICH_LR_HW;
 		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
+		/*
+		 * Never set pending+active on a HW interrupt, as the
+		 * pending state is kept at the physical distributor
+		 * level.
+		 */
+		if (irq->active && irq_is_pending(irq))
+			val &= ~GICH_LR_PENDING_BIT;
 	} else {
 		if (irq->config == VGIC_CONFIG_LEVEL)
 			val |= GICH_LR_EOI;
-- 
cgit v1.2.3-59-g8ed1b


From 3d6e77ad1489650afa20da92bb589c8778baa8da Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 May 2017 14:30:40 +0100
Subject: KVM: arm/arm64: vgic-v3: Do not use Active+Pending state for a HW
 interrupt

When an interrupt is injected with the HW bit set (indicating that
deactivation should be propagated to the physical distributor),
special care must be taken so that we never mark the corresponding
LR with the Active+Pending state (as the pending state is kept in
the physycal distributor).

Cc: stable@vger.kernel.org
Fixes: 59529f69f504 ("KVM: arm/arm64: vgic-new: Add GICv3 world switch backend")
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-v3.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 8fa737edde6f..6fe3f003636a 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -127,6 +127,13 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 	if (irq->hw) {
 		val |= ICH_LR_HW;
 		val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
+		/*
+		 * Never set pending+active on a HW interrupt, as the
+		 * pending state is kept at the physical distributor
+		 * level.
+		 */
+		if (irq->active && irq_is_pending(irq))
+			val &= ~ICH_LR_PENDING_BIT;
 	} else {
 		if (irq->config == VGIC_CONFIG_LEVEL)
 			val |= ICH_LR_EOI;
-- 
cgit v1.2.3-59-g8ed1b


From 15d2bffdde6268883647c6112970f74d3e1af651 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 May 2017 14:30:41 +0100
Subject: KVM: arm/arm64: vgic-v3: Use PREbits to infer the number of
 ICH_APxRn_EL2 registers

The GICv3 documentation is extremely confusing, as it talks about
the number of priorities represented by the ICH_APxRn_EL2 registers,
while it should really talk about the number of preemption levels.

This leads to a bug where we may access undefined ICH_APxRn_EL2
registers, since PREbits is allowed to be smaller than PRIbits.
Thankfully, nobody seem to have taken this path so far...

The fix is to use ICH_VTR_EL2.PREbits instead.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/hyp/vgic-v3-sr.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index bce6037cf01d..32c3295929b0 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -22,7 +22,7 @@
 #include <asm/kvm_hyp.h>
 
 #define vtr_to_max_lr_idx(v)		((v) & 0xf)
-#define vtr_to_nr_pri_bits(v)		(((u32)(v) >> 29) + 1)
+#define vtr_to_nr_pre_bits(v)		(((u32)(v) >> 26) + 1)
 
 static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
 {
@@ -135,13 +135,13 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 
 	if (used_lrs) {
 		int i;
-		u32 nr_pri_bits;
+		u32 nr_pre_bits;
 
 		cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
 
 		write_gicreg(0, ICH_HCR_EL2);
 		val = read_gicreg(ICH_VTR_EL2);
-		nr_pri_bits = vtr_to_nr_pri_bits(val);
+		nr_pre_bits = vtr_to_nr_pre_bits(val);
 
 		for (i = 0; i < used_lrs; i++) {
 			if (cpu_if->vgic_elrsr & (1 << i))
@@ -152,7 +152,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 			__gic_v3_set_lr(0, i);
 		}
 
-		switch (nr_pri_bits) {
+		switch (nr_pre_bits) {
 		case 7:
 			cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
 			cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
@@ -162,7 +162,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 			cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
 		}
 
-		switch (nr_pri_bits) {
+		switch (nr_pre_bits) {
 		case 7:
 			cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
 			cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
@@ -198,7 +198,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
 	u64 val;
-	u32 nr_pri_bits;
+	u32 nr_pre_bits;
 	int i;
 
 	/*
@@ -217,12 +217,12 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 	}
 
 	val = read_gicreg(ICH_VTR_EL2);
-	nr_pri_bits = vtr_to_nr_pri_bits(val);
+	nr_pre_bits = vtr_to_nr_pre_bits(val);
 
 	if (used_lrs) {
 		write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
 
-		switch (nr_pri_bits) {
+		switch (nr_pre_bits) {
 		case 7:
 			write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
 			write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
@@ -232,7 +232,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 			write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
 		}
 
-		switch (nr_pri_bits) {
+		switch (nr_pre_bits) {
 		case 7:
 			write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
 			write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
-- 
cgit v1.2.3-59-g8ed1b


From 2c8e3f44f708a89a2c73a25a134af8c23998a2bc Mon Sep 17 00:00:00 2001
From: Rui Miguel Silva <rmfrfs@gmail.com>
Date: Fri, 12 May 2017 21:16:13 +0100
Subject: staging: typec: fusb302: do not free gpio from managed resource

When allocating a gpio using the managed resource devm_, we can avoid freeing it
manually. But even if we did it we should use devm_gpio_free.

So, just remove the free of the gpio in the error path.

Signed-off-by: Rui Miguel Silva <rmfrfs@gmail.com>
Acked-by: Yueyao Zhu <yueyao.zhu@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/fusb302/fusb302.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c
index d8b50b49bb2d..ef5cceaa5967 100644
--- a/drivers/staging/typec/fusb302/fusb302.c
+++ b/drivers/staging/typec/fusb302/fusb302.c
@@ -1663,14 +1663,12 @@ static int init_gpio(struct fusb302_chip *chip)
 	if (ret < 0) {
 		fusb302_log(chip,
 			    "cannot set GPIO Int_N to input, ret=%d", ret);
-		gpio_free(chip->gpio_int_n);
 		return ret;
 	}
 	ret = gpio_to_irq(chip->gpio_int_n);
 	if (ret < 0) {
 		fusb302_log(chip,
 			    "cannot request IRQ for GPIO Int_N, ret=%d", ret);
-		gpio_free(chip->gpio_int_n);
 		return ret;
 	}
 	chip->gpio_int_n_irq = ret;
-- 
cgit v1.2.3-59-g8ed1b


From f03d95f59026d14219230795ac4dcda8c09b5321 Mon Sep 17 00:00:00 2001
From: Guru Das Srinagesh <gurooodas@gmail.com>
Date: Wed, 10 May 2017 22:51:35 -0700
Subject: staging: typec: Fix sparse warnings about incorrect types

Fix the following sparse warnings about incorrect type usage:

fusb302.c:1028:32: warning: incorrect type in argument 1 (different base types)
fusb302.c:1028:32:    expected unsigned short [unsigned] [usertype] header
fusb302.c:1028:32:    got restricted __le16 const [usertype] header
fusb302.c:1484:32: warning: incorrect type in argument 1 (different base types)
fusb302.c:1484:32:    expected unsigned short [unsigned] [usertype] header
fusb302.c:1484:32:    got restricted __le16 [usertype] header

Signed-off-by: Guru Das Srinagesh <gurooodas@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/fusb302/fusb302.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c
index ef5cceaa5967..6bd602db11be 100644
--- a/drivers/staging/typec/fusb302/fusb302.c
+++ b/drivers/staging/typec/fusb302/fusb302.c
@@ -1025,7 +1025,7 @@ static int fusb302_pd_send_message(struct fusb302_chip *chip,
 	buf[pos++] = FUSB302_TKN_SYNC1;
 	buf[pos++] = FUSB302_TKN_SYNC2;
 
-	len = pd_header_cnt(msg->header) * 4;
+	len = pd_header_cnt_le(msg->header) * 4;
 	/* plug 2 for header */
 	len += 2;
 	if (len > 0x1F) {
@@ -1481,7 +1481,7 @@ static int fusb302_pd_read_message(struct fusb302_chip *chip,
 				     (u8 *)&msg->header);
 	if (ret < 0)
 		return ret;
-	len = pd_header_cnt(msg->header) * 4;
+	len = pd_header_cnt_le(msg->header) * 4;
 	/* add 4 to length to include the CRC */
 	if (len > PD_MAX_PAYLOAD * 4) {
 		fusb302_log(chip, "PD message too long %d", len);
-- 
cgit v1.2.3-59-g8ed1b


From c21376631d6325590e53ac8720312d2b02494103 Mon Sep 17 00:00:00 2001
From: Ian Chard <ian@chard.org>
Date: Wed, 10 May 2017 10:20:59 +0100
Subject: staging: ccree: remove extraneous spin_unlock_bh() in error handler

An early error handler in send_request() tries to release a spinlock,
but the lock isn't acquired until the loop below it is entered.

Signed-off-by: Ian Chard <ian@chard.org>
Acked-by: Gilad Ben-Yossef <gilad@benyossef.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/ccree/ssi_request_mgr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/ccree/ssi_request_mgr.c b/drivers/staging/ccree/ssi_request_mgr.c
index 522bd62c102e..8611adf3bb2e 100644
--- a/drivers/staging/ccree/ssi_request_mgr.c
+++ b/drivers/staging/ccree/ssi_request_mgr.c
@@ -376,7 +376,6 @@ int send_request(
 	rc = ssi_power_mgr_runtime_get(&drvdata->plat_dev->dev);
 	if (rc != 0) {
 		SSI_LOG_ERR("ssi_power_mgr_runtime_get returned %x\n",rc);
-		spin_unlock_bh(&req_mgr_h->hw_lock);
 		return rc;
 	}
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From ff92b9e3c9f85fa442c430d70bf075499e1193b7 Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.org>
Date: Thu, 4 May 2017 10:58:20 +0100
Subject: staging: vc04_services: Fix bulk cache maintenance

vchiq_arm supports transfers less than one page and at arbitrary
alignment, using the dma-mapping API to perform its cache maintenance
(even though the VPU drives the DMA hardware). Read (DMA_FROM_DEVICE)
operations use cache invalidation for speed, falling back to
clean+invalidate on partial cache lines, with writes (DMA_TO_DEVICE)
using flushes.

If a read transfer has ends which aren't page-aligned, performing cache
maintenance as if they were whole pages can lead to memory corruption
since the partial cache lines at the ends (and any cache lines before or
after the transfer area) will be invalidated. This bug was masked until
the disabling of the cache flush in flush_dcache_page().

Honouring the requested transfer start- and end-points prevents the
corruption.

Fixes: cf9caf192988 ("staging: vc04_services: Replace dmac_map_area with dmac_map_sg")
Signed-off-by: Phil Elwell <phil@raspberrypi.org>
Cc: stable <stable@vger.kernel.org> # 4.10
Reported-by: Stefan Wahren <stefan.wahren@i2se.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../interface/vchiq_arm/vchiq_2835_arm.c           | 31 +++++++++++++---------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
index 988ee61fb4a7..d04db3f55519 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
@@ -502,8 +502,15 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
 	 */
 	sg_init_table(scatterlist, num_pages);
 	/* Now set the pages for each scatterlist */
-	for (i = 0; i < num_pages; i++)
-		sg_set_page(scatterlist + i, pages[i], PAGE_SIZE, 0);
+	for (i = 0; i < num_pages; i++)	{
+		unsigned int len = PAGE_SIZE - offset;
+
+		if (len > count)
+			len = count;
+		sg_set_page(scatterlist + i, pages[i], len, offset);
+		offset = 0;
+		count -= len;
+	}
 
 	dma_buffers = dma_map_sg(g_dev,
 				 scatterlist,
@@ -524,20 +531,20 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
 		u32 addr = sg_dma_address(sg);
 
 		/* Note: addrs is the address + page_count - 1
-		 * The firmware expects the block to be page
+		 * The firmware expects blocks after the first to be page-
 		 * aligned and a multiple of the page size
 		 */
 		WARN_ON(len == 0);
-		WARN_ON(len & ~PAGE_MASK);
-		WARN_ON(addr & ~PAGE_MASK);
+		WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
+		WARN_ON(i && (addr & ~PAGE_MASK));
 		if (k > 0 &&
-		    ((addrs[k - 1] & PAGE_MASK) |
-			((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT)
-		    == addr) {
-			addrs[k - 1] += (len >> PAGE_SHIFT);
-		} else {
-			addrs[k++] = addr | ((len >> PAGE_SHIFT) - 1);
-		}
+		    ((addrs[k - 1] & PAGE_MASK) +
+		     (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT))
+		    == (addr & PAGE_MASK))
+			addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT);
+		else
+			addrs[k++] = (addr & PAGE_MASK) |
+				(((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1);
 	}
 
 	/* Partial cache lines (fragments) require special measures */
-- 
cgit v1.2.3-59-g8ed1b


From baabd567f87be05330faa5140f72a91960e7405a Mon Sep 17 00:00:00 2001
From: Malcolm Priestley <tvboxspy@gmail.com>
Date: Thu, 11 May 2017 18:57:43 +0100
Subject: staging: rtl8192e: rtl92e_fill_tx_desc fix write to mapped out
 memory.

The driver attempts to alter memory that is mapped to PCI device.

This is because tx_fwinfo_8190pci points to skb->data

Move the pci_map_single to when completed buffer is ready to be mapped with
psdec is empty to drop on mapping error.

Signed-off-by: Malcolm Priestley <tvboxspy@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index 4723a0bd5067..a23628f390c9 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -1182,8 +1182,7 @@ void  rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc,
 			  struct cb_desc *cb_desc, struct sk_buff *skb)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	dma_addr_t mapping = pci_map_single(priv->pdev, skb->data, skb->len,
-			 PCI_DMA_TODEVICE);
+	dma_addr_t mapping;
 	struct tx_fwinfo_8190pci *pTxFwInfo;
 
 	pTxFwInfo = (struct tx_fwinfo_8190pci *)skb->data;
@@ -1194,8 +1193,6 @@ void  rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc,
 	pTxFwInfo->Short = _rtl92e_query_is_short(pTxFwInfo->TxHT,
 						  pTxFwInfo->TxRate, cb_desc);
 
-	if (pci_dma_mapping_error(priv->pdev, mapping))
-		netdev_err(dev, "%s(): DMA Mapping error\n", __func__);
 	if (cb_desc->bAMPDUEnable) {
 		pTxFwInfo->AllowAggregation = 1;
 		pTxFwInfo->RxMF = cb_desc->ampdu_factor;
@@ -1230,6 +1227,14 @@ void  rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc,
 	}
 
 	memset((u8 *)pdesc, 0, 12);
+
+	mapping = pci_map_single(priv->pdev, skb->data, skb->len,
+				 PCI_DMA_TODEVICE);
+	if (pci_dma_mapping_error(priv->pdev, mapping)) {
+		netdev_err(dev, "%s(): DMA Mapping error\n", __func__);
+		return;
+	}
+
 	pdesc->LINIP = 0;
 	pdesc->CmdInit = 1;
 	pdesc->Offset = sizeof(struct tx_fwinfo_8190pci) + 8;
-- 
cgit v1.2.3-59-g8ed1b


From 867510bde14e7b7fc6dd0f50b48f6753cfbd227a Mon Sep 17 00:00:00 2001
From: Malcolm Priestley <tvboxspy@gmail.com>
Date: Thu, 11 May 2017 18:57:44 +0100
Subject: staging: rtl8192e: fix 2 byte alignment of register BSSIDR.

BSSIDR has two byte alignment on PCI ioremap correct the write
by swapping to 16 bits first.

This fixes a problem that the device associates fail because
the filter is not set correctly.

Signed-off-by: Malcolm Priestley <tvboxspy@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index a23628f390c9..e03d0a3a6dcc 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -97,8 +97,9 @@ void rtl92e_set_reg(struct net_device *dev, u8 variable, u8 *val)
 
 	switch (variable) {
 	case HW_VAR_BSSID:
-		rtl92e_writel(dev, BSSIDR, ((u32 *)(val))[0]);
-		rtl92e_writew(dev, BSSIDR+2, ((u16 *)(val+2))[0]);
+		/* BSSIDR 2 byte alignment */
+		rtl92e_writew(dev, BSSIDR, *(u16 *)val);
+		rtl92e_writel(dev, BSSIDR + 2, *(u32 *)(val + 2));
 		break;
 
 	case HW_VAR_MEDIA_STATUS:
@@ -961,8 +962,8 @@ static void _rtl92e_net_update(struct net_device *dev)
 	rtl92e_config_rate(dev, &rate_config);
 	priv->dot11CurrentPreambleMode = PREAMBLE_AUTO;
 	 priv->basic_rate = rate_config &= 0x15f;
-	rtl92e_writel(dev, BSSIDR, ((u32 *)net->bssid)[0]);
-	rtl92e_writew(dev, BSSIDR+4, ((u16 *)net->bssid)[2]);
+	rtl92e_writew(dev, BSSIDR, *(u16 *)net->bssid);
+	rtl92e_writel(dev, BSSIDR + 2, *(u32 *)(net->bssid + 2));
 
 	if (priv->rtllib->iw_mode == IW_MODE_ADHOC) {
 		rtl92e_writew(dev, ATIMWND, 2);
-- 
cgit v1.2.3-59-g8ed1b


From 90be652c9f157d44b9c2803f902a8839796c090d Mon Sep 17 00:00:00 2001
From: Malcolm Priestley <tvboxspy@gmail.com>
Date: Thu, 11 May 2017 18:57:45 +0100
Subject: staging: rtl8192e: rtl92e_get_eeprom_size Fix read size of EPROM_CMD.

EPROM_CMD is 2 byte aligned on PCI map so calling with rtl92e_readl
will return invalid data so use rtl92e_readw.

The device is unable to select the right eeprom type.

Signed-off-by: Malcolm Priestley <tvboxspy@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index e03d0a3a6dcc..1c6ed5b2a6f9 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -625,7 +625,7 @@ void rtl92e_get_eeprom_size(struct net_device *dev)
 	struct r8192_priv *priv = rtllib_priv(dev);
 
 	RT_TRACE(COMP_INIT, "===========>%s()\n", __func__);
-	curCR = rtl92e_readl(dev, EPROM_CMD);
+	curCR = rtl92e_readw(dev, EPROM_CMD);
 	RT_TRACE(COMP_INIT, "read from Reg Cmd9346CR(%x):%x\n", EPROM_CMD,
 		 curCR);
 	priv->epromtype = (curCR & EPROM_CMD_9356SEL) ? EEPROM_93C56 :
-- 
cgit v1.2.3-59-g8ed1b


From 95d93e271d920dfda369d4740b1cc1061d41fe7f Mon Sep 17 00:00:00 2001
From: Malcolm Priestley <tvboxspy@gmail.com>
Date: Thu, 11 May 2017 18:57:46 +0100
Subject: staging: rtl8192e: GetTs Fix invalid TID 7 warning.

TID 7 is a valid value for QoS IEEE 802.11e.

The switch statement that follows states 7 is valid.

Remove function IsACValid and use the default case to filter
invalid TIDs.

Signed-off-by: Malcolm Priestley <tvboxspy@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_TSProc.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl819x_TSProc.c b/drivers/staging/rtl8192e/rtl819x_TSProc.c
index 48bbd9e8a52f..dcc4eb691889 100644
--- a/drivers/staging/rtl8192e/rtl819x_TSProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_TSProc.c
@@ -306,11 +306,6 @@ static void MakeTSEntry(struct ts_common_info *pTsCommonInfo, u8 *Addr,
 	pTsCommonInfo->TClasNum = TCLAS_Num;
 }
 
-static bool IsACValid(unsigned int tid)
-{
-	return tid < 7;
-}
-
 bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS,
 	   u8 *Addr, u8 TID, enum tr_select TxRxSelect, bool bAddNewTs)
 {
@@ -328,12 +323,6 @@ bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS,
 	if (ieee->current_network.qos_data.supported == 0) {
 		UP = 0;
 	} else {
-		if (!IsACValid(TID)) {
-			netdev_warn(ieee->dev, "%s(): TID(%d) is not valid\n",
-				    __func__, TID);
-			return false;
-		}
-
 		switch (TID) {
 		case 0:
 		case 3:
@@ -351,6 +340,10 @@ bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS,
 		case 7:
 			UP = 7;
 			break;
+		default:
+			netdev_warn(ieee->dev, "%s(): TID(%d) is not valid\n",
+				    __func__, TID);
+			return false;
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From ad0ccac76dcc92c3331f4c94c9fc54f8bf1ab20c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 11 May 2017 11:41:19 +0200
Subject: USB: serial: ir-usb: fix big-endian baud-rate debug printk

Add missing endianness conversion when printing the supported baud
rates.

Found using sparse:

	warning: restricted __le16 degrades to integer

Fixes: e0d795e4f36c ("usb: irda: cleanup on ir-usb module")
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ir-usb.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 73956d48a0c5..f9734a96d516 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -197,6 +197,7 @@ static u8 ir_xbof_change(u8 xbof)
 static int ir_startup(struct usb_serial *serial)
 {
 	struct usb_irda_cs_descriptor *irda_desc;
+	int rates;
 
 	irda_desc = irda_usb_find_class_desc(serial, 0);
 	if (!irda_desc) {
@@ -205,18 +206,20 @@ static int ir_startup(struct usb_serial *serial)
 		return -ENODEV;
 	}
 
+	rates = le16_to_cpu(irda_desc->wBaudRate);
+
 	dev_dbg(&serial->dev->dev,
 		"%s - Baud rates supported:%s%s%s%s%s%s%s%s%s\n",
 		__func__,
-		(irda_desc->wBaudRate & USB_IRDA_BR_2400) ? " 2400" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_9600) ? " 9600" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_19200) ? " 19200" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_38400) ? " 38400" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_57600) ? " 57600" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_115200) ? " 115200" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_576000) ? " 576000" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_1152000) ? " 1152000" : "",
-		(irda_desc->wBaudRate & USB_IRDA_BR_4000000) ? " 4000000" : "");
+		(rates & USB_IRDA_BR_2400) ? " 2400" : "",
+		(rates & USB_IRDA_BR_9600) ? " 9600" : "",
+		(rates & USB_IRDA_BR_19200) ? " 19200" : "",
+		(rates & USB_IRDA_BR_38400) ? " 38400" : "",
+		(rates & USB_IRDA_BR_57600) ? " 57600" : "",
+		(rates & USB_IRDA_BR_115200) ? " 115200" : "",
+		(rates & USB_IRDA_BR_576000) ? " 576000" : "",
+		(rates & USB_IRDA_BR_1152000) ? " 1152000" : "",
+		(rates & USB_IRDA_BR_4000000) ? " 4000000" : "");
 
 	switch (irda_desc->bmAdditionalBOFs) {
 	case USB_IRDA_AB_48:
-- 
cgit v1.2.3-59-g8ed1b


From 26cede343656c0bc2c33cdc783771282405c7fb2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 11 May 2017 11:41:20 +0200
Subject: USB: serial: mct_u232: fix big-endian baud-rate handling

Drop erroneous cpu_to_le32 when setting the baud rate, something which
corrupted the divisor on big-endian hosts.

Found using sparse:

	warning: incorrect type in argument 1 (different base types)
	    expected unsigned int [unsigned] [usertype] val
	    got restricted __le32 [usertype] <noident>

Fixes: af2ac1a091bc ("USB: serial mct_usb232: move DMA buffers to heap")
Cc: stable <stable@vger.kernel.org>     # 2.6.34
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-By: Pete Zaitcev <zaitcev@yahoo.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/mct_u232.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index edbc81f205c2..70f346f1aa86 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -189,7 +189,7 @@ static int mct_u232_set_baud_rate(struct tty_struct *tty,
 		return -ENOMEM;
 
 	divisor = mct_u232_calculate_baud_rate(serial, value, &speed);
-	put_unaligned_le32(cpu_to_le32(divisor), buf);
+	put_unaligned_le32(divisor, buf);
 	rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
 				MCT_U232_SET_BAUD_RATE_REQUEST,
 				MCT_U232_SET_REQUEST_TYPE,
-- 
cgit v1.2.3-59-g8ed1b


From 6aeb75e6adfaed16e58780309613a578fe1ee90b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 11 May 2017 11:41:21 +0200
Subject: USB: serial: io_ti: fix div-by-zero in set_termios

Fix a division-by-zero in set_termios when debugging is enabled and a
high-enough speed has been requested so that the divisor value becomes
zero.

Instead of just fixing the offending debug statement, cap the baud rate
at the base as a zero divisor value also appears to crash the firmware.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>     # 2.6.12
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/io_ti.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 87798e625d6c..6cefb9cb133d 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2336,8 +2336,11 @@ static void change_port_settings(struct tty_struct *tty,
 	if (!baud) {
 		/* pick a default, any default... */
 		baud = 9600;
-	} else
+	} else {
+		/* Avoid a zero divisor. */
+		baud = min(baud, 461550);
 		tty_encode_baud_rate(tty, baud, baud);
+	}
 
 	edge_port->baud_rate = baud;
 	config->wBaudRate = (__u16)((461550L + baud/2) / baud);
-- 
cgit v1.2.3-59-g8ed1b


From 6c0d706b563af732adb094c5bf807437e8963e84 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Wed, 3 May 2017 15:17:51 +0100
Subject: kvm: arm/arm64: Fix race in resetting stage2 PGD
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In kvm_free_stage2_pgd() we check the stage2 PGD before holding
the lock and proceed to take the lock if it is valid. And we unmap
the page tables, followed by releasing the lock. We reset the PGD
only after dropping this lock, which could cause a race condition
where another thread waiting on or even holding the lock, could
potentially see that the PGD is still valid and proceed to perform
a stage2 operation and later encounter a NULL PGD.

[223090.242280] Unable to handle kernel NULL pointer dereference at
virtual address 00000040
[223090.262330] PC is at unmap_stage2_range+0x8c/0x428
[223090.262332] LR is at kvm_unmap_hva_handler+0x2c/0x3c
[223090.262531] Call trace:
[223090.262533] [<ffff0000080adb78>] unmap_stage2_range+0x8c/0x428
[223090.262535] [<ffff0000080adf40>] kvm_unmap_hva_handler+0x2c/0x3c
[223090.262537] [<ffff0000080ace2c>] handle_hva_to_gpa+0xb0/0x104
[223090.262539] [<ffff0000080af988>] kvm_unmap_hva+0x5c/0xbc
[223090.262543] [<ffff0000080a2478>]
kvm_mmu_notifier_invalidate_page+0x50/0x8c
[223090.262547] [<ffff0000082274f8>]
__mmu_notifier_invalidate_page+0x5c/0x84
[223090.262551] [<ffff00000820b700>] try_to_unmap_one+0x1d0/0x4a0
[223090.262553] [<ffff00000820c5c8>] rmap_walk+0x1cc/0x2e0
[223090.262555] [<ffff00000820c90c>] try_to_unmap+0x74/0xa4
[223090.262557] [<ffff000008230ce4>] migrate_pages+0x31c/0x5ac
[223090.262561] [<ffff0000081f869c>] compact_zone+0x3fc/0x7ac
[223090.262563] [<ffff0000081f8ae0>] compact_zone_order+0x94/0xb0
[223090.262564] [<ffff0000081f91c0>] try_to_compact_pages+0x108/0x290
[223090.262569] [<ffff0000081d5108>] __alloc_pages_direct_compact+0x70/0x1ac
[223090.262571] [<ffff0000081d64a0>] __alloc_pages_nodemask+0x434/0x9f4
[223090.262572] [<ffff0000082256f0>] alloc_pages_vma+0x230/0x254
[223090.262574] [<ffff000008235e5c>] do_huge_pmd_anonymous_page+0x114/0x538
[223090.262576] [<ffff000008201bec>] handle_mm_fault+0xd40/0x17a4
[223090.262577] [<ffff0000081fb324>] __get_user_pages+0x12c/0x36c
[223090.262578] [<ffff0000081fb804>] get_user_pages_unlocked+0xa4/0x1b8
[223090.262579] [<ffff0000080a3ce8>] __gfn_to_pfn_memslot+0x280/0x31c
[223090.262580] [<ffff0000080a3dd0>] gfn_to_pfn_prot+0x4c/0x5c
[223090.262582] [<ffff0000080af3f8>] kvm_handle_guest_abort+0x240/0x774
[223090.262584] [<ffff0000080b2bac>] handle_exit+0x11c/0x1ac
[223090.262586] [<ffff0000080ab99c>] kvm_arch_vcpu_ioctl_run+0x31c/0x648
[223090.262587] [<ffff0000080a1d78>] kvm_vcpu_ioctl+0x378/0x768
[223090.262590] [<ffff00000825df5c>] do_vfs_ioctl+0x324/0x5a4
[223090.262591] [<ffff00000825e26c>] SyS_ioctl+0x90/0xa4
[223090.262595] [<ffff000008085d84>] el0_svc_naked+0x38/0x3c

This patch moves the stage2 PGD manipulation under the lock.

Reported-by: Alexander Graf <agraf@suse.de>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/mmu.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 313ee646480f..909a1a793b31 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -829,22 +829,22 @@ void stage2_unmap_vm(struct kvm *kvm)
  * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
  * underlying level-2 and level-3 tables before freeing the actual level-1 table
  * and setting the struct pointer to NULL.
- *
- * Note we don't need locking here as this is only called when the VM is
- * destroyed, which can only be done once.
  */
 void kvm_free_stage2_pgd(struct kvm *kvm)
 {
-	if (kvm->arch.pgd == NULL)
-		return;
+	void *pgd = NULL;
 
 	spin_lock(&kvm->mmu_lock);
-	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+	if (kvm->arch.pgd) {
+		unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+		pgd = kvm->arch.pgd;
+		kvm->arch.pgd = NULL;
+	}
 	spin_unlock(&kvm->mmu_lock);
 
 	/* Free the HW pgd, one page at a time */
-	free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
-	kvm->arch.pgd = NULL;
+	if (pgd)
+		free_pages_exact(pgd, S2_PGD_SIZE);
 }
 
 static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
-- 
cgit v1.2.3-59-g8ed1b


From a2b7cbdd2559aff06cebc28a7150f81c307a90d3 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Wed, 19 Apr 2017 11:39:20 -0700
Subject: netfilter: ctnetlink: Make some parameters integer to avoid enum
 mismatch

Not all parameters passed to ctnetlink_parse_tuple() and
ctnetlink_exp_dump_tuple() match the enum type in the signatures of these
functions. Since this is intended change the argument type of to be an
unsigned integer value.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dcf561b5c97a..fa752626029e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1007,9 +1007,8 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
 
 static int
 ctnetlink_parse_tuple(const struct nlattr * const cda[],
-		      struct nf_conntrack_tuple *tuple,
-		      enum ctattr_type type, u_int8_t l3num,
-		      struct nf_conntrack_zone *zone)
+		      struct nf_conntrack_tuple *tuple, u32 type,
+		      u_int8_t l3num, struct nf_conntrack_zone *zone)
 {
 	struct nlattr *tb[CTA_TUPLE_MAX+1];
 	int err;
@@ -2447,7 +2446,7 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = {
 
 static int ctnetlink_exp_dump_tuple(struct sk_buff *skb,
 				    const struct nf_conntrack_tuple *tuple,
-				    enum ctattr_expect type)
+				    u32 type)
 {
 	struct nlattr *nest_parms;
 
-- 
cgit v1.2.3-59-g8ed1b


From fb317002ab4419ae7e068bee6897f2d5745aa3b9 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 9 May 2017 12:50:53 +0200
Subject: s390/virtio: change virtio_feature_desc:features type to __le32

The feature member of virtio_feature_desc contains little endian
values, given that it contents will be converted with
le32_to_cpu(). The "wrong" __u32 type leads to the sparse warnings
below.
In order to avoid them, use the correct __le32 type instead.

drivers/s390/virtio/virtio_ccw.c:749:14: warning: cast to restricted __le32
drivers/s390/virtio/virtio_ccw.c:762:28: warning: cast to restricted __le32

Acked-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/virtio/virtio_ccw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 0ed209f3d8b0..c7d4ef7b4b22 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -87,7 +87,7 @@ struct vq_info_block {
 } __packed;
 
 struct virtio_feature_desc {
-	__u32 features;
+	__le32 features;
 	__u8 index;
 } __packed;
 
-- 
cgit v1.2.3-59-g8ed1b


From ca6e8cdbe1865caf7b05483e1a242e72d9bc919f Mon Sep 17 00:00:00 2001
From: Ian W MORRISON <ianwmorrison@gmail.com>
Date: Mon, 8 May 2017 23:40:35 +1000
Subject: staging: rtl8723bs: remove re-positioned call to kfree in
 os_dep/ioctl_cfg80211.c

A re-positioned call to kfree() in
drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
causes a segmentation error. This patch removed the kfree() call.

Fixes 6557ddfec348 ("staging: rtl8723bs: Fix various errors in os_dep/ioctl_cfg80211.c")
Signed-off-by: Ian W Morrison <ianwmorrison@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index 5e7a61f24f8d..36c3189fc4b7 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
@@ -3531,7 +3531,6 @@ int rtw_wdev_alloc(struct adapter *padapter, struct device *dev)
 		pwdev_priv->power_mgmt = true;
 	else
 		pwdev_priv->power_mgmt = false;
-	kfree((u8 *)wdev);
 
 	return ret;
 
-- 
cgit v1.2.3-59-g8ed1b


From d110a3942aca78d14929bc648aeb83ee0b245a61 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 6 May 2017 20:28:02 +0800
Subject: netfilter: don't setup nat info for confirmed ct

We cannot setup nat info if the ct has been confirmed already, else,
different cpu may race to handle the same ct. In extreme situation,
we may hit the "BUG_ON(nf_nat_initialized(ct, maniptype))" in the
nf_nat_setup_info.

Also running the following commands will easily hit NF_CT_ASSERT in
nf_conntrack_alter_reply:
  # nft flush ruleset
  # ping -c 2 -W 1 1.1.1.111 &
  # nft add table t
  # nft add chain t c {type nat hook postrouting priority 0 \;}
  # nft add rule t c snat to 4.5.6.7
  WARNING: CPU: 1 PID: 10065 at net/netfilter/nf_conntrack_core.c:1472
  nf_conntrack_alter_reply+0x9a/0x1a0 [nf_conntrack]
  [...]
  Call Trace:
   nf_nat_setup_info+0xad/0x840 [nf_nat]
   ? deactivate_slab+0x65d/0x6c0
   nft_nat_eval+0xcd/0x100 [nft_nat]
   nft_do_chain+0xff/0x5d0 [nf_tables]
   ? mark_held_locks+0x6f/0xa0
   ? __local_bh_enable_ip+0x70/0xa0
   ? trace_hardirqs_on_caller+0x11f/0x190
   ? ipt_do_table+0x310/0x610
   ? trace_hardirqs_on+0xd/0x10
   ? __local_bh_enable_ip+0x70/0xa0
   ? ipt_do_table+0x32b/0x610
   ? __lock_acquire+0x2ac/0x1580
   ? ipt_do_table+0x32b/0x610
   nft_nat_do_chain+0x65/0x80 [nft_chain_nat_ipv4]
   nf_nat_ipv4_fn+0x1ae/0x240 [nf_nat_ipv4]
   nf_nat_ipv4_out+0x4a/0xf0 [nf_nat_ipv4]
   nft_nat_ipv4_out+0x15/0x20 [nft_chain_nat_ipv4]
   nf_hook_slow+0x2c/0xf0
   ip_output+0x154/0x270

So for the confirmed ct, just ignore it and return NF_ACCEPT.

Fixes: 9a08ecfe74d7 ("netfilter: don't attach a nat extension by default")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b48d6b5aae8a..ef0be325a0c6 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -409,6 +409,10 @@ nf_nat_setup_info(struct nf_conn *ct,
 {
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 
+	/* Can't setup nat info for confirmed ct. */
+	if (nf_ct_is_confirmed(ct))
+		return NF_ACCEPT;
+
 	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
 		     maniptype == NF_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
-- 
cgit v1.2.3-59-g8ed1b


From d91fc59cd77c719f33eda65c194ad8f95a055190 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 7 May 2017 22:01:55 +0800
Subject: netfilter: introduce nf_conntrack_helper_put helper function

And convert module_put invocation to nf_conntrack_helper_put, this is
prepared for the followup patch, which will add a refcnt for cthelper,
so we can reject the deleting request when cthelper is in use.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_helper.h | 2 ++
 net/netfilter/nf_conntrack_helper.c         | 6 ++++++
 net/netfilter/nft_ct.c                      | 4 ++--
 net/netfilter/xt_CT.c                       | 6 +++---
 net/openvswitch/conntrack.c                 | 4 ++--
 5 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index e04fa7691e5d..c1c12411103a 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -79,6 +79,8 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
 struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name,
 							       u16 l3num,
 							       u8 protonum);
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper);
+
 void nf_ct_helper_init(struct nf_conntrack_helper *helper,
 		       u16 l3num, u16 protonum, const char *name,
 		       u16 default_port, u16 spec_port, u32 id,
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3a60efa7799b..e17006b6e434 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -181,6 +181,12 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
 
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
+{
+	module_put(helper->me);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
+
 struct nf_conn_help *
 nf_ct_helper_ext_add(struct nf_conn *ct,
 		     struct nf_conntrack_helper *helper, gfp_t gfp)
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index a34ceb38fc55..1678e9e75e8e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -826,9 +826,9 @@ static void nft_ct_helper_obj_destroy(struct nft_object *obj)
 	struct nft_ct_helper_obj *priv = nft_obj_data(obj);
 
 	if (priv->helper4)
-		module_put(priv->helper4->me);
+		nf_conntrack_helper_put(priv->helper4);
 	if (priv->helper6)
-		module_put(priv->helper6->me);
+		nf_conntrack_helper_put(priv->helper6);
 }
 
 static void nft_ct_helper_obj_eval(struct nft_object *obj,
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index bb7ad82dcd56..623ef37de886 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -96,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 
 	help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
 	if (help == NULL) {
-		module_put(helper->me);
+		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
 	}
 
@@ -263,7 +263,7 @@ out:
 err4:
 	help = nfct_help(ct);
 	if (help)
-		module_put(help->helper->me);
+		nf_conntrack_helper_put(help->helper);
 err3:
 	nf_ct_tmpl_free(ct);
 err2:
@@ -346,7 +346,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
 	if (ct) {
 		help = nfct_help(ct);
 		if (help)
-			module_put(help->helper->me);
+			nf_conntrack_helper_put(help->helper);
 
 		nf_ct_netns_put(par->net, par->family);
 
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index bf602e33c40a..08679ebb3068 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1123,7 +1123,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 
 	help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
 	if (!help) {
-		module_put(helper->me);
+		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
 	}
 
@@ -1584,7 +1584,7 @@ void ovs_ct_free_action(const struct nlattr *a)
 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
 {
 	if (ct_info->helper)
-		module_put(ct_info->helper->me);
+		nf_conntrack_helper_put(ct_info->helper);
 	if (ct_info->ct)
 		nf_ct_tmpl_free(ct_info->ct);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 9338d7b4418e9996a7642867d8f6b482a6040ed6 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 7 May 2017 22:01:56 +0800
Subject: netfilter: nfnl_cthelper: reject del request if helper obj is in use

We can still delete the ct helper even if it is in use, this will cause
a use-after-free error. In more detail, I mean:
  # nfct helper add ssdp inet udp
  # iptables -t raw -A OUTPUT -p udp -j CT --helper ssdp
  # nfct helper delete ssdp //--> oops, succeed!
  BUG: unable to handle kernel paging request at 000026ca
  IP: 0x26ca
  [...]
  Call Trace:
   ? ipv4_helper+0x62/0x80 [nf_conntrack_ipv4]
   nf_hook_slow+0x21/0xb0
   ip_output+0xe9/0x100
   ? ip_fragment.constprop.54+0xc0/0xc0
   ip_local_out+0x33/0x40
   ip_send_skb+0x16/0x80
   udp_send_skb+0x84/0x240
   udp_sendmsg+0x35d/0xa50

So add reference count to fix this issue, if ct helper is used by
others, reject the delete request.

Apply this patch:
  # nfct helper delete ssdp
  nfct v1.4.3: netlink error: Device or resource busy

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_helper.h |  2 ++
 net/netfilter/nf_conntrack_helper.c         |  6 ++++++
 net/netfilter/nfnetlink_cthelper.c          | 17 +++++++++++------
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index c1c12411103a..c519bb5b5bb8 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -9,6 +9,7 @@
 
 #ifndef _NF_CONNTRACK_HELPER_H
 #define _NF_CONNTRACK_HELPER_H
+#include <linux/refcount.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -26,6 +27,7 @@ struct nf_conntrack_helper {
 	struct hlist_node hnode;	/* Internal use. */
 
 	char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */
+	refcount_t refcnt;
 	struct module *me;		/* pointer to self */
 	const struct nf_conntrack_expect_policy *expect_policy;
 
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index e17006b6e434..7f6100ca63be 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -174,6 +174,10 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
 #endif
 	if (h != NULL && !try_module_get(h->me))
 		h = NULL;
+	if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) {
+		module_put(h->me);
+		h = NULL;
+	}
 
 	rcu_read_unlock();
 
@@ -183,6 +187,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
 
 void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
 {
+	refcount_dec(&helper->refcnt);
 	module_put(helper->me);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
@@ -423,6 +428,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 			}
 		}
 	}
+	refcount_set(&me->refcnt, 1);
 	hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
 	nf_ct_helper_count++;
 out:
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 950bf6eadc65..be678a323598 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -686,6 +686,7 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 		tuple_set = true;
 	}
 
+	ret = -ENOENT;
 	list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
 		cur = &nlcth->helper;
 		j++;
@@ -699,16 +700,20 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 		     tuple.dst.protonum != cur->tuple.dst.protonum))
 			continue;
 
-		found = true;
-		nf_conntrack_helper_unregister(cur);
-		kfree(cur->expect_policy);
+		if (refcount_dec_if_one(&cur->refcnt)) {
+			found = true;
+			nf_conntrack_helper_unregister(cur);
+			kfree(cur->expect_policy);
 
-		list_del(&nlcth->list);
-		kfree(nlcth);
+			list_del(&nlcth->list);
+			kfree(nlcth);
+		} else {
+			ret = -EBUSY;
+		}
 	}
 
 	/* Make sure we return success if we flush and there is no helpers */
-	return (found || j == 0) ? 0 : -ENOENT;
+	return (found || j == 0) ? 0 : ret;
 }
 
 static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
-- 
cgit v1.2.3-59-g8ed1b


From 324318f0248c31be8a08984146e7e4dd7cdd091d Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 9 May 2017 16:17:37 -0400
Subject: netfilter: xtables: zero padding in data_to_user

When looking up an iptables rule, the iptables binary compares the
aligned match and target data (XT_ALIGN). In some cases this can
exceed the actual data size to include padding bytes.

Before commit f77bc5b23fb1 ("iptables: use match, target and data
copy_to_user helpers") the malloc()ed bytes were overwritten by the
kernel with kzalloced contents, zeroing the padding and making the
comparison succeed. After this patch, the kernel copies and clears
only data, leaving the padding bytes undefined.

Extend the clear operation from data size to aligned data size to
include the padding bytes, if any.

Padding bytes can be observed in both match and target, and the bug
triggered, by issuing a rule with match icmp and target ACCEPT:

  iptables -t mangle -A INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT
  iptables -t mangle -D INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT

Fixes: f77bc5b23fb1 ("iptables: use match, target and data copy_to_user helpers")
Reported-by: Paul Moore <pmoore@redhat.com>
Reported-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 2 +-
 net/bridge/netfilter/ebtables.c    | 9 ++++++---
 net/netfilter/x_tables.c           | 9 ++++++---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index be378cf47fcc..b3044c2c62cb 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m,
 int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u);
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size);
+		    int usersize, int size, int aligned_size);
 
 void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 				 struct xt_counters_info *info, bool compat);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9ec0c9f908fa..9c6e619f452b 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1373,7 +1373,8 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name,
 	strlcpy(name, _name, sizeof(name));
 	if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
 	    put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
-	    xt_data_to_user(um + entrysize, data, usersize, datasize))
+	    xt_data_to_user(um + entrysize, data, usersize, datasize,
+			    XT_ALIGN(datasize)))
 		return -EFAULT;
 
 	return 0;
@@ -1658,7 +1659,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
 		if (match->compat_to_user(cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, m->data, match->usersize, msize))
+		if (xt_data_to_user(cm->data, m->data, match->usersize, msize,
+				    COMPAT_XT_ALIGN(msize)))
 			return -EFAULT;
 	}
 
@@ -1687,7 +1689,8 @@ static int compat_target_to_user(struct ebt_entry_target *t,
 		if (target->compat_to_user(cm->data, t->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize))
+		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize,
+				    COMPAT_XT_ALIGN(tsize)))
 			return -EFAULT;
 	}
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8876b7da6884..d17769599c10 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -283,12 +283,13 @@ static int xt_obj_to_user(u16 __user *psize, u16 size,
 		       &U->u.user.revision, K->u.kernel.TYPE->revision)
 
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size)
+		    int usersize, int size, int aligned_size)
 {
 	usersize = usersize ? : size;
 	if (copy_to_user(dst, src, usersize))
 		return -EFAULT;
-	if (usersize != size && clear_user(dst + usersize, size - usersize))
+	if (usersize != aligned_size &&
+	    clear_user(dst + usersize, aligned_size - usersize))
 		return -EFAULT;
 
 	return 0;
@@ -298,7 +299,9 @@ EXPORT_SYMBOL_GPL(xt_data_to_user);
 #define XT_DATA_TO_USER(U, K, TYPE, C_SIZE)				\
 	xt_data_to_user(U->data, K->data,				\
 			K->u.kernel.TYPE->usersize,			\
-			C_SIZE ? : K->u.kernel.TYPE->TYPE##size)
+			C_SIZE ? : K->u.kernel.TYPE->TYPE##size,	\
+			C_SIZE ? COMPAT_XT_ALIGN(C_SIZE) :		\
+				 XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
 
 int xt_match_to_user(const struct xt_entry_match *m,
 		     struct xt_entry_match __user *u)
-- 
cgit v1.2.3-59-g8ed1b


From 87e94dbc210a720a34be5c1174faee5c84be963e Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@regit.org>
Date: Thu, 11 May 2017 18:56:38 +0200
Subject: netfilter: synproxy: fix conntrackd interaction

This patch fixes the creation of connection tracking entry from
netlink when synproxy is used. It was missing the addition of
the synproxy extension.

This was causing kernel crashes when a conntrack entry created by
conntrackd was used after the switch of traffic from active node
to the passive node.

Signed-off-by: Eric Leblond <eric@regit.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index fa752626029e..9799a50bc604 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,6 +45,8 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_l4proto.h>
@@ -1827,6 +1829,8 @@ ctnetlink_create_conntrack(struct net *net,
 	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 	nf_ct_labels_ext_add(ct);
+	nfct_seqadj_ext_add(ct);
+	nfct_synproxy_ext_add(ct);
 
 	/* we must add conntrack extensions before confirmation. */
 	ct->status |= IPS_CONFIRMED;
-- 
cgit v1.2.3-59-g8ed1b


From fa803605eef39372e53d7813002d73a3fcf10c88 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 14 May 2017 21:35:22 +0800
Subject: netfilter: nf_tables: can't assume lock is acquired when dumping set
 elems

When dumping the elements related to a specified set, we may invoke the
nf_tables_dump_set with the NFNL_SUBSYS_NFTABLES lock not acquired. So
we should use the proper rcu operation to avoid race condition, just
like other nft dump operations.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 78 +++++++++++++++++++++++++++++++------------
 net/netfilter/nft_set_hash.c  |  2 +-
 2 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 559225029740..5f4a4d48b871 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3367,35 +3367,50 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
 	return nf_tables_fill_setelem(args->skb, set, elem);
 }
 
+struct nft_set_dump_ctx {
+	const struct nft_set	*set;
+	struct nft_ctx		ctx;
+};
+
 static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct nft_set_dump_ctx *dump_ctx = cb->data;
 	struct net *net = sock_net(skb->sk);
-	u8 genmask = nft_genmask_cur(net);
+	struct nft_af_info *afi;
+	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_set_dump_args args;
-	struct nft_ctx ctx;
-	struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
+	bool set_found = false;
 	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 	struct nlattr *nest;
 	u32 portid, seq;
-	int event, err;
+	int event;
 
-	err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
-			  NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy,
-			  NULL);
-	if (err < 0)
-		return err;
+	rcu_read_lock();
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+		if (afi != dump_ctx->ctx.afi)
+			continue;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
-					 (void *)nla, genmask);
-	if (err < 0)
-		return err;
+		list_for_each_entry_rcu(table, &afi->tables, list) {
+			if (table != dump_ctx->ctx.table)
+				continue;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-				   genmask);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+			list_for_each_entry_rcu(set, &table->sets, list) {
+				if (set == dump_ctx->set) {
+					set_found = true;
+					break;
+				}
+			}
+			break;
+		}
+		break;
+	}
+
+	if (!set_found) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
 
 	event  = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM);
 	portid = NETLINK_CB(cb->skb).portid;
@@ -3407,11 +3422,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 		goto nla_put_failure;
 
 	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = ctx.afi->family;
+	nfmsg->nfgen_family = afi->family;
 	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = htons(ctx.net->nft.base_seq & 0xffff);
+	nfmsg->res_id	    = htons(net->nft.base_seq & 0xffff);
 
-	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
+	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
 		goto nla_put_failure;
 	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
 		goto nla_put_failure;
@@ -3422,12 +3437,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 
 	args.cb			= cb;
 	args.skb		= skb;
-	args.iter.genmask	= nft_genmask_cur(ctx.net);
+	args.iter.genmask	= nft_genmask_cur(net);
 	args.iter.skip		= cb->args[0];
 	args.iter.count		= 0;
 	args.iter.err		= 0;
 	args.iter.fn		= nf_tables_dump_setelem;
-	set->ops->walk(&ctx, set, &args.iter);
+	set->ops->walk(&dump_ctx->ctx, set, &args.iter);
+	rcu_read_unlock();
 
 	nla_nest_end(skb, nest);
 	nlmsg_end(skb, nlh);
@@ -3441,9 +3457,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 
 nla_put_failure:
+	rcu_read_unlock();
 	return -ENOSPC;
 }
 
+static int nf_tables_dump_set_done(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
 static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
@@ -3465,7 +3488,18 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nf_tables_dump_set,
+			.done = nf_tables_dump_set_done,
 		};
+		struct nft_set_dump_ctx *dump_ctx;
+
+		dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
+		if (!dump_ctx)
+			return -ENOMEM;
+
+		dump_ctx->set = set;
+		dump_ctx->ctx = ctx;
+
+		c.data = dump_ctx;
 		return netlink_dump_start(nlsk, skb, nlh, &c);
 	}
 	return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 8ec086b6b56b..3d3a6df4ce70 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -222,7 +222,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_elem elem;
 	int err;
 
-	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+	err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
 	iter->err = err;
 	if (err)
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From 71df14b0ce094be46d105b5a3ededd83b8e779a0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 15 May 2017 11:17:29 +0100
Subject: netfilter: nf_tables: missing sanitization in data from userspace

Do not assume userspace always sends us NFT_DATA_VALUE for bitwise and
cmp expressions. Although NFT_DATA_VERDICT does not make any sense, it
is still possible to handcraft a netlink message using this incorrect
data type.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_bitwise.c | 19 ++++++++++++++-----
 net/netfilter/nft_cmp.c     | 12 ++++++++++--
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 877d9acd91ef..96bd4f325b0f 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -83,17 +83,26 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
 			    tb[NFTA_BITWISE_MASK]);
 	if (err < 0)
 		return err;
-	if (d1.len != priv->len)
-		return -EINVAL;
+	if (d1.len != priv->len) {
+		err = -EINVAL;
+		goto err1;
+	}
 
 	err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2,
 			    tb[NFTA_BITWISE_XOR]);
 	if (err < 0)
-		return err;
-	if (d2.len != priv->len)
-		return -EINVAL;
+		goto err1;
+	if (d2.len != priv->len) {
+		err = -EINVAL;
+		goto err2;
+	}
 
 	return 0;
+err2:
+	nft_data_uninit(&priv->xor, d2.type);
+err1:
+	nft_data_uninit(&priv->mask, d1.type);
+	return err;
 }
 
 static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 2b96effeadc1..8c9d0fb19118 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -201,10 +201,18 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 	if (err < 0)
 		return ERR_PTR(err);
 
+	if (desc.type != NFT_DATA_VALUE) {
+		err = -EINVAL;
+		goto err1;
+	}
+
 	if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ)
 		return &nft_cmp_fast_ops;
-	else
-		return &nft_cmp_ops;
+
+	return &nft_cmp_ops;
+err1:
+	nft_data_uninit(&data, desc.type);
+	return ERR_PTR(-EINVAL);
 }
 
 struct nft_expr_type nft_cmp_type __read_mostly = {
-- 
cgit v1.2.3-59-g8ed1b


From 591054469b3eef34bc097c30fae8ededddf8d796 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 15 May 2017 11:17:34 +0100
Subject: netfilter: nf_tables: revisit chain/object refcounting from elements

Andreas reports that the following incremental update using our commit
protocol doesn't work.

 # nft -f incremental-update.nft
 delete element ip filter client_to_any { 10.180.86.22 : goto CIn_1 }
 delete chain ip filter CIn_1
 ... Error: Could not process rule: Device or resource busy

The existing code is not well-integrated into the commit phase protocol,
since element deletions do not result in refcount decrement from the
preparation phase. This results in bogus EBUSY errors like the one
above.

Two new functions come with this patch:

* nft_set_elem_activate() function is used from the abort path, to
  restore the set element refcounting on objects that occurred from
  the preparation phase.

* nft_set_elem_deactivate() that is called from nft_del_setelem() to
  decrement set element refcounting on objects from the preparation
  phase in the commit protocol.

The nft_data_uninit() has been renamed to nft_data_release() since this
function does not uninitialize any data store in the data register,
instead just releases the references to objects. Moreover, a new
function nft_data_hold() has been introduced to be used from
nft_set_elem_activate().

Reported-by: Andreas Schultz <aschultz@tpip.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  2 +-
 net/netfilter/nf_tables_api.c     | 82 ++++++++++++++++++++++++++++++++++-----
 net/netfilter/nft_bitwise.c       |  4 +-
 net/netfilter/nft_cmp.c           |  2 +-
 net/netfilter/nft_immediate.c     |  5 ++-
 net/netfilter/nft_range.c         |  4 +-
 6 files changed, 81 insertions(+), 18 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 028faec8fc27..8a8bab8d7b15 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -176,7 +176,7 @@ struct nft_data_desc {
 int nft_data_init(const struct nft_ctx *ctx,
 		  struct nft_data *data, unsigned int size,
 		  struct nft_data_desc *desc, const struct nlattr *nla);
-void nft_data_uninit(const struct nft_data *data, enum nft_data_types type);
+void nft_data_release(const struct nft_data *data, enum nft_data_types type);
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len);
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5f4a4d48b871..da314be0c048 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3627,9 +3627,9 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 {
 	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
 
-	nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
+	nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
-		nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
 	if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
 		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
@@ -3638,6 +3638,18 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 }
 EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
 
+/* Only called from commit path, nft_set_elem_deactivate() already deals with
+ * the refcounting from the preparation phase.
+ */
+static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
+	kfree(elem);
+}
+
 static int nft_setelem_parse_flags(const struct nft_set *set,
 				   const struct nlattr *attr, u32 *flags)
 {
@@ -3849,9 +3861,9 @@ err4:
 	kfree(elem.priv);
 err3:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
-		nft_data_uninit(&data, d2.type);
+		nft_data_release(&data, d2.type);
 err2:
-	nft_data_uninit(&elem.key.val, d1.type);
+	nft_data_release(&elem.key.val, d1.type);
 err1:
 	return err;
 }
@@ -3896,6 +3908,53 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 	return err;
 }
 
+/**
+ *	nft_data_hold - hold a nft_data item
+ *
+ *	@data: struct nft_data to release
+ *	@type: type of data
+ *
+ *	Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *	NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and
+ *	NFT_GOTO verdicts. This function must be called on active data objects
+ *	from the second phase of the commit protocol.
+ */
+static void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+{
+	if (type == NFT_DATA_VERDICT) {
+		switch (data->verdict.code) {
+		case NFT_JUMP:
+		case NFT_GOTO:
+			data->verdict.chain->use++;
+			break;
+		}
+	}
+}
+
+static void nft_set_elem_activate(const struct net *net,
+				  const struct nft_set *set,
+				  struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_hold(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		(*nft_set_ext_obj(ext))->use++;
+}
+
+static void nft_set_elem_deactivate(const struct net *net,
+				    const struct nft_set *set,
+				    struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		(*nft_set_ext_obj(ext))->use--;
+}
+
 static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 			   const struct nlattr *attr)
 {
@@ -3961,6 +4020,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	kfree(elem.priv);
 	elem.priv = priv;
 
+	nft_set_elem_deactivate(ctx->net, set, &elem);
+
 	nft_trans_elem(trans) = elem;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -3970,7 +4031,7 @@ err4:
 err3:
 	kfree(elem.priv);
 err2:
-	nft_data_uninit(&elem.key.val, desc.type);
+	nft_data_release(&elem.key.val, desc.type);
 err1:
 	return err;
 }
@@ -4777,8 +4838,8 @@ static void nf_tables_commit_release(struct nft_trans *trans)
 		nft_set_destroy(nft_trans_set(trans));
 		break;
 	case NFT_MSG_DELSETELEM:
-		nft_set_elem_destroy(nft_trans_elem_set(trans),
-				     nft_trans_elem(trans).priv, true);
+		nf_tables_set_elem_destroy(nft_trans_elem_set(trans),
+					   nft_trans_elem(trans).priv);
 		break;
 	case NFT_MSG_DELOBJ:
 		nft_obj_destroy(nft_trans_obj(trans));
@@ -5013,6 +5074,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
 
+			nft_set_elem_activate(net, te->set, &te->elem);
 			te->set->ops->activate(net, te->set, &te->elem);
 			te->set->ndeact--;
 
@@ -5498,7 +5560,7 @@ int nft_data_init(const struct nft_ctx *ctx,
 EXPORT_SYMBOL_GPL(nft_data_init);
 
 /**
- *	nft_data_uninit - release a nft_data item
+ *	nft_data_release - release a nft_data item
  *
  *	@data: struct nft_data to release
  *	@type: type of data
@@ -5506,7 +5568,7 @@ EXPORT_SYMBOL_GPL(nft_data_init);
  *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
  *	all others need to be released by calling this function.
  */
-void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+void nft_data_release(const struct nft_data *data, enum nft_data_types type)
 {
 	if (type < NFT_DATA_VERDICT)
 		return;
@@ -5517,7 +5579,7 @@ void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
 		WARN_ON(1);
 	}
 }
-EXPORT_SYMBOL_GPL(nft_data_uninit);
+EXPORT_SYMBOL_GPL(nft_data_release);
 
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len)
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 96bd4f325b0f..fff8073e2a56 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -99,9 +99,9 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
 
 	return 0;
 err2:
-	nft_data_uninit(&priv->xor, d2.type);
+	nft_data_release(&priv->xor, d2.type);
 err1:
-	nft_data_uninit(&priv->mask, d1.type);
+	nft_data_release(&priv->mask, d1.type);
 	return err;
 }
 
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 8c9d0fb19118..c2945eb3397c 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -211,7 +211,7 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 
 	return &nft_cmp_ops;
 err1:
-	nft_data_uninit(&data, desc.type);
+	nft_data_release(&data, desc.type);
 	return ERR_PTR(-EINVAL);
 }
 
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 728baf88295a..4717d7796927 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -65,7 +65,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
 	return 0;
 
 err1:
-	nft_data_uninit(&priv->data, desc.type);
+	nft_data_release(&priv->data, desc.type);
 	return err;
 }
 
@@ -73,7 +73,8 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
 				  const struct nft_expr *expr)
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
-	return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+
+	return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
 }
 
 static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 9edc74eedc10..cedb96c3619f 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -102,9 +102,9 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
 	priv->len = desc_from.len;
 	return 0;
 err2:
-	nft_data_uninit(&priv->data_to, desc_to.type);
+	nft_data_release(&priv->data_to, desc_to.type);
 err1:
-	nft_data_uninit(&priv->data_from, desc_from.type);
+	nft_data_release(&priv->data_from, desc_from.type);
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 9b5fed0daa2a9ea3355ef06bb9e87ce800e71df6 Mon Sep 17 00:00:00 2001
From: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Date: Fri, 28 Apr 2017 11:02:22 +0300
Subject: drm/i915/glk: Fix DSI "*ERROR* ULPS is still active" messages

The sequence in glk_dsi_device_ready() enters ULPS then waits until it is
*not* active to then disable it. The correct sequence according to the
spec is to enter ULPS then wait until the GLK_ULPS_NOT_ACTIVE bit is
zero, i.e., ULPS is active, and then disable ULPS.

Fixing the condition gets rid of the following spurious error messages:

[drm:glk_dsi_device_ready [i915]] *ERROR* ULPS is still active

Fixes: 4644848369c0 ("drm/i915/glk: Add MIPIIO Enable/disable sequence")
Cc: Deepak M <m.deepak@intel.com>
Cc: Madhav Chauhan <madhav.chauhan@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: <drm-intel-fixes@lists.freedesktop.org>
Signed-off-by: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Reviewed-by: Madhav Chauhan <madhav.chauhan@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170428080222.6147-1-ander.conselvan.de.oliveira@intel.com
(cherry picked from commit 3acbec03b3c51559d01c879e9564d9c9610fe8ce)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_dsi.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 3ffe8b1f1d48..fc0ef492252a 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -410,11 +410,10 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder)
 		val |= (ULPS_STATE_ENTER | DEVICE_READY);
 		I915_WRITE(MIPI_DEVICE_READY(port), val);
 
-		/* Wait for ULPS Not active */
+		/* Wait for ULPS active */
 		if (intel_wait_for_register(dev_priv,
-				MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE,
-				GLK_ULPS_NOT_ACTIVE, 20))
-			DRM_ERROR("ULPS is still active\n");
+				MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, 0, 20))
+			DRM_ERROR("ULPS not active\n");
 
 		/* Exit ULPS */
 		val = I915_READ(MIPI_DEVICE_READY(port));
-- 
cgit v1.2.3-59-g8ed1b


From 668e3b014afb66ab29e134bca7c258527273ac75 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Thu, 27 Apr 2017 19:02:20 +0300
Subject: drm/i915: Fix runtime PM for LPE audio
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not calling pm_runtime_enable() means that runtime PM can't be
enabled at all via sysfs. So we definitely need to call it
from somewhere.

Calling it from the driver seems like a bad idea because it
would have to be paired with a pm_runtime_disable() at driver
unload time, otherwise the core gets upset. Also if there's
no LPE audio driver loaded then we couldn't runtime suspend
i915 either.

So it looks like a better plan is to call it from i915 when
we register the platform device. That seems to match how
pci generally does things. I cargo culted the
pm_runtime_forbid() and pm_runtime_set_active() calls from
pci as well.

The exposed runtime PM API is massive an thorougly misleading, so
I don't actually know if this is how you're supposed to use the API
or not. But it seems to work. I can now runtime suspend i915 again
with or without the LPE audio driver loaded, and reloading the
LPE audio driver also seems to work.

Note that powertop won't auto-tune runtime PM for platform devices,
which is a little annoying. So I'm not sure that leaving runtime
PM in "on" mode by default is the best choice here. But I've left
it like that for now at least.

Also remove the comment about there not being much benefit from
LPE audio runtime PM. Not allowing runtime PM blocks i915 runtime
PM, which will also block s0ix, and that could have a measurable
impact on power consumption.

Cc: stable@vger.kernel.org
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Fixes: 0b6b524f3915 ("ALSA: x86: Don't enable runtime PM as default")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-2-ville.syrjala@linux.intel.com
Reviewed-by: Takashi Iwai <tiwai@suse.de>
(cherry picked from commit 183c00350ccda86781f6695840e6c5f5b22efbd1)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_lpe_audio.c | 5 +++++
 sound/x86/intel_hdmi_audio.c           | 4 ----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 25d8e76489e4..668f00480d97 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -63,6 +63,7 @@
 #include <linux/acpi.h>
 #include <linux/device.h>
 #include <linux/pci.h>
+#include <linux/pm_runtime.h>
 
 #include "i915_drv.h"
 #include <linux/delay.h>
@@ -121,6 +122,10 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
 
 	kfree(rsc);
 
+	pm_runtime_forbid(&platdev->dev);
+	pm_runtime_set_active(&platdev->dev);
+	pm_runtime_enable(&platdev->dev);
+
 	return platdev;
 
 err:
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
index 664b7fe206d6..b11d3920b9a5 100644
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -1809,10 +1809,6 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 	pdata->notify_pending = false;
 	spin_unlock_irq(&pdata->lpe_audio_slock);
 
-	/* runtime PM isn't enabled as default, since it won't save much on
-	 * BYT/CHT devices; user who want the runtime PM should adjust the
-	 * power/ontrol and power/autosuspend_delay_ms sysfs entries instead
-	 */
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_mark_last_busy(&pdev->dev);
 	pm_runtime_set_active(&pdev->dev);
-- 
cgit v1.2.3-59-g8ed1b


From 82f2b4aca8fd90476fc3fd1786d107163ab17201 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Thu, 4 May 2017 21:15:30 +0300
Subject: drm/i915: Fix rawclk readout for g4x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Turns out our skills in decoding the CLKCFG register weren't good
enough. On this particular elk the answer we got was 400 MHz when
in reality the clock was running at 266 MHz, which then caused us
to program a bogus AUX clock divider that caused all AUX communication
to fail.

Sadly the docs are now in bit heaven, so the fix will have to be based
on empirical evidence. Using another elk machine I was able to frob
the FSB frequency from the BIOS and see how it affects the CLKCFG
register. The machine seesm to use a frequency of 266 MHz by default,
and fortunately it still boot even with the 50% CPU overclock that
we get when we bump the FSB up to 400 MHz.

It turns out the actual FSB frequency and the register have no real
link whatsoever. The register value is based on some straps or something,
but fortunately those too can be configured from the BIOS on this board,
although it doesn't seem to respect the settings 100%. In the end I was
able to derive the following relationship:

BIOS FSB / strap | CLKCFG
-------------------------
200              | 0x2
266              | 0x0
333              | 0x4
400              | 0x4

So only the 200 and 400 MHz cases actually match how we're currently
decoding that register. But as the comment next to some of the defines
says, we have been just guessing anyway.

So let's fix things up so that at least the 266 MHz case will work
correctly as that is actually the setting used by both the buggy
machine and my test machine.

The fact that 333 and 400 MHz BIOS settings result in the same register
value is a little disappointing, as that means we can't tell them apart.
However, according to the gmch datasheet for both elk and ctg 400 Mhz is
not even a supported FSB frequency, so I'm going to make the assumption
that we should decode it as 333 MHz instead.

Cc: stable@vger.kernel.org
Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
Reported-by: Tomi Sarvela <tomi.p.sarvela@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100926
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170504181530.6908-1-ville.syrjala@linux.intel.com
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Tomi Sarvela <tomi.p.sarvela@intel.com>
(cherry picked from commit 6f38123ecaac446312a63523b68df84ceb5a06ed)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h    | 10 +++++++---
 drivers/gpu/drm/i915/intel_cdclk.c |  6 ++----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 11b12f412492..5a7c63e64381 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3051,10 +3051,14 @@ enum skl_disp_power_wells {
 #define CLKCFG_FSB_667					(3 << 0)	/* hrawclk 166 */
 #define CLKCFG_FSB_800					(2 << 0)	/* hrawclk 200 */
 #define CLKCFG_FSB_1067					(6 << 0)	/* hrawclk 266 */
+#define CLKCFG_FSB_1067_ALT				(0 << 0)	/* hrawclk 266 */
 #define CLKCFG_FSB_1333					(7 << 0)	/* hrawclk 333 */
-/* Note, below two are guess */
-#define CLKCFG_FSB_1600					(4 << 0)	/* hrawclk 400 */
-#define CLKCFG_FSB_1600_ALT				(0 << 0)	/* hrawclk 400 */
+/*
+ * Note that on at least on ELK the below value is reported for both
+ * 333 and 400 MHz BIOS FSB setting, but given that the gmch datasheet
+ * lists only 200/266/333 MHz FSB as supported let's decode it as 333 MHz.
+ */
+#define CLKCFG_FSB_1333_ALT				(4 << 0)	/* hrawclk 333 */
 #define CLKCFG_FSB_MASK					(7 << 0)
 #define CLKCFG_MEM_533					(1 << 4)
 #define CLKCFG_MEM_667					(2 << 4)
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index dd3ad52b7dfe..f29a226e24d8 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1798,13 +1798,11 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv)
 	case CLKCFG_FSB_800:
 		return 200000;
 	case CLKCFG_FSB_1067:
+	case CLKCFG_FSB_1067_ALT:
 		return 266667;
 	case CLKCFG_FSB_1333:
+	case CLKCFG_FSB_1333_ALT:
 		return 333333;
-	/* these two are just a guess; one of them might be right */
-	case CLKCFG_FSB_1600:
-	case CLKCFG_FSB_1600_ALT:
-		return 400000;
 	default:
 		return 133333;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 2f720aac936dc7a301b757d3b197d86c333d59b8 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 12 May 2017 10:14:23 +0100
Subject: drm/i915: don't do allocate_va_range again on PIN_UPDATE

If a vma is already bound to a ppgtt, we incorrectly call
allocate_va_range again when doing a PIN_UPDATE, which will result in
over accounting within our paging structures, such that when we do
unbind something we don't actually destroy the structures and end up
inadvertently recycling them. In reality this probably isn't too bad,
but once we start touching PDEs and PDPEs for 64K/2M/1G pages this
apparent recycling will manifest into lots of really, really subtle
bugs.

v2: Fix the testing of vma->flags for aliasing_ppgtt_bind_vma

Fixes: ff685975d97f ("drm/i915: Move allocate_va_range to GTT")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170512091423.26085-1-chris@chris-wilson.co.uk
(cherry picked from commit 1f23475c893a85c934143cd64865ebb9b6af383f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2aa6b97fd22f..a0563e18d753 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -195,9 +195,12 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 	u32 pte_flags;
 	int ret;
 
-	ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size);
-	if (ret)
-		return ret;
+	if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
+		ret = vma->vm->allocate_va_range(vma->vm, vma->node.start,
+						 vma->size);
+		if (ret)
+			return ret;
+	}
 
 	vma->pages = vma->obj->mm.pages;
 
@@ -2306,7 +2309,8 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_LOCAL_BIND) {
 		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
 
-		if (appgtt->base.allocate_va_range) {
+		if (!(vma->flags & I915_VMA_LOCAL_BIND) &&
+		    appgtt->base.allocate_va_range) {
 			ret = appgtt->base.allocate_va_range(&appgtt->base,
 							     vma->node.start,
 							     vma->node.size);
-- 
cgit v1.2.3-59-g8ed1b


From 661e6b02b5aa82db31897f36e96324b77450fd7a Mon Sep 17 00:00:00 2001
From: Zhichao Huang <zhichao.huang@linaro.org>
Date: Thu, 11 May 2017 13:46:11 +0100
Subject: KVM: arm: plug potential guest hardware debug leakage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hardware debugging in guests is not intercepted currently, it means
that a malicious guest can bring down the entire machine by writing
to the debug registers.

This patch enable trapping of all debug registers, preventing the
guests to access the debug registers. This includes access to the
debug mode(DBGDSCR) in the guest world all the time which could
otherwise mess with the host state. Reads return 0 and writes are
ignored (RAZ_WI).

The result is the guest cannot detect any working hardware based debug
support. As debug exceptions are still routed to the guest normal
debug using software based breakpoints still works.

To support debugging using hardware registers we need to implement a
debug register aware world switch as well as special trapping for
registers that may affect the host state.

Cc: stable@vger.kernel.org
Signed-off-by: Zhichao Huang <zhichao.huang@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 arch/arm/include/asm/kvm_coproc.h |  3 +-
 arch/arm/kvm/coproc.c             | 77 ++++++++++++++++++++++++++++++---------
 arch/arm/kvm/handle_exit.c        |  4 +-
 arch/arm/kvm/hyp/switch.c         |  4 +-
 4 files changed, 66 insertions(+), 22 deletions(-)

diff --git a/arch/arm/include/asm/kvm_coproc.h b/arch/arm/include/asm/kvm_coproc.h
index 4917c2f7e459..e74ab0fbab79 100644
--- a/arch/arm/include/asm/kvm_coproc.h
+++ b/arch/arm/include/asm/kvm_coproc.h
@@ -31,7 +31,8 @@ void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table);
 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index ac8d36da4d08..1403ffb1916b 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -112,12 +112,6 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
 static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
 {
 	/*
@@ -533,12 +527,7 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
 	return 1;
 }
 
-/**
- * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
- * @vcpu: The VCPU pointer
- * @run:  The kvm_run struct
- */
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static struct coproc_params decode_64bit_hsr(struct kvm_vcpu *vcpu)
 {
 	struct coproc_params params;
 
@@ -552,9 +541,38 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
 	params.CRm = 0;
 
+	return params;
+}
+
+/**
+ * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct coproc_params params = decode_64bit_hsr(vcpu);
+
 	return emulate_cp15(vcpu, &params);
 }
 
+/**
+ * kvm_handle_cp14_64 -- handles a mrrc/mcrr trap on a guest CP14 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct coproc_params params = decode_64bit_hsr(vcpu);
+
+	/* raz_wi cp14 */
+	pm_fake(vcpu, &params, NULL);
+
+	/* handled */
+	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+	return 1;
+}
+
 static void reset_coproc_regs(struct kvm_vcpu *vcpu,
 			      const struct coproc_reg *table, size_t num)
 {
@@ -565,12 +583,7 @@ static void reset_coproc_regs(struct kvm_vcpu *vcpu,
 			table[i].reset(vcpu, &table[i]);
 }
 
-/**
- * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
- * @vcpu: The VCPU pointer
- * @run:  The kvm_run struct
- */
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu)
 {
 	struct coproc_params params;
 
@@ -584,9 +597,37 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7;
 	params.Rt2 = 0;
 
+	return params;
+}
+
+/**
+ * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct coproc_params params = decode_32bit_hsr(vcpu);
 	return emulate_cp15(vcpu, &params);
 }
 
+/**
+ * kvm_handle_cp14_32 -- handles a mrc/mcr trap on a guest CP14 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct coproc_params params = decode_32bit_hsr(vcpu);
+
+	/* raz_wi cp14 */
+	pm_fake(vcpu, &params, NULL);
+
+	/* handled */
+	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+	return 1;
+}
+
 /******************************************************************************
  * Userspace API
  *****************************************************************************/
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 5fd7968cdae9..f86a9aaef462 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -95,9 +95,9 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[HSR_EC_WFI]		= kvm_handle_wfx,
 	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
 	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
-	[HSR_EC_CP14_MR]	= kvm_handle_cp14_access,
+	[HSR_EC_CP14_MR]	= kvm_handle_cp14_32,
 	[HSR_EC_CP14_LS]	= kvm_handle_cp14_load_store,
-	[HSR_EC_CP14_64]	= kvm_handle_cp14_access,
+	[HSR_EC_CP14_64]	= kvm_handle_cp14_64,
 	[HSR_EC_CP_0_13]	= kvm_handle_cp_0_13_access,
 	[HSR_EC_CP10_ID]	= kvm_handle_cp10_id,
 	[HSR_EC_HVC]		= handle_hvc,
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 92678b7bd046..624a510d31df 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -48,7 +48,9 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host)
 	write_sysreg(HSTR_T(15), HSTR);
 	write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
 	val = read_sysreg(HDCR);
-	write_sysreg(val | HDCR_TPM | HDCR_TPMCR, HDCR);
+	val |= HDCR_TPM | HDCR_TPMCR; /* trap performance monitors */
+	val |= HDCR_TDRA | HDCR_TDOSA | HDCR_TDA; /* trap debug regs */
+	write_sysreg(val, HDCR);
 }
 
 static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3-59-g8ed1b


From 9b619a8f08da9f61f166edbbe30ad05c359ec19e Mon Sep 17 00:00:00 2001
From: Zhichao Huang <zhichao.huang@linaro.org>
Date: Thu, 11 May 2017 13:46:12 +0100
Subject: KVM: arm: rename pm_fake handler to trap_raz_wi
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pm_fake doesn't quite describe what the handler does (ignoring writes
and returning 0 for reads).

As we're about to use it (a lot) in a different context, rename it
with a (admitedly cryptic) name that make sense for all users.

Signed-off-by: Zhichao Huang <zhichao.huang@linaro.org>
Reviewed-by: Alex Bennee <alex.bennee@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 arch/arm/kvm/coproc.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 1403ffb1916b..6d1d2e26dfe5 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -279,7 +279,7 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
  * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
  * all PM registers, which doesn't crash the guest kernel at least.
  */
-static bool pm_fake(struct kvm_vcpu *vcpu,
+static bool trap_raz_wi(struct kvm_vcpu *vcpu,
 		    const struct coproc_params *p,
 		    const struct coproc_reg *r)
 {
@@ -289,19 +289,19 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
 		return read_zero(vcpu, p);
 }
 
-#define access_pmcr pm_fake
-#define access_pmcntenset pm_fake
-#define access_pmcntenclr pm_fake
-#define access_pmovsr pm_fake
-#define access_pmselr pm_fake
-#define access_pmceid0 pm_fake
-#define access_pmceid1 pm_fake
-#define access_pmccntr pm_fake
-#define access_pmxevtyper pm_fake
-#define access_pmxevcntr pm_fake
-#define access_pmuserenr pm_fake
-#define access_pmintenset pm_fake
-#define access_pmintenclr pm_fake
+#define access_pmcr trap_raz_wi
+#define access_pmcntenset trap_raz_wi
+#define access_pmcntenclr trap_raz_wi
+#define access_pmovsr trap_raz_wi
+#define access_pmselr trap_raz_wi
+#define access_pmceid0 trap_raz_wi
+#define access_pmceid1 trap_raz_wi
+#define access_pmccntr trap_raz_wi
+#define access_pmxevtyper trap_raz_wi
+#define access_pmxevcntr trap_raz_wi
+#define access_pmuserenr trap_raz_wi
+#define access_pmintenset trap_raz_wi
+#define access_pmintenclr trap_raz_wi
 
 /* Architected CP15 registers.
  * CRn denotes the primary register number, but is copied to the CRm in the
@@ -566,7 +566,7 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	struct coproc_params params = decode_64bit_hsr(vcpu);
 
 	/* raz_wi cp14 */
-	pm_fake(vcpu, &params, NULL);
+	trap_raz_wi(vcpu, &params, NULL);
 
 	/* handled */
 	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
@@ -621,7 +621,7 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	struct coproc_params params = decode_32bit_hsr(vcpu);
 
 	/* raz_wi cp14 */
-	pm_fake(vcpu, &params, NULL);
+	trap_raz_wi(vcpu, &params, NULL);
 
 	/* handled */
 	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-- 
cgit v1.2.3-59-g8ed1b


From 66c25f6e31766a9ec19c2bdc7f5f69f9c59bafd7 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 15 May 2017 10:56:06 +0200
Subject: net: stmmac: use correct pointer when printing normal descriptor ring

There are two pointers in sysfs_display_ring,
one that increments if using normal dma descriptors,
another if using extended dma descriptors.

When printing the normal dma descriptors, the wrong pointer is used,
thus the printed descriptor addresses are incorrect.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cd8c60132390..a74c481401c4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3725,7 +3725,7 @@ static void sysfs_display_ring(void *head, int size, int extend_desc,
 			ep++;
 		} else {
 			seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-				   i, (unsigned int)virt_to_phys(ep),
+				   i, (unsigned int)virt_to_phys(p),
 				   le32_to_cpu(p->des0), le32_to_cpu(p->des1),
 				   le32_to_cpu(p->des2), le32_to_cpu(p->des3));
 			p++;
-- 
cgit v1.2.3-59-g8ed1b


From 4769886baf39b6a307eb8f9e39848823ca6c5939 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 10 May 2017 22:43:17 +0300
Subject: kvm: nVMX: off by one in vmx_write_pml_buffer()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are PML_ENTITY_NUM elements in the pml_address[] array so the >
should be >= or we write beyond the end of the array when we do:

	pml_address[vmcs12->guest_pml_index--] = gpa;

Fixes: c5f983f6e845 ("nVMX: Implement emulated Page Modification Logging")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c6f4ad44aa95..7698e8f321bf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11213,7 +11213,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
 		if (!nested_cpu_has_pml(vmcs12))
 			return 0;
 
-		if (vmcs12->guest_pml_index > PML_ENTITY_NUM) {
+		if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
 			vmx->nested.pml_full = true;
 			return 1;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From a575813bfe4bc15aba511a5e91e61d242bff8b9d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 11 May 2017 02:58:55 -0700
Subject: KVM: x86: Fix load damaged SSEx MXCSR register
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by syzkaller:

   BUG: unable to handle kernel paging request at ffffffffc07f6a2e
   IP: report_bug+0x94/0x120
   PGD 348e12067
   P4D 348e12067
   PUD 348e14067
   PMD 3cbd84067
   PTE 80000003f7e87161

   Oops: 0003 [#1] SMP
   CPU: 2 PID: 7091 Comm: kvm_load_guest_ Tainted: G           OE   4.11.0+ #8
   task: ffff92fdfb525400 task.stack: ffffbda6c3d04000
   RIP: 0010:report_bug+0x94/0x120
   RSP: 0018:ffffbda6c3d07b20 EFLAGS: 00010202
    do_trap+0x156/0x170
    do_error_trap+0xa3/0x170
    ? kvm_load_guest_fpu.part.175+0x12a/0x170 [kvm]
    ? mark_held_locks+0x79/0xa0
    ? retint_kernel+0x10/0x10
    ? trace_hardirqs_off_thunk+0x1a/0x1c
    do_invalid_op+0x20/0x30
    invalid_op+0x1e/0x30
   RIP: 0010:kvm_load_guest_fpu.part.175+0x12a/0x170 [kvm]
    ? kvm_load_guest_fpu.part.175+0x1c/0x170 [kvm]
    kvm_arch_vcpu_ioctl_run+0xed6/0x1b70 [kvm]
    kvm_vcpu_ioctl+0x384/0x780 [kvm]
    ? kvm_vcpu_ioctl+0x384/0x780 [kvm]
    ? sched_clock+0x13/0x20
    ? __do_page_fault+0x2a0/0x550
    do_vfs_ioctl+0xa4/0x700
    ? up_read+0x1f/0x40
    ? __do_page_fault+0x2a0/0x550
    SyS_ioctl+0x79/0x90
    entry_SYSCALL_64_fastpath+0x23/0xc2

SDM mentioned that "The MXCSR has several reserved bits, and attempting to write
a 1 to any of these bits will cause a general-protection exception(#GP) to be
generated". The syzkaller forks' testcase overrides xsave area w/ random values
and steps on the reserved bits of MXCSR register. The damaged MXCSR register
values of guest will be restored to SSEx MXCSR register before vmentry. This
patch fixes it by catching userspace override MXCSR register reserved bits w/
random values and bails out immediately.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kernel/fpu/init.c | 1 +
 arch/x86/kvm/x86.c         | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index c2f8dde3255c..d5d44c452624 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -90,6 +90,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
  * Boot time FPU feature detection code:
  */
 unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
+EXPORT_SYMBOL_GPL(mxcsr_feature_mask);
 
 static void __init fpu__init_system_mxcsr(void)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 464da936c53d..b54125b590e8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3288,11 +3288,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 	}
 }
 
+#define XSAVE_MXCSR_OFFSET 24
+
 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 					struct kvm_xsave *guest_xsave)
 {
 	u64 xstate_bv =
 		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
+	u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
 
 	if (boot_cpu_has(X86_FEATURE_XSAVE)) {
 		/*
@@ -3300,11 +3303,13 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 		 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
 		 * with old userspace.
 		 */
-		if (xstate_bv & ~kvm_supported_xcr0())
+		if (xstate_bv & ~kvm_supported_xcr0() ||
+			mxcsr & ~mxcsr_feature_mask)
 			return -EINVAL;
 		load_xsave(vcpu, (u8 *)guest_xsave->region);
 	} else {
-		if (xstate_bv & ~XFEATURE_MASK_FPSSE)
+		if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
+			mxcsr & ~mxcsr_feature_mask)
 			return -EINVAL;
 		memcpy(&vcpu->arch.guest_fpu.state.fxsave,
 			guest_xsave->region, sizeof(struct fxregs_state));
-- 
cgit v1.2.3-59-g8ed1b


From fce6ac4c0508b985d497e3d9c8eff28ec8a43182 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 11 May 2017 02:58:56 -0700
Subject: KVM: VMX: Don't enable EPT A/D feature if EPT feature is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can observe eptad kvm_intel module parameter is still Y
even if ept is disabled which is weird. This patch will
not enable EPT A/D feature if EPT feature is disabled.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7698e8f321bf..72f78396bc09 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6504,7 +6504,7 @@ static __init int hardware_setup(void)
 		enable_ept_ad_bits = 0;
 	}
 
-	if (!cpu_has_vmx_ept_ad_bits())
+	if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
 		enable_ept_ad_bits = 0;
 
 	if (!cpu_has_vmx_unrestricted_guest())
-- 
cgit v1.2.3-59-g8ed1b


From 0780516a18f87e881e42ed815f189279b0a1743c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 May 2017 13:23:29 +0200
Subject: KVM: nVMX: fix EPT permissions as reported in exit qualification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the new ept_access_test_read_only and ept_access_test_read_write
testcases from vmx.flat.

The problem is that gpte_access moves bits around to switch from EPT
bit order (XWR) to ACC_*_MASK bit order (RWX).  This results in an
incorrect exit qualification.  To fix this, make pt_access and
pte_access operate on raw PTE values (only with NX flipped to mean
"can execute") and call gpte_access at the end of the walk.  This
lets us use pte_access to compute the exit qualification with XWR
bit order.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@tencent.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 56241746abbd..b0454c7e4cff 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	pt_element_t pte;
 	pt_element_t __user *uninitialized_var(ptep_user);
 	gfn_t table_gfn;
-	unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
+	u64 pt_access, pte_access;
+	unsigned index, accessed_dirty, pte_pkey;
 	unsigned nested_access;
 	gpa_t pte_gpa;
 	bool have_ad;
 	int offset;
+	u64 walk_nx_mask = 0;
 	const int write_fault = access & PFERR_WRITE_MASK;
 	const int user_fault  = access & PFERR_USER_MASK;
 	const int fetch_fault = access & PFERR_FETCH_MASK;
@@ -302,6 +304,7 @@ retry_walk:
 	have_ad       = PT_HAVE_ACCESSED_DIRTY(mmu);
 
 #if PTTYPE == 64
+	walk_nx_mask = 1ULL << PT64_NX_SHIFT;
 	if (walker->level == PT32E_ROOT_LEVEL) {
 		pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
 		trace_kvm_mmu_paging_element(pte, walker->level);
@@ -313,8 +316,6 @@ retry_walk:
 	walker->max_level = walker->level;
 	ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
 
-	accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
-
 	/*
 	 * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
 	 * by the MOV to CR instruction are treated as reads and do not cause the
@@ -322,14 +323,14 @@ retry_walk:
 	 */
 	nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
 
-	pt_access = pte_access = ACC_ALL;
+	pte_access = ~0;
 	++walker->level;
 
 	do {
 		gfn_t real_gfn;
 		unsigned long host_addr;
 
-		pt_access &= pte_access;
+		pt_access = pte_access;
 		--walker->level;
 
 		index = PT_INDEX(addr, walker->level);
@@ -371,6 +372,12 @@ retry_walk:
 
 		trace_kvm_mmu_paging_element(pte, walker->level);
 
+		/*
+		 * Inverting the NX it lets us AND it like other
+		 * permission bits.
+		 */
+		pte_access = pt_access & (pte ^ walk_nx_mask);
+
 		if (unlikely(!FNAME(is_present_gpte)(pte)))
 			goto error;
 
@@ -379,14 +386,16 @@ retry_walk:
 			goto error;
 		}
 
-		accessed_dirty &= pte;
-		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
-
 		walker->ptes[walker->level - 1] = pte;
 	} while (!is_last_gpte(mmu, walker->level, pte));
 
 	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
-	errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access);
+	accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
+
+	/* Convert to ACC_*_MASK flags for struct guest_walker.  */
+	walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
+	walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask);
+	errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
 	if (unlikely(errcode))
 		goto error;
 
@@ -403,7 +412,7 @@ retry_walk:
 	walker->gfn = real_gpa >> PAGE_SHIFT;
 
 	if (!write_fault)
-		FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
+		FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
 	else
 		/*
 		 * On a write fault, fold the dirty bit into accessed_dirty.
@@ -421,10 +430,8 @@ retry_walk:
 			goto retry_walk;
 	}
 
-	walker->pt_access = pt_access;
-	walker->pte_access = pte_access;
 	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-		 __func__, (u64)pte, pte_access, pt_access);
+		 __func__, (u64)pte, walker->pte_access, walker->pt_access);
 	return 1;
 
 error:
@@ -452,7 +459,7 @@ error:
 	 */
 	if (!(errcode & PFERR_RSVD_MASK)) {
 		vcpu->arch.exit_qualification &= 0x187;
-		vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;
+		vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
 	}
 #endif
 	walker->fault.address = addr;
-- 
cgit v1.2.3-59-g8ed1b


From 68118e0e73aa3a6291c8b9eb1ee708e05f110cea Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Sun, 7 May 2017 07:16:30 +0200
Subject: i2c: mux: reg: put away the parent i2c adapter on probe failure

It is only prudent to let go of resources that are not used.

Fixes: b3fdd32799d8 ("i2c: mux: Add register-based mux i2c-mux-reg")
Signed-off-by: Peter Rosin <peda@axentia.se>
---
 drivers/i2c/muxes/i2c-mux-reg.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 406d5059072c..11974e3cd1e5 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -196,20 +196,25 @@ static int i2c_mux_reg_probe(struct platform_device *pdev)
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 		mux->data.reg_size = resource_size(res);
 		mux->data.reg = devm_ioremap_resource(&pdev->dev, res);
-		if (IS_ERR(mux->data.reg))
-			return PTR_ERR(mux->data.reg);
+		if (IS_ERR(mux->data.reg)) {
+			ret = PTR_ERR(mux->data.reg);
+			goto err_put_parent;
+		}
 	}
 
 	if (mux->data.reg_size != 4 && mux->data.reg_size != 2 &&
 	    mux->data.reg_size != 1) {
 		dev_err(&pdev->dev, "Invalid register size\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_put_parent;
 	}
 
 	muxc = i2c_mux_alloc(parent, &pdev->dev, mux->data.n_values, 0, 0,
 			     i2c_mux_reg_select, NULL);
-	if (!muxc)
-		return -ENOMEM;
+	if (!muxc) {
+		ret = -ENOMEM;
+		goto err_put_parent;
+	}
 	muxc->priv = mux;
 
 	platform_set_drvdata(pdev, muxc);
@@ -233,6 +238,8 @@ static int i2c_mux_reg_probe(struct platform_device *pdev)
 
 add_adapter_failed:
 	i2c_mux_del_adapters(muxc);
+err_put_parent:
+	i2c_put_adapter(parent);
 
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From a36d4637e4a06be067b8e327a0b1118bb2a73cb8 Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Mon, 15 May 2017 18:48:55 +0200
Subject: i2c: mux: reg: rename label to indicate what it does

That maintains sanity if it is ever called from some other spot, and
also makes the label names coherent.

Signed-off-by: Peter Rosin <peda@axentia.se>
---
 drivers/i2c/muxes/i2c-mux-reg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 11974e3cd1e5..d97031804de8 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -228,7 +228,7 @@ static int i2c_mux_reg_probe(struct platform_device *pdev)
 
 		ret = i2c_mux_add_adapter(muxc, nr, mux->data.values[i], class);
 		if (ret)
-			goto add_adapter_failed;
+			goto err_del_mux_adapters;
 	}
 
 	dev_dbg(&pdev->dev, "%d port mux on %s adapter\n",
@@ -236,7 +236,7 @@ static int i2c_mux_reg_probe(struct platform_device *pdev)
 
 	return 0;
 
-add_adapter_failed:
+err_del_mux_adapters:
 	i2c_mux_del_adapters(muxc);
 err_put_parent:
 	i2c_put_adapter(parent);
-- 
cgit v1.2.3-59-g8ed1b


From 9fce894d03a98ec8e8e8106a964644633d2772ee Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Mon, 15 May 2017 09:03:50 +0200
Subject: i2c: mux: only print failure message on error

As is, a failure message is printed unconditionally, which is confusing.
And noisy.

Fixes: 8d4d159f25a7 ("i2c: mux: provide more info on failure in i2c_mux_add_adapter")
Signed-off-by: Peter Rosin <peda@axentia.se>
---
 drivers/i2c/i2c-mux.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index 26f7237558ba..9669ca4937b8 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -395,18 +395,20 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc,
 	if (force_nr) {
 		priv->adap.nr = force_nr;
 		ret = i2c_add_numbered_adapter(&priv->adap);
-		dev_err(&parent->dev,
-			"failed to add mux-adapter %u as bus %u (error=%d)\n",
-			chan_id, force_nr, ret);
+		if (ret < 0) {
+			dev_err(&parent->dev,
+				"failed to add mux-adapter %u as bus %u (error=%d)\n",
+				chan_id, force_nr, ret);
+			goto err_free_priv;
+		}
 	} else {
 		ret = i2c_add_adapter(&priv->adap);
-		dev_err(&parent->dev,
-			"failed to add mux-adapter %u (error=%d)\n",
-			chan_id, ret);
-	}
-	if (ret < 0) {
-		kfree(priv);
-		return ret;
+		if (ret < 0) {
+			dev_err(&parent->dev,
+				"failed to add mux-adapter %u (error=%d)\n",
+				chan_id, ret);
+			goto err_free_priv;
+		}
 	}
 
 	WARN(sysfs_create_link(&priv->adap.dev.kobj, &muxc->dev->kobj,
@@ -422,6 +424,10 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc,
 
 	muxc->adapter[muxc->num_adapters++] = &priv->adap;
 	return 0;
+
+err_free_priv:
+	kfree(priv);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i2c_mux_add_adapter);
 
-- 
cgit v1.2.3-59-g8ed1b


From 8df728e1ae614f592961e51f65d3e3212ede5a75 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 12 May 2017 13:48:41 +0100
Subject: arm64: Remove redundant mov from LL/SC cmpxchg

The cmpxchg implementation introduced by commit c342f78217e8 ("arm64:
cmpxchg: patch in lse instructions when supported by the CPU") performs
an apparently redundant register move of [old] to [oldval] in the
success case - it always uses the same register width as [oldval] was
originally loaded with, and is only executed when [old] and [oldval] are
known to be equal anyway.

The only effect it seemingly does have is to take up a surprising amount
of space in the kernel text, as removing it reveals:

   text	   data	    bss	    dec	    hex	filename
12426658	1348614	4499749	18275021	116dacd	vmlinux.o.new
12429238	1348614	4499749	18277601	116e4e1	vmlinux.o.old

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/atomic_ll_sc.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index f819fdcff1ac..f5a2d09afb38 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -264,7 +264,6 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
 	"	st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n"	\
 	"	cbnz	%w[tmp], 1b\n"					\
 	"	" #mb "\n"						\
-	"	mov	%" #w "[oldval], %" #w "[old]\n"		\
 	"2:"								\
 	: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),			\
 	  [v] "+Q" (*(unsigned long *)ptr)				\
-- 
cgit v1.2.3-59-g8ed1b


From 78a19cfdf37d19002c83c8790853c1cc10feccdc Mon Sep 17 00:00:00 2001
From: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Date: Tue, 2 May 2017 21:59:34 +0530
Subject: arm64: perf: Ignore exclude_hv when kernel is running in HYP

commit d98ecdaca296 ("arm64: perf: Count EL2 events if the kernel is
running in HYP") returns -EINVAL when perf system call perf_event_open is
called with exclude_hv != exclude_kernel. This change breaks applications
on VHE enabled ARMv8.1 platforms. The issue was observed with HHVM
application, which calls perf_event_open with exclude_hv = 1 and
exclude_kernel = 0.

There is no separate hypervisor privilege level when VHE is enabled, the
host kernel runs at EL2. So when VHE is enabled, we should ignore
exclude_hv from the application. This behaviour is consistent with PowerPC
where the exclude_hv is ignored when the hypervisor is not present and with
x86 where this flag is ignored.

Signed-off-by: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
[will: added comment to justify the behaviour of exclude_hv]
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/perf_event.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index bcc79471b38e..83a1b1ad189f 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -877,15 +877,24 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
 
 	if (attr->exclude_idle)
 		return -EPERM;
-	if (is_kernel_in_hyp_mode() &&
-	    attr->exclude_kernel != attr->exclude_hv)
-		return -EINVAL;
+
+	/*
+	 * If we're running in hyp mode, then we *are* the hypervisor.
+	 * Therefore we ignore exclude_hv in this configuration, since
+	 * there's no hypervisor to sample anyway. This is consistent
+	 * with other architectures (x86 and Power).
+	 */
+	if (is_kernel_in_hyp_mode()) {
+		if (!attr->exclude_kernel)
+			config_base |= ARMV8_PMU_INCLUDE_EL2;
+	} else {
+		if (attr->exclude_kernel)
+			config_base |= ARMV8_PMU_EXCLUDE_EL1;
+		if (!attr->exclude_hv)
+			config_base |= ARMV8_PMU_INCLUDE_EL2;
+	}
 	if (attr->exclude_user)
 		config_base |= ARMV8_PMU_EXCLUDE_EL0;
-	if (!is_kernel_in_hyp_mode() && attr->exclude_kernel)
-		config_base |= ARMV8_PMU_EXCLUDE_EL1;
-	if (!attr->exclude_hv)
-		config_base |= ARMV8_PMU_INCLUDE_EL2;
 
 	/*
 	 * Install the filter into config_base as this is used to
-- 
cgit v1.2.3-59-g8ed1b


From 70957eaecc2e43308e403c80293bec3d59632412 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Thu, 11 May 2017 11:09:52 -0400
Subject: macvlan: Fix performance issues with vlan tagged packets

Macvlan always turns on offload features that have sofware
fallback (NETIF_GSO_SOFTWARE).  This allows much higher guest-guest
communications over macvtap.

However, macvtap does not turn on these features for vlan tagged traffic.
As a result, depending on the HW that mactap is configured on, the
performance of guest-guest communication over a vlan is very
inconsistent.  If the HW supports TSO/UFO over vlans, then the
performance will be fine.  If not, the the performance will suffer
greatly since the VM may continue using TSO/UFO, and will force the host
segment the traffic and possibly overlow the macvtap queue.

This patch adds the always on offloads to vlan_features.  This
makes sure that any vlan tagged traffic between 2 guest will not
be segmented needlessly.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index b34eaaae03fd..346ad2ff3998 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -789,10 +789,12 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu)
  */
 static struct lock_class_key macvlan_netdev_addr_lock_key;
 
-#define ALWAYS_ON_FEATURES \
-	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \
+#define ALWAYS_ON_OFFLOADS \
+	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \
 	 NETIF_F_GSO_ROBUST)
 
+#define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX)
+
 #define MACVLAN_FEATURES \
 	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
 	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \
@@ -827,6 +829,7 @@ static int macvlan_init(struct net_device *dev)
 	dev->features		|= ALWAYS_ON_FEATURES;
 	dev->hw_features	|= NETIF_F_LRO;
 	dev->vlan_features	= lowerdev->vlan_features & MACVLAN_FEATURES;
+	dev->vlan_features	|= ALWAYS_ON_OFFLOADS;
 	dev->gso_max_size	= lowerdev->gso_max_size;
 	dev->gso_max_segs	= lowerdev->gso_max_segs;
 	dev->hard_header_len	= lowerdev->hard_header_len;
-- 
cgit v1.2.3-59-g8ed1b


From 4762010f09ac0453f613df345c5281e7f2dec510 Mon Sep 17 00:00:00 2001
From: "yuval.shaia@oracle.com" <yuval.shaia@oracle.com>
Date: Fri, 12 May 2017 09:10:51 +0300
Subject: net/mlx4_core: Use min3 to select number of MSI-X vectors

Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 703205475524..83aab1e4c8c8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2862,12 +2862,10 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 	int port = 0;
 
 	if (msi_x) {
-		int nreq = dev->caps.num_ports * num_online_cpus() + 1;
-
-		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
-			     nreq);
-		if (nreq > MAX_MSIX)
-			nreq = MAX_MSIX;
+		int nreq = min3(dev->caps.num_ports *
+				(int)num_online_cpus() + 1,
+				dev->caps.num_eqs - dev->caps.reserved_eqs,
+				MAX_MSIX);
 
 		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
 		if (!entries)
-- 
cgit v1.2.3-59-g8ed1b


From d19b183cdc1fa3d70d6abe2a4c369e748cd7ebb8 Mon Sep 17 00:00:00 2001
From: Douglas Caetano dos Santos <douglascs@taghos.com.br>
Date: Fri, 12 May 2017 15:19:15 -0300
Subject: net/packet: fix missing net_device reference release

When using a TX ring buffer, if an error occurs processing a control
message (e.g. invalid message), the net_device reference is not
released.

Fixes c14ac9451c348 ("sock: enable timestamping using control messages")
Signed-off-by: Douglas Caetano dos Santos <douglascs@taghos.com.br>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f4001763134d..e3eeed19cc7a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2658,13 +2658,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
 	}
 
-	sockc.tsflags = po->sk.sk_tsflags;
-	if (msg->msg_controllen) {
-		err = sock_cmsg_send(&po->sk, msg, &sockc);
-		if (unlikely(err))
-			goto out;
-	}
-
 	err = -ENXIO;
 	if (unlikely(dev == NULL))
 		goto out;
@@ -2672,6 +2665,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;
 
+	sockc.tsflags = po->sk.sk_tsflags;
+	if (msg->msg_controllen) {
+		err = sock_cmsg_send(&po->sk, msg, &sockc);
+		if (unlikely(err))
+			goto out_put;
+	}
+
 	if (po->sk.sk_socket->type == SOCK_RAW)
 		reserve = dev->hard_header_len;
 	size_max = po->tx_ring.frame_size
-- 
cgit v1.2.3-59-g8ed1b


From 8c977f5a856a7276450ddf3a7b57b4a8815b63f9 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Fri, 12 May 2017 22:54:23 +0800
Subject: mdio: mux: fix device_node_continue.cocci warnings

Device node iterators put the previous value of the index variable, so an
explicit put causes a double put.

In particular, of_mdiobus_register can fail before doing anything
interesting, so one could view it as a no-op from the reference count
point of view.

Generated by: scripts/coccinelle/iterators/device_node_continue.cocci

CC: Jon Mason <jon.mason@broadcom.com>
Signed-off-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mux.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index 6943c5ece44a..599ce24c514f 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -169,7 +169,6 @@ int mdio_mux_init(struct device *dev,
 		if (r) {
 			mdiobus_free(cb->mii_bus);
 			devm_kfree(dev, cb);
-			of_node_put(child_bus_node);
 		} else {
 			cb->next = pb->children;
 			pb->children = cb;
-- 
cgit v1.2.3-59-g8ed1b


From aa4ad88cfcd4ee45f527fb982140576711e3b501 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Sun, 14 May 2017 12:21:23 +0300
Subject: qed: Fix uninitialized data in aRFS infrastructure

Current memset is using incorrect type of variable, causing the
upper-half of the strucutre to be left uninitialized and causing:

  ethernet/qlogic/qed/qed_init_fw_funcs.c: In function 'qed_set_rfs_mode_disable':
  ethernet/qlogic/qed/qed_init_fw_funcs.c:993:3: error: '*((void *)&ramline+4)' is used uninitialized in this function [-Werror=uninitialized]

Fixes: d51e4af5c209 ("qed: aRFS infrastructure support")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
index 67200c5498ab..0a8fde629991 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -983,7 +983,7 @@ void qed_set_rfs_mode_disable(struct qed_hwfn *p_hwfn,
 	memset(&camline, 0, sizeof(union gft_cam_line_union));
 	qed_wr(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id,
 	       camline.cam_line_mapped.camline);
-	memset(&ramline, 0, sizeof(union gft_cam_line_union));
+	memset(&ramline, 0, sizeof(ramline));
 
 	for (i = 0; i < RAM_LINE_SIZE / REG_SIZE; i++) {
 		u32 hw_addr = PRS_REG_GFT_PROFILE_MASK_RAM;
-- 
cgit v1.2.3-59-g8ed1b


From 66eb9f86e50547ec2a8ff7a75997066a74ef584b Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Fri, 12 May 2017 17:03:39 -0700
Subject: ipv6: avoid dad-failures for addresses with NODAD

Every address gets added with TENTATIVE flag even for the addresses with
IFA_F_NODAD flag and dad-work is scheduled for them. During this DAD process
we realize it's an address with NODAD and complete the process without
sending any probe. However the TENTATIVE flags stays on the
address for sometime enough to cause misinterpretation when we receive a NS.
While processing NS, if the address has TENTATIVE flag, we mark it DADFAILED
and endup with an address that was originally configured as NODAD with
DADFAILED.

We can't avoid scheduling dad_work for addresses with NODAD but we can
avoid adding TENTATIVE flag to avoid this racy situation.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8d297a79b568..6a4fb1e629fb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1022,7 +1022,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
-	ifa->flags = flags | IFA_F_TENTATIVE;
+	ifa->flags = flags;
+	/* No need to add the TENTATIVE flag for addresses with NODAD */
+	if (!(flags & IFA_F_NODAD))
+		ifa->flags |= IFA_F_TENTATIVE;
 	ifa->valid_lft = valid_lft;
 	ifa->prefered_lft = prefered_lft;
 	ifa->cstamp = ifa->tstamp = jiffies;
-- 
cgit v1.2.3-59-g8ed1b


From 91bcdb92d39711d1adb40c26b653b7978d93eb98 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Mon, 15 May 2017 09:43:05 -0400
Subject: dm thin metadata: call precommit before saving the roots

These calls were the wrong way round in __write_initial_superblock.

Cc: stable@vger.kernel.org
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-thin-metadata.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 0f0251d0d337..d31d18d9727c 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -484,11 +484,11 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd)
 	if (r < 0)
 		return r;
 
-	r = save_sm_roots(pmd);
+	r = dm_tm_pre_commit(pmd->tm);
 	if (r < 0)
 		return r;
 
-	r = dm_tm_pre_commit(pmd->tm);
+	r = save_sm_roots(pmd);
 	if (r < 0)
 		return r;
 
-- 
cgit v1.2.3-59-g8ed1b


From 0377a07c7a035e0d033cd8b29f0cb15244c0916a Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Mon, 15 May 2017 09:45:40 -0400
Subject: dm space map disk: fix some book keeping in the disk space map

When decrementing the reference count for a block, the free count wasn't
being updated if the reference count went to zero.

Cc: stable@vger.kernel.org
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/persistent-data/dm-space-map-disk.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index ebb280a14325..32adf6b4a9c7 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -142,10 +142,23 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
 
 static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
 {
+	int r;
+	uint32_t old_count;
 	enum allocation_event ev;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-	return sm_ll_dec(&smd->ll, b, &ev);
+	r = sm_ll_dec(&smd->ll, b, &ev);
+	if (!r && (ev == SM_FREE)) {
+		/*
+		 * It's only free if it's also free in the last
+		 * transaction.
+		 */
+		r = sm_ll_lookup(&smd->old_ll, b, &old_count);
+		if (!r && !old_count)
+			smd->nr_allocated_this_transaction--;
+	}
+
+	return r;
 }
 
 static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
-- 
cgit v1.2.3-59-g8ed1b


From ece0728037b15f4d31198f12b359104bcb5db4c8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 15 May 2017 17:28:36 +0200
Subject: dm rq: add a missing break to map_request

We don't want to bug when receiving a DM_MAPIO_KILL value..

Fixes: 412445ac ("dm: introduce a new DM_MAPIO_KILL return value")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-rq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 2af27026aa2e..b639fa7246ee 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -507,6 +507,7 @@ static int map_request(struct dm_rq_target_io *tio)
 	case DM_MAPIO_KILL:
 		/* The target wants to complete the I/O */
 		dm_kill_unmapped_request(rq, -EIO);
+		break;
 	default:
 		DMWARN("unimplemented target map return value: %d", r);
 		BUG();
-- 
cgit v1.2.3-59-g8ed1b


From 18a482f5245cc875755090853e84283512b3e6bd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 15 May 2017 17:28:37 +0200
Subject: dm mpath: don't return -EIO from dm_report_EIO

Instead just turn the macro into a helper for the warning message.
This removes an unnecessary assignment and will allow the next commit to
fix a place where -EIO is the wrong return value.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-mpath.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 926a6bcb32c8..d55454f98b59 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -447,7 +447,7 @@ failed:
  * it has been invoked.
  */
 #define dm_report_EIO(m)						\
-({									\
+do {									\
 	struct mapped_device *md = dm_table_get_md((m)->ti->table);	\
 									\
 	pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \
@@ -455,8 +455,7 @@ failed:
 		 test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags),	\
 		 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags),	\
 		 dm_noflush_suspending((m)->ti));			\
-	-EIO;								\
-})
+} while (0)
 
 /*
  * Map cloned requests (request-based multipath)
@@ -481,7 +480,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
 	if (!pgpath) {
 		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
 			return DM_MAPIO_DELAY_REQUEUE;
-		return dm_report_EIO(m);	/* Failed */
+		dm_report_EIO(m);	/* Failed */
+		return -EIO;
 	} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
 		   test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
 		if (pg_init_all_paths(m))
@@ -558,7 +558,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
 	if (!pgpath) {
 		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
 			return DM_MAPIO_REQUEUE;
-		return dm_report_EIO(m);
+		dm_report_EIO(m);
+		return -EIO;
 	}
 
 	mpio->pgpath = pgpath;
@@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 		if (atomic_read(&m->nr_valid_paths) == 0 &&
 		    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
 			if (error == -EIO)
-				error = dm_report_EIO(m);
+				dm_report_EIO(m);
 			/* complete with the original error */
 			r = DM_ENDIO_DONE;
 		}
@@ -1524,8 +1525,10 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
 		fail_path(mpio->pgpath);
 
 	if (atomic_read(&m->nr_valid_paths) == 0 &&
-	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
-		return dm_report_EIO(m);
+	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+		dm_report_EIO(m);
+		return -EIO;
+	}
 
 	/* Queue for the daemon to resubmit */
 	dm_bio_restore(get_bio_details_from_bio(clone), clone);
-- 
cgit v1.2.3-59-g8ed1b


From f98e0eb68008aff9824d1c4dad7276c8bab83ca5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 15 May 2017 17:28:38 +0200
Subject: dm mpath: multipath_clone_and_map must not return -EIO

Since 412445ac ("dm: introduce a new DM_MAPIO_KILL return value"), the
clone_and_map_rq methods must not return errno values, so fix it up
to properly return DM_MAPIO_KILL, instead of the -EIO value that snuck
in due to a conflict between two patches.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-mpath.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d55454f98b59..3df056b73b66 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -481,7 +481,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
 		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
 			return DM_MAPIO_DELAY_REQUEUE;
 		dm_report_EIO(m);	/* Failed */
-		return -EIO;
+		return DM_MAPIO_KILL;
 	} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
 		   test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
 		if (pg_init_all_paths(m))
-- 
cgit v1.2.3-59-g8ed1b


From ca9df7ede41afd006d74fd6f09f36d909d0eaad7 Mon Sep 17 00:00:00 2001
From: Miroslav Lichvar <mlichvar@redhat.com>
Date: Mon, 15 May 2017 16:04:36 +0200
Subject: net: netcp: fix check of requested timestamping filter

The driver doesn't support timestamping of all received packets and
should return error when trying to enable the HWTSTAMP_FILTER_ALL
filter.

Cc: WingMan Kwok <w-kwok2@ti.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/netcp_ethss.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 897176fc5043..dd92950a4615 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2651,7 +2651,6 @@ static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *ifr)
 	case HWTSTAMP_FILTER_NONE:
 		cpts_rx_enable(cpts, 0);
 		break;
-	case HWTSTAMP_FILTER_ALL:
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-- 
cgit v1.2.3-59-g8ed1b


From b18e5e86b44be0dca399d8e2383f97c8077392ce Mon Sep 17 00:00:00 2001
From: Thomas Tai <thomas.tai@oracle.com>
Date: Mon, 15 May 2017 10:51:07 -0700
Subject: ldmvsw: unregistering netdev before disable hardware

When running LDom binding/unbinding test, kernel may panic
in ldmvsw_open(). It is more likely that because we're removing
the ldc connection before unregistering the netdev in vsw_port_remove(),
we set up a window of time where one process could be removing the
device while another trying to UP the device. This also sometimes causes
vio handshake error due to opening a device without closing it completely.
We should unregister the netdev before we disable the "hardware".

Orabug: 25980913, 25925306

Signed-off-by: Thomas Tai <thomas.tai@oracle.com>
Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/ldmvsw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index 5a90fed06260..309747c7b8ae 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -413,6 +413,7 @@ static int vsw_port_remove(struct vio_dev *vdev)
 		del_timer_sync(&port->vio.timer);
 
 		napi_disable(&port->napi);
+		unregister_netdev(port->dev);
 
 		list_del_rcu(&port->list);
 
@@ -427,7 +428,6 @@ static int vsw_port_remove(struct vio_dev *vdev)
 
 		dev_set_drvdata(&vdev->dev, NULL);
 
-		unregister_netdev(port->dev);
 		free_netdev(port->dev);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 8b671f906c2debc4f2393438c4e7668936522e99 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Mon, 15 May 2017 10:51:08 -0700
Subject: ldmvsw: stop the clean timer at beginning of remove

Stop the clean timer earlier to be sure there's no asynchronous
interference while stopping the port.

Orabug: 25748241

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/ldmvsw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index 309747c7b8ae..5b56c24b6ed2 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -411,6 +411,7 @@ static int vsw_port_remove(struct vio_dev *vdev)
 
 	if (port) {
 		del_timer_sync(&port->vio.timer);
+		del_timer_sync(&port->clean_timer);
 
 		napi_disable(&port->napi);
 		unregister_netdev(port->dev);
@@ -418,7 +419,6 @@ static int vsw_port_remove(struct vio_dev *vdev)
 		list_del_rcu(&port->list);
 
 		synchronize_rcu();
-		del_timer_sync(&port->clean_timer);
 		spin_lock_irqsave(&port->vp->lock, flags);
 		sunvnet_port_rm_txq_common(port);
 		spin_unlock_irqrestore(&port->vp->lock, flags);
-- 
cgit v1.2.3-59-g8ed1b


From 2d4456c73a487abe53863e10641c2f73537edf5c Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Thu, 11 May 2017 10:27:35 -0500
Subject: block: xen-blkback: add null check to avoid null pointer dereference

Add null check before calling xen_blkif_put() to avoid potential
null pointer dereference.

Addresses-Coverity-ID: 1350942
Cc: Juergen Gross <jgross@suse.com>
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/xenbus.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 8fe61b5dc5a6..1f3dfaa54d87 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -504,11 +504,13 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
 
 	dev_set_drvdata(&dev->dev, NULL);
 
-	if (be->blkif)
+	if (be->blkif) {
 		xen_blkif_disconnect(be->blkif);
 
-	/* Put the reference we set in xen_blkif_alloc(). */
-	xen_blkif_put(be->blkif);
+		/* Put the reference we set in xen_blkif_alloc(). */
+		xen_blkif_put(be->blkif);
+	}
+
 	kfree(be->mode);
 	kfree(be);
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From cde97f8492ac4425c4d0647a308e15e78cb4c218 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Mon, 8 May 2017 17:16:27 +1000
Subject: selftests/powerpc: Test TM and VMX register state

Test that the VMX checkpointed register state is maintained when a VMX
unavailable exception is taken during a transaction.

Thanks to Breno Leitao <brenohl@br.ibm.com> and
Gustavo Bueno Romero <gromero@br.ibm.com> for the original test this
is based heavily on.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Reviewed-by: Cyril Bur <cyrilbur@gmail.com>
[mpe: Add to .gitignore, always build it 64-bit to fix build errors]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/tm/.gitignore      |   1 +
 tools/testing/selftests/powerpc/tm/Makefile        |   4 +-
 .../testing/selftests/powerpc/tm/tm-vmx-unavail.c  | 118 +++++++++++++++++++++
 3 files changed, 122 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c

diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 427621792229..2f1f7b013293 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -11,3 +11,4 @@ tm-signal-context-chk-fpu
 tm-signal-context-chk-gpr
 tm-signal-context-chk-vmx
 tm-signal-context-chk-vsx
+tm-vmx-unavail
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 5576ee6a51f2..958c11c14acd 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -2,7 +2,8 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
 	tm-signal-context-chk-vmx tm-signal-context-chk-vsx
 
 TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
-	tm-vmxcopy tm-fork tm-tar tm-tmspr $(SIGNAL_CONTEXT_CHK_TESTS)
+	tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail \
+	$(SIGNAL_CONTEXT_CHK_TESTS)
 
 include ../../lib.mk
 
@@ -13,6 +14,7 @@ CFLAGS += -mhtm
 $(OUTPUT)/tm-syscall: tm-syscall-asm.S
 $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
 $(OUTPUT)/tm-tmspr: CFLAGS += -pthread
+$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
 
 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
 $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
new file mode 100644
index 000000000000..137185ba4937
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2017, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ * Original: Breno Leitao <brenohl@br.ibm.com> &
+ *           Gustavo Bueno Romero <gromero@br.ibm.com>
+ * Edited: Michael Neuling
+ *
+ * Force VMX unavailable during a transaction and see if it corrupts
+ * the checkpointed VMX register state after the abort.
+ */
+
+#include <inttypes.h>
+#include <htmintrin.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int passed;
+
+void *worker(void *unused)
+{
+	__int128 vmx0;
+	uint64_t texasr;
+
+	asm goto (
+		"li       3, 1;"  /* Stick non-zero value in VMX0 */
+		"std      3, 0(%[vmx0_ptr]);"
+		"lvx      0, 0, %[vmx0_ptr];"
+
+		/* Wait here a bit so we get scheduled out 255 times */
+		"lis      3, 0x3fff;"
+		"1: ;"
+		"addi     3, 3, -1;"
+		"cmpdi    3, 0;"
+		"bne      1b;"
+
+		/* Kernel will hopefully turn VMX off now */
+
+		"tbegin. ;"
+		"beq      failure;"
+
+		/* Cause VMX unavail. Any VMX instruction */
+		"vaddcuw  0,0,0;"
+
+		"tend. ;"
+		"b        %l[success];"
+
+		/* Check VMX0 sanity after abort */
+		"failure: ;"
+		"lvx       1,  0, %[vmx0_ptr];"
+		"vcmpequb. 2,  0, 1;"
+		"bc        4, 24, %l[value_mismatch];"
+		"b        %l[value_match];"
+		:
+		: [vmx0_ptr] "r"(&vmx0)
+		: "r3"
+		: success, value_match, value_mismatch
+		);
+
+	/* HTM aborted and VMX0 is corrupted */
+value_mismatch:
+	texasr = __builtin_get_texasr();
+
+	printf("\n\n==============\n\n");
+	printf("Failure with error: %lx\n",   _TEXASR_FAILURE_CODE(texasr));
+	printf("Summary error     : %lx\n",   _TEXASR_FAILURE_SUMMARY(texasr));
+	printf("TFIAR exact       : %lx\n\n", _TEXASR_TFIAR_EXACT(texasr));
+
+	passed = 0;
+	return NULL;
+
+	/* HTM aborted but VMX0 is correct */
+value_match:
+//	printf("!");
+	return NULL;
+
+success:
+//	printf(".");
+	return NULL;
+}
+
+int tm_vmx_unavail_test()
+{
+	int threads;
+	pthread_t *thread;
+
+	SKIP_IF(!have_htm());
+
+	passed = 1;
+
+	threads = sysconf(_SC_NPROCESSORS_ONLN) * 4;
+	thread = malloc(sizeof(pthread_t)*threads);
+	if (!thread)
+		return EXIT_FAILURE;
+
+	for (uint64_t i = 0; i < threads; i++)
+		pthread_create(&thread[i], NULL, &worker, NULL);
+
+	for (uint64_t i = 0; i < threads; i++)
+		pthread_join(thread[i], NULL);
+
+	free(thread);
+
+	return passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+
+int main(int argc, char **argv)
+{
+	return test_harness(tm_vmx_unavail_test, "tm_vmx_unavail_test");
+}
-- 
cgit v1.2.3-59-g8ed1b


From bbb075ddf7d58762040ca413ad82e9974713def3 Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Fri, 12 May 2017 14:52:06 +0530
Subject: powerpc/powernv: Set NAPSTATELOST after recovering paca on P9 DD1

Commit 17ed4c8f81da ("powerpc/powernv: Recover correct PACA on wakeup
from a stop on P9 DD1") promises to set the NAPSTATELOST bit in paca
after recovering the correct paca for the thread waking up from stop1
on DD1, so that the GPRs can be correctly restored on the stop exit
path. However, it loads the value 1 into r3, but stores the value in
r0 into NAPSTATELOST(r13).

Fix this by correctly set the NAPSTATELOST bit in paca after
recovering the paca on POWER9 DD1.

Fixes: 17ed4c8f81da ("powerpc/powernv: Recover correct PACA on wakeup from a stop on P9 DD1")
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/idle_book3s.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 07d4e0ad60db..4898d676dcae 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -416,7 +416,7 @@ power9_dd1_recover_paca:
 	 * which needs to be restored from the stack.
 	 */
 	li	r3, 1
-	stb	r0,PACA_NAPSTATELOST(r13)
+	stb	r3,PACA_NAPSTATELOST(r13)
 	blr
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From d04c02f8aa96d82e4cbe783f85a820aae820e746 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Mon, 15 May 2017 23:40:05 +0530
Subject: powerpc/kprobes: Fix handling of instruction emulation on probe
 re-entry

Commit 22d8b3dec214c ("powerpc/kprobes: Emulate instructions on kprobe
handler re-entry") enabled emulating instructions on kprobe re-entry,
rather than single-stepping always. However, we didn't update the single
stepping code to only be run if the emulation fails. Also, we missed
re-enabling preemption if the instruction emulation was successful. Fix
those issues.

Fixes: 22d8b3dec214c ("powerpc/kprobes: Emulate instructions on kprobe handler re-entry")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/kprobes.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 160ae0fa7d0d..fc4343514bed 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -305,16 +305,17 @@ int kprobe_handler(struct pt_regs *regs)
 			save_previous_kprobe(kcb);
 			set_current_kprobe(p, regs, kcb);
 			kprobes_inc_nmissed_count(p);
-			prepare_singlestep(p, regs);
 			kcb->kprobe_status = KPROBE_REENTER;
 			if (p->ainsn.boostable >= 0) {
 				ret = try_to_emulate(p, regs);
 
 				if (ret > 0) {
 					restore_previous_kprobe(kcb);
+					preempt_enable_no_resched();
 					return 1;
 				}
 			}
+			prepare_singlestep(p, regs);
 			return 1;
 		} else {
 			if (*addr != BREAKPOINT_INSTRUCTION) {
-- 
cgit v1.2.3-59-g8ed1b


From fa16b69f1299004b60b625f181143500a246e5cb Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 16 May 2017 09:11:33 +0200
Subject: ALSA: hda - No loopback on ALC299 codec

ALC299 has no loopback mixer, but the driver still tries to add a beep
control over the mixer NID which leads to the error at accessing it.
This patch fixes it by properly declaring mixer_nid=0 for this codec.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195775
Fixes: 28f1f9b26cee ("ALSA: hda/realtek - Add new codec ID ALC299")
Cc: stable@vger.kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 58df440013c5..9c22ad694534 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6465,8 +6465,11 @@ static int patch_alc269(struct hda_codec *codec)
 		break;
 	case 0x10ec0225:
 	case 0x10ec0295:
+		spec->codec_variant = ALC269_TYPE_ALC225;
+		break;
 	case 0x10ec0299:
 		spec->codec_variant = ALC269_TYPE_ALC225;
+		spec->gen.mixer_nid = 0; /* no loopback on ALC299 */
 		break;
 	case 0x10ec0234:
 	case 0x10ec0274:
-- 
cgit v1.2.3-59-g8ed1b


From c953d63548207a085abcb12a15fefc8a11ffdf0a Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Tue, 16 May 2017 09:30:18 +0800
Subject: ebtables: arpreply: Add the standard target sanity check

The info->target comes from userspace and it would be used directly.
So we need to add the sanity check to make sure it is a valid standard
target, although the ebtables tool has already checked it. Kernel needs
to validate anything coming from userspace.

If the target is set as an evil value, it would break the ebtables
and cause a panic. Because the non-standard target is treated as one
offset.

Now add one helper function ebt_invalid_target, and we would replace
the macro INVALID_TARGET later.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h | 5 +++++
 net/bridge/netfilter/ebt_arpreply.c       | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index a30efb437e6d..e0cbf17af780 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 /* True if the target is not a standard target */
 #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
 
+static inline bool ebt_invalid_target(int target)
+{
+	return (target < -NUM_STANDARD_TARGETS || target >= 0);
+}
+
 #endif
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 5929309beaa1..db85230e49c3 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -68,6 +68,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 	if (e->ethproto != htons(ETH_P_ARP) ||
 	    e->invflags & EBT_IPROTO)
 		return -EINVAL;
+	if (ebt_invalid_target(info->target))
+		return -EINVAL;
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 2952a6070e07ebdd5896f1f5b861acad677caded Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 16 May 2017 10:34:54 +0100
Subject: kvm: arm/arm64: Force reading uncached stage2 PGD

Make sure we don't use a cached value of the KVM stage2 PGD while
resetting the PGD.

Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 909a1a793b31..704e35f312a4 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -837,7 +837,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	spin_lock(&kvm->mmu_lock);
 	if (kvm->arch.pgd) {
 		unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-		pgd = kvm->arch.pgd;
+		pgd = READ_ONCE(kvm->arch.pgd);
 		kvm->arch.pgd = NULL;
 	}
 	spin_unlock(&kvm->mmu_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 0c428a6a9256fcd66817e12db32a50b405ed2e5c Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 16 May 2017 10:34:55 +0100
Subject: kvm: arm/arm64: Fix use after free of stage2 page table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We yield the kvm->mmu_lock occassionaly while performing an operation
(e.g, unmap or permission changes) on a large area of stage2 mappings.
However this could possibly cause another thread to clear and free up
the stage2 page tables while we were waiting for regaining the lock and
thus the original thread could end up in accessing memory that was
freed. This patch fixes the problem by making sure that the stage2
pagetable is still valid after we regain the lock. The fact that
mmu_notifer->release() could be called twice (via __mmu_notifier_release
and mmu_notifier_unregsister) enhances the possibility of hitting
this race where there are two threads trying to unmap the entire guest
shadow pages.

While at it, cleanup the redudant checks around cond_resched_lock in
stage2_wp_range(), as cond_resched_lock already does the same checks.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: andreyknvl@google.com
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/mmu.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 704e35f312a4..a2d63247d1bb 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -295,6 +295,13 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	assert_spin_locked(&kvm->mmu_lock);
 	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
 	do {
+		/*
+		 * Make sure the page table is still active, as another thread
+		 * could have possibly freed the page table, while we released
+		 * the lock.
+		 */
+		if (!READ_ONCE(kvm->arch.pgd))
+			break;
 		next = stage2_pgd_addr_end(addr, end);
 		if (!stage2_pgd_none(*pgd))
 			unmap_stage2_puds(kvm, pgd, addr, next);
@@ -1170,11 +1177,13 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 		 * large. Otherwise, we may see kernel panics with
 		 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
 		 * CONFIG_LOCKDEP. Additionally, holding the lock too long
-		 * will also starve other vCPUs.
+		 * will also starve other vCPUs. We have to also make sure
+		 * that the page tables are not freed while we released
+		 * the lock.
 		 */
-		if (need_resched() || spin_needbreak(&kvm->mmu_lock))
-			cond_resched_lock(&kvm->mmu_lock);
-
+		cond_resched_lock(&kvm->mmu_lock);
+		if (!READ_ONCE(kvm->arch.pgd))
+			break;
 		next = stage2_pgd_addr_end(addr, end);
 		if (stage2_pgd_present(*pgd))
 			stage2_wp_puds(pgd, addr, next);
-- 
cgit v1.2.3-59-g8ed1b


From 682179592e48fa66056fbad1a86604be4992f885 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Fri, 1 Apr 2016 17:13:13 +0300
Subject: usb: dwc3: pci: add Intel Cannonlake PCI IDs

Intel Cannonlake PCH has the same DWC3 than Intel
Sunrisepoint. Add the new IDs to the supported devices.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/dwc3-pci.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index a15ec71d0423..84a2cebfc712 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -39,6 +39,8 @@
 #define PCI_DEVICE_ID_INTEL_APL			0x5aaa
 #define PCI_DEVICE_ID_INTEL_KBP			0xa2b0
 #define PCI_DEVICE_ID_INTEL_GLK			0x31aa
+#define PCI_DEVICE_ID_INTEL_CNPLP		0x9dee
+#define PCI_DEVICE_ID_INTEL_CNPH		0xa36e
 
 #define PCI_INTEL_BXT_DSM_UUID		"732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
 #define PCI_INTEL_BXT_FUNC_PMU_PWR	4
@@ -270,6 +272,8 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GLK), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPLP), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPH), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), },
 	{  }	/* Terminating Entry */
 };
-- 
cgit v1.2.3-59-g8ed1b


From f1d6826cae30e97e37a1f2481d7e1dc4faa09ce1 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Fri, 21 Apr 2017 15:58:08 +0300
Subject: usb: dwc3: gadget: Fix ISO transfer performance

Commit 08a36b543803 ("usb: dwc3: gadget: simplify __dwc3_gadget_ep_queue()")
caused a small change in the way ISO transfer is handled in the case
when XferInProgress event happens on Isoc EP with an active transfer.
This caused a performance degradation of 50%. e.g. using g_webcam on DUT
and luvcview on host the video frame rate dropped from 16fps to 8fps
@high-speed.

Make the ISO transfer handling equivalent to that prior to that commit
to get back the original ISO performance numbers.

Fixes: 08a36b543803 ("usb: dwc3: gadget: simplify __dwc3_gadget_ep_queue()")
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 6f6f0b3be3ad..3d6180805eb8 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1261,14 +1261,24 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 				__dwc3_gadget_start_isoc(dwc, dep, cur_uf);
 				dep->flags &= ~DWC3_EP_PENDING_REQUEST;
 			}
+			return 0;
 		}
-		return 0;
+
+		if ((dep->flags & DWC3_EP_BUSY) &&
+		    !(dep->flags & DWC3_EP_MISSED_ISOC)) {
+			WARN_ON_ONCE(!dep->resource_index);
+			ret = __dwc3_gadget_kick_transfer(dep,
+							  dep->resource_index);
+		}
+
+		goto out;
 	}
 
 	if (!dwc3_calc_trbs_left(dep))
 		return 0;
 
 	ret = __dwc3_gadget_kick_transfer(dep, 0);
+out:
 	if (ret == -EBUSY)
 		ret = 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From d325a1de49d61ee11aca58a529571c91ecea7879 Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Thu, 11 May 2017 17:26:47 -0700
Subject: usb: dwc3: gadget: Prevent losing events in event cache

The dwc3 driver can overwite its previous events if its top-half IRQ
handler (TH) gets invoked again before processing the events in the
cache. We see this as a hang in the file transfer and the host will
attempt to reset the device. TH gets the event count and deasserts the
interrupt line by writing DWC3_GEVNTSIZ_INTMASK to DWC3_GEVNTSIZ. If
there's a new event coming between reading the event count and interrupt
deassertion, dwc3 will lose previous pending events. More generally, we
will see 0 event count, which should not affect anything.

This shouldn't be possible in the current dwc3 implementation. However,
through testing and reading the PCIe trace, the TH occasionally still
gets invoked one more time after HW interrupt deassertion. (With PCIe
legacy interrupts, TH is called repeatedly as long as the interrupt line
is asserted). We suspect that there is a small detection delay in the
SW.

To avoid this issue, Check DWC3_EVENT_PENDING flag to determine if the
events are processed in the bottom-half IRQ handler. If not, return
IRQ_HANDLED and don't process new event.

Cc: stable@vger.kernel.org
Signed-off-by: Thinh Nguyen <thinhn@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 3d6180805eb8..aea9a5b948b4 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -3036,6 +3036,15 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
 		return IRQ_HANDLED;
 	}
 
+	/*
+	 * With PCIe legacy interrupt, test shows that top-half irq handler can
+	 * be called again after HW interrupt deassertion. Check if bottom-half
+	 * irq event handler completes before caching new event to prevent
+	 * losing events.
+	 */
+	if (evt->flags & DWC3_EVENT_PENDING)
+		return IRQ_HANDLED;
+
 	count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
 	count &= DWC3_GEVNTCOUNT_MASK;
 	if (!count)
-- 
cgit v1.2.3-59-g8ed1b


From 844cf8a9d51f1d1e116d0bb18c353a2a94c70e79 Mon Sep 17 00:00:00 2001
From: Bogdan Mirea <Bogdan-Stefan_mirea@mentor.com>
Date: Sun, 30 Apr 2017 13:21:26 +0300
Subject: usb: gadget: gserial: check if console kthread exists

Check for bad pointer that may result because of kthread_create failure.
This check is needed since the gserial setup callback function
(gs_console_setup()) is only freeing the info->con_buf in case of
kthread_create failure which will result into bad info->console_thread
pointer.
Without checking info->console_thread pointer validity in the
gserial_console_exit() function, before calling kthread_stop(), the
rmmod will generate Kernel Oops.

Signed-off-by: Bogdan Mirea <Bogdan-Stefan_mirea@mentor.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/u_serial.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
index 000677c991b0..9b0805f55ad7 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -1256,7 +1256,7 @@ static void gserial_console_exit(void)
 	struct gscons_info *info = &gscons_info;
 
 	unregister_console(&gserial_cons);
-	if (info->console_thread != NULL)
+	if (!IS_ERR_OR_NULL(info->console_thread))
 		kthread_stop(info->console_thread);
 	gs_buf_free(&info->con_buf);
 }
-- 
cgit v1.2.3-59-g8ed1b


From b7f73850bb4fac1e2209a4dd5e636d39be92f42c Mon Sep 17 00:00:00 2001
From: William Wu <william.wu@rock-chips.com>
Date: Tue, 25 Apr 2017 17:45:48 +0800
Subject: usb: gadget: f_fs: avoid out of bounds access on comp_desc

Companion descriptor is only used for SuperSpeed endpoints,
if the endpoints are HighSpeed or FullSpeed, the Companion
descriptor will not allocated, so we can only access it if
gadget is SuperSpeed.

I can reproduce this issue on Rockchip platform rk3368 SoC
which supports USB 2.0, and use functionfs for ADB. Kernel
build with CONFIG_KASAN=y and CONFIG_SLUB_DEBUG=y report
the following BUG:

==================================================================
BUG: KASAN: slab-out-of-bounds in ffs_func_set_alt+0x224/0x3a0 at addr ffffffc0601f6509
Read of size 1 by task swapper/0/0
============================================================================
BUG kmalloc-256 (Not tainted): kasan: bad access detected
----------------------------------------------------------------------------

Disabling lock debugging due to kernel taint
INFO: Allocated in ffs_func_bind+0x52c/0x99c age=1275 cpu=0 pid=1
alloc_debug_processing+0x128/0x17c
___slab_alloc.constprop.58+0x50c/0x610
__slab_alloc.isra.55.constprop.57+0x24/0x34
__kmalloc+0xe0/0x250
ffs_func_bind+0x52c/0x99c
usb_add_function+0xd8/0x1d4
configfs_composite_bind+0x48c/0x570
udc_bind_to_driver+0x6c/0x170
usb_udc_attach_driver+0xa4/0xd0
gadget_dev_desc_UDC_store+0xcc/0x118
configfs_write_file+0x1a0/0x1f8
__vfs_write+0x64/0x174
vfs_write+0xe4/0x200
SyS_write+0x68/0xc8
el0_svc_naked+0x24/0x28
INFO: Freed in inode_doinit_with_dentry+0x3f0/0x7c4 age=1275 cpu=7 pid=247
...
Call trace:
[<ffffff900808aab4>] dump_backtrace+0x0/0x230
[<ffffff900808acf8>] show_stack+0x14/0x1c
[<ffffff90084ad420>] dump_stack+0xa0/0xc8
[<ffffff90082157cc>] print_trailer+0x188/0x198
[<ffffff9008215948>] object_err+0x3c/0x4c
[<ffffff900821b5ac>] kasan_report+0x324/0x4dc
[<ffffff900821aa38>] __asan_load1+0x24/0x50
[<ffffff90089eb750>] ffs_func_set_alt+0x224/0x3a0
[<ffffff90089d3760>] composite_setup+0xdcc/0x1ac8
[<ffffff90089d7394>] android_setup+0x124/0x1a0
[<ffffff90089acd18>] _setup+0x54/0x74
[<ffffff90089b6b98>] handle_ep0+0x3288/0x4390
[<ffffff90089b9b44>] dwc_otg_pcd_handle_out_ep_intr+0x14dc/0x2ae4
[<ffffff90089be85c>] dwc_otg_pcd_handle_intr+0x1ec/0x298
[<ffffff90089ad680>] dwc_otg_pcd_irq+0x10/0x20
[<ffffff9008116328>] handle_irq_event_percpu+0x124/0x3ac
[<ffffff9008116610>] handle_irq_event+0x60/0xa0
[<ffffff900811af30>] handle_fasteoi_irq+0x10c/0x1d4
[<ffffff9008115568>] generic_handle_irq+0x30/0x40
[<ffffff90081159b4>] __handle_domain_irq+0xac/0xdc
[<ffffff9008080e9c>] gic_handle_irq+0x64/0xa4
...
Memory state around the buggy address:
  ffffffc0601f6400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  ffffffc0601f6480: 00 00 00 00 00 00 00 00 00 00 06 fc fc fc fc fc
 >ffffffc0601f6500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                       ^
  ffffffc0601f6580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ffffffc0601f6600: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00
==================================================================

Signed-off-by: William Wu <william.wu@rock-chips.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_fs.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 71dd27c0d7f2..47dda3450abd 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1858,12 +1858,12 @@ static int ffs_func_eps_enable(struct ffs_function *func)
 		ep->ep->driver_data = ep;
 		ep->ep->desc = ds;
 
-		comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds +
-				USB_DT_ENDPOINT_SIZE);
-		ep->ep->maxburst = comp_desc->bMaxBurst + 1;
-
-		if (needs_comp_desc)
+		if (needs_comp_desc) {
+			comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds +
+					USB_DT_ENDPOINT_SIZE);
+			ep->ep->maxburst = comp_desc->bMaxBurst + 1;
 			ep->ep->comp_desc = comp_desc;
+		}
 
 		ret = usb_ep_enable(ep->ep);
 		if (likely(!ret)) {
-- 
cgit v1.2.3-59-g8ed1b


From 018047a1dba7636e1f7fdae2cc290a528991d648 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sun, 23 Apr 2017 13:55:13 +0800
Subject: usb: dwc3: keystone: check return value

Function devm_clk_get() returns an ERR_PTR when it fails. However, in
function kdwc3_probe(), its return value is not checked, which may
result in a bad memory access bug. This patch fixes the bug.

Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/dwc3-keystone.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/dwc3/dwc3-keystone.c b/drivers/usb/dwc3/dwc3-keystone.c
index 72664700b8a2..12ee23f53cdd 100644
--- a/drivers/usb/dwc3/dwc3-keystone.c
+++ b/drivers/usb/dwc3/dwc3-keystone.c
@@ -107,6 +107,10 @@ static int kdwc3_probe(struct platform_device *pdev)
 		return PTR_ERR(kdwc->usbss);
 
 	kdwc->clk = devm_clk_get(kdwc->dev, "usb");
+	if (IS_ERR(kdwc->clk)) {
+		dev_err(kdwc->dev, "unable to get usb clock\n");
+		return PTR_ERR(kdwc->clk);
+	}
 
 	error = clk_prepare_enable(kdwc->clk);
 	if (error < 0) {
-- 
cgit v1.2.3-59-g8ed1b


From f0d39a179b9cd38c739acfc4f92720a0c79e1d66 Mon Sep 17 00:00:00 2001
From: Rui Miguel Silva <rmfrfs@gmail.com>
Date: Fri, 12 May 2017 21:16:14 +0100
Subject: staging: typec: fusb302: reset i2c_busy state in error

Fix reset of i2c_busy flag if an error occurs during the i2c block read.

Signed-off-by: Rui Miguel Silva <rmfrfs@gmail.com>
Acked-by: Yueyao Zhu <yueyao.zhu@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/fusb302/fusb302.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c
index 6bd602db11be..7b0b9e51016d 100644
--- a/drivers/staging/typec/fusb302/fusb302.c
+++ b/drivers/staging/typec/fusb302/fusb302.c
@@ -365,13 +365,15 @@ static int fusb302_i2c_block_read(struct fusb302_chip *chip, u8 address,
 	if (ret < 0) {
 		fusb302_log(chip, "cannot block read 0x%02x, len=%d, ret=%d",
 			    address, length, ret);
-		return ret;
+		goto done;
 	}
 	if (ret != length) {
 		fusb302_log(chip, "only read %d/%d bytes from 0x%02x",
 			    ret, length, address);
-		return -EIO;
+		ret = -EIO;
 	}
+
+done:
 	atomic_set(&chip->i2c_busy, 0);
 
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 50b7c322cfc8b44a36b1373dab2177db23e3282c Mon Sep 17 00:00:00 2001
From: Rui Miguel Silva <rmfrfs@gmail.com>
Date: Fri, 12 May 2017 21:16:15 +0100
Subject: staging: typec: fusb302: refactor resume retry mechanism

The i2c functions need to test the pm_suspend state and do, if needed, some
retry before i2c operations. This code was repeated 4x.

To isolate this, create a new function to check suspend state and call it in
every need place.

As at it, move the error message from pr_err to dev_err.

Signed-off-by: Rui Miguel Silva <rmfrfs@gmail.com>
Acked-by: Yueyao Zhu <yueyao.zhu@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/fusb302/fusb302.c | 70 +++++++++++++++++----------------
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c
index 7b0b9e51016d..4a356e509fe4 100644
--- a/drivers/staging/typec/fusb302/fusb302.c
+++ b/drivers/staging/typec/fusb302/fusb302.c
@@ -264,22 +264,36 @@ static void fusb302_debugfs_exit(const struct fusb302_chip *chip) { }
 
 #define FUSB302_RESUME_RETRY 10
 #define FUSB302_RESUME_RETRY_SLEEP 50
-static int fusb302_i2c_write(struct fusb302_chip *chip,
-			     u8 address, u8 data)
+
+static bool fusb302_is_suspended(struct fusb302_chip *chip)
 {
 	int retry_cnt;
-	int ret = 0;
 
-	atomic_set(&chip->i2c_busy, 1);
 	for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) {
 		if (atomic_read(&chip->pm_suspend)) {
-			pr_err("fusb302_i2c: pm suspend, retry %d/%d\n",
-			       retry_cnt + 1, FUSB302_RESUME_RETRY);
+			dev_err(chip->dev, "i2c: pm suspend, retry %d/%d\n",
+				retry_cnt + 1, FUSB302_RESUME_RETRY);
 			msleep(FUSB302_RESUME_RETRY_SLEEP);
 		} else {
-			break;
+			return false;
 		}
 	}
+
+	return true;
+}
+
+static int fusb302_i2c_write(struct fusb302_chip *chip,
+			     u8 address, u8 data)
+{
+	int ret = 0;
+
+	atomic_set(&chip->i2c_busy, 1);
+
+	if (fusb302_is_suspended(chip)) {
+		atomic_set(&chip->i2c_busy, 0);
+		return -ETIMEDOUT;
+	}
+
 	ret = i2c_smbus_write_byte_data(chip->i2c_client, address, data);
 	if (ret < 0)
 		fusb302_log(chip, "cannot write 0x%02x to 0x%02x, ret=%d",
@@ -292,21 +306,17 @@ static int fusb302_i2c_write(struct fusb302_chip *chip,
 static int fusb302_i2c_block_write(struct fusb302_chip *chip, u8 address,
 				   u8 length, const u8 *data)
 {
-	int retry_cnt;
 	int ret = 0;
 
 	if (length <= 0)
 		return ret;
 	atomic_set(&chip->i2c_busy, 1);
-	for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) {
-		if (atomic_read(&chip->pm_suspend)) {
-			pr_err("fusb302_i2c: pm suspend, retry %d/%d\n",
-			       retry_cnt + 1, FUSB302_RESUME_RETRY);
-			msleep(FUSB302_RESUME_RETRY_SLEEP);
-		} else {
-			break;
-		}
+
+	if (fusb302_is_suspended(chip)) {
+		atomic_set(&chip->i2c_busy, 0);
+		return -ETIMEDOUT;
 	}
+
 	ret = i2c_smbus_write_i2c_block_data(chip->i2c_client, address,
 					     length, data);
 	if (ret < 0)
@@ -320,19 +330,15 @@ static int fusb302_i2c_block_write(struct fusb302_chip *chip, u8 address,
 static int fusb302_i2c_read(struct fusb302_chip *chip,
 			    u8 address, u8 *data)
 {
-	int retry_cnt;
 	int ret = 0;
 
 	atomic_set(&chip->i2c_busy, 1);
-	for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) {
-		if (atomic_read(&chip->pm_suspend)) {
-			pr_err("fusb302_i2c: pm suspend, retry %d/%d\n",
-			       retry_cnt + 1, FUSB302_RESUME_RETRY);
-			msleep(FUSB302_RESUME_RETRY_SLEEP);
-		} else {
-			break;
-		}
+
+	if (fusb302_is_suspended(chip)) {
+		atomic_set(&chip->i2c_busy, 0);
+		return -ETIMEDOUT;
 	}
+
 	ret = i2c_smbus_read_byte_data(chip->i2c_client, address);
 	*data = (u8)ret;
 	if (ret < 0)
@@ -345,21 +351,17 @@ static int fusb302_i2c_read(struct fusb302_chip *chip,
 static int fusb302_i2c_block_read(struct fusb302_chip *chip, u8 address,
 				  u8 length, u8 *data)
 {
-	int retry_cnt;
 	int ret = 0;
 
 	if (length <= 0)
 		return ret;
 	atomic_set(&chip->i2c_busy, 1);
-	for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) {
-		if (atomic_read(&chip->pm_suspend)) {
-			pr_err("fusb302_i2c: pm suspend, retry %d/%d\n",
-			       retry_cnt + 1, FUSB302_RESUME_RETRY);
-			msleep(FUSB302_RESUME_RETRY_SLEEP);
-		} else {
-			break;
-		}
+
+	if (fusb302_is_suspended(chip)) {
+		atomic_set(&chip->i2c_busy, 0);
+		return -ETIMEDOUT;
 	}
+
 	ret = i2c_smbus_read_i2c_block_data(chip->i2c_client, address,
 					    length, data);
 	if (ret < 0) {
-- 
cgit v1.2.3-59-g8ed1b


From b72d7451209a0bad4264f5f4cb389e7f71cc5ad4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2017 13:30:05 +0200
Subject: staging: fsl-dpaa2/eth: add ETHERNET dependency

The new driver cannot link correctly when the netdevice infrastructure
is disabled:

ERROR: "netdev_info" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined!
ERROR: "skb_to_sgvec" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined!
ERROR: "napi_disable" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined!
ERROR: "napi_schedule_prep" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined!
ERROR: "__napi_schedule_irqoff" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined!
ERROR: "netif_carrier_on" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined!

This adds a dependency on NETDEVICES and ETHERNET.

Fixes: 0352d1d85201 ("staging: fsl-dpaa2/eth: Add APIs for DPNI objects")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/fsl-dpaa2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/fsl-dpaa2/Kconfig b/drivers/staging/fsl-dpaa2/Kconfig
index 2e325cb747ae..730fd6d4db33 100644
--- a/drivers/staging/fsl-dpaa2/Kconfig
+++ b/drivers/staging/fsl-dpaa2/Kconfig
@@ -12,6 +12,7 @@ config FSL_DPAA2
 config FSL_DPAA2_ETH
 	tristate "Freescale DPAA2 Ethernet"
 	depends on FSL_DPAA2 && FSL_MC_DPIO
+	depends on NETDEVICES && ETHERNET
 	---help---
 	  Ethernet driver for Freescale DPAA2 SoCs, using the
 	  Freescale MC bus driver
-- 
cgit v1.2.3-59-g8ed1b


From 2c4569ca26986d18243f282dd727da27e9adae4c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 11 May 2017 13:54:11 +0200
Subject: genirq: Fix chained interrupt data ordering

irq_set_chained_handler_and_data() sets up the chained interrupt and then
stores the handler data.

That's racy against an immediate interrupt which gets handled before the
store of the handler data happened. The handler will dereference a NULL
pointer and crash.

Cure it by storing handler data before installing the chained handler.

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
---
 kernel/irq/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 686be4b73018..c94da688ee9b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -880,8 +880,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
 	if (!desc)
 		return;
 
-	__irq_do_set_handler(desc, handle, 1, NULL);
 	desc->irq_common_data.handler_data = data;
+	__irq_do_set_handler(desc, handle, 1, NULL);
 
 	irq_put_desc_busunlock(desc, flags);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7c62de5f3fc92291decc0dac5f36949bdc3fb575 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Thu, 13 Apr 2017 10:21:21 +0530
Subject: ARM: dts: dra7: Add power hold and power controller properties to
 palmas

Add power hold and power controller properties to palmas node.
This is needed to shutdown pmic correctly on boards with
powerhold set.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/dra7-evm.dts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index 4bc4b575c99b..31a9e061ddd0 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -204,6 +204,8 @@
 	tps659038: tps659038@58 {
 		compatible = "ti,tps659038";
 		reg = <0x58>;
+		ti,palmas-override-powerhold;
+		ti,system-power-controller;
 
 		tps659038_pmic {
 			compatible = "ti,tps659038-pmic";
-- 
cgit v1.2.3-59-g8ed1b


From 910958b65cf638347e6c8f65d1f749d6a4cfb7e3 Mon Sep 17 00:00:00 2001
From: Andreas Kemnade <andreas@kemnade.info>
Date: Fri, 28 Apr 2017 12:27:35 +0200
Subject: ARM: dts: gta04: fix polarity of clocks for mcbsp4

The clock polarity setting of the mcbsp connected to
the modem was wrong so almost only noise
was received.
With this patch it is also the same as it was on
earlier non-dt kernels where it was working properly

Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/omap3-gta04.dtsi | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index b3a8b1f24499..9ec737069369 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -55,7 +55,8 @@
 		simple-audio-card,bitclock-master = <&telephony_link_master>;
 		simple-audio-card,frame-master = <&telephony_link_master>;
 		simple-audio-card,format = "i2s";
-
+		simple-audio-card,bitclock-inversion;
+		simple-audio-card,frame-inversion;
 		simple-audio-card,cpu {
 			sound-dai = <&mcbsp4>;
 		};
-- 
cgit v1.2.3-59-g8ed1b


From f0b8dca8336b112d23be9fa9c0d38cc4eb733344 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 16 May 2017 08:10:07 -0700
Subject: ARM: dts: omap4: enable CEC pin for Pandaboard A4 and ES

The CEC pin was always pulled up, making it impossible to use it.

Change to PIN_INPUT so it can be used by the new CEC support.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 arch/arm/boot/dts/omap4-panda-a4.dts | 2 +-
 arch/arm/boot/dts/omap4-panda-es.dts | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/omap4-panda-a4.dts b/arch/arm/boot/dts/omap4-panda-a4.dts
index 78d363177762..f1a6476af371 100644
--- a/arch/arm/boot/dts/omap4-panda-a4.dts
+++ b/arch/arm/boot/dts/omap4-panda-a4.dts
@@ -13,7 +13,7 @@
 /* Pandaboard Rev A4+ have external pullups on SCL & SDA */
 &dss_hdmi_pins {
 	pinctrl-single,pins = <
-		OMAP4_IOPAD(0x09a, PIN_INPUT_PULLUP | MUX_MODE0)	/* hdmi_cec.hdmi_cec */
+		OMAP4_IOPAD(0x09a, PIN_INPUT | MUX_MODE0)		/* hdmi_cec.hdmi_cec */
 		OMAP4_IOPAD(0x09c, PIN_INPUT | MUX_MODE0)		/* hdmi_scl.hdmi_scl */
 		OMAP4_IOPAD(0x09e, PIN_INPUT | MUX_MODE0)		/* hdmi_sda.hdmi_sda */
 		>;
diff --git a/arch/arm/boot/dts/omap4-panda-es.dts b/arch/arm/boot/dts/omap4-panda-es.dts
index 119f8e657edc..940fe4f7c5f6 100644
--- a/arch/arm/boot/dts/omap4-panda-es.dts
+++ b/arch/arm/boot/dts/omap4-panda-es.dts
@@ -34,7 +34,7 @@
 /* PandaboardES has external pullups on SCL & SDA */
 &dss_hdmi_pins {
 	pinctrl-single,pins = <
-		OMAP4_IOPAD(0x09a, PIN_INPUT_PULLUP | MUX_MODE0)	/* hdmi_cec.hdmi_cec */
+		OMAP4_IOPAD(0x09a, PIN_INPUT | MUX_MODE0)		/* hdmi_cec.hdmi_cec */
 		OMAP4_IOPAD(0x09c, PIN_INPUT | MUX_MODE0)		/* hdmi_scl.hdmi_scl */
 		OMAP4_IOPAD(0x09e, PIN_INPUT | MUX_MODE0)		/* hdmi_sda.hdmi_sda */
 		>;
-- 
cgit v1.2.3-59-g8ed1b


From 56322e123235370f1449c7444e311cce857d12f5 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Thu, 11 May 2017 12:21:19 -0500
Subject: ARM: dts: LogicPD Torpedo: Fix camera pin mux

Fix commit 05c4ffc3a266 ("ARM: dts: LogicPD Torpedo: Add MT9P031 Support")
In the previous commit, I indicated that the only testing was done by
showing the camera showed up when probing.  This patch fixes an incorrect
pin muxing on cam_d0, cam_d1 and cam_d2.

Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
index 08cce17a25a0..43e9364083de 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
@@ -249,9 +249,9 @@
 			OMAP3_CORE1_IOPAD(0x2110, PIN_INPUT | MUX_MODE0)   /* cam_xclka.cam_xclka */
 			OMAP3_CORE1_IOPAD(0x2112, PIN_INPUT | MUX_MODE0)   /* cam_pclk.cam_pclk */
 
-			OMAP3_CORE1_IOPAD(0x2114, PIN_INPUT | MUX_MODE0)   /* cam_d0.cam_d0 */
-			OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0)   /* cam_d1.cam_d1 */
-			OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0)   /* cam_d2.cam_d2 */
+			OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0)   /* cam_d0.cam_d0 */
+			OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0)   /* cam_d1.cam_d1 */
+			OMAP3_CORE1_IOPAD(0x211a, PIN_INPUT | MUX_MODE0)   /* cam_d2.cam_d2 */
 			OMAP3_CORE1_IOPAD(0x211c, PIN_INPUT | MUX_MODE0)   /* cam_d3.cam_d3 */
 			OMAP3_CORE1_IOPAD(0x211e, PIN_INPUT | MUX_MODE0)   /* cam_d4.cam_d4 */
 			OMAP3_CORE1_IOPAD(0x2120, PIN_INPUT | MUX_MODE0)   /* cam_d5.cam_d5 */
-- 
cgit v1.2.3-59-g8ed1b


From aff523fb82696f59b197ea79ea3652c216dbe4c0 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 5 May 2017 15:37:06 -0700
Subject: memory: omap-gpmc: Fix debug output for access width

The width needs to be configured in bytes with 1 meaning 8-bit
access and 2 meaning 16-bit access.

Cc: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/memory/omap-gpmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index bf0fe0137dfe..6d1b4b707cc2 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -512,7 +512,7 @@ static void gpmc_cs_show_timings(int cs, const char *desc)
 	pr_info("gpmc cs%i access configuration:\n", cs);
 	GPMC_GET_RAW_BOOL(GPMC_CS_CONFIG1,  4,  4, "time-para-granularity");
 	GPMC_GET_RAW(GPMC_CS_CONFIG1,  8,  9, "mux-add-data");
-	GPMC_GET_RAW_MAX(GPMC_CS_CONFIG1, 12, 13,
+	GPMC_GET_RAW_SHIFT_MAX(GPMC_CS_CONFIG1, 12, 13, 1,
 			 GPMC_CONFIG1_DEVICESIZE_MAX, "device-width");
 	GPMC_GET_RAW(GPMC_CS_CONFIG1, 16, 17, "wait-pin");
 	GPMC_GET_RAW_BOOL(GPMC_CS_CONFIG1, 21, 21, "wait-on-write");
-- 
cgit v1.2.3-59-g8ed1b


From 0daaecacb83bc6b656a56393ab77a31c28139bc7 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Fri, 12 May 2017 10:44:08 -0700
Subject: xfs: fix indlen accounting error on partial delalloc conversion

The delalloc -> real block conversion path uses an incorrect
calculation in the case where the middle part of a delalloc extent
is being converted. This is documented as a rare situation because
XFS generally attempts to maximize contiguity by converting as much
of a delalloc extent as possible.

If this situation does occur, the indlen reservation for the two new
delalloc extents left behind by the conversion of the middle range
is calculated and compared with the original reservation. If more
blocks are required, the delta is allocated from the global block
pool. This delta value can be characterized as the difference
between the new total requirement (temp + temp2) and the currently
available reservation minus those blocks that have already been
allocated (startblockval(PREV.br_startblock) - allocated).

The problem is that the current code does not account for previously
allocated blocks correctly. It subtracts the current allocation
count from the (new - old) delta rather than the old indlen
reservation. This means that more indlen blocks than have been
allocated end up stashed in the remaining extents and free space
accounting is broken as a result.

Fix up the calculation to subtract the allocated block count from
the original extent indlen and thus correctly allocate the
reservation delta based on the difference between the new total
requirement and the unused blocks from the original reservation.
Also remove a bogus assert that contradicts the fact that the new
indlen reservation can be larger than the original indlen
reservation.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f02eb7673392..8adb91b05588 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2065,8 +2065,10 @@ xfs_bmap_add_extent_delay_real(
 		}
 		temp = xfs_bmap_worst_indlen(bma->ip, temp);
 		temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
-		diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
-			(bma->cur ? bma->cur->bc_private.b.allocated : 0));
+		diff = (int)(temp + temp2 -
+			     (startblockval(PREV.br_startblock) -
+			      (bma->cur ?
+			       bma->cur->bc_private.b.allocated : 0)));
 		if (diff > 0) {
 			error = xfs_mod_fdblocks(bma->ip->i_mount,
 						 -((int64_t)diff), false);
@@ -2123,7 +2125,6 @@ xfs_bmap_add_extent_delay_real(
 		temp = da_new;
 		if (bma->cur)
 			temp += bma->cur->bc_private.b.allocated;
-		ASSERT(temp <= da_old);
 		if (temp < da_old)
 			xfs_mod_fdblocks(bma->ip->i_mount,
 					(int64_t)(da_old - temp), false);
-- 
cgit v1.2.3-59-g8ed1b


From 6eadbf4c8ba816c10d1c97bed9aa861d9fd17809 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 12 May 2017 10:44:08 -0700
Subject: xfs: BMAPX shouldn't barf on inline-format directories

When we're fulfilling a BMAPX request, jump out early if the data fork
is in local format.  This prevents us from hitting a debugging check in
bmapi_read and barfing errors back to userspace.  The on-disk extent
count check later isn't sufficient for IF_DELALLOC mode because da
extents are in memory and not on disk.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_bmap_util.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2b954308a1d6..2e8851ee6759 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -582,9 +582,13 @@ xfs_getbmap(
 		}
 		break;
 	default:
+		/* Local format data forks report no extents. */
+		if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+			bmv->bmv_entries = 0;
+			return 0;
+		}
 		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
 			return -EINVAL;
 
 		if (xfs_get_extsz_hint(ip) ||
-- 
cgit v1.2.3-59-g8ed1b


From 6e747506dde195d3d05fe2bb8ef78aceba28a5e3 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 12 May 2017 10:44:11 -0700
Subject: xfs: fix warnings about unused stack variables

Reduce stack usage and get rid of compiler warnings by eliminating
unused variables.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 8adb91b05588..a7048eafa8e6 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1280,7 +1280,6 @@ xfs_bmap_read_extents(
 		xfs_bmbt_rec_t	*frp;
 		xfs_fsblock_t	nextbno;
 		xfs_extnum_t	num_recs;
-		xfs_extnum_t	start;
 
 		num_recs = xfs_btree_get_numrecs(block);
 		if (unlikely(i + num_recs > room)) {
@@ -1303,7 +1302,6 @@ xfs_bmap_read_extents(
 		 * Copy records into the extent records.
 		 */
 		frp = XFS_BMBT_REC_ADDR(mp, block, 1);
-		start = i;
 		for (j = 0; j < num_recs; j++, i++, frp++) {
 			xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
 			trp->l0 = be64_to_cpu(frp->l0);
-- 
cgit v1.2.3-59-g8ed1b


From 892d2a5f705723b2cb488bfb38bcbdcf83273184 Mon Sep 17 00:00:00 2001
From: Zorro Lang <zlang@redhat.com>
Date: Mon, 15 May 2017 08:40:02 -0700
Subject: xfs: bad assertion for delalloc an extent that start at i_size

By run fsstress long enough time enough in RHEL-7, I find an
assertion failure (harder to reproduce on linux-4.11, but problem
is still there):

  XFS: Assertion failed: (iflags & BMV_IF_DELALLOC) != 0, file: fs/xfs/xfs_bmap_util.c

The assertion is in xfs_getbmap() funciton:

  if (map[i].br_startblock == DELAYSTARTBLOCK &&
-->   map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
          ASSERT((iflags & BMV_IF_DELALLOC) != 0);

When map[i].br_startoff == XFS_B_TO_FSB(mp, XFS_ISIZE(ip)), the
startoff is just at EOF. But we only need to make sure delalloc
extents that are within EOF, not include EOF.

Signed-off-by: Zorro Lang <zlang@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2e8851ee6759..9e3cc2146d5b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -716,7 +716,7 @@ xfs_getbmap(
 			 * extents.
 			 */
 			if (map[i].br_startblock == DELAYSTARTBLOCK &&
-			    map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
+			    map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
 				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
 
                         if (map[i].br_startblock == HOLESTARTBLOCK &&
-- 
cgit v1.2.3-59-g8ed1b


From bafbb9c73241760023d8981191ddd30bb1c6dbac Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Mon, 15 May 2017 17:05:47 -0400
Subject: tcp: eliminate negative reordering in tcp_clean_rtx_queue

tcp_ack() can call tcp_fragment() which may dededuct the
value tp->fackets_out when MSS changes. When prior_fackets
is larger than tp->fackets_out, tcp_clean_rtx_queue() can
invoke tcp_update_reordering() with negative values. This
results in absurd tp->reodering values higher than
sysctl_tcp_max_reordering.

Note that tcp_update_reordering indeeds sets tp->reordering
to min(sysctl_tcp_max_reordering, metric), but because
the comparison is signed, a negative metric always wins.

Fixes: c7caf8d3ed7a ("[TCP]: Fix reord detection due to snd_una covered holes")
Reported-by: Rebecca Isaacs <risaacs@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 06e2dbc2b4a2..174d4376baa5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3190,7 +3190,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			int delta;
 
 			/* Non-retransmitted hole got filled? That's reordering */
-			if (reord < prior_fackets)
+			if (reord < prior_fackets && reord <= tp->fackets_out)
 				tcp_update_reordering(sk, tp->fackets_out - reord, 0);
 
 			delta = tcp_is_fack(tp) ? pkts_acked :
-- 
cgit v1.2.3-59-g8ed1b


From bcfc7d33110b0f33069d74138eeb7ca9acbb3c85 Mon Sep 17 00:00:00 2001
From: Thomas Winter <Thomas.Winter@alliedtelesis.co.nz>
Date: Tue, 16 May 2017 10:14:44 +1200
Subject: ipmr: vrf: Find VIFs using the actual device

The skb->dev that is passed into ip_mr_input is
the loX device for VRFs. When we lookup a vif
for this dev, none is found as we do not create
vifs for loopbacks. Instead lookup a vif for the
actual device that the packet was received on,
eg the vlan.

Signed-off-by: Thomas Winter <Thomas.Winter@alliedtelesis.co.nz>
cc: David Ahern <dsa@cumulusnetworks.com>
cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
cc: roopa <roopa@cumulusnetworks.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3a02d52ed50e..551de4d023a8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1980,6 +1980,20 @@ int ip_mr_input(struct sk_buff *skb)
 	struct net *net = dev_net(skb->dev);
 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
 	struct mr_table *mrt;
+	struct net_device *dev;
+
+	/* skb->dev passed in is the loX master dev for vrfs.
+	 * As there are no vifs associated with loopback devices,
+	 * get the proper interface that does have a vif associated with it.
+	 */
+	dev = skb->dev;
+	if (netif_is_l3_master(skb->dev)) {
+		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
+		if (!dev) {
+			kfree_skb(skb);
+			return -ENODEV;
+		}
+	}
 
 	/* Packet is looped back after forward, it should not be
 	 * forwarded second time, but still can be delivered locally.
@@ -2017,7 +2031,7 @@ int ip_mr_input(struct sk_buff *skb)
 	/* already under rcu_read_lock() */
 	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
 	if (!cache) {
-		int vif = ipmr_find_vif(mrt, skb->dev);
+		int vif = ipmr_find_vif(mrt, dev);
 
 		if (vif >= 0)
 			cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
@@ -2037,7 +2051,7 @@ int ip_mr_input(struct sk_buff *skb)
 		}
 
 		read_lock(&mrt_lock);
-		vif = ipmr_find_vif(mrt, skb->dev);
+		vif = ipmr_find_vif(mrt, dev);
 		if (vif >= 0) {
 			int err2 = ipmr_cache_unresolved(mrt, vif, skb);
 			read_unlock(&mrt_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 6f61dd3aa35179043f1fcdb0965c5d56278ab724 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 12 May 2017 14:52:34 -0700
Subject: efi-pstore: Fix read iter after pstore API refactor

During the internal pstore API refactoring, the EFI vars read entry was
accidentally made to update a stack variable instead of the pstore
private data pointer. This corrects the problem (and removes the now
needless argument).

Fixes: 125cc42baf8a ("pstore: Replace arguments for read() API")
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/firmware/efi/efi-pstore.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 93d8cdbe7ef4..44148fd4c9f2 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -155,19 +155,14 @@ static int efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
  * efi_pstore_sysfs_entry_iter
  *
  * @record: pstore record to pass to callback
- * @pos: entry to begin iterating from
  *
  * You MUST call efivar_enter_iter_begin() before this function, and
  * efivar_entry_iter_end() afterwards.
  *
- * It is possible to begin iteration from an arbitrary entry within
- * the list by passing @pos. @pos is updated on return to point to
- * the next entry of the last one passed to efi_pstore_read_func().
- * To begin iterating from the beginning of the list @pos must be %NULL.
  */
-static int efi_pstore_sysfs_entry_iter(struct pstore_record *record,
-				       struct efivar_entry **pos)
+static int efi_pstore_sysfs_entry_iter(struct pstore_record *record)
 {
+	struct efivar_entry **pos = (struct efivar_entry **)&record->psi->data;
 	struct efivar_entry *entry, *n;
 	struct list_head *head = &efivar_sysfs_list;
 	int size = 0;
@@ -218,7 +213,6 @@ static int efi_pstore_sysfs_entry_iter(struct pstore_record *record,
  */
 static ssize_t efi_pstore_read(struct pstore_record *record)
 {
-	struct efivar_entry *entry = (struct efivar_entry *)record->psi->data;
 	ssize_t size;
 
 	record->buf = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL);
@@ -229,7 +223,7 @@ static ssize_t efi_pstore_read(struct pstore_record *record)
 		size = -EINTR;
 		goto out;
 	}
-	size = efi_pstore_sysfs_entry_iter(record, &entry);
+	size = efi_pstore_sysfs_entry_iter(record);
 	efivar_entry_iter_end();
 
 out:
-- 
cgit v1.2.3-59-g8ed1b


From 263eec9b2a82e8697d064709414914b5b10ac538 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Mon, 15 May 2017 17:33:37 +0200
Subject: smc: switch to usage of IB_PD_UNSAFE_GLOBAL_RKEY

Currently, SMC enables remote access to physical memory when a user
has successfully configured and established an SMC-connection until ten
minutes after the last SMC connection is closed. Because this is considered
a security risk, drivers are supposed to use IB_PD_UNSAFE_GLOBAL_RKEY in
such a case.

This patch changes the current SMC code to use IB_PD_UNSAFE_GLOBAL_RKEY.
This improves user awareness, but does not remove the security risk itself.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_clc.c  |  4 ++--
 net/smc/smc_core.c | 16 +++-------------
 net/smc/smc_core.h |  2 +-
 net/smc/smc_ib.c   | 21 ++-------------------
 net/smc/smc_ib.h   |  2 --
 5 files changed, 8 insertions(+), 37 deletions(-)

diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index e41f594a1e1d..03ec058d18df 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -204,7 +204,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
 	memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(cclc.qpn, link->roce_qp->qp_num);
 	cclc.rmb_rkey =
-		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+		htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
 	cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
 	cclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
@@ -256,7 +256,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 	memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(aclc.qpn, link->roce_qp->qp_num);
 	aclc.rmb_rkey =
-		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+		htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
 	aclc.conn_idx = 1;			/* as long as 1 RMB = 1 RMBE */
 	aclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	aclc.qp_mtu = link->path_mtu;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 65020e93ff21..3ac09a629ea1 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -613,19 +613,8 @@ int smc_rmb_create(struct smc_sock *smc)
 			rmb_desc = NULL;
 			continue; /* if mapping failed, try smaller one */
 		}
-		rc = smc_ib_get_memory_region(lgr->lnk[SMC_SINGLE_LINK].roce_pd,
-					      IB_ACCESS_REMOTE_WRITE |
-					      IB_ACCESS_LOCAL_WRITE,
-					     &rmb_desc->mr_rx[SMC_SINGLE_LINK]);
-		if (rc) {
-			smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-					 tmp_bufsize, rmb_desc,
-					 DMA_FROM_DEVICE);
-			kfree(rmb_desc->cpu_addr);
-			kfree(rmb_desc);
-			rmb_desc = NULL;
-			continue;
-		}
+		rmb_desc->rkey[SMC_SINGLE_LINK] =
+			lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey;
 		rmb_desc->used = 1;
 		write_lock_bh(&lgr->rmbs_lock);
 		list_add(&rmb_desc->list,
@@ -668,6 +657,7 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
 
 	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
 		if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
+		    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
 		    test_bit(i, lgr->rtokens_used_mask)) {
 			conn->rtoken_idx = i;
 			return 0;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 27eb38056a27..b013cb43a327 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -93,7 +93,7 @@ struct smc_buf_desc {
 	u64			dma_addr[SMC_LINKS_PER_LGR_MAX];
 						/* mapped address of buffer */
 	void			*cpu_addr;	/* virtual address of buffer */
-	struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
+	u32			rkey[SMC_LINKS_PER_LGR_MAX];
 						/* for rmb only:
 						 * rkey provided to peer
 						 */
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index cb69ab977cd7..b31715505a35 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -37,24 +37,6 @@ u8 local_systemid[SMC_SYSTEMID_LEN] = SMC_LOCAL_SYSTEMID_RESET;	/* unique system
 								 * identifier
 								 */
 
-int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
-			     struct ib_mr **mr)
-{
-	int rc;
-
-	if (*mr)
-		return 0; /* already done */
-
-	/* obtain unique key -
-	 * next invocation of get_dma_mr returns a different key!
-	 */
-	*mr = pd->device->get_dma_mr(pd, access_flags);
-	rc = PTR_ERR_OR_ZERO(*mr);
-	if (IS_ERR(*mr))
-		*mr = NULL;
-	return rc;
-}
-
 static int smc_ib_modify_qp_init(struct smc_link *lnk)
 {
 	struct ib_qp_attr qp_attr;
@@ -210,7 +192,8 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
 {
 	int rc;
 
-	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
+	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev,
+				   IB_PD_UNSAFE_GLOBAL_RKEY);
 	rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
 	if (IS_ERR(lnk->roce_pd))
 		lnk->roce_pd = NULL;
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 7e1f0e24d177..b567152a526d 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -61,8 +61,6 @@ void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
 int smc_ib_create_protection_domain(struct smc_link *lnk);
 void smc_ib_destroy_queue_pair(struct smc_link *lnk);
 int smc_ib_create_queue_pair(struct smc_link *lnk);
-int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
-			     struct ib_mr **mr);
 int smc_ib_ready_link(struct smc_link *lnk);
 int smc_ib_modify_qp_rts(struct smc_link *lnk);
 int smc_ib_modify_qp_reset(struct smc_link *lnk);
-- 
cgit v1.2.3-59-g8ed1b


From 19a0f7e37c0761a0a1cbf550705a6063c9675223 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 16 May 2017 09:51:38 +0300
Subject: net/smc: Add warning about remote memory exposure

The driver explicitly bypasses APIs to register all memory once a
connection is made, and thus allows remote access to memory.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Acked-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index c717ef0896aa..33954852f3f8 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -8,6 +8,10 @@ config SMC
 	  The Linux implementation of the SMC-R solution is designed as
 	  a separate socket family SMC.
 
+	  Warning: SMC will expose all memory for remote reads and writes
+	  once a connection is established.  Don't enable this option except
+	  for tightly controlled lab environment.
+
 	  Select this option if you want to run SMC socket applications
 
 config SMC_DIAG
-- 
cgit v1.2.3-59-g8ed1b


From f6c5775ff0bfa62b072face6bf1d40f659f194b2 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 15 May 2017 23:19:17 -0700
Subject: net: Improve handling of failures on link and route dumps

In general, rtnetlink dumps do not anticipate failure to dump a single
object (e.g., link or route) on a single pass. As both route and link
objects have grown via more attributes, that is no longer a given.

netlink dumps can handle a failure if the dump function returns an
error; specifically, netlink_dump adds the return code to the response
if it is <= 0 so userspace is notified of the failure. The missing
piece is the rtnetlink dump functions returning the error.

Fix route and link dump functions to return the errors if no object is
added to an skb (detected by skb->len != 0). IPv6 route dumps
(rt6_dump_route) already return the error; this patch updates IPv4 and
link dumps. Other dump functions may need to be ajusted as well.

Reported-by: Jan Moskyto Matejka <mq@ucw.cz>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c    | 36 ++++++++++++++++++++++++------------
 net/ipv4/fib_frontend.c | 15 +++++++++++----
 net/ipv4/fib_trie.c     | 26 ++++++++++++++------------
 3 files changed, 49 insertions(+), 28 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7f82c3450b1..49a279a7cc15 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1627,13 +1627,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 					       cb->nlh->nlmsg_seq, 0,
 					       flags,
 					       ext_filter_mask);
-			/* If we ran out of room on the first message,
-			 * we're in trouble
-			 */
-			WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
 
-			if (err < 0)
-				goto out;
+			if (err < 0) {
+				if (likely(skb->len))
+					goto out;
+
+				goto out_err;
+			}
 
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
@@ -1641,10 +1641,12 @@ cont:
 		}
 	}
 out:
+	err = skb->len;
+out_err:
 	cb->args[1] = idx;
 	cb->args[0] = h;
 
-	return skb->len;
+	return err;
 }
 
 int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
@@ -3453,8 +3455,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 				err = br_dev->netdev_ops->ndo_bridge_getlink(
 						skb, portid, seq, dev,
 						filter_mask, NLM_F_MULTI);
-				if (err < 0 && err != -EOPNOTSUPP)
-					break;
+				if (err < 0 && err != -EOPNOTSUPP) {
+					if (likely(skb->len))
+						break;
+
+					goto out_err;
+				}
 			}
 			idx++;
 		}
@@ -3465,16 +3471,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 							      seq, dev,
 							      filter_mask,
 							      NLM_F_MULTI);
-				if (err < 0 && err != -EOPNOTSUPP)
-					break;
+				if (err < 0 && err != -EOPNOTSUPP) {
+					if (likely(skb->len))
+						break;
+
+					goto out_err;
+				}
 			}
 			idx++;
 		}
 	}
+	err = skb->len;
+out_err:
 	rcu_read_unlock();
 	cb->args[0] = idx;
 
-	return skb->len;
+	return err;
 }
 
 static inline size_t bridge_nlmsg_size(void)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 39bd1edee676..83e3ed258467 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -763,7 +763,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned int e = 0, s_e;
 	struct fib_table *tb;
 	struct hlist_head *head;
-	int dumped = 0;
+	int dumped = 0, err;
 
 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
@@ -783,20 +783,27 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 			if (dumped)
 				memset(&cb->args[2], 0, sizeof(cb->args) -
 						 2 * sizeof(cb->args[0]));
-			if (fib_table_dump(tb, skb, cb) < 0)
-				goto out;
+			err = fib_table_dump(tb, skb, cb);
+			if (err < 0) {
+				if (likely(skb->len))
+					goto out;
+
+				goto out_err;
+			}
 			dumped = 1;
 next:
 			e++;
 		}
 	}
 out:
+	err = skb->len;
+out_err:
 	rcu_read_unlock();
 
 	cb->args[1] = e;
 	cb->args[0] = h;
 
-	return skb->len;
+	return err;
 }
 
 /* Prepare and feed intra-kernel routing request.
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1201409ba1dc..51182ff2b441 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1983,6 +1983,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
 
 	/* rcu_read_lock is hold by caller */
 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+		int err;
+
 		if (i < s_i) {
 			i++;
 			continue;
@@ -1993,17 +1995,14 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
 			continue;
 		}
 
-		if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
-				  cb->nlh->nlmsg_seq,
-				  RTM_NEWROUTE,
-				  tb->tb_id,
-				  fa->fa_type,
-				  xkey,
-				  KEYLENGTH - fa->fa_slen,
-				  fa->fa_tos,
-				  fa->fa_info, NLM_F_MULTI) < 0) {
+		err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
+				    cb->nlh->nlmsg_seq, RTM_NEWROUTE,
+				    tb->tb_id, fa->fa_type,
+				    xkey, KEYLENGTH - fa->fa_slen,
+				    fa->fa_tos, fa->fa_info, NLM_F_MULTI);
+		if (err < 0) {
 			cb->args[4] = i;
-			return -1;
+			return err;
 		}
 		i++;
 	}
@@ -2025,10 +2024,13 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
 	t_key key = cb->args[3];
 
 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
-		if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
+		int err;
+
+		err = fn_trie_dump_leaf(l, tb, skb, cb);
+		if (err < 0) {
 			cb->args[3] = key;
 			cb->args[2] = count;
-			return -1;
+			return err;
 		}
 
 		++count;
-- 
cgit v1.2.3-59-g8ed1b


From 13840d38016203f0095cd547b90352812d24b787 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 30 Apr 2017 17:32:28 -0400
Subject: dm bufio: make the parameter "retain_bytes" unsigned long

Change the type of the parameter "retain_bytes" from unsigned to
unsigned long, so that on 64-bit machines the user can set more than
4GiB of data to be retained.

Also, change the type of the variable "count" in the function
"__evict_old_buffers" to unsigned long.  The assignment
"count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];"
could result in unsigned long to unsigned overflow and that could result
in buffers not being freed when they should.

While at it, avoid division in get_retain_buffers().  Division is slow,
we can change it to shift because we have precalculated the log2 of
block size.

Cc: stable@vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-bufio.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 5db11a405129..cd8139593ccd 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -218,7 +218,7 @@ static DEFINE_SPINLOCK(param_spinlock);
  * Buffers are freed after this timeout
  */
 static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
-static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
+static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
 
 static unsigned long dm_bufio_peak_allocated;
 static unsigned long dm_bufio_allocated_kmem_cache;
@@ -1558,10 +1558,10 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
 	return true;
 }
 
-static unsigned get_retain_buffers(struct dm_bufio_client *c)
+static unsigned long get_retain_buffers(struct dm_bufio_client *c)
 {
-        unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
-        return retain_bytes / c->block_size;
+        unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
+        return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT);
 }
 
 static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
@@ -1571,7 +1571,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
 	struct dm_buffer *b, *tmp;
 	unsigned long freed = 0;
 	unsigned long count = nr_to_scan;
-	unsigned retain_target = get_retain_buffers(c);
+	unsigned long retain_target = get_retain_buffers(c);
 
 	for (l = 0; l < LIST_SIZE; l++) {
 		list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
@@ -1794,8 +1794,8 @@ static bool older_than(struct dm_buffer *b, unsigned long age_hz)
 static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
 {
 	struct dm_buffer *b, *tmp;
-	unsigned retain_target = get_retain_buffers(c);
-	unsigned count;
+	unsigned long retain_target = get_retain_buffers(c);
+	unsigned long count;
 	LIST_HEAD(write_list);
 
 	dm_bufio_lock(c);
@@ -1955,7 +1955,7 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
 module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
 
-module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR);
+module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
 
 module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
-- 
cgit v1.2.3-59-g8ed1b


From b401ee0b85a53e89739ff68a5b1a0667d664afc9 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 18 Apr 2017 12:41:18 +0200
Subject: KVM: x86: lower default for halt_poll_ns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some fio benchmarks, halt_poll_ns=400000 caused CPU utilization to
increase heavily even in cases where the performance improvement was
small.  In particular, bandwidth divided by CPU usage was as much as
60% lower.

To some extent this is the expected effect of the patch, and the
additional CPU utilization is only visible when running the
benchmarks.  However, halving the threshold also halves the extra
CPU utilization (from +30-130% to +20-70%) and has no negative
effect on performance.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9c761fea0c98..695605eb1dfb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -43,7 +43,7 @@
 #define KVM_PRIVATE_MEM_SLOTS 3
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 
-#define KVM_HALT_POLL_NS_DEFAULT 400000
+#define KVM_HALT_POLL_NS_DEFAULT 200000
 
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 
-- 
cgit v1.2.3-59-g8ed1b


From ea9a46e1c49251331dbfda19ced7114337966178 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 12 May 2017 10:44:10 -0700
Subject: xfs: only return detailed fsmap info if the caller has CAP_SYS_ADMIN

There were a number of handwaving complaints that one could "possibly"
use inode numbers and extent maps to fingerprint a filesystem hosting
multiple containers and somehow use the information to guess at the
contents of other containers and attack them.  Despite the total lack of
any demonstration that this is actually possible, it's easier to
restrict access now and broaden it later, so use the rmapbt fsmap
backends only if the caller has CAP_SYS_ADMIN.  Unprivileged users will
just have to make do with only getting the free space and static
metadata placement information.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/xfs_fsmap.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 3683819887a5..814ed729881d 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -828,6 +828,7 @@ xfs_getfsmap(
 	struct xfs_fsmap		dkeys[2];	/* per-dev keys */
 	struct xfs_getfsmap_dev		handlers[XFS_GETFSMAP_DEVS];
 	struct xfs_getfsmap_info	info = { NULL };
+	bool				use_rmap;
 	int				i;
 	int				error = 0;
 
@@ -837,12 +838,14 @@ xfs_getfsmap(
 	    !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
 		return -EINVAL;
 
+	use_rmap = capable(CAP_SYS_ADMIN) &&
+		   xfs_sb_version_hasrmapbt(&mp->m_sb);
 	head->fmh_entries = 0;
 
 	/* Set up our device handlers. */
 	memset(handlers, 0, sizeof(handlers));
 	handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (use_rmap)
 		handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
 	else
 		handlers[0].fn = xfs_getfsmap_datadev_bnobt;
-- 
cgit v1.2.3-59-g8ed1b


From 2432a3fb5cff026005aaad24e42226402f7fd0aa Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2017 13:27:49 +0200
Subject: mlx5e: add CONFIG_INET dependency

We now reference the arp_tbl, which requires IPv4 support to be
enabled in the kernel, otherwise we get a link error:

drivers/net/built-in.o: In function `mlx5e_tc_update_neigh_used_value':
(.text+0x16afec): undefined reference to `arp_tbl'
drivers/net/built-in.o: In function `mlx5e_rep_neigh_init':
en_rep.c:(.text+0x16c16d): undefined reference to `arp_tbl'
drivers/net/built-in.o: In function `mlx5e_rep_netevent_event':
en_rep.c:(.text+0x16cbb5): undefined reference to `arp_tbl'

This adds a Kconfig dependency for it.

Fixes: 232c001398ae ("net/mlx5e: Add support to neighbour update flow")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index fc52d742b7f7..27251a78075c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -13,7 +13,7 @@ config MLX5_CORE
 
 config MLX5_CORE_EN
 	bool "Mellanox Technologies ConnectX-4 Ethernet support"
-	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+	depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
 	depends on IPV6=y || IPV6=n || MLX5_CORE=m
 	imply PTP_1588_CLOCK
 	default n
-- 
cgit v1.2.3-59-g8ed1b


From 3e21f4af170bebf47c187c1ff8bf155583c9f3b1 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Tue, 16 May 2017 19:18:55 +0200
Subject: char: lp: fix possible integer overflow in lp_setup()

The lp_setup() code doesn't apply any bounds checking when passing
"lp=none", and only in this case, resulting in an overflow of the
parport_nr[] array. All versions in Git history are affected.

Reported-By: Roee Hay <roee.hay@hcl.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: stable@vger.kernel.org
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/lp.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 565e4cf04a02..8249762192d5 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -859,7 +859,11 @@ static int __init lp_setup (char *str)
 	} else if (!strcmp(str, "auto")) {
 		parport_nr[0] = LP_PARPORT_AUTO;
 	} else if (!strcmp(str, "none")) {
-		parport_nr[parport_ptr++] = LP_PARPORT_NONE;
+		if (parport_ptr < LP_NO)
+			parport_nr[parport_ptr++] = LP_PARPORT_NONE;
+		else
+			printk(KERN_INFO "lp: too many ports, %s ignored.\n",
+			       str);
 	} else if (!strcmp(str, "reset")) {
 		reset = 1;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From a20cfc1cde76047657045fc5976834f57422a8c5 Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Tue, 9 May 2017 13:52:02 +0200
Subject: misc: pci_endpoint_test: select CRC32

There is the following link error with CONFIG_PCI_ENDPOINT_TEST=y and
CONFIG_CRC32=m:

drivers/built-in.o: In function 'pci_endpoint_test_ioctl':
pci_endpoint_test.c:(.text+0xf1251): undefined reference to 'crc32_le'
pci_endpoint_test.c:(.text+0xf1322): undefined reference to 'crc32_le'
pci_endpoint_test.c:(.text+0xf13b2): undefined reference to 'crc32_le'
pci_endpoint_test.c:(.text+0xf141e): undefined reference to 'crc32_le'

Fix this by selecting CRC32 in the PCI_ENDPOINT_TEST kconfig entry.

Fixes: 2c156ac71c6b ("misc: Add host side PCI driver for PCI test function device")
Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 2cba76e6fa3c..07bbd4cc1852 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -492,6 +492,7 @@ config ASPEED_LPC_CTRL
 
 config PCI_ENDPOINT_TEST
 	depends on PCI
+	select CRC32
 	tristate "PCI Endpoint Test driver"
 	---help---
            Enable this configuration option to enable the host side test driver
-- 
cgit v1.2.3-59-g8ed1b


From 0d83539092ddb1ab79b4d65bccb866bf07ea2ccd Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Tue, 9 May 2017 18:58:24 -0500
Subject: uio: fix incorrect memory leak cleanup

Commit 75f0aef6220d ("uio: fix memory leak") has fixed up some
memory leaks during the failure paths of the addition of uio
attributes, but still is not correct entirely. A kobject_uevent()
failure still needs a kobject_put() and the kobject container
structure allocation failure before the kobject_init() doesn't
need a kobject_put(). Fix this properly.

Fixes: 75f0aef6220d ("uio: fix memory leak")
Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 1c196f87e9d9..ff04b7f8549f 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -279,7 +279,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 		map = kzalloc(sizeof(*map), GFP_KERNEL);
 		if (!map) {
 			ret = -ENOMEM;
-			goto err_map_kobj;
+			goto err_map;
 		}
 		kobject_init(&map->kobj, &map_attr_type);
 		map->mem = mem;
@@ -289,7 +289,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 			goto err_map_kobj;
 		ret = kobject_uevent(&map->kobj, KOBJ_ADD);
 		if (ret)
-			goto err_map;
+			goto err_map_kobj;
 	}
 
 	for (pi = 0; pi < MAX_UIO_PORT_REGIONS; pi++) {
@@ -308,7 +308,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 		portio = kzalloc(sizeof(*portio), GFP_KERNEL);
 		if (!portio) {
 			ret = -ENOMEM;
-			goto err_portio_kobj;
+			goto err_portio;
 		}
 		kobject_init(&portio->kobj, &portio_attr_type);
 		portio->port = port;
@@ -319,7 +319,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 			goto err_portio_kobj;
 		ret = kobject_uevent(&portio->kobj, KOBJ_ADD);
 		if (ret)
-			goto err_portio;
+			goto err_portio_kobj;
 	}
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 88ad60c23a394b2f8bf1e570c756f415435d1d35 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 16 May 2017 14:07:24 +0200
Subject: i2c: mv64xxx: don't override deferred probing when getting irq

There is no reason to use platform_get_irq() for non-DT probing and
irq_of_parse_and_map() for DT probing. Indeed, platform_get_irq()
works fine for both.

In addition, using platform_get_irq() properly returns -EPROBE_DEFER
when the interrupt controller is not yet available, so instead of
inventing our own error code (-ENXIO), return the one provided by
platform_get_irq().

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-mv64xxx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index cf737ec8563b..5c4db65c5019 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -819,7 +819,6 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
 		rc = -EINVAL;
 		goto out;
 	}
-	drv_data->irq = irq_of_parse_and_map(np, 0);
 
 	drv_data->rstc = devm_reset_control_get_optional(dev, NULL);
 	if (IS_ERR(drv_data->rstc)) {
@@ -902,10 +901,11 @@ mv64xxx_i2c_probe(struct platform_device *pd)
 	if (!IS_ERR(drv_data->clk))
 		clk_prepare_enable(drv_data->clk);
 
+	drv_data->irq = platform_get_irq(pd, 0);
+
 	if (pdata) {
 		drv_data->freq_m = pdata->freq_m;
 		drv_data->freq_n = pdata->freq_n;
-		drv_data->irq = platform_get_irq(pd, 0);
 		drv_data->adapter.timeout = msecs_to_jiffies(pdata->timeout);
 		drv_data->offload_enabled = false;
 		memcpy(&drv_data->reg_offsets, &mv64xxx_i2c_regs_mv64xxx, sizeof(drv_data->reg_offsets));
@@ -915,7 +915,7 @@ mv64xxx_i2c_probe(struct platform_device *pd)
 			goto exit_clk;
 	}
 	if (drv_data->irq < 0) {
-		rc = -ENXIO;
+		rc = drv_data->irq;
 		goto exit_reset;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 9a2eba337cacefc95b97c2726e3efdd435b3460e Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Mon, 15 May 2017 12:04:31 +0300
Subject: drm/nouveau: Fix drm poll_helper handling

Commit cae9ff036eea effectively disabled the drm poll_helper by checking
the wrong flag to see if the driver should enable the poll or not:
mode_config.poll_enabled is only set to true by poll_init and it is not
indicating if the poll is enabled or not.
nouveau_display_create() will initialize the poll and going to disable it
right away. After poll_init() the mode_config.poll_enabled will be true,
but the poll itself is disabled.

To avoid the race caused by calling the poll_enable() from different paths,
this patch will enable the poll from one place, in the
nouveau_display_hpd_work().

In case the pm_runtime is disabled we will enable the poll in
nouveau_drm_load() once.

Fixes: cae9ff036eea ("drm/nouveau: Don't enabling polling twice on runtime resume")
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reviewed-by: Lyude <lyude@redhat.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_display.c | 6 ++----
 drivers/gpu/drm/nouveau/nouveau_drm.c     | 6 +++---
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 21b10f9840c9..549763f5e17d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -360,6 +360,8 @@ nouveau_display_hpd_work(struct work_struct *work)
 	pm_runtime_get_sync(drm->dev->dev);
 
 	drm_helper_hpd_irq_event(drm->dev);
+	/* enable polling for external displays */
+	drm_kms_helper_poll_enable(drm->dev);
 
 	pm_runtime_mark_last_busy(drm->dev->dev);
 	pm_runtime_put_sync(drm->dev->dev);
@@ -413,10 +415,6 @@ nouveau_display_init(struct drm_device *dev)
 	if (ret)
 		return ret;
 
-	/* enable polling for external displays */
-	if (!dev->mode_config.poll_enabled)
-		drm_kms_helper_poll_enable(dev);
-
 	/* enable hotplug interrupts */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct nouveau_connector *conn = nouveau_connector(connector);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 2b6ac24ce690..36268e1802b5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -502,6 +502,9 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 		pm_runtime_allow(dev->dev);
 		pm_runtime_mark_last_busy(dev->dev);
 		pm_runtime_put(dev->dev);
+	} else {
+		/* enable polling for external displays */
+		drm_kms_helper_poll_enable(dev);
 	}
 	return 0;
 
@@ -774,9 +777,6 @@ nouveau_pmops_runtime_resume(struct device *dev)
 
 	ret = nouveau_do_resume(drm_dev, true);
 
-	if (!drm_dev->mode_config.poll_enabled)
-		drm_kms_helper_poll_enable(drm_dev);
-
 	/* do magic */
 	nvif_mask(&device->object, 0x088488, (1 << 25), (1 << 25));
 	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
-- 
cgit v1.2.3-59-g8ed1b


From 563ad2b640955a0da21aedce1aaa332c7d016dde Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 8 May 2017 14:30:43 +0200
Subject: drm/nouveau/secboot: plug memory leak in ls_ucode_img_load_gr() error
 path

The last goto looks spurious because it releases less resources than the
previous one.
Also free 'img->sig' if 'ls_ucode_img_build()' fails.

Fixes: 9d896f3e41a6 ("drm/nouveau/secboot: abstract LS firmware loading functions")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
index d1cf02d22db1..1b0c793c0192 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
@@ -116,6 +116,7 @@ ls_ucode_img_load_gr(const struct nvkm_subdev *subdev, struct ls_ucode_img *img,
 	ret = nvkm_firmware_get(subdev->device, f, &sig);
 	if (ret)
 		goto free_data;
+
 	img->sig = kmemdup(sig->data, sig->size, GFP_KERNEL);
 	if (!img->sig) {
 		ret = -ENOMEM;
@@ -126,8 +127,9 @@ ls_ucode_img_load_gr(const struct nvkm_subdev *subdev, struct ls_ucode_img *img,
 	img->ucode_data = ls_ucode_img_build(bl, code, data,
 					     &img->ucode_desc);
 	if (IS_ERR(img->ucode_data)) {
+		kfree(img->sig);
 		ret = PTR_ERR(img->ucode_data);
-		goto free_data;
+		goto free_sig;
 	}
 	img->ucode_size = img->ucode_desc.image_size;
 
-- 
cgit v1.2.3-59-g8ed1b


From 2579b8b0ece53248b815042f8662a4531acf120d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 27 Apr 2017 12:12:56 +0300
Subject: drm/nouveau/fifo/gk104-: Silence a locking warning

Presumably we can never actually hit this return, but static checkers
complain that we should unlock before we return.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 3a24788c3185..a7e55c422501 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -148,7 +148,7 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 	case NVKM_MEM_TARGET_NCOH: target = 3; break;
 	default:
 		WARN_ON(1);
-		return;
+		goto unlock;
 	}
 
 	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
@@ -160,6 +160,7 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 				       & 0x00100000),
 			       msecs_to_jiffies(2000)) == 0)
 		nvkm_error(subdev, "runlist %d update timeout\n", runl);
+unlock:
 	mutex_unlock(&subdev->mutex);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 4fa8324461b824eaea9b6695464395710fe20c44 Mon Sep 17 00:00:00 2001
From: Derek Basehore <dbasehore@chromium.org>
Date: Thu, 11 May 2017 14:34:24 +0200
Subject: scsi: sd: Ignore sync cache failures when not supported

Some external hard drives don't support the sync command even though the
hard drive has write cache enabled. In this case, upon suspend request,
sync cache failures are ignored if the error code in the sense header is
ILLEGAL_REQUEST. There's not much we can do for these drives, so we
shouldn't fail to suspend for this error case. The drive may stay
powered if that's the setup for the port it's plugged into.

Signed-off-by: Derek Basehore <dbasehore@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index de9e2f2ef662..b6bb4e0ce0e3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1582,17 +1582,21 @@ out:
 	return retval;
 }
 
-static int sd_sync_cache(struct scsi_disk *sdkp)
+static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
 {
 	int retries, res;
 	struct scsi_device *sdp = sdkp->device;
 	const int timeout = sdp->request_queue->rq_timeout
 		* SD_FLUSH_TIMEOUT_MULTIPLIER;
-	struct scsi_sense_hdr sshdr;
+	struct scsi_sense_hdr my_sshdr;
 
 	if (!scsi_device_online(sdp))
 		return -ENODEV;
 
+	/* caller might not be interested in sense, but we need it */
+	if (!sshdr)
+		sshdr = &my_sshdr;
+
 	for (retries = 3; retries > 0; --retries) {
 		unsigned char cmd[10] = { 0 };
 
@@ -1601,7 +1605,7 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		 * Leave the rest of the command zero to indicate
 		 * flush everything.
 		 */
-		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, sshdr,
 				timeout, SD_MAX_RETRIES, 0, RQF_PM, NULL);
 		if (res == 0)
 			break;
@@ -1611,11 +1615,12 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		sd_print_result(sdkp, "Synchronize Cache(10) failed", res);
 
 		if (driver_byte(res) & DRIVER_SENSE)
-			sd_print_sense_hdr(sdkp, &sshdr);
+			sd_print_sense_hdr(sdkp, sshdr);
+
 		/* we need to evaluate the error return  */
-		if (scsi_sense_valid(&sshdr) &&
-			(sshdr.asc == 0x3a ||	/* medium not present */
-			 sshdr.asc == 0x20))	/* invalid command */
+		if (scsi_sense_valid(sshdr) &&
+			(sshdr->asc == 0x3a ||	/* medium not present */
+			 sshdr->asc == 0x20))	/* invalid command */
 				/* this is no error here */
 				return 0;
 
@@ -3459,7 +3464,7 @@ static void sd_shutdown(struct device *dev)
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		sd_sync_cache(sdkp);
+		sd_sync_cache(sdkp, NULL);
 	}
 
 	if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
@@ -3471,6 +3476,7 @@ static void sd_shutdown(struct device *dev)
 static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+	struct scsi_sense_hdr sshdr;
 	int ret = 0;
 
 	if (!sdkp)	/* E.g.: runtime suspend following sd_remove() */
@@ -3478,12 +3484,23 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		ret = sd_sync_cache(sdkp);
+		ret = sd_sync_cache(sdkp, &sshdr);
+
 		if (ret) {
 			/* ignore OFFLINE device */
 			if (ret == -ENODEV)
-				ret = 0;
-			goto done;
+				return 0;
+
+			if (!scsi_sense_valid(&sshdr) ||
+			    sshdr.sense_key != ILLEGAL_REQUEST)
+				return ret;
+
+			/*
+			 * sshdr.sense_key == ILLEGAL_REQUEST means this drive
+			 * doesn't support sync. There's not much to do and
+			 * suspend shouldn't fail.
+			 */
+			 ret = 0;
 		}
 	}
 
@@ -3495,7 +3512,6 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 			ret = 0;
 	}
 
-done:
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From dd6e1f71b785a6ac2511e2ddb86315f292873e59 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Thu, 11 May 2017 17:24:44 -0500
Subject: scsi: libfc: fix incorrect variable assignment

Previous assignment was causing the use of the uninitialized variable
_explan_ inside fc_seq_ls_rjt() function, which in this particular case
is being called by fc_seq_els_rsp_send().

[mkp: fixed typo]

Addresses-Coverity-ID: 1398125
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libfc/fc_rport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index b44c3136eb51..520325867e2b 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -1422,7 +1422,7 @@ static void fc_rport_recv_rtv_req(struct fc_rport_priv *rdata,
 	fp = fc_frame_alloc(lport, sizeof(*rtv));
 	if (!fp) {
 		rjt_data.reason = ELS_RJT_UNAB;
-		rjt_data.reason = ELS_EXPL_INSUF_RES;
+		rjt_data.explan = ELS_EXPL_INSUF_RES;
 		fc_seq_els_rsp_send(in_fp, ELS_LS_RJT, &rjt_data);
 		goto drop;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 845d9e8df2fa879e6494e786f290e1fd5560ac8c Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:38 -0700
Subject: scsi: lpfc: Fix used-RPI accounting problem.

With 255 vports created a link trasition can casue a crash.

When going through discovery after a link bounce the driver is using
rpis before the cmd FCOE_POST_HDR_TEMPLATES completes. By doing that the
next rpi bumps the rpi range out of the boundary.

The fix it to increment the next_rpi only when the
FCOE_POST_HDR_TEMPLATE succeeds.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_els.c  |  3 ++-
 drivers/scsi/lpfc/lpfc_init.c | 24 +++++-------------------
 drivers/scsi/lpfc/lpfc_sli.c  |  8 ++++++++
 drivers/scsi/lpfc/lpfc_sli4.h |  1 +
 4 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 67827e397431..3f9f6d5f8c69 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -8667,7 +8667,8 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		lpfc_do_scr_ns_plogi(phba, vport);
 	goto out;
 fdisc_failed:
-	if (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS)
+	if (vport->fc_vport &&
+	    (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS))
 		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
 	/* Cancel discovery timer */
 	lpfc_can_disctmo(vport);
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 4b1eb98c228d..b1b181a756dc 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -6525,7 +6525,6 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	uint16_t rpi_limit, curr_rpi_range;
 	struct lpfc_dmabuf *dmabuf;
 	struct lpfc_rpi_hdr *rpi_hdr;
-	uint32_t rpi_count;
 
 	/*
 	 * If the SLI4 port supports extents, posting the rpi header isn't
@@ -6538,8 +6537,7 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 		return NULL;
 
 	/* The limit on the logical index is just the max_rpi count. */
-	rpi_limit = phba->sli4_hba.max_cfg_param.rpi_base +
-	phba->sli4_hba.max_cfg_param.max_rpi - 1;
+	rpi_limit = phba->sli4_hba.max_cfg_param.max_rpi;
 
 	spin_lock_irq(&phba->hbalock);
 	/*
@@ -6550,18 +6548,10 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	curr_rpi_range = phba->sli4_hba.next_rpi;
 	spin_unlock_irq(&phba->hbalock);
 
-	/*
-	 * The port has a limited number of rpis. The increment here
-	 * is LPFC_RPI_HDR_COUNT - 1 to account for the starting value
-	 * and to allow the full max_rpi range per port.
-	 */
-	if ((curr_rpi_range + (LPFC_RPI_HDR_COUNT - 1)) > rpi_limit)
-		rpi_count = rpi_limit - curr_rpi_range;
-	else
-		rpi_count = LPFC_RPI_HDR_COUNT;
-
-	if (!rpi_count)
+	/* Reached full RPI range */
+	if (curr_rpi_range == rpi_limit)
 		return NULL;
+
 	/*
 	 * First allocate the protocol header region for the port.  The
 	 * port expects a 4KB DMA-mapped memory region that is 4K aligned.
@@ -6595,13 +6585,9 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 
 	/* The rpi_hdr stores the logical index only. */
 	rpi_hdr->start_rpi = curr_rpi_range;
+	rpi_hdr->next_rpi = phba->sli4_hba.next_rpi + LPFC_RPI_HDR_COUNT;
 	list_add_tail(&rpi_hdr->list, &phba->sli4_hba.lpfc_rpi_hdr_list);
 
-	/*
-	 * The next_rpi stores the next logical module-64 rpi value used
-	 * to post physical rpis in subsequent rpi postings.
-	 */
-	phba->sli4_hba.next_rpi += rpi_count;
 	spin_unlock_irq(&phba->hbalock);
 	return rpi_hdr;
 
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 2a4fc00dfa9b..e2d25ae5ba45 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -17137,6 +17137,14 @@ lpfc_sli4_post_rpi_hdr(struct lpfc_hba *phba, struct lpfc_rpi_hdr *rpi_page)
 				"status x%x add_status x%x, mbx status x%x\n",
 				shdr_status, shdr_add_status, rc);
 		rc = -ENXIO;
+	} else {
+		/*
+		 * The next_rpi stores the next logical module-64 rpi value used
+		 * to post physical rpis in subsequent rpi postings.
+		 */
+		spin_lock_irq(&phba->hbalock);
+		phba->sli4_hba.next_rpi = rpi_page->next_rpi;
+		spin_unlock_irq(&phba->hbalock);
 	}
 	return rc;
 }
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index da46471337c8..915e8d5581bd 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -698,6 +698,7 @@ struct lpfc_rpi_hdr {
 	struct lpfc_dmabuf *dmabuf;
 	uint32_t page_count;
 	uint32_t start_rpi;
+	uint16_t next_rpi;
 };
 
 struct lpfc_rsrc_blks {
-- 
cgit v1.2.3-59-g8ed1b


From 0c9c6a75141810acade82add4f4708959a5d3a1d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:39 -0700
Subject: scsi: lpfc: Fix system crash when port is reset.

The driver panic when using the els_wq during port reset.

Check for NULL els_wq before dereferencing.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         | 8 ++++++--
 drivers/scsi/lpfc/lpfc_hbadisc.c | 6 +++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 6d7840b096e6..62571fa9c6ad 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -1228,7 +1228,11 @@ lpfc_sli_read_hs(struct lpfc_hba *phba)
 static inline struct lpfc_sli_ring *
 lpfc_phba_elsring(struct lpfc_hba *phba)
 {
-	if (phba->sli_rev == LPFC_SLI_REV4)
-		return phba->sli4_hba.els_wq->pring;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		if (phba->sli4_hba.els_wq)
+			return phba->sli4_hba.els_wq->pring;
+		else
+			return NULL;
+	}
 	return &phba->sli.sli3_ring[LPFC_ELS_RING];
 }
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 0482c5580331..dcc9b3858778 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -693,9 +693,9 @@ lpfc_work_done(struct lpfc_hba *phba)
 	pring = lpfc_phba_elsring(phba);
 	status = (ha_copy & (HA_RXMASK  << (4*LPFC_ELS_RING)));
 	status >>= (4*LPFC_ELS_RING);
-	if ((status & HA_RXMASK) ||
-	    (pring->flag & LPFC_DEFERRED_RING_EVENT) ||
-	    (phba->hba_flag & HBA_SP_QUEUE_EVT)) {
+	if (pring && (status & HA_RXMASK ||
+		      pring->flag & LPFC_DEFERRED_RING_EVENT ||
+		      phba->hba_flag & HBA_SP_QUEUE_EVT)) {
 		if (pring->flag & LPFC_STOP_IOCB_EVENT) {
 			pring->flag |= LPFC_DEFERRED_RING_EVENT;
 			/* Set the lpfc data pending flag */
-- 
cgit v1.2.3-59-g8ed1b


From 547077a44b3b49f56c0f05c0b46c8c617dea591d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:40 -0700
Subject: scsi: lpfc: Adding additional stats counters for nvme.

More debug messages added for nvme statistics.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_attr.c    | 24 ++++++++++++---------
 drivers/scsi/lpfc/lpfc_debugfs.c | 27 +++++++++++++----------
 drivers/scsi/lpfc/lpfc_nvmet.c   | 46 ++++++++++++++++++++++++++++++----------
 drivers/scsi/lpfc/lpfc_nvmet.h   | 12 ++++++-----
 drivers/scsi/lpfc/lpfc_sli.c     | 38 ++++++++++++++++++++++++++++-----
 drivers/scsi/lpfc/lpfc_sli4.h    |  2 +-
 6 files changed, 106 insertions(+), 43 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 4830370bfab1..41ec7451689b 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -205,8 +205,9 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_ls_rsp_error));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP: Rcv %08x Drop %08x\n",
+				"FCP: Rcv %08x Release %08x Drop %08x\n",
 				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->xmt_fcp_release),
 				atomic_read(&tgtp->rcv_fcp_cmd_drop));
 
 		if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
@@ -218,15 +219,12 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 		}
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x\n",
+				"FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x "
+				"drop %08x\n",
 				atomic_read(&tgtp->xmt_fcp_read),
 				atomic_read(&tgtp->xmt_fcp_read_rsp),
 				atomic_read(&tgtp->xmt_fcp_write),
-				atomic_read(&tgtp->xmt_fcp_rsp));
-
-		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP Rsp: abort %08x drop %08x\n",
-				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_rsp),
 				atomic_read(&tgtp->xmt_fcp_drop));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
@@ -236,10 +234,16 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_fcp_rsp_drop));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				"ABORT: Xmt %08x Cmpl %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_abort_cmpl));
+
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"ABORT: Sol %08x  Usol %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_sol),
+				atomic_read(&tgtp->xmt_abort_unsol),
 				atomic_read(&tgtp->xmt_abort_rsp),
-				atomic_read(&tgtp->xmt_abort_rsp_error),
-				atomic_read(&tgtp->xmt_abort_cmpl));
+				atomic_read(&tgtp->xmt_abort_rsp_error));
 
 		len +=  snprintf(buf+len, PAGE_SIZE-len, "\n");
 		return len;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index fce549a91911..a41daedeb967 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -797,11 +797,6 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				atomic_read(&tgtp->xmt_fcp_write),
 				atomic_read(&tgtp->xmt_fcp_rsp));
 
-		len += snprintf(buf + len, size - len,
-				"FCP Rsp: abort %08x drop %08x\n",
-				atomic_read(&tgtp->xmt_fcp_abort),
-				atomic_read(&tgtp->xmt_fcp_drop));
-
 		len += snprintf(buf + len, size - len,
 				"FCP Rsp Cmpl: %08x err %08x drop %08x\n",
 				atomic_read(&tgtp->xmt_fcp_rsp_cmpl),
@@ -809,10 +804,16 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				atomic_read(&tgtp->xmt_fcp_rsp_drop));
 
 		len += snprintf(buf + len, size - len,
-				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				"ABORT: Xmt %08x Cmpl %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_abort_cmpl));
+
+		len += snprintf(buf + len, size - len,
+				"ABORT: Sol %08x  Usol %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_sol),
+				atomic_read(&tgtp->xmt_abort_unsol),
 				atomic_read(&tgtp->xmt_abort_rsp),
-				atomic_read(&tgtp->xmt_abort_rsp_error),
-				atomic_read(&tgtp->xmt_abort_cmpl));
+				atomic_read(&tgtp->xmt_abort_rsp_error));
 
 		len +=  snprintf(buf + len, size - len, "\n");
 
@@ -1959,6 +1960,7 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 		atomic_set(&tgtp->rcv_ls_req_out, 0);
 		atomic_set(&tgtp->rcv_ls_req_drop, 0);
 		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_abort_cmpl, 0);
 		atomic_set(&tgtp->xmt_ls_rsp, 0);
 		atomic_set(&tgtp->xmt_ls_drop, 0);
 		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
@@ -1967,19 +1969,22 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
-		atomic_set(&tgtp->xmt_fcp_abort, 0);
 		atomic_set(&tgtp->xmt_fcp_drop, 0);
 		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
 		atomic_set(&tgtp->xmt_fcp_read, 0);
 		atomic_set(&tgtp->xmt_fcp_write, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_release, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
 
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_abort_cmpl, 0);
+		atomic_set(&tgtp->xmt_abort_sol, 0);
+		atomic_set(&tgtp->xmt_abort_unsol, 0);
 		atomic_set(&tgtp->xmt_abort_rsp, 0);
 		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
-		atomic_set(&tgtp->xmt_abort_cmpl, 0);
 	}
 	return nbytes;
 }
@@ -3143,7 +3148,7 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
 			"\t\t%s RQ info: ", rqtype);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"AssocCQID[%02d]: RQ-STAT[nopost:x%x nobuf:x%x "
-			"trunc:x%x rcv:x%llx]\n",
+			"posted:x%x rcv:x%llx]\n",
 			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 94434e621c33..bb12e2c9fbf4 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -502,6 +502,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 				"6150 LS Drop IO x%x: Prep\n",
 				ctxp->oxid);
 		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		atomic_inc(&nvmep->xmt_ls_abort);
 		lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp,
 						ctxp->sid, ctxp->oxid);
 		return -ENOMEM;
@@ -545,6 +546,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 	lpfc_nlp_put(nvmewqeq->context1);
 
 	lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+	atomic_inc(&nvmep->xmt_ls_abort);
 	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid);
 	return -ENXIO;
 }
@@ -692,6 +694,7 @@ static void
 lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 			   struct nvmefc_tgt_fcp_req *rsp)
 {
+	struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
 	struct lpfc_nvmet_rcv_ctx *ctxp =
 		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
 	struct lpfc_hba *phba = ctxp->phba;
@@ -710,6 +713,8 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 	lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid,
 			 ctxp->state, 0);
 
+	atomic_inc(&lpfc_nvmep->xmt_fcp_release);
+
 	if (aborting)
 		return;
 
@@ -796,6 +801,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 		atomic_set(&tgtp->rcv_ls_req_out, 0);
 		atomic_set(&tgtp->rcv_ls_req_drop, 0);
 		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_abort_cmpl, 0);
 		atomic_set(&tgtp->xmt_ls_rsp, 0);
 		atomic_set(&tgtp->xmt_ls_drop, 0);
 		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
@@ -803,18 +809,21 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
-		atomic_set(&tgtp->xmt_fcp_abort, 0);
 		atomic_set(&tgtp->xmt_fcp_drop, 0);
 		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
 		atomic_set(&tgtp->xmt_fcp_read, 0);
 		atomic_set(&tgtp->xmt_fcp_write, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_release, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_abort_cmpl, 0);
+		atomic_set(&tgtp->xmt_abort_unsol, 0);
+		atomic_set(&tgtp->xmt_abort_sol, 0);
 		atomic_set(&tgtp->xmt_abort_rsp, 0);
 		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
-		atomic_set(&tgtp->xmt_abort_cmpl, 0);
 	}
 	return error;
 }
@@ -1011,6 +1020,7 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		oxid = 0;
 		size = 0;
 		sid = 0;
+		ctxp = NULL;
 		goto dropit;
 	}
 
@@ -1117,6 +1127,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 		oxid = 0;
 		size = 0;
 		sid = 0;
+		ctxp = NULL;
 		goto dropit;
 	}
 
@@ -1193,8 +1204,11 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 
 	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-			"6159 FCP Drop IO x%x: err x%x\n",
-			ctxp->oxid, rc);
+			"6159 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
+			ctxp->oxid, rc,
+			atomic_read(&tgtp->rcv_fcp_cmd_in),
+			atomic_read(&tgtp->rcv_fcp_cmd_out),
+			atomic_read(&tgtp->xmt_fcp_release));
 dropit:
 	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
 			 oxid, size, sid);
@@ -1206,7 +1220,7 @@ dropit:
 	if (nvmebuf) {
 		nvmebuf->iocbq->hba_wqidx = 0;
 		/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+		lpfc_nvmet_rq_post(phba, ctxp, &nvmebuf->hbuf);
 	}
 #endif
 }
@@ -1812,7 +1826,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
 	ctxp->state = LPFC_NVMET_STE_DONE;
 
@@ -1827,6 +1842,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	}
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+	atomic_inc(&tgtp->xmt_abort_rsp);
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
 			"6165 ABORT cmpl: xri x%x flg x%x (%d) "
@@ -1877,7 +1893,8 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
 	if (!ctxp) {
 		/* if context is clear, related io alrady complete */
@@ -1907,6 +1924,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	}
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+	atomic_inc(&tgtp->xmt_abort_rsp);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6316 ABTS cmpl xri x%x flg x%x (%x) "
@@ -1953,7 +1971,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	atomic_inc(&tgtp->xmt_ls_abort_cmpl);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6083 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n",
@@ -2104,6 +2122,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* Issue ABTS for this WQE based on iotag */
 	ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba);
 	if (!ctxp->abort_wqeq) {
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
 				"6161 ABORT failed: No wqeqs: "
 				"xri: x%x\n", ctxp->oxid);
@@ -2128,6 +2147,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* driver queued commands are in process of being flushed */
 	if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
 				"6163 Driver in reset cleanup - flushing "
 				"NVME Req now. hba_flag x%x oxid x%x\n",
@@ -2140,6 +2160,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* Outstanding abort is in progress */
 	if (abts_wqeq->iocb_flag & LPFC_DRIVER_ABORTED) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
 				"6164 Outstanding NVME I/O Abort Request "
 				"still pending on oxid x%x\n",
@@ -2190,9 +2211,12 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	abts_wqeq->context2 = ctxp;
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
-	if (rc == WQE_SUCCESS)
+	if (rc == WQE_SUCCESS) {
+		atomic_inc(&tgtp->xmt_abort_sol);
 		return 0;
+	}
 
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	lpfc_sli_release_iocbq(phba, abts_wqeq);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
@@ -2231,11 +2255,11 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
-		atomic_inc(&tgtp->xmt_abort_rsp);
 		return 0;
 	}
 
 aerr:
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
@@ -2279,7 +2303,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
-		atomic_inc(&tgtp->xmt_abort_rsp);
+		atomic_inc(&tgtp->xmt_abort_unsol);
 		return 0;
 	}
 
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 128759fe6650..837210a3e7c8 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -34,6 +34,7 @@ struct lpfc_nvmet_tgtport {
 	atomic_t rcv_ls_req_out;
 	atomic_t rcv_ls_req_drop;
 	atomic_t xmt_ls_abort;
+	atomic_t xmt_ls_abort_cmpl;
 
 	/* Stats counters - lpfc_nvmet_xmt_ls_rsp */
 	atomic_t xmt_ls_rsp;
@@ -47,9 +48,9 @@ struct lpfc_nvmet_tgtport {
 	atomic_t rcv_fcp_cmd_in;
 	atomic_t rcv_fcp_cmd_out;
 	atomic_t rcv_fcp_cmd_drop;
+	atomic_t xmt_fcp_release;
 
 	/* Stats counters - lpfc_nvmet_xmt_fcp_op */
-	atomic_t xmt_fcp_abort;
 	atomic_t xmt_fcp_drop;
 	atomic_t xmt_fcp_read_rsp;
 	atomic_t xmt_fcp_read;
@@ -62,12 +63,13 @@ struct lpfc_nvmet_tgtport {
 	atomic_t xmt_fcp_rsp_drop;
 
 
-	/* Stats counters - lpfc_nvmet_unsol_issue_abort */
+	/* Stats counters - lpfc_nvmet_xmt_fcp_abort */
+	atomic_t xmt_fcp_abort;
+	atomic_t xmt_fcp_abort_cmpl;
+	atomic_t xmt_abort_sol;
+	atomic_t xmt_abort_unsol;
 	atomic_t xmt_abort_rsp;
 	atomic_t xmt_abort_rsp_error;
-
-	/* Stats counters - lpfc_nvmet_xmt_abort_cmp */
-	atomic_t xmt_abort_cmpl;
 };
 
 struct lpfc_nvmet_rcv_ctx {
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index e2d25ae5ba45..333c5094b97d 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -512,6 +512,7 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 		} else {
 			return -EINVAL;
 		}
+		hq->RQ_buf_posted += hq->entry_repost;
 		writel(doorbell.word0, hq->db_regaddr);
 	}
 	return put_index;
@@ -12788,6 +12789,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 	struct fc_frame_header *fc_hdr;
 	struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq;
 	struct lpfc_queue *drq = phba->sli4_hba.dat_rq;
+	struct lpfc_nvmet_tgtport *tgtp;
 	struct hbq_dmabuf *dma_buf;
 	uint32_t status, rq_id;
 	unsigned long iflags;
@@ -12808,7 +12810,6 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"2537 Receive Frame Truncated!!\n");
-		hrq->RQ_buf_trunc++;
 	case FC_STATUS_RQ_SUCCESS:
 		lpfc_sli4_rq_release(hrq, drq);
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -12819,6 +12820,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 			goto out;
 		}
 		hrq->RQ_rcv_buf++;
+		hrq->RQ_buf_posted--;
 		memcpy(&dma_buf->cq_event.cqe.rcqe_cmpl, rcqe, sizeof(*rcqe));
 
 		/* If a NVME LS event (type 0x28), treat it as Fast path */
@@ -12832,8 +12834,21 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		workposted = true;
 		break;
-	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		if (phba->nvmet_support) {
+			tgtp = phba->targetport->private;
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_NVME,
+					"6402 RQE Error x%x, posted %d err_cnt "
+					"%d: %x %x %x\n",
+					status, hrq->RQ_buf_posted,
+					hrq->RQ_no_posted_buf,
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out),
+					atomic_read(&tgtp->xmt_fcp_release));
+		}
+		/* fallthrough */
+
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 		hrq->RQ_no_posted_buf++;
 		/* Post more buffers if possible */
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -13135,6 +13150,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	struct lpfc_queue *drq;
 	struct rqb_dmabuf *dma_buf;
 	struct fc_frame_header *fc_hdr;
+	struct lpfc_nvmet_tgtport *tgtp;
 	uint32_t status, rq_id;
 	unsigned long iflags;
 	uint32_t fctl, idx;
@@ -13165,8 +13181,6 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"6126 Receive Frame Truncated!!\n");
-		hrq->RQ_buf_trunc++;
-		break;
 	case FC_STATUS_RQ_SUCCESS:
 		lpfc_sli4_rq_release(hrq, drq);
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -13178,6 +13192,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 		}
 		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		hrq->RQ_rcv_buf++;
+		hrq->RQ_buf_posted--;
 		fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt;
 
 		/* Just some basic sanity checks on FCP Command frame */
@@ -13200,8 +13215,21 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 drop:
 		lpfc_in_buf_free(phba, &dma_buf->dbuf);
 		break;
-	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		if (phba->nvmet_support) {
+			tgtp = phba->targetport->private;
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_NVME,
+					"6401 RQE Error x%x, posted %d err_cnt "
+					"%d: %x %x %x\n",
+					status, hrq->RQ_buf_posted,
+					hrq->RQ_no_posted_buf,
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out),
+					atomic_read(&tgtp->xmt_fcp_release));
+		}
+		/* fallthrough */
+
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 		hrq->RQ_no_posted_buf++;
 		/* Post more buffers if possible */
 		spin_lock_irqsave(&phba->hbalock, iflags);
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 915e8d5581bd..7a8cbeb6a745 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -195,7 +195,7 @@ struct lpfc_queue {
 /* defines for RQ stats */
 #define	RQ_no_posted_buf	q_cnt_1
 #define	RQ_no_buf_found		q_cnt_2
-#define	RQ_buf_trunc		q_cnt_3
+#define	RQ_buf_posted		q_cnt_3
 #define	RQ_rcv_buf		q_cnt_4
 
 	uint64_t isr_timestamp;
-- 
cgit v1.2.3-59-g8ed1b


From 61f3d4bf4f8f062cf6be143c9b7adbc3a017ea6e Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:41 -0700
Subject: scsi: lpfc: Fix nvmet RQ resource needs for large block writes.

Large block writes to the nvme target were failing because the default
number of RQs posted was insufficient.

Expand the NVMET RQs to 2048 RQEs and ensure a minimum of 512 RQEs are
posted, no matter how many MRQs are configured.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_attr.c  |  6 +++---
 drivers/scsi/lpfc/lpfc_init.c  | 23 +++++++++++++-------
 drivers/scsi/lpfc/lpfc_nvmet.c |  2 +-
 drivers/scsi/lpfc/lpfc_nvmet.h |  1 +
 drivers/scsi/lpfc/lpfc_sli.c   | 49 +++++++++---------------------------------
 drivers/scsi/lpfc/lpfc_sli4.h  |  2 +-
 6 files changed, 31 insertions(+), 52 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 41ec7451689b..129d6cd7635b 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -60,9 +60,9 @@
 #define LPFC_MIN_DEVLOSS_TMO	1
 #define LPFC_MAX_DEVLOSS_TMO	255
 
-#define LPFC_DEF_MRQ_POST	256
-#define LPFC_MIN_MRQ_POST	32
-#define LPFC_MAX_MRQ_POST	512
+#define LPFC_DEF_MRQ_POST	512
+#define LPFC_MIN_MRQ_POST	512
+#define LPFC_MAX_MRQ_POST	2048
 
 /*
  * Write key size should be multiple of 4. If write key is changed
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index b1b181a756dc..5f62e3a1dff6 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3390,6 +3390,11 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 	 */
 	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
 	nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
+
+	/* Ensure we at least meet the minimun for the system */
+	if (nvmet_xri_cnt < LPFC_NVMET_RQE_DEF_COUNT)
+		nvmet_xri_cnt = LPFC_NVMET_RQE_DEF_COUNT;
+
 	tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
 	if (nvmet_xri_cnt > tot_cnt) {
 		phba->cfg_nvmet_mrq_post = tot_cnt / phba->cfg_nvmet_mrq;
@@ -8158,7 +8163,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			/* Create NVMET Receive Queue for header */
 			qdesc = lpfc_sli4_queue_alloc(phba,
 						      phba->sli4_hba.rq_esize,
-						      phba->sli4_hba.rq_ecount);
+						      LPFC_NVMET_RQE_DEF_COUNT);
 			if (!qdesc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"3146 Failed allocate "
@@ -8180,7 +8185,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			/* Create NVMET Receive Queue for data */
 			qdesc = lpfc_sli4_queue_alloc(phba,
 						      phba->sli4_hba.rq_esize,
-						      phba->sli4_hba.rq_ecount);
+						      LPFC_NVMET_RQE_DEF_COUNT);
 			if (!qdesc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"3156 Failed allocate "
@@ -8770,9 +8775,6 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		goto out_destroy;
 	}
 
-	lpfc_rq_adjust_repost(phba, phba->sli4_hba.hdr_rq, LPFC_ELS_HBQ);
-	lpfc_rq_adjust_repost(phba, phba->sli4_hba.dat_rq, LPFC_ELS_HBQ);
-
 	rc = lpfc_rq_create(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq,
 			    phba->sli4_hba.els_cq, LPFC_USOL);
 	if (rc) {
@@ -11096,7 +11098,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	struct lpfc_hba   *phba;
 	struct lpfc_vport *vport = NULL;
 	struct Scsi_Host  *shost = NULL;
-	int error, cnt;
+	int error, cnt, num;
 	uint32_t cfg_mode, intr_mode;
 
 	/* Allocate memory for HBA structure */
@@ -11131,8 +11133,13 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	}
 
 	cnt = phba->cfg_iocb_cnt * 1024;
-	if (phba->nvmet_support)
-		cnt += phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq;
+	if (phba->nvmet_support) {
+		/* Ensure we at least meet the minimun for the system */
+		num = (phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq);
+		if (num < LPFC_NVMET_RQE_DEF_COUNT)
+			num = LPFC_NVMET_RQE_DEF_COUNT;
+		cnt += num;
+	}
 
 	/* Initialize and populate the iocb list per host */
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index bb12e2c9fbf4..dfa7296499cf 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -614,9 +614,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n",
 			 ctxp->oxid, rsp->op, rsp->rsplen);
 
+	ctxp->flag |= LPFC_NVMET_IO_INP;
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
 	if (rc == WQE_SUCCESS) {
-		ctxp->flag |= LPFC_NVMET_IO_INP;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 		if (!phba->ktime_on)
 			return 0;
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 837210a3e7c8..55f2a859dc70 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -22,6 +22,7 @@
  ********************************************************************/
 
 #define LPFC_NVMET_DEFAULT_SEGS		(64 + 1)	/* 256K IOs */
+#define LPFC_NVMET_RQE_DEF_COUNT	512
 #define LPFC_NVMET_SUCCESS_LEN	12
 
 /* Used for NVME Target */
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 333c5094b97d..f344abce4949 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -479,22 +479,23 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 	if (unlikely(!hq) || unlikely(!dq))
 		return -ENOMEM;
 	put_index = hq->host_index;
-	temp_hrqe = hq->qe[hq->host_index].rqe;
+	temp_hrqe = hq->qe[put_index].rqe;
 	temp_drqe = dq->qe[dq->host_index].rqe;
 
 	if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ)
 		return -EINVAL;
-	if (hq->host_index != dq->host_index)
+	if (put_index != dq->host_index)
 		return -EINVAL;
 	/* If the host has not yet processed the next entry then we are done */
-	if (((hq->host_index + 1) % hq->entry_count) == hq->hba_index)
+	if (((put_index + 1) % hq->entry_count) == hq->hba_index)
 		return -EBUSY;
 	lpfc_sli_pcimem_bcopy(hrqe, temp_hrqe, hq->entry_size);
 	lpfc_sli_pcimem_bcopy(drqe, temp_drqe, dq->entry_size);
 
 	/* Update the host index to point to the next slot */
-	hq->host_index = ((hq->host_index + 1) % hq->entry_count);
+	hq->host_index = ((put_index + 1) % hq->entry_count);
 	dq->host_index = ((dq->host_index + 1) % dq->entry_count);
+	hq->RQ_buf_posted++;
 
 	/* Ring The Header Receive Queue Doorbell */
 	if (!(hq->host_index % hq->entry_repost)) {
@@ -512,7 +513,6 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 		} else {
 			return -EINVAL;
 		}
-		hq->RQ_buf_posted += hq->entry_repost;
 		writel(doorbell.word0, hq->db_regaddr);
 	}
 	return put_index;
@@ -6905,14 +6905,9 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			INIT_LIST_HEAD(&rqbp->rqb_buffer_list);
 			rqbp->rqb_alloc_buffer = lpfc_sli4_nvmet_alloc;
 			rqbp->rqb_free_buffer = lpfc_sli4_nvmet_free;
-			rqbp->entry_count = 256;
+			rqbp->entry_count = LPFC_NVMET_RQE_DEF_COUNT;
 			rqbp->buffer_count = 0;
 
-			/* Divide by 4 and round down to multiple of 16 */
-			rc = (phba->cfg_nvmet_mrq_post >> 2) & 0xfff8;
-			phba->sli4_hba.nvmet_mrq_hdr[i]->entry_repost = rc;
-			phba->sli4_hba.nvmet_mrq_data[i]->entry_repost = rc;
-
 			lpfc_post_rq_buffer(
 				phba, phba->sli4_hba.nvmet_mrq_hdr[i],
 				phba->sli4_hba.nvmet_mrq_data[i],
@@ -14892,34 +14887,6 @@ out:
 	return status;
 }
 
-/**
- * lpfc_rq_adjust_repost - Adjust entry_repost for an RQ
- * @phba: HBA structure that indicates port to create a queue on.
- * @rq:   The queue structure to use for the receive queue.
- * @qno:  The associated HBQ number
- *
- *
- * For SLI4 we need to adjust the RQ repost value based on
- * the number of buffers that are initially posted to the RQ.
- */
-void
-lpfc_rq_adjust_repost(struct lpfc_hba *phba, struct lpfc_queue *rq, int qno)
-{
-	uint32_t cnt;
-
-	/* sanity check on queue memory */
-	if (!rq)
-		return;
-	cnt = lpfc_hbq_defs[qno]->entry_count;
-
-	/* Recalc repost for RQs based on buffers initially posted */
-	cnt = (cnt >> 3);
-	if (cnt < LPFC_QUEUE_MIN_REPOST)
-		cnt = LPFC_QUEUE_MIN_REPOST;
-
-	rq->entry_repost = cnt;
-}
-
 /**
  * lpfc_rq_create - Create a Receive Queue on the HBA
  * @phba: HBA structure that indicates port to create a queue on.
@@ -15105,6 +15072,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	hrq->subtype = subtype;
 	hrq->host_index = 0;
 	hrq->hba_index = 0;
+	hrq->entry_repost = LPFC_RQ_REPOST;
 
 	/* now create the data queue */
 	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
@@ -15186,6 +15154,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	drq->subtype = subtype;
 	drq->host_index = 0;
 	drq->hba_index = 0;
+	drq->entry_repost = LPFC_RQ_REPOST;
 
 	/* link the header and data RQs onto the parent cq child list */
 	list_add_tail(&hrq->list, &cq->child_list);
@@ -15343,6 +15312,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		hrq->subtype = subtype;
 		hrq->host_index = 0;
 		hrq->hba_index = 0;
+		hrq->entry_repost = LPFC_RQ_REPOST;
 
 		drq->db_format = LPFC_DB_RING_FORMAT;
 		drq->db_regaddr = phba->sli4_hba.RQDBregaddr;
@@ -15351,6 +15321,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		drq->subtype = subtype;
 		drq->host_index = 0;
 		drq->hba_index = 0;
+		drq->entry_repost = LPFC_RQ_REPOST;
 
 		list_add_tail(&hrq->list, &cq->child_list);
 		list_add_tail(&drq->list, &cq->child_list);
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 7a8cbeb6a745..422bde85c9f1 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -156,6 +156,7 @@ struct lpfc_queue {
 	uint32_t entry_size;	/* Size of each queue entry. */
 	uint32_t entry_repost;	/* Count of entries before doorbell is rung */
 #define LPFC_QUEUE_MIN_REPOST	8
+#define LPFC_RQ_REPOST		64
 	uint32_t queue_id;	/* Queue ID assigned by the hardware */
 	uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
 	uint32_t page_count;	/* Number of pages allocated for this queue */
@@ -763,7 +764,6 @@ int lpfc_rq_create(struct lpfc_hba *, struct lpfc_queue *,
 int lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 			struct lpfc_queue **drqp, struct lpfc_queue **cqp,
 			uint32_t subtype);
-void lpfc_rq_adjust_repost(struct lpfc_hba *, struct lpfc_queue *, int);
 int lpfc_eq_destroy(struct lpfc_hba *, struct lpfc_queue *);
 int lpfc_cq_destroy(struct lpfc_hba *, struct lpfc_queue *);
 int lpfc_mq_destroy(struct lpfc_hba *, struct lpfc_queue *);
-- 
cgit v1.2.3-59-g8ed1b


From 3120046a970aee08a0787fb6792590f1e0047f62 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:42 -0700
Subject: scsi: lpfc: Fix NVMEI driver not decrementing counter causing bad
 rport state.

During driver boot, a latency in the NVMET driver side causes the
incoming NVMEI PRLI to get rejected by the NVMET driver.  When this
happens, the NVMEI driver runs out of PRLI retries.  Bouncing the link
does not fix the situation.

If the NVMEI driver decides, on PRLI completion failures, to retry the
PRLI, always decrement the fc4_prli_sent counter.  This allows the PRLI
completion to resolve to UNMAPPED when NVMET rejects the PRLI.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_els.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 3f9f6d5f8c69..3085895464d9 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2077,16 +2077,19 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 	if (irsp->ulpStatus) {
 		/* Check for retry */
+		ndlp->fc4_prli_sent--;
 		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
 			/* ELS command is being retried */
-			ndlp->fc4_prli_sent--;
 			goto out;
 		}
+
 		/* PRLI failed */
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
-				 "2754 PRLI failure DID:%06X Status:x%x/x%x\n",
+				 "2754 PRLI failure DID:%06X Status:x%x/x%x, "
+				 "data: x%x\n",
 				 ndlp->nlp_DID, irsp->ulpStatus,
-				 irsp->un.ulpWord[4]);
+				 irsp->un.ulpWord[4], ndlp->fc4_prli_sent);
+
 		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
 		if (lpfc_error_lost_link(irsp))
 			goto out;
-- 
cgit v1.2.3-59-g8ed1b


From 7869da183a7cfc8a2189f6eddd3bc558be40d5e3 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:43 -0700
Subject: scsi: lpfc: Fix NMI watchdog assertions when running nvmet IOPS tests

After running IOPS test for 30 second we get kernel:NMI watchdog:
Watchdog detected hard LOCKUP on cpu 0

The driver is speend too much time in its ISR.

In ISR EQ and CQ processing routines, if we hit the entry_repost numbers
of EQE/CQEs just break out of the routine as opposed to hitting the
doorbell with NOARM and continue processing.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c | 30 +++++++++++++++---------------
 drivers/scsi/lpfc/lpfc_sli.c     | 16 ++++++----------
 2 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index a41daedeb967..7284533f4df2 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -3075,11 +3075,11 @@ __lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype,
 			qp->assoc_qid, qp->q_cnt_1,
 			(unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tWQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"\t\tWQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index);
+			qp->hba_index, qp->entry_repost);
 	len +=  snprintf(pbuffer + len,
 			LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 	return len;
@@ -3126,11 +3126,11 @@ __lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype,
 			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\tCQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"\tCQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index);
+			qp->hba_index, qp->entry_repost);
 
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
@@ -3152,16 +3152,16 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
 			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tHQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			"\t\tHQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index);
+			qp->host_index, qp->hba_index, qp->entry_repost);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tDQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			"\t\tDQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
 			datqp->queue_id, datqp->entry_count,
 			datqp->entry_size, datqp->host_index,
-			datqp->hba_index);
+			datqp->hba_index, datqp->entry_repost);
 	return len;
 }
 
@@ -3247,10 +3247,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
 			eqtype, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3,
 			(unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"EQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index);
+			qp->host_index, qp->hba_index, qp->entry_repost);
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
 	return len;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index f344abce4949..cc45e9191062 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -12961,7 +12961,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		while ((cqe = lpfc_sli4_cq_get(cq))) {
 			workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe);
 			if (!(++ecount % cq->entry_repost))
-				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+				break;
 			cq->CQ_mbox++;
 		}
 		break;
@@ -12975,7 +12975,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 				workposted |= lpfc_sli4_sp_handle_cqe(phba, cq,
 								      cqe);
 			if (!(++ecount % cq->entry_repost))
-				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+				break;
 		}
 
 		/* Track the max number of CQEs processed in 1 EQ */
@@ -13227,10 +13227,6 @@ drop:
 	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 		hrq->RQ_no_posted_buf++;
 		/* Post more buffers if possible */
-		spin_lock_irqsave(&phba->hbalock, iflags);
-		phba->hba_flag |= HBA_POST_RECEIVE_BUFFER;
-		spin_unlock_irqrestore(&phba->hbalock, iflags);
-		workposted = true;
 		break;
 	}
 out:
@@ -13384,7 +13380,7 @@ process_cq:
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
 		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
-			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+			break;
 	}
 
 	/* Track the max number of CQEs processed in 1 EQ */
@@ -13475,7 +13471,7 @@ lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
 		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
-			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+			break;
 	}
 
 	/* Track the max number of CQEs processed in 1 EQ */
@@ -13557,7 +13553,7 @@ lpfc_sli4_fof_intr_handler(int irq, void *dev_id)
 	while ((eqe = lpfc_sli4_eq_get(eq))) {
 		lpfc_sli4_fof_handle_eqe(phba, eqe);
 		if (!(++ecount % eq->entry_repost))
-			lpfc_sli4_eq_release(eq, LPFC_QUEUE_NOARM);
+			break;
 		eq->EQ_processed++;
 	}
 
@@ -13674,7 +13670,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 
 		lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
 		if (!(++ecount % fpeq->entry_repost))
-			lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_NOARM);
+			break;
 		fpeq->EQ_processed++;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 3c603be9798758dde794daa622e0f7017dbff3a7 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:44 -0700
Subject: scsi: lpfc: Separate NVMET data buffer pool fir ELS/CT.

Using 2048 byte buffer and onle 128 bytes is needed.

Create nee LFPC_NVMET_DATA_BUF_SIZE define to use for NVMET RQ/MRQs.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h      |  1 +
 drivers/scsi/lpfc/lpfc_crtn.h |  1 +
 drivers/scsi/lpfc/lpfc_hw4.h  |  1 +
 drivers/scsi/lpfc/lpfc_init.c |  7 ++++++-
 drivers/scsi/lpfc/lpfc_mem.c  | 33 ++++++++++++++++++++++++++-------
 drivers/scsi/lpfc/lpfc_sli.c  | 19 +++++++++++++++----
 6 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 62571fa9c6ad..c4b38491da8e 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -943,6 +943,7 @@ struct lpfc_hba {
 	struct pci_pool *lpfc_mbuf_pool;
 	struct pci_pool *lpfc_hrb_pool;	/* header receive buffer pool */
 	struct pci_pool *lpfc_drb_pool; /* data receive buffer pool */
+	struct pci_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */
 	struct pci_pool *lpfc_hbq_pool;	/* SLI3 hbq buffer pool */
 	struct pci_pool *txrdy_payload_pool;
 	struct lpfc_dma_pool lpfc_mbuf_safety_pool;
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 1c55408ac718..fb7fc48a1324 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -271,6 +271,7 @@ int lpfc_sli4_fcf_rr_next_proc(struct lpfc_vport *, uint16_t);
 void lpfc_sli4_clear_fcf_rr_bmask(struct lpfc_hba *);
 
 int lpfc_mem_alloc(struct lpfc_hba *, int align);
+int lpfc_nvmet_mem_alloc(struct lpfc_hba *phba);
 int lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *);
 void lpfc_mem_free(struct lpfc_hba *);
 void lpfc_mem_free_all(struct lpfc_hba *);
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 1d12f2be36bc..df97c6b7433b 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1356,6 +1356,7 @@ struct lpfc_mbx_wq_destroy {
 
 #define LPFC_HDR_BUF_SIZE 128
 #define LPFC_DATA_BUF_SIZE 2048
+#define LPFC_NVMET_DATA_BUF_SIZE 128
 struct rq_context {
 	uint32_t word0;
 #define lpfc_rq_context_rqe_count_SHIFT	16	/* Version 0 Only */
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 5f62e3a1dff6..26b6a843d32d 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -5956,16 +5956,21 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
 			if (wwn == lpfc_enable_nvmet[i]) {
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+				if (lpfc_nvmet_mem_alloc(phba))
+					break;
+
+				phba->nvmet_support = 1; /* a match */
+
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6017 NVME Target %016llx\n",
 						wwn);
-				phba->nvmet_support = 1; /* a match */
 #else
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6021 Can't enable NVME Target."
 						" NVME_TARGET_FC infrastructure"
 						" is not in kernel\n");
 #endif
+				break;
 			}
 		}
 	}
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 5986c7957199..91060afc9721 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -214,6 +214,21 @@ fail_free_drb_pool:
 	return -ENOMEM;
 }
 
+int
+lpfc_nvmet_mem_alloc(struct lpfc_hba *phba)
+{
+	phba->lpfc_nvmet_drb_pool =
+		pci_pool_create("lpfc_nvmet_drb_pool",
+				phba->pcidev, LPFC_NVMET_DATA_BUF_SIZE,
+				SGL_ALIGN_SZ, 0);
+	if (!phba->lpfc_nvmet_drb_pool) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6024 Can't enable NVME Target - no memory\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
 /**
  * lpfc_mem_free - Frees memory allocated by lpfc_mem_alloc
  * @phba: HBA to free memory for
@@ -232,6 +247,9 @@ lpfc_mem_free(struct lpfc_hba *phba)
 
 	/* Free HBQ pools */
 	lpfc_sli_hbqbuf_free_all(phba);
+	if (phba->lpfc_nvmet_drb_pool)
+		pci_pool_destroy(phba->lpfc_nvmet_drb_pool);
+	phba->lpfc_nvmet_drb_pool = NULL;
 	if (phba->lpfc_drb_pool)
 		pci_pool_destroy(phba->lpfc_drb_pool);
 	phba->lpfc_drb_pool = NULL;
@@ -624,20 +642,20 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 		kfree(dma_buf);
 		return NULL;
 	}
-	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
-					    &dma_buf->dbuf.phys);
+	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_nvmet_drb_pool,
+					    GFP_KERNEL, &dma_buf->dbuf.phys);
 	if (!dma_buf->dbuf.virt) {
 		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
 			      dma_buf->hbuf.phys);
 		kfree(dma_buf);
 		return NULL;
 	}
-	dma_buf->total_size = LPFC_DATA_BUF_SIZE;
+	dma_buf->total_size = LPFC_NVMET_DATA_BUF_SIZE;
 
 	dma_buf->context = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx),
 				   GFP_KERNEL);
 	if (!dma_buf->context) {
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
+		pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt,
 			      dma_buf->dbuf.phys);
 		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
 			      dma_buf->hbuf.phys);
@@ -648,7 +666,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 	dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
 	if (!dma_buf->iocbq) {
 		kfree(dma_buf->context);
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
+		pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt,
 			      dma_buf->dbuf.phys);
 		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
 			      dma_buf->hbuf.phys);
@@ -678,7 +696,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 	if (!dma_buf->sglq) {
 		lpfc_sli_release_iocbq(phba, dma_buf->iocbq);
 		kfree(dma_buf->context);
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
+		pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt,
 			      dma_buf->dbuf.phys);
 		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
 			      dma_buf->hbuf.phys);
@@ -718,7 +736,8 @@ lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab)
 	lpfc_sli_release_iocbq(phba, dmab->iocbq);
 	kfree(dmab->context);
 	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
-	pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
+	pci_pool_free(phba->lpfc_nvmet_drb_pool,
+		      dmab->dbuf.virt, dmab->dbuf.phys);
 	kfree(dmab);
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index cc45e9191062..49d5c4700054 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -15079,7 +15079,12 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	if (phba->sli4_hba.pc_sli4_params.rqv == LPFC_Q_CREATE_VERSION_1) {
 		bf_set(lpfc_rq_context_rqe_count_1,
 		       &rq_create->u.request.context, hrq->entry_count);
-		rq_create->u.request.context.buffer_size = LPFC_DATA_BUF_SIZE;
+		if (subtype == LPFC_NVMET)
+			rq_create->u.request.context.buffer_size =
+				LPFC_NVMET_DATA_BUF_SIZE;
+		else
+			rq_create->u.request.context.buffer_size =
+				LPFC_DATA_BUF_SIZE;
 		bf_set(lpfc_rq_context_rqe_size, &rq_create->u.request.context,
 		       LPFC_RQE_SIZE_8);
 		bf_set(lpfc_rq_context_page_size, &rq_create->u.request.context,
@@ -15116,8 +15121,14 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 			       LPFC_RQ_RING_SIZE_4096);
 			break;
 		}
-		bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
-		       LPFC_DATA_BUF_SIZE);
+		if (subtype == LPFC_NVMET)
+			bf_set(lpfc_rq_context_buf_size,
+			       &rq_create->u.request.context,
+			       LPFC_NVMET_DATA_BUF_SIZE);
+		else
+			bf_set(lpfc_rq_context_buf_size,
+			       &rq_create->u.request.context,
+			       LPFC_DATA_BUF_SIZE);
 	}
 	bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context,
 	       cq->queue_id);
@@ -15263,7 +15274,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 			       cq->queue_id);
 			bf_set(lpfc_rq_context_data_size,
 			       &rq_create->u.request.context,
-			       LPFC_DATA_BUF_SIZE);
+			       LPFC_NVMET_DATA_BUF_SIZE);
 			bf_set(lpfc_rq_context_hdr_size,
 			       &rq_create->u.request.context,
 			       LPFC_HDR_BUF_SIZE);
-- 
cgit v1.2.3-59-g8ed1b


From 6c621a2229b084da0d926967f84b059a10c26ede Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:45 -0700
Subject: scsi: lpfc: Separate NVMET RQ buffer posting from IO resources
 SGL/iocbq/context

Currently IO resources are mapped 1 to 1 with RQ buffers posted

Added logic to separate RQE buffers from IO op resources
(sgl/iocbq/context). During initialization, the driver will determine
how many SGLs it will allocate for NVMET (based on what the firmware
reports) and associate a NVMET IOCBq and NVMET context structure with
each one.

Now that hdr/data buffers are immediately reposted back to the RQ, 512
RQEs for each MRQ is sufficient. Also, since NVMET data buffers are now
128 bytes, lpfc_nvmet_mrq_post is not necessary anymore as we will
always post the max (512) buffers per NVMET MRQ.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h       |  11 +-
 drivers/scsi/lpfc/lpfc_attr.c  |  11 --
 drivers/scsi/lpfc/lpfc_crtn.h  |   8 +-
 drivers/scsi/lpfc/lpfc_init.c  |  92 ++-------------
 drivers/scsi/lpfc/lpfc_mem.c   |  73 +-----------
 drivers/scsi/lpfc/lpfc_nvmet.c | 246 +++++++++++++++++++++++++++++++----------
 drivers/scsi/lpfc/lpfc_nvmet.h |   1 +
 drivers/scsi/lpfc/lpfc_sli.c   |  78 ++++++++++++-
 drivers/scsi/lpfc/lpfc_sli4.h  |   4 +-
 9 files changed, 291 insertions(+), 233 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index c4b38491da8e..72641b1d3ab8 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -141,6 +141,13 @@ struct lpfc_dmabuf {
 	uint32_t   buffer_tag;	/* used for tagged queue ring */
 };
 
+struct lpfc_nvmet_ctxbuf {
+	struct list_head list;
+	struct lpfc_nvmet_rcv_ctx *context;
+	struct lpfc_iocbq *iocbq;
+	struct lpfc_sglq *sglq;
+};
+
 struct lpfc_dma_pool {
 	struct lpfc_dmabuf   *elements;
 	uint32_t    max_count;
@@ -163,9 +170,6 @@ struct rqb_dmabuf {
 	struct lpfc_dmabuf dbuf;
 	uint16_t total_size;
 	uint16_t bytes_recv;
-	void *context;
-	struct lpfc_iocbq *iocbq;
-	struct lpfc_sglq *sglq;
 	struct lpfc_queue *hrq;	  /* ptr to associated Header RQ */
 	struct lpfc_queue *drq;	  /* ptr to associated Data RQ */
 };
@@ -777,7 +781,6 @@ struct lpfc_hba {
 	uint32_t cfg_nvme_oas;
 	uint32_t cfg_nvme_io_channel;
 	uint32_t cfg_nvmet_mrq;
-	uint32_t cfg_nvmet_mrq_post;
 	uint32_t cfg_enable_nvmet;
 	uint32_t cfg_nvme_enable_fb;
 	uint32_t cfg_nvmet_fb_size;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 129d6cd7635b..65264582915a 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -3315,14 +3315,6 @@ LPFC_ATTR_R(nvmet_mrq,
 	    1, 1, 16,
 	    "Specify number of RQ pairs for processing NVMET cmds");
 
-/*
- * lpfc_nvmet_mrq_post: Specify number buffers to post on every MRQ
- *
- */
-LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
-	    LPFC_MIN_MRQ_POST, LPFC_MAX_MRQ_POST,
-	    "Specify number of buffers to post on every MRQ");
-
 /*
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
@@ -5158,7 +5150,6 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_suppress_rsp,
 	&dev_attr_lpfc_nvme_io_channel,
 	&dev_attr_lpfc_nvmet_mrq,
-	&dev_attr_lpfc_nvmet_mrq_post,
 	&dev_attr_lpfc_nvme_enable_fb,
 	&dev_attr_lpfc_nvmet_fb_size,
 	&dev_attr_lpfc_enable_bg,
@@ -6198,7 +6189,6 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 
 	lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type);
 	lpfc_nvmet_mrq_init(phba, lpfc_nvmet_mrq);
-	lpfc_nvmet_mrq_post_init(phba, lpfc_nvmet_mrq_post);
 
 	/* Initialize first burst. Target vs Initiator are different. */
 	lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
@@ -6295,7 +6285,6 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 		/* Not NVME Target mode.  Turn off Target parameters. */
 		phba->nvmet_support = 0;
 		phba->cfg_nvmet_mrq = 0;
-		phba->cfg_nvmet_mrq_post = 0;
 		phba->cfg_nvmet_fb_size = 0;
 	}
 
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index fb7fc48a1324..cc95abd130b4 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -75,6 +75,8 @@ void lpfc_init_vpi_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_cancel_all_vport_retry_delay_timer(struct lpfc_hba *);
 void lpfc_retry_pport_discovery(struct lpfc_hba *);
 void lpfc_release_rpi(struct lpfc_hba *, struct lpfc_vport *, uint16_t);
+int lpfc_init_iocb_list(struct lpfc_hba *phba, int cnt);
+void lpfc_free_iocb_list(struct lpfc_hba *phba);
 
 void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
@@ -246,16 +248,14 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
 void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
 struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba);
 void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab);
-void lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
-			struct lpfc_dmabuf *mp);
+void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba,
+			    struct lpfc_nvmet_ctxbuf *ctxp);
 int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 			       struct fc_frame_header *fc_hdr);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
 			uint16_t);
 int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 		     struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe);
-int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq,
-			struct lpfc_queue *dq, int count);
 int lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq);
 void lpfc_unregister_fcf(struct lpfc_hba *);
 void lpfc_unregister_fcf_rescan(struct lpfc_hba *);
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 26b6a843d32d..86b0b26dfeea 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1099,7 +1099,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 
 		list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
 			ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
-			lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 		}
 	}
 
@@ -3381,7 +3381,7 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 {
 	struct lpfc_sglq *sglq_entry = NULL, *sglq_entry_next = NULL;
 	uint16_t i, lxri, xri_cnt, els_xri_cnt;
-	uint16_t nvmet_xri_cnt, tot_cnt;
+	uint16_t nvmet_xri_cnt;
 	LIST_HEAD(nvmet_sgl_list);
 	int rc;
 
@@ -3389,20 +3389,9 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 	 * update on pci function's nvmet xri-sgl list
 	 */
 	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
-	nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
 
-	/* Ensure we at least meet the minimun for the system */
-	if (nvmet_xri_cnt < LPFC_NVMET_RQE_DEF_COUNT)
-		nvmet_xri_cnt = LPFC_NVMET_RQE_DEF_COUNT;
-
-	tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
-	if (nvmet_xri_cnt > tot_cnt) {
-		phba->cfg_nvmet_mrq_post = tot_cnt / phba->cfg_nvmet_mrq;
-		nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
-		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-				"6301 NVMET post-sgl count changed to %d\n",
-				phba->cfg_nvmet_mrq_post);
-	}
+	/* For NVMET, ALL remaining XRIs are dedicated for IO processing */
+	nvmet_xri_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
 
 	if (nvmet_xri_cnt > phba->sli4_hba.nvmet_xri_cnt) {
 		/* els xri-sgl expanded */
@@ -5835,6 +5824,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_list);
+
 		/* Fast-path XRI aborted CQ Event work queue list */
 		INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
 	}
@@ -6279,7 +6270,7 @@ lpfc_unset_driver_resource_phase2(struct lpfc_hba *phba)
  *
  * This routine is invoked to free the driver's IOCB list and memory.
  **/
-static void
+void
 lpfc_free_iocb_list(struct lpfc_hba *phba)
 {
 	struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL;
@@ -6307,7 +6298,7 @@ lpfc_free_iocb_list(struct lpfc_hba *phba)
  *	0 - successful
  *	other values - error
  **/
-static int
+int
 lpfc_init_iocb_list(struct lpfc_hba *phba, int iocb_count)
 {
 	struct lpfc_iocbq *iocbq_entry = NULL;
@@ -8321,46 +8312,6 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
 }
 
-int
-lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
-		    struct lpfc_queue *drq, int count)
-{
-	int rc, i;
-	struct lpfc_rqe hrqe;
-	struct lpfc_rqe drqe;
-	struct lpfc_rqb *rqbp;
-	struct rqb_dmabuf *rqb_buffer;
-	LIST_HEAD(rqb_buf_list);
-
-	rqbp = hrq->rqbp;
-	for (i = 0; i < count; i++) {
-		rqb_buffer = (rqbp->rqb_alloc_buffer)(phba);
-		if (!rqb_buffer)
-			break;
-		rqb_buffer->hrq = hrq;
-		rqb_buffer->drq = drq;
-		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
-	}
-	while (!list_empty(&rqb_buf_list)) {
-		list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf,
-				 hbuf.list);
-
-		hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys);
-		hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys);
-		drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys);
-		drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys);
-		rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
-		if (rc < 0) {
-			(rqbp->rqb_free_buffer)(phba, rqb_buffer);
-		} else {
-			list_add_tail(&rqb_buffer->hbuf.list,
-				      &rqbp->rqb_buffer_list);
-			rqbp->buffer_count++;
-		}
-	}
-	return 1;
-}
-
 int
 lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *rq)
 {
@@ -11103,7 +11054,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	struct lpfc_hba   *phba;
 	struct lpfc_vport *vport = NULL;
 	struct Scsi_Host  *shost = NULL;
-	int error, cnt, num;
+	int error;
 	uint32_t cfg_mode, intr_mode;
 
 	/* Allocate memory for HBA structure */
@@ -11137,27 +11088,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_unset_pci_mem_s4;
 	}
 
-	cnt = phba->cfg_iocb_cnt * 1024;
-	if (phba->nvmet_support) {
-		/* Ensure we at least meet the minimun for the system */
-		num = (phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq);
-		if (num < LPFC_NVMET_RQE_DEF_COUNT)
-			num = LPFC_NVMET_RQE_DEF_COUNT;
-		cnt += num;
-	}
-
-	/* Initialize and populate the iocb list per host */
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2821 initialize iocb list %d total %d\n",
-			phba->cfg_iocb_cnt, cnt);
-	error = lpfc_init_iocb_list(phba, cnt);
-
-	if (error) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1413 Failed to initialize iocb list.\n");
-		goto out_unset_driver_resource_s4;
-	}
-
 	INIT_LIST_HEAD(&phba->active_rrq_list);
 	INIT_LIST_HEAD(&phba->fcf.fcf_pri_list);
 
@@ -11166,7 +11096,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1414 Failed to set up driver resource.\n");
-		goto out_free_iocb_list;
+		goto out_unset_driver_resource_s4;
 	}
 
 	/* Get the default values for Model Name and Description */
@@ -11266,8 +11196,6 @@ out_destroy_shost:
 	lpfc_destroy_shost(phba);
 out_unset_driver_resource:
 	lpfc_unset_driver_resource_phase2(phba);
-out_free_iocb_list:
-	lpfc_free_iocb_list(phba);
 out_unset_driver_resource_s4:
 	lpfc_sli4_driver_resource_unset(phba);
 out_unset_pci_mem_s4:
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 91060afc9721..fcc05a1517c2 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -629,8 +629,6 @@ struct rqb_dmabuf *
 lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 {
 	struct rqb_dmabuf *dma_buf;
-	struct lpfc_iocbq *nvmewqe;
-	union lpfc_wqe128 *wqe;
 
 	dma_buf = kzalloc(sizeof(struct rqb_dmabuf), GFP_KERNEL);
 	if (!dma_buf)
@@ -651,60 +649,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 		return NULL;
 	}
 	dma_buf->total_size = LPFC_NVMET_DATA_BUF_SIZE;
-
-	dma_buf->context = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx),
-				   GFP_KERNEL);
-	if (!dma_buf->context) {
-		pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		return NULL;
-	}
-
-	dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
-	if (!dma_buf->iocbq) {
-		kfree(dma_buf->context);
-		pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-				"2621 Ran out of nvmet iocb/WQEs\n");
-		return NULL;
-	}
-	dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
-	nvmewqe = dma_buf->iocbq;
-	wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
-	/* Initialize WQE */
-	memset(wqe, 0, sizeof(union lpfc_wqe));
-	/* Word 7 */
-	bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
-	bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
-	bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
-	/* Word 10 */
-	bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
-	bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
-	bf_set(wqe_qosd, &wqe->generic.wqe_com, 0);
-
-	dma_buf->iocbq->context1 = NULL;
-	spin_lock(&phba->sli4_hba.sgl_list_lock);
-	dma_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, dma_buf->iocbq);
-	spin_unlock(&phba->sli4_hba.sgl_list_lock);
-	if (!dma_buf->sglq) {
-		lpfc_sli_release_iocbq(phba, dma_buf->iocbq);
-		kfree(dma_buf->context);
-		pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-				"6132 Ran out of nvmet XRIs\n");
-		return NULL;
-	}
 	return dma_buf;
 }
 
@@ -723,18 +667,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 void
 lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab)
 {
-	unsigned long flags;
-
-	__lpfc_clear_active_sglq(phba, dmab->sglq->sli4_lxritag);
-	dmab->sglq->state = SGL_FREED;
-	dmab->sglq->ndlp = NULL;
-
-	spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags);
-	list_add_tail(&dmab->sglq->list, &phba->sli4_hba.lpfc_nvmet_sgl_list);
-	spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock, flags);
-
-	lpfc_sli_release_iocbq(phba, dmab->iocbq);
-	kfree(dmab->context);
 	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
 	pci_pool_free(phba->lpfc_nvmet_drb_pool,
 		      dmab->dbuf.virt, dmab->dbuf.phys);
@@ -822,6 +754,11 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
 	rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
 	if (rc < 0) {
 		(rqbp->rqb_free_buffer)(phba, rqb_entry);
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6409 Cannot post to RQ %d: %x %x\n",
+				rqb_entry->hrq->queue_id,
+				rqb_entry->hrq->host_index,
+				rqb_entry->hrq->hba_index);
 	} else {
 		list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
 		rqbp->buffer_count++;
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index dfa7296499cf..fcc77ae0c71c 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -142,7 +142,7 @@ out:
 }
 
 /**
- * lpfc_nvmet_rq_post - Repost a NVMET RQ DMA buffer and clean up context
+ * lpfc_nvmet_ctxbuf_post - Repost a NVMET RQ DMA buffer and clean up context
  * @phba: HBA buffer is associated with
  * @ctxp: context to clean up
  * @mp: Buffer to free
@@ -155,24 +155,24 @@ out:
  * Returns: None
  **/
 void
-lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
-		   struct lpfc_dmabuf *mp)
+lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 {
-	if (ctxp) {
-		if (ctxp->flag)
-			lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-				"6314 rq_post ctx xri x%x flag x%x\n",
-				ctxp->oxid, ctxp->flag);
-
-		if (ctxp->txrdy) {
-			pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
-				      ctxp->txrdy_phys);
-			ctxp->txrdy = NULL;
-			ctxp->txrdy_phys = 0;
-		}
-		ctxp->state = LPFC_NVMET_STE_FREE;
+	struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
+	unsigned long iflag;
+
+	if (ctxp->txrdy) {
+		pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
+			      ctxp->txrdy_phys);
+		ctxp->txrdy = NULL;
+		ctxp->txrdy_phys = 0;
 	}
-	lpfc_rq_buf_free(phba, mp);
+	ctxp->state = LPFC_NVMET_STE_FREE;
+
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag);
+	list_add_tail(&ctx_buf->list,
+		      &phba->sli4_hba.lpfc_nvmet_ctx_list);
+	phba->sli4_hba.nvmet_ctx_cnt++;
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag);
 }
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -718,7 +718,7 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 	if (aborting)
 		return;
 
-	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+	lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 }
 
 static struct nvmet_fc_target_template lpfc_tgttemplate = {
@@ -739,17 +739,128 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
 	.target_priv_sz = sizeof(struct lpfc_nvmet_tgtport),
 };
 
+void
+lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba)
+{
+	struct lpfc_nvmet_ctxbuf *ctx_buf, *next_ctx_buf;
+	unsigned long flags;
+
+	list_for_each_entry_safe(
+		ctx_buf, next_ctx_buf,
+		&phba->sli4_hba.lpfc_nvmet_ctx_list, list) {
+		spin_lock_irqsave(
+			&phba->sli4_hba.abts_nvme_buf_list_lock, flags);
+		list_del_init(&ctx_buf->list);
+		spin_unlock_irqrestore(
+			&phba->sli4_hba.abts_nvme_buf_list_lock, flags);
+		__lpfc_clear_active_sglq(phba,
+					 ctx_buf->sglq->sli4_lxritag);
+		ctx_buf->sglq->state = SGL_FREED;
+		ctx_buf->sglq->ndlp = NULL;
+
+		spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags);
+		list_add_tail(&ctx_buf->sglq->list,
+			      &phba->sli4_hba.lpfc_nvmet_sgl_list);
+		spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock,
+				       flags);
+
+		lpfc_sli_release_iocbq(phba, ctx_buf->iocbq);
+		kfree(ctx_buf->context);
+	}
+}
+
+int
+lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
+{
+	struct lpfc_nvmet_ctxbuf *ctx_buf;
+	struct lpfc_iocbq *nvmewqe;
+	union lpfc_wqe128 *wqe;
+	int i;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
+			"6403 Allocate NVMET resources for %d XRIs\n",
+			phba->sli4_hba.nvmet_xri_cnt);
+
+	/* For all nvmet xris, allocate resources needed to process a
+	 * received command on a per xri basis.
+	 */
+	for (i = 0; i < phba->sli4_hba.nvmet_xri_cnt; i++) {
+		ctx_buf = kzalloc(sizeof(*ctx_buf), GFP_KERNEL);
+		if (!ctx_buf) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6404 Ran out of memory for NVMET\n");
+			return -ENOMEM;
+		}
+
+		ctx_buf->context = kzalloc(sizeof(*ctx_buf->context),
+					   GFP_KERNEL);
+		if (!ctx_buf->context) {
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6405 Ran out of NVMET "
+					"context memory\n");
+			return -ENOMEM;
+		}
+		ctx_buf->context->ctxbuf = ctx_buf;
+
+		ctx_buf->iocbq = lpfc_sli_get_iocbq(phba);
+		if (!ctx_buf->iocbq) {
+			kfree(ctx_buf->context);
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6406 Ran out of NVMET iocb/WQEs\n");
+			return -ENOMEM;
+		}
+		ctx_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
+		nvmewqe = ctx_buf->iocbq;
+		wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
+		/* Initialize WQE */
+		memset(wqe, 0, sizeof(union lpfc_wqe));
+		/* Word 7 */
+		bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
+		bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
+		bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
+		/* Word 10 */
+		bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
+		bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
+		bf_set(wqe_qosd, &wqe->generic.wqe_com, 0);
+
+		ctx_buf->iocbq->context1 = NULL;
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		ctx_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, ctx_buf->iocbq);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
+		if (!ctx_buf->sglq) {
+			lpfc_sli_release_iocbq(phba, ctx_buf->iocbq);
+			kfree(ctx_buf->context);
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6407 Ran out of NVMET XRIs\n");
+			return -ENOMEM;
+		}
+		spin_lock(&phba->sli4_hba.nvmet_io_lock);
+		list_add_tail(&ctx_buf->list,
+			      &phba->sli4_hba.lpfc_nvmet_ctx_list);
+		spin_unlock(&phba->sli4_hba.nvmet_io_lock);
+	}
+	phba->sli4_hba.nvmet_ctx_cnt = phba->sli4_hba.nvmet_xri_cnt;
+	return 0;
+}
+
 int
 lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 {
 	struct lpfc_vport  *vport = phba->pport;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct nvmet_fc_port_info pinfo;
-	int error = 0;
+	int error;
 
 	if (phba->targetport)
 		return 0;
 
+	error = lpfc_nvmet_setup_io_context(phba);
+	if (error)
+		return error;
+
 	memset(&pinfo, 0, sizeof(struct nvmet_fc_port_info));
 	pinfo.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
 	pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
@@ -778,13 +889,16 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 					     &phba->pcidev->dev,
 					     &phba->targetport);
 #else
-	error = -ENOMEM;
+	error = -ENOENT;
 #endif
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
 				"6025 Cannot register NVME targetport "
 				"x%x\n", error);
 		phba->targetport = NULL;
+
+		lpfc_nvmet_cleanup_io_context(phba);
+
 	} else {
 		tgtp = (struct lpfc_nvmet_tgtport *)
 			phba->targetport->private;
@@ -874,7 +988,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
-		if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
 		/* Check if we already received a free context call
@@ -895,7 +1009,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 		    (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE ||
 		     ndlp->nlp_state == NLP_STE_MAPPED_NODE)) {
 			lpfc_set_rrq_active(phba, ndlp,
-				ctxp->rqb_buffer->sglq->sli4_lxritag,
+				ctxp->ctxbuf->sglq->sli4_lxritag,
 				rxid, 1);
 			lpfc_sli4_abts_err_handler(phba, ndlp, axri);
 		}
@@ -904,8 +1018,8 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 				"6318 XB aborted %x flg x%x (%x)\n",
 				ctxp->oxid, ctxp->flag, released);
 		if (released)
-			lpfc_nvmet_rq_post(phba, ctxp,
-					   &ctxp->rqb_buffer->hbuf);
+			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
+
 		if (rrq_empty)
 			lpfc_worker_wake_up(phba);
 		return;
@@ -933,7 +1047,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
-		if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
 		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
@@ -985,6 +1099,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 		init_completion(&tgtp->tport_unreg_done);
 		nvmet_fc_unregister_targetport(phba->targetport);
 		wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
+		lpfc_nvmet_cleanup_io_context(phba);
 	}
 	phba->targetport = NULL;
 #endif
@@ -1115,15 +1230,18 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
+	struct lpfc_nvmet_ctxbuf *ctx_buf;
 	uint32_t *payload;
 	uint32_t size, oxid, sid, rc;
+	unsigned long iflag;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint32_t id;
 #endif
 
+	ctx_buf = NULL;
 	if (!nvmebuf || !phba->targetport) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6157 FCP Drop IO\n");
+				"6157 NVMET FCP Drop IO\n");
 		oxid = 0;
 		size = 0;
 		sid = 0;
@@ -1131,6 +1249,23 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 		goto dropit;
 	}
 
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag);
+	if (phba->sli4_hba.nvmet_ctx_cnt) {
+		list_remove_head(&phba->sli4_hba.lpfc_nvmet_ctx_list,
+				 ctx_buf, struct lpfc_nvmet_ctxbuf, list);
+		phba->sli4_hba.nvmet_ctx_cnt--;
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag);
+
+	if (!ctx_buf) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6408 No NVMET ctx Drop IO\n");
+		oxid = 0;
+		size = 0;
+		sid = 0;
+		ctxp = NULL;
+		goto dropit;
+	}
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	payload = (uint32_t *)(nvmebuf->dbuf.virt);
@@ -1139,16 +1274,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
 	sid = sli4_sid_from_fc_hdr(fc_hdr);
 
-	ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmebuf->context;
-	if (ctxp == NULL) {
-		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6158 FCP Drop IO x%x: Alloc\n",
-				oxid);
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
-		/* Cannot send ABTS without context */
-		return;
-	}
+	ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
 	memset(ctxp, 0, sizeof(ctxp->ctx));
 	ctxp->wqeq = NULL;
 	ctxp->txrdy = NULL;
@@ -1158,9 +1284,9 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	ctxp->oxid = oxid;
 	ctxp->sid = sid;
 	ctxp->state = LPFC_NVMET_STE_RCV;
-	ctxp->rqb_buffer = nvmebuf;
 	ctxp->entry_cnt = 1;
 	ctxp->flag = 0;
+	ctxp->ctxbuf = ctx_buf;
 	spin_lock_init(&ctxp->ctxlock);
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -1192,6 +1318,9 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	 * The calling sequence should be:
 	 * nvmet_fc_rcv_fcp_req -> lpfc_nvmet_xmt_fcp_op/cmp -> req->done
 	 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+	 * When we return from nvmet_fc_rcv_fcp_req, all relevant info in
+	 * the NVME command / FC header is stored, so we are free to repost
+	 * the buffer.
 	 */
 	rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
 				  payload, size);
@@ -1199,6 +1328,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	/* Process FCP command */
 	if (rc == 0) {
 		atomic_inc(&tgtp->rcv_fcp_cmd_out);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 		return;
 	}
 
@@ -1213,15 +1343,17 @@ dropit:
 	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
 			 oxid, size, sid);
 	if (oxid) {
+		lpfc_nvmet_defer_release(phba, ctxp);
 		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 		return;
 	}
 
-	if (nvmebuf) {
-		nvmebuf->iocbq->hba_wqidx = 0;
-		/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
-		lpfc_nvmet_rq_post(phba, ctxp, &nvmebuf->hbuf);
-	}
+	if (ctx_buf)
+		lpfc_nvmet_ctxbuf_post(phba, ctx_buf);
+
+	if (nvmebuf)
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 #endif
 }
 
@@ -1273,7 +1405,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
 			   uint64_t isr_timestamp)
 {
 	if (phba->nvmet_support == 0) {
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf);
 		return;
 	}
 	lpfc_nvmet_unsol_fcp_buffer(phba, pring, nvmebuf,
@@ -1474,7 +1606,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 	nvmewqe = ctxp->wqeq;
 	if (nvmewqe == NULL) {
 		/* Allocate buffer for  command wqe */
-		nvmewqe = ctxp->rqb_buffer->iocbq;
+		nvmewqe = ctxp->ctxbuf->iocbq;
 		if (nvmewqe == NULL) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 					"6110 lpfc_nvmet_prep_fcp_wqe: No "
@@ -1501,7 +1633,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 		return NULL;
 	}
 
-	sgl  = (struct sli4_sge *)ctxp->rqb_buffer->sglq->sgl;
+	sgl  = (struct sli4_sge *)ctxp->ctxbuf->sglq->sgl;
 	switch (rsp->op) {
 	case NVMET_FCOP_READDATA:
 	case NVMET_FCOP_READDATA_RSP:
@@ -1851,15 +1983,16 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			wcqe->word0, wcqe->total_data_placed,
 			result, wcqe->word3);
 
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
 	/*
 	 * if transport has released ctx, then can reuse it. Otherwise,
 	 * will be recycled by transport release call.
 	 */
 	if (released)
-		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 
-	cmdwqe->context2 = NULL;
-	cmdwqe->context3 = NULL;
+	/* This is the iocbq for the abort, not the command */
 	lpfc_sli_release_iocbq(phba, cmdwqe);
 
 	/* Since iaab/iaar are NOT set, there is no work left.
@@ -1932,15 +2065,15 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			ctxp->oxid, ctxp->flag, released,
 			wcqe->word0, wcqe->total_data_placed,
 			result, wcqe->word3);
+
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
 	/*
 	 * if transport has released ctx, then can reuse it. Otherwise,
 	 * will be recycled by transport release call.
 	 */
 	if (released)
-		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
-
-	cmdwqe->context2 = NULL;
-	cmdwqe->context3 = NULL;
+		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 
 	/* Since iaab/iaar are NOT set, there is no work left.
 	 * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
@@ -2002,10 +2135,6 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 			sid, xri, ctxp->wqeq->sli4_xritag);
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
-		ctxp->wqeq->hba_wqidx = 0;
-	}
 
 	ndlp = lpfc_findnode_did(phba->pport, sid);
 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
@@ -2101,7 +2230,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq = ctxp->ctxbuf->iocbq;
 		ctxp->wqeq->hba_wqidx = 0;
 	}
 
@@ -2239,7 +2368,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq = ctxp->ctxbuf->iocbq;
 		ctxp->wqeq->hba_wqidx = 0;
 	}
 
@@ -2294,6 +2423,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
 	}
 	abts_wqeq = ctxp->wqeq;
 	wqe_abts = &abts_wqeq->wqe;
+
 	lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 55f2a859dc70..6eb2f5d8d4ed 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -106,6 +106,7 @@ struct lpfc_nvmet_rcv_ctx {
 #define LPFC_NVMET_CTX_RLS		0x8  /* ctx free requested */
 #define LPFC_NVMET_ABTS_RCV		0x10  /* ABTS received on exchange */
 	struct rqb_dmabuf *rqb_buffer;
+	struct lpfc_nvmet_ctxbuf *ctxbuf;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint64_t ts_isr_cmd;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 49d5c4700054..d68ee3ee299a 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -6513,6 +6513,49 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 		 (phba->hba_flag & HBA_FCOE_MODE) ? "FCoE" : "FC");
 }
 
+static int
+lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+		    struct lpfc_queue *drq, int count)
+{
+	int rc, i;
+	struct lpfc_rqe hrqe;
+	struct lpfc_rqe drqe;
+	struct lpfc_rqb *rqbp;
+	struct rqb_dmabuf *rqb_buffer;
+	LIST_HEAD(rqb_buf_list);
+
+	rqbp = hrq->rqbp;
+	for (i = 0; i < count; i++) {
+		/* IF RQ is already full, don't bother */
+		if (rqbp->buffer_count + i >= rqbp->entry_count - 1)
+			break;
+		rqb_buffer = rqbp->rqb_alloc_buffer(phba);
+		if (!rqb_buffer)
+			break;
+		rqb_buffer->hrq = hrq;
+		rqb_buffer->drq = drq;
+		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
+	}
+	while (!list_empty(&rqb_buf_list)) {
+		list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf,
+				 hbuf.list);
+
+		hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys);
+		hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys);
+		drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys);
+		drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys);
+		rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
+		if (rc < 0) {
+			rqbp->rqb_free_buffer(phba, rqb_buffer);
+		} else {
+			list_add_tail(&rqb_buffer->hbuf.list,
+				      &rqbp->rqb_buffer_list);
+			rqbp->buffer_count++;
+		}
+	}
+	return 1;
+}
+
 /**
  * lpfc_sli4_hba_setup - SLI4 device initialization PCI function
  * @phba: Pointer to HBA context object.
@@ -6525,7 +6568,7 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 int
 lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 {
-	int rc, i;
+	int rc, i, cnt;
 	LPFC_MBOXQ_t *mboxq;
 	struct lpfc_mqe *mqe;
 	uint8_t *vpd;
@@ -6876,6 +6919,21 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			goto out_destroy_queue;
 		}
 		phba->sli4_hba.nvmet_xri_cnt = rc;
+
+		cnt = phba->cfg_iocb_cnt * 1024;
+		/* We need 1 iocbq for every SGL, for IO processing */
+		cnt += phba->sli4_hba.nvmet_xri_cnt;
+		/* Initialize and populate the iocb list per host */
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2821 initialize iocb list %d total %d\n",
+				phba->cfg_iocb_cnt, cnt);
+		rc = lpfc_init_iocb_list(phba, cnt);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"1413 Failed to init iocb list.\n");
+			goto out_destroy_queue;
+		}
+
 		lpfc_nvmet_create_targetport(phba);
 	} else {
 		/* update host scsi xri-sgl sizes and mappings */
@@ -6895,10 +6953,21 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 					"and mapping: %d\n", rc);
 			goto out_destroy_queue;
 		}
+
+		cnt = phba->cfg_iocb_cnt * 1024;
+		/* Initialize and populate the iocb list per host */
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2820 initialize iocb list %d total %d\n",
+				phba->cfg_iocb_cnt, cnt);
+		rc = lpfc_init_iocb_list(phba, cnt);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6301 Failed to init iocb list.\n");
+			goto out_destroy_queue;
+		}
 	}
 
 	if (phba->nvmet_support && phba->cfg_nvmet_mrq) {
-
 		/* Post initial buffers to all RQs created */
 		for (i = 0; i < phba->cfg_nvmet_mrq; i++) {
 			rqbp = phba->sli4_hba.nvmet_mrq_hdr[i]->rqbp;
@@ -6911,7 +6980,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			lpfc_post_rq_buffer(
 				phba, phba->sli4_hba.nvmet_mrq_hdr[i],
 				phba->sli4_hba.nvmet_mrq_data[i],
-				phba->cfg_nvmet_mrq_post);
+				LPFC_NVMET_RQE_DEF_COUNT);
 		}
 	}
 
@@ -7078,6 +7147,7 @@ out_unset_queue:
 	/* Unset all the queues set up in this routine when error out */
 	lpfc_sli4_queue_unset(phba);
 out_destroy_queue:
+	lpfc_free_iocb_list(phba);
 	lpfc_sli4_queue_destroy(phba);
 out_stop_timers:
 	lpfc_stop_hba_timers(phba);
@@ -18731,7 +18801,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		ctxp = pwqe->context2;
-		sglq = ctxp->rqb_buffer->sglq;
+		sglq = ctxp->ctxbuf->sglq;
 		if (pwqe->sli4_xritag ==  NO_XRI) {
 			pwqe->sli4_lxritag = sglq->sli4_lxritag;
 			pwqe->sli4_xritag = sglq->sli4_xritag;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 422bde85c9f1..19e2f190ea2e 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -618,10 +618,12 @@ struct lpfc_sli4_hba {
 	uint16_t scsi_xri_start;
 	uint16_t els_xri_cnt;
 	uint16_t nvmet_xri_cnt;
+	uint16_t nvmet_ctx_cnt;
 	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
 	struct list_head lpfc_nvmet_sgl_list;
 	struct list_head lpfc_abts_nvmet_ctx_list;
+	struct list_head lpfc_nvmet_ctx_list;
 	struct list_head lpfc_abts_scsi_buf_list;
 	struct list_head lpfc_abts_nvme_buf_list;
 	struct lpfc_sglq **lpfc_sglq_active_list;
@@ -662,8 +664,6 @@ struct lpfc_sli4_hba {
 	uint16_t num_online_cpu;
 	uint16_t num_present_cpu;
 	uint16_t curr_disp_cpu;
-
-	uint16_t nvmet_mrq_post_idx;
 };
 
 enum lpfc_sge_type {
-- 
cgit v1.2.3-59-g8ed1b


From a8cf5dfeb4d84248c0ad12386ae0cb36ee21589a Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:46 -0700
Subject: scsi: lpfc: Added recovery logic for running out of NVMET IO context
 resources

Previous logic would just drop the IO.

Added logic to queue the IO to wait for an IO context resource from an
IO thats already in progress.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |   1 +
 drivers/scsi/lpfc/lpfc_attr.c    |   6 ++
 drivers/scsi/lpfc/lpfc_crtn.h    |   2 +
 drivers/scsi/lpfc/lpfc_debugfs.c |   6 ++
 drivers/scsi/lpfc/lpfc_init.c    |   2 +
 drivers/scsi/lpfc/lpfc_nvmet.c   | 138 +++++++++++++++++++++++++++++++++------
 drivers/scsi/lpfc/lpfc_sli.c     |   7 +-
 drivers/scsi/lpfc/lpfc_sli4.h    |   6 +-
 8 files changed, 144 insertions(+), 24 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 72641b1d3ab8..c47bde6205c9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -170,6 +170,7 @@ struct rqb_dmabuf {
 	struct lpfc_dmabuf dbuf;
 	uint16_t total_size;
 	uint16_t bytes_recv;
+	uint16_t idx;
 	struct lpfc_queue *hrq;	  /* ptr to associated Header RQ */
 	struct lpfc_queue *drq;	  /* ptr to associated Data RQ */
 };
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 65264582915a..bb2d9e238225 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -245,6 +245,12 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_abort_rsp),
 				atomic_read(&tgtp->xmt_abort_rsp_error));
 
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"IO_CTX: %08x outstanding %08x total %x",
+				phba->sli4_hba.nvmet_ctx_cnt,
+				phba->sli4_hba.nvmet_io_wait_cnt,
+				phba->sli4_hba.nvmet_io_wait_total);
+
 		len +=  snprintf(buf+len, PAGE_SIZE-len, "\n");
 		return len;
 	}
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index cc95abd130b4..8912767e7bc8 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -77,6 +77,8 @@ void lpfc_retry_pport_discovery(struct lpfc_hba *);
 void lpfc_release_rpi(struct lpfc_hba *, struct lpfc_vport *, uint16_t);
 int lpfc_init_iocb_list(struct lpfc_hba *phba, int cnt);
 void lpfc_free_iocb_list(struct lpfc_hba *phba);
+int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+			struct lpfc_queue *drq, int count, int idx);
 
 void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 7284533f4df2..c7d1c9d37a64 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -842,6 +842,12 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 			}
 			spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		}
+
+		len += snprintf(buf + len, size - len,
+				"IO_CTX: %08x  outstanding %08x total %08x\n",
+				phba->sli4_hba.nvmet_ctx_cnt,
+				phba->sli4_hba.nvmet_io_wait_cnt,
+				phba->sli4_hba.nvmet_io_wait_total);
 	} else {
 		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 			return len;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 86b0b26dfeea..9f6c7e71814b 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -5825,6 +5825,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_list);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_io_wait_list);
 
 		/* Fast-path XRI aborted CQ Event work queue list */
 		INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
@@ -5833,6 +5834,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	/* This abort list used by worker thread */
 	spin_lock_init(&phba->sli4_hba.sgl_list_lock);
 	spin_lock_init(&phba->sli4_hba.nvmet_io_lock);
+	spin_lock_init(&phba->sli4_hba.nvmet_io_wait_lock);
 
 	/*
 	 * Initialize driver internal slow-path work queues
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index fcc77ae0c71c..312f54278bd4 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -158,6 +158,12 @@ void
 lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 {
 	struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct fc_frame_header *fc_hdr;
+	struct rqb_dmabuf *nvmebuf;
+	struct lpfc_dmabuf *hbufp;
+	uint32_t *payload;
+	uint32_t size, oxid, sid, rc;
 	unsigned long iflag;
 
 	if (ctxp->txrdy) {
@@ -168,6 +174,87 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 	}
 	ctxp->state = LPFC_NVMET_STE_FREE;
 
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+	if (phba->sli4_hba.nvmet_io_wait_cnt) {
+		hbufp = &nvmebuf->hbuf;
+		list_remove_head(&phba->sli4_hba.lpfc_nvmet_io_wait_list,
+				 nvmebuf, struct rqb_dmabuf,
+				 hbuf.list);
+		phba->sli4_hba.nvmet_io_wait_cnt--;
+		spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock,
+				       iflag);
+
+		fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
+		oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		payload = (uint32_t *)(nvmebuf->dbuf.virt);
+		size = nvmebuf->bytes_recv;
+		sid = sli4_sid_from_fc_hdr(fc_hdr);
+
+		ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
+		memset(ctxp, 0, sizeof(ctxp->ctx));
+		ctxp->wqeq = NULL;
+		ctxp->txrdy = NULL;
+		ctxp->offset = 0;
+		ctxp->phba = phba;
+		ctxp->size = size;
+		ctxp->oxid = oxid;
+		ctxp->sid = sid;
+		ctxp->state = LPFC_NVMET_STE_RCV;
+		ctxp->entry_cnt = 1;
+		ctxp->flag = 0;
+		ctxp->ctxbuf = ctx_buf;
+		spin_lock_init(&ctxp->ctxlock);
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (phba->ktime_on) {
+			ctxp->ts_cmd_nvme = ktime_get_ns();
+			ctxp->ts_isr_cmd = ctxp->ts_cmd_nvme;
+			ctxp->ts_nvme_data = 0;
+			ctxp->ts_data_wqput = 0;
+			ctxp->ts_isr_data = 0;
+			ctxp->ts_data_nvme = 0;
+			ctxp->ts_nvme_status = 0;
+			ctxp->ts_status_wqput = 0;
+			ctxp->ts_isr_status = 0;
+			ctxp->ts_status_nvme = 0;
+		}
+#endif
+		atomic_inc(&tgtp->rcv_fcp_cmd_in);
+		/*
+		 * The calling sequence should be:
+		 * nvmet_fc_rcv_fcp_req->lpfc_nvmet_xmt_fcp_op/cmp- req->done
+		 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+		 * When we return from nvmet_fc_rcv_fcp_req, all relevant info
+		 * the NVME command / FC header is stored.
+		 * A buffer has already been reposted for this IO, so just free
+		 * the nvmebuf.
+		 */
+		rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
+					  payload, size);
+
+		/* Process FCP command */
+		if (rc == 0) {
+			atomic_inc(&tgtp->rcv_fcp_cmd_out);
+			nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+			return;
+		}
+
+		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
+				ctxp->oxid, rc,
+				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_out),
+				atomic_read(&tgtp->xmt_fcp_release));
+
+		lpfc_nvmet_defer_release(phba, ctxp);
+		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+		nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+		return;
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+
 	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag);
 	list_add_tail(&ctx_buf->list,
 		      &phba->sli4_hba.lpfc_nvmet_ctx_list);
@@ -1232,7 +1319,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	struct fc_frame_header *fc_hdr;
 	struct lpfc_nvmet_ctxbuf *ctx_buf;
 	uint32_t *payload;
-	uint32_t size, oxid, sid, rc;
+	uint32_t size, oxid, sid, rc, qno;
 	unsigned long iflag;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint32_t id;
@@ -1257,21 +1344,41 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	}
 	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag);
 
+	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
+	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+	size = nvmebuf->bytes_recv;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
+		id = smp_processor_id();
+		if (id < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_rcv_io[id]++;
+	}
+#endif
+
+	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d CPU %02x\n",
+			 oxid, size, smp_processor_id());
+
 	if (!ctx_buf) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6408 No NVMET ctx Drop IO\n");
-		oxid = 0;
-		size = 0;
-		sid = 0;
-		ctxp = NULL;
-		goto dropit;
+		/* Queue this NVME IO to process later */
+		spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+		list_add_tail(&nvmebuf->hbuf.list,
+			      &phba->sli4_hba.lpfc_nvmet_io_wait_list);
+		phba->sli4_hba.nvmet_io_wait_cnt++;
+		phba->sli4_hba.nvmet_io_wait_total++;
+		spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock,
+				       iflag);
+
+		/* Post a brand new DMA buffer to RQ */
+		qno = nvmebuf->idx;
+		lpfc_post_rq_buffer(
+			phba, phba->sli4_hba.nvmet_mrq_hdr[qno],
+			phba->sli4_hba.nvmet_mrq_data[qno], 1, qno);
+		return;
 	}
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	payload = (uint32_t *)(nvmebuf->dbuf.virt);
-	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
-	size = nvmebuf->bytes_recv;
-	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
 	sid = sli4_sid_from_fc_hdr(fc_hdr);
 
 	ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
@@ -1302,17 +1409,8 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 		ctxp->ts_isr_status = 0;
 		ctxp->ts_status_nvme = 0;
 	}
-
-	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
-		id = smp_processor_id();
-		if (id < LPFC_CHECK_CPU_CNT)
-			phba->cpucheck_rcv_io[id]++;
-	}
 #endif
 
-	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d CPU %02x\n",
-			 oxid, size, smp_processor_id());
-
 	atomic_inc(&tgtp->rcv_fcp_cmd_in);
 	/*
 	 * The calling sequence should be:
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index d68ee3ee299a..3fb4e715bfa2 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -6513,9 +6513,9 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 		 (phba->hba_flag & HBA_FCOE_MODE) ? "FCoE" : "FC");
 }
 
-static int
+int
 lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
-		    struct lpfc_queue *drq, int count)
+		    struct lpfc_queue *drq, int count, int idx)
 {
 	int rc, i;
 	struct lpfc_rqe hrqe;
@@ -6534,6 +6534,7 @@ lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 			break;
 		rqb_buffer->hrq = hrq;
 		rqb_buffer->drq = drq;
+		rqb_buffer->idx = idx;
 		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
 	}
 	while (!list_empty(&rqb_buf_list)) {
@@ -6980,7 +6981,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			lpfc_post_rq_buffer(
 				phba, phba->sli4_hba.nvmet_mrq_hdr[i],
 				phba->sli4_hba.nvmet_mrq_data[i],
-				LPFC_NVMET_RQE_DEF_COUNT);
+				LPFC_NVMET_RQE_DEF_COUNT, i);
 		}
 	}
 
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 19e2f190ea2e..c1c9a9125266 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -619,13 +619,16 @@ struct lpfc_sli4_hba {
 	uint16_t els_xri_cnt;
 	uint16_t nvmet_xri_cnt;
 	uint16_t nvmet_ctx_cnt;
+	uint16_t nvmet_io_wait_cnt;
+	uint16_t nvmet_io_wait_total;
 	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
 	struct list_head lpfc_nvmet_sgl_list;
 	struct list_head lpfc_abts_nvmet_ctx_list;
-	struct list_head lpfc_nvmet_ctx_list;
 	struct list_head lpfc_abts_scsi_buf_list;
 	struct list_head lpfc_abts_nvme_buf_list;
+	struct list_head lpfc_nvmet_ctx_list;
+	struct list_head lpfc_nvmet_io_wait_list;
 	struct lpfc_sglq **lpfc_sglq_active_list;
 	struct list_head lpfc_rpi_hdr_list;
 	unsigned long *rpi_bmask;
@@ -657,6 +660,7 @@ struct lpfc_sli4_hba {
 	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t sgl_list_lock; /* list of aborted els IOs */
 	spinlock_t nvmet_io_lock;
+	spinlock_t nvmet_io_wait_lock; /* IOs waiting for ctx resources */
 	uint32_t physical_port;
 
 	/* CPU to vector mapping information */
-- 
cgit v1.2.3-59-g8ed1b


From 82820f0cf19aa62e2608c2909bd44e7a68268ff5 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:47 -0700
Subject: scsi: lpfc: Fix NVME I+T not registering NVME as a supported FC4 type

When the driver send the RPA command, it does not send supported FC4
Type NVME to the management server.

Encode NVME (type x28) in the AttribEntry in the RPA command.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_ct.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index c7962dae4dab..f2cd19c6c2df 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -2092,6 +2092,7 @@ lpfc_fdmi_port_attr_fc4type(struct lpfc_vport *vport,
 
 	ae->un.AttrTypes[3] = 0x02; /* Type 1 - ELS */
 	ae->un.AttrTypes[2] = 0x01; /* Type 8 - FCP */
+	ae->un.AttrTypes[6] = 0x01; /* Type 40 - NVME */
 	ae->un.AttrTypes[7] = 0x01; /* Type 32 - CT */
 	size = FOURBYTES + 32;
 	ad->AttrLen = cpu_to_be16(size);
-- 
cgit v1.2.3-59-g8ed1b


From 667a7662529bf0afb1d84a32ceb0da0a875a3b6c Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:48 -0700
Subject: scsi: lpfc: Fix debugfs root inode "lpfc" not getting deleted on
 driver unload.

When unloading and reloading the driver, the driver fails to recreate
the lpfc root inode in the debugfs tree.

The driver is incorrectly removing the lpfc root inode in
lpfc_debugfs_terminate in the first driver instance that unloads and
then sets the lpfc_debugfs_root global parameter to NULL.  When the
final driver instance unloads, the debugfs calls quietly ignore the
remove on a NULL pointer.  The bug is that the debugfs_remove call
returns void so the driver doesn't know to correctly set the global
parameter to NULL.

Base the debugfs_remove of the lpfc_debugfs_root parameter on
lpfc_debugfs_hba_count because this parameter tracks the fnX instance
tracked per driver instance.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index c7d1c9d37a64..4bcb92c844ca 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -5866,8 +5866,10 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 			atomic_dec(&lpfc_debugfs_hba_count);
 		}
 
-		debugfs_remove(lpfc_debugfs_root); /* lpfc */
-		lpfc_debugfs_root = NULL;
+		if (atomic_read(&lpfc_debugfs_hba_count) == 0) {
+			debugfs_remove(lpfc_debugfs_root); /* lpfc */
+			lpfc_debugfs_root = NULL;
+		}
 	}
 #endif
 	return;
-- 
cgit v1.2.3-59-g8ed1b


From 64eb4dcb140a7c5547f6e965fb471b1b75c01108 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:49 -0700
Subject: scsi: lpfc: Cleanup entry_repost settings on SLI4 queues

Too many work items being processed in IRQ context take a lot of CPU
time and cause problems.

With a recent change, we get out of the ISR after hitting entry_repost
work items on a queue. However, the actual values for entry repost are
still high. EQ is 128 and CQ is 128, this could translate into
processing 128 * 128 (16384) work items under IRQ context.

Set entry_repost in the actual queue creation routine now.  Limit EQ
repost to 8 and CQ repost to 64 to further limit the amount of time
spent in the IRQ.

Fix fof IRQ routines as well.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c  | 19 ++++++++-----------
 drivers/scsi/lpfc/lpfc_sli4.h |  6 ++++--
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 3fb4e715bfa2..903c06ff828a 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -13922,17 +13922,10 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t entry_size,
 	}
 	queue->entry_size = entry_size;
 	queue->entry_count = entry_count;
-
-	/*
-	 * entry_repost is calculated based on the number of entries in the
-	 * queue. This works out except for RQs. If buffers are NOT initially
-	 * posted for every RQE, entry_repost should be adjusted accordingly.
-	 */
-	queue->entry_repost = (entry_count >> 3);
-	if (queue->entry_repost < LPFC_QUEUE_MIN_REPOST)
-		queue->entry_repost = LPFC_QUEUE_MIN_REPOST;
 	queue->phba = phba;
 
+	/* entry_repost will be set during q creation */
+
 	return queue;
 out_fail:
 	lpfc_sli4_queue_free(queue);
@@ -14163,6 +14156,7 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
 		status = -ENXIO;
 	eq->host_index = 0;
 	eq->hba_index = 0;
+	eq->entry_repost = LPFC_EQ_REPOST;
 
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
@@ -14236,9 +14230,9 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0361 Unsupported CQ count: "
-				"entry cnt %d sz %d pg cnt %d repost %d\n",
+				"entry cnt %d sz %d pg cnt %d\n",
 				cq->entry_count, cq->entry_size,
-				cq->page_count, cq->entry_repost);
+				cq->page_count);
 		if (cq->entry_count < 256) {
 			status = -EINVAL;
 			goto out;
@@ -14291,6 +14285,7 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	cq->assoc_qid = eq->queue_id;
 	cq->host_index = 0;
 	cq->hba_index = 0;
+	cq->entry_repost = LPFC_CQ_REPOST;
 
 out:
 	mempool_free(mbox, phba->mbox_mem_pool);
@@ -14482,6 +14477,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 		cq->assoc_qid = eq->queue_id;
 		cq->host_index = 0;
 		cq->hba_index = 0;
+		cq->entry_repost = LPFC_CQ_REPOST;
 
 		rc = 0;
 		list_for_each_entry(dmabuf, &cq->page_list, list) {
@@ -14730,6 +14726,7 @@ lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq,
 	mq->subtype = subtype;
 	mq->host_index = 0;
 	mq->hba_index = 0;
+	mq->entry_repost = LPFC_MQ_REPOST;
 
 	/* link the mq onto the parent cq child list */
 	list_add_tail(&mq->list, &cq->child_list);
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index c1c9a9125266..cf863db27700 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -24,7 +24,6 @@
 #define LPFC_XRI_EXCH_BUSY_WAIT_TMO		10000
 #define LPFC_XRI_EXCH_BUSY_WAIT_T1   		10
 #define LPFC_XRI_EXCH_BUSY_WAIT_T2              30000
-#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32
 #define LPFC_RPI_LOW_WATER_MARK			10
 
 #define LPFC_UNREG_FCF                          1
@@ -155,8 +154,11 @@ struct lpfc_queue {
 	uint32_t entry_count;	/* Number of entries to support on the queue */
 	uint32_t entry_size;	/* Size of each queue entry. */
 	uint32_t entry_repost;	/* Count of entries before doorbell is rung */
-#define LPFC_QUEUE_MIN_REPOST	8
+#define LPFC_EQ_REPOST		8
+#define LPFC_MQ_REPOST		8
+#define LPFC_CQ_REPOST		64
 #define LPFC_RQ_REPOST		64
+#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32  /* For WQs */
 	uint32_t queue_id;	/* Queue ID assigned by the hardware */
 	uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
 	uint32_t page_count;	/* Number of pages allocated for this queue */
-- 
cgit v1.2.3-59-g8ed1b


From dc53a61852279f25909d99dad4638b4aee0b2d82 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:50 -0700
Subject: scsi: lpfc: Fix NVMEI's handling of NVMET's PRLI response attributes

Code review of NVMEI's FC_PORT_ROLE_NVME_DISCOVERY looked wrong.

Discussions with storage architecture team clarified NVMEI's audit of
the PRLI response port roles.  Following up discussion with code review
showed a few minor corrections were required - especially in
anticipation of NVME auto discovery.

During PRLI, NVMEI should sent prli_init - which it it does.  NVMET
should send prli_tgt and prli_disc - which it does.  When NVMEI receives
a PRLI Response now, it audits the incoming target bits and stores the
attributes in the corresponding NDLP.  Later, when NVMEI registers the
NVME rport, it uses the stored ndlp attributes to set the rport
port_roles correctly.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_disc.h      | 1 +
 drivers/scsi/lpfc/lpfc_nportdisc.c | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 9d5a379f4b15..094c97b9e5f7 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -90,6 +90,7 @@ struct lpfc_nodelist {
 #define NLP_FCP_INITIATOR  0x10			/* entry is an FCP Initiator */
 #define NLP_NVME_TARGET    0x20			/* entry is a NVME Target */
 #define NLP_NVME_INITIATOR 0x40			/* entry is a NVME Initiator */
+#define NLP_NVME_DISCOVERY 0x80                 /* entry has NVME disc srvc */
 
 	uint16_t	nlp_fc4_type;		/* FC types node supports. */
 						/* Assigned from GID_FF, only
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 8777c2d5f50d..bff3de053df4 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1944,7 +1944,13 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
 		/* Target driver cannot solicit NVME FB. */
 		if (bf_get_be32(prli_tgt, nvpr)) {
+			/* Complete the nvme target roles.  The transport
+			 * needs to know if the rport is capable of
+			 * discovery in addition to its role.
+			 */
 			ndlp->nlp_type |= NLP_NVME_TARGET;
+			if (bf_get_be32(prli_disc, nvpr))
+				ndlp->nlp_type |= NLP_NVME_DISCOVERY;
 			if ((bf_get_be32(prli_fba, nvpr) == 1) &&
 			    (bf_get_be32(prli_fb_sz, nvpr) > 0) &&
 			    (phba->cfg_nvme_enable_fb) &&
-- 
cgit v1.2.3-59-g8ed1b


From ae9e28f36a6cca4e5760f4927b70b6c9e588db1a Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:51 -0700
Subject: scsi: lpfc: Add MDS Diagnostic support.

Added code to support Cisco MDS loopback diagnostic. The diagnostics run
various loopbacks including one which loops-back frame through the
driver.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |   2 +
 drivers/scsi/lpfc/lpfc_els.c     |   7 +++
 drivers/scsi/lpfc/lpfc_hbadisc.c |   3 +-
 drivers/scsi/lpfc/lpfc_hw4.h     |  15 ++++-
 drivers/scsi/lpfc/lpfc_init.c    |  13 ++++
 drivers/scsi/lpfc/lpfc_sli.c     | 131 ++++++++++++++++++++++++++++++++++++---
 6 files changed, 161 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index c47bde6205c9..f2c0ba6ced78 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -675,6 +675,8 @@ struct lpfc_hba {
 					/* INIT_LINK mailbox command */
 #define LS_NPIV_FAB_SUPPORTED 0x2	/* Fabric supports NPIV */
 #define LS_IGNORE_ERATT       0x4	/* intr handler should ignore ERATT */
+#define LS_MDS_LINK_DOWN      0x8	/* MDS Diagnostics Link Down */
+#define LS_MDS_LOOPBACK      0x16	/* MDS Diagnostics Link Up (Loopback) */
 
 	uint32_t hba_flag;	/* hba generic flags */
 #define HBA_ERATT_HANDLED	0x1 /* This flag is set when eratt handled */
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 3085895464d9..1d36f82fa369 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1047,6 +1047,13 @@ stop_rr_fcf_flogi:
 				 irsp->ulpStatus, irsp->un.ulpWord[4],
 				 irsp->ulpTimeout);
 
+
+		/* If this is not a loop open failure, bail out */
+		if (!(irsp->ulpStatus == IOSTAT_LOCAL_REJECT &&
+		      ((irsp->un.ulpWord[4] & IOERR_PARAM_MASK) ==
+					IOERR_LOOP_OPEN_FAILURE)))
+			goto flogifail;
+
 		/* FLOGI failed, so there is no fabric */
 		spin_lock_irq(shost->host_lock);
 		vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index dcc9b3858778..3ffcd9215ca8 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -701,7 +701,8 @@ lpfc_work_done(struct lpfc_hba *phba)
 			/* Set the lpfc data pending flag */
 			set_bit(LPFC_DATA_READY, &phba->data_flags);
 		} else {
-			if (phba->link_state >= LPFC_LINK_UP) {
+			if (phba->link_state >= LPFC_LINK_UP ||
+			    phba->link_flag & LS_MDS_LOOPBACK) {
 				pring->flag &= ~LPFC_DEFERRED_RING_EVENT;
 				lpfc_sli_handle_slow_ring_event(phba, pring,
 								(status &
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index df97c6b7433b..e0a5fce416ae 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -4421,6 +4421,19 @@ struct fcp_treceive64_wqe {
 };
 #define TXRDY_PAYLOAD_LEN      12
 
+#define CMD_SEND_FRAME	0xE1
+
+struct send_frame_wqe {
+	struct ulp_bde64 bde;          /* words 0-2 */
+	uint32_t frame_len;            /* word 3 */
+	uint32_t fc_hdr_wd0;           /* word 4 */
+	uint32_t fc_hdr_wd1;           /* word 5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t fc_hdr_wd2;           /* word 12 */
+	uint32_t fc_hdr_wd3;           /* word 13 */
+	uint32_t fc_hdr_wd4;           /* word 14 */
+	uint32_t fc_hdr_wd5;           /* word 15 */
+};
 
 union lpfc_wqe {
 	uint32_t words[16];
@@ -4439,7 +4452,7 @@ union lpfc_wqe {
 	struct fcp_trsp64_wqe fcp_trsp;
 	struct fcp_tsend64_wqe fcp_tsend;
 	struct fcp_treceive64_wqe fcp_treceive;
-
+	struct send_frame_wqe send_frame;
 };
 
 union lpfc_wqe128 {
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 9f6c7e71814b..9add9473cae5 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -4540,6 +4540,19 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
 	pmb->vport = phba->pport;
 
 	if (phba->sli4_hba.link_state.status != LPFC_FC_LA_TYPE_LINK_UP) {
+		phba->link_flag &= ~(LS_MDS_LINK_DOWN | LS_MDS_LOOPBACK);
+
+		switch (phba->sli4_hba.link_state.status) {
+		case LPFC_FC_LA_TYPE_MDS_LINK_DOWN:
+			phba->link_flag |= LS_MDS_LINK_DOWN;
+			break;
+		case LPFC_FC_LA_TYPE_MDS_LOOPBACK:
+			phba->link_flag |= LS_MDS_LOOPBACK;
+			break;
+		default:
+			break;
+		}
+
 		/* Parse and translate status field */
 		mb = &pmb->u.mb;
 		mb->mbxStatus = lpfc_sli4_parse_latt_fault(phba,
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 903c06ff828a..d6b184839bc2 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -74,6 +74,8 @@ static struct lpfc_iocbq *lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *,
 							 struct lpfc_iocbq *);
 static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *,
 				      struct hbq_dmabuf *);
+static void lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
+					  struct hbq_dmabuf *dmabuf);
 static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
 				    struct lpfc_cqe *);
 static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
@@ -5907,7 +5909,7 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox,
 		bf_set(lpfc_mbx_set_feature_mds,
 		       &mbox->u.mqe.un.set_feature, 1);
 		bf_set(lpfc_mbx_set_feature_mds_deep_loopbk,
-		       &mbox->u.mqe.un.set_feature, 0);
+		       &mbox->u.mqe.un.set_feature, 1);
 		mbox->u.mqe.un.set_feature.feature = LPFC_SET_MDS_DIAGS;
 		mbox->u.mqe.un.set_feature.param_len = 8;
 		break;
@@ -8688,8 +8690,11 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		memset(wqe, 0, sizeof(union lpfc_wqe128));
 	/* Some of the fields are in the right position already */
 	memcpy(wqe, &iocbq->iocb, sizeof(union lpfc_wqe));
-	wqe->generic.wqe_com.word7 = 0; /* The ct field has moved so reset */
-	wqe->generic.wqe_com.word10 = 0;
+	if (iocbq->iocb.ulpCommand != CMD_SEND_FRAME) {
+		/* The ct field has moved so reset */
+		wqe->generic.wqe_com.word7 = 0;
+		wqe->generic.wqe_com.word10 = 0;
+	}
 
 	abort_tag = (uint32_t) iocbq->iotag;
 	xritag = iocbq->sli4_xritag;
@@ -9183,6 +9188,10 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		}
 
 		break;
+	case CMD_SEND_FRAME:
+		bf_set(wqe_xri_tag, &wqe->generic.wqe_com, xritag);
+		bf_set(wqe_reqtag, &wqe->generic.wqe_com, iocbq->iotag);
+		return 0;
 	case CMD_XRI_ABORTED_CX:
 	case CMD_CREATE_XRI_CR: /* Do we expect to use this? */
 	case CMD_IOCB_FCP_IBIDIR64_CR: /* bidirectional xfer */
@@ -16137,6 +16146,8 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	struct fc_vft_header *fc_vft_hdr;
 	uint32_t *header = (uint32_t *) fc_hdr;
 
+#define FC_RCTL_MDS_DIAGS	0xF4
+
 	switch (fc_hdr->fh_r_ctl) {
 	case FC_RCTL_DD_UNCAT:		/* uncategorized information */
 	case FC_RCTL_DD_SOL_DATA:	/* solicited data */
@@ -16164,6 +16175,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	case FC_RCTL_F_BSY:	/* fabric busy to data frame */
 	case FC_RCTL_F_BSYL:	/* fabric busy to link control frame */
 	case FC_RCTL_LCR:	/* link credit reset */
+	case FC_RCTL_MDS_DIAGS: /* MDS Diagnostics */
 	case FC_RCTL_END:	/* end */
 		break;
 	case FC_RCTL_VFTH:	/* Virtual Fabric tagging Header */
@@ -16173,12 +16185,16 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	default:
 		goto drop;
 	}
+
+#define FC_TYPE_VENDOR_UNIQUE	0xFF
+
 	switch (fc_hdr->fh_type) {
 	case FC_TYPE_BLS:
 	case FC_TYPE_ELS:
 	case FC_TYPE_FCP:
 	case FC_TYPE_CT:
 	case FC_TYPE_NVME:
+	case FC_TYPE_VENDOR_UNIQUE:
 		break;
 	case FC_TYPE_IP:
 	case FC_TYPE_ILS:
@@ -16189,12 +16205,14 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
 			"2538 Received frame rctl:%s (x%x), type:%s (x%x), "
 			"frame Data:%08x %08x %08x %08x %08x %08x %08x\n",
+			(fc_hdr->fh_r_ctl == FC_RCTL_MDS_DIAGS) ? "MDS Diags" :
 			lpfc_rctl_names[fc_hdr->fh_r_ctl], fc_hdr->fh_r_ctl,
-			lpfc_type_names[fc_hdr->fh_type], fc_hdr->fh_type,
-			be32_to_cpu(header[0]), be32_to_cpu(header[1]),
-			be32_to_cpu(header[2]), be32_to_cpu(header[3]),
-			be32_to_cpu(header[4]), be32_to_cpu(header[5]),
-			be32_to_cpu(header[6]));
+			(fc_hdr->fh_type == FC_TYPE_VENDOR_UNIQUE) ?
+			"Vendor Unique" : lpfc_type_names[fc_hdr->fh_type],
+			fc_hdr->fh_type, be32_to_cpu(header[0]),
+			be32_to_cpu(header[1]), be32_to_cpu(header[2]),
+			be32_to_cpu(header[3]), be32_to_cpu(header[4]),
+			be32_to_cpu(header[5]), be32_to_cpu(header[6]));
 	return 0;
 drop:
 	lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
@@ -17000,6 +17018,96 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport,
 	lpfc_sli_release_iocbq(phba, iocbq);
 }
 
+static void
+lpfc_sli4_mds_loopback_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+			    struct lpfc_iocbq *rspiocb)
+{
+	struct lpfc_dmabuf *pcmd = cmdiocb->context2;
+
+	if (pcmd && pcmd->virt)
+		pci_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys);
+	kfree(pcmd);
+	lpfc_sli_release_iocbq(phba, cmdiocb);
+}
+
+static void
+lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
+			      struct hbq_dmabuf *dmabuf)
+{
+	struct fc_frame_header *fc_hdr;
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_iocbq *iocbq = NULL;
+	union  lpfc_wqe *wqe;
+	struct lpfc_dmabuf *pcmd = NULL;
+	uint32_t frame_len;
+	int rc;
+
+	fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+	frame_len = bf_get(lpfc_rcqe_length, &dmabuf->cq_event.cqe.rcqe_cmpl);
+
+	/* Send the received frame back */
+	iocbq = lpfc_sli_get_iocbq(phba);
+	if (!iocbq)
+		goto exit;
+
+	/* Allocate buffer for command payload */
+	pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (pcmd)
+		pcmd->virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
+					    &pcmd->phys);
+	if (!pcmd || !pcmd->virt)
+		goto exit;
+
+	INIT_LIST_HEAD(&pcmd->list);
+
+	/* copyin the payload */
+	memcpy(pcmd->virt, dmabuf->dbuf.virt, frame_len);
+
+	/* fill in BDE's for command */
+	iocbq->iocb.un.xseq64.bdl.addrHigh = putPaddrHigh(pcmd->phys);
+	iocbq->iocb.un.xseq64.bdl.addrLow = putPaddrLow(pcmd->phys);
+	iocbq->iocb.un.xseq64.bdl.bdeFlags = BUFF_TYPE_BDE_64;
+	iocbq->iocb.un.xseq64.bdl.bdeSize = frame_len;
+
+	iocbq->context2 = pcmd;
+	iocbq->vport = vport;
+	iocbq->iocb_flag &= ~LPFC_FIP_ELS_ID_MASK;
+	iocbq->iocb_flag |= LPFC_USE_FCPWQIDX;
+
+	/*
+	 * Setup rest of the iocb as though it were a WQE
+	 * Build the SEND_FRAME WQE
+	 */
+	wqe = (union lpfc_wqe *)&iocbq->iocb;
+
+	wqe->send_frame.frame_len = frame_len;
+	wqe->send_frame.fc_hdr_wd0 = be32_to_cpu(*((uint32_t *)fc_hdr));
+	wqe->send_frame.fc_hdr_wd1 = be32_to_cpu(*((uint32_t *)fc_hdr + 1));
+	wqe->send_frame.fc_hdr_wd2 = be32_to_cpu(*((uint32_t *)fc_hdr + 2));
+	wqe->send_frame.fc_hdr_wd3 = be32_to_cpu(*((uint32_t *)fc_hdr + 3));
+	wqe->send_frame.fc_hdr_wd4 = be32_to_cpu(*((uint32_t *)fc_hdr + 4));
+	wqe->send_frame.fc_hdr_wd5 = be32_to_cpu(*((uint32_t *)fc_hdr + 5));
+
+	iocbq->iocb.ulpCommand = CMD_SEND_FRAME;
+	iocbq->iocb.ulpLe = 1;
+	iocbq->iocb_cmpl = lpfc_sli4_mds_loopback_cmpl;
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocbq, 0);
+	if (rc == IOCB_ERROR)
+		goto exit;
+
+	lpfc_in_buf_free(phba, &dmabuf->dbuf);
+	return;
+
+exit:
+	lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+			"2023 Unable to process MDS loopback frame\n");
+	if (pcmd && pcmd->virt)
+		pci_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys);
+	kfree(pcmd);
+	lpfc_sli_release_iocbq(phba, iocbq);
+	lpfc_in_buf_free(phba, &dmabuf->dbuf);
+}
+
 /**
  * lpfc_sli4_handle_received_buffer - Handle received buffers from firmware
  * @phba: Pointer to HBA context object.
@@ -17038,6 +17146,13 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
 		fcfi = bf_get(lpfc_rcqe_fcf_id,
 			      &dmabuf->cq_event.cqe.rcqe_cmpl);
 
+	if (fc_hdr->fh_r_ctl == 0xF4 && fc_hdr->fh_type == 0xFF) {
+		vport = phba->pport;
+		/* Handle MDS Loopback frames */
+		lpfc_sli4_handle_mds_loopback(vport, dmabuf);
+		return;
+	}
+
 	/* d_id this frame is directed to */
 	did = sli4_did_from_fc_hdr(fc_hdr);
 
-- 
cgit v1.2.3-59-g8ed1b


From 2848e1d503d60955ff51ae9ec8d5eada6bd9ba6d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 15:20:52 -0700
Subject: scsi: lpfc: update version to 11.2.0.14

Change driver version to 11.2.0.14.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 1c26dc67151b..c2653244221c 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "11.2.0.12"
+#define LPFC_DRIVER_VERSION "11.2.0.14"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
-- 
cgit v1.2.3-59-g8ed1b


From bfb9956ab4d8242f4594b5f4bee534b935384fd9 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 16 May 2017 20:42:53 +1000
Subject: powerpc/mm: Fix crash in page table dump with huge pages

The page table dump code doesn't know about huge pages, so currently
it crashes (or walks random memory, usually leading to a crash), if it
finds a huge page. On Book3S we only see huge pages in the Linux page
tables when we're using the P9 Radix MMU.

Teaching the code to properly handle huge pages is a bit more involved,
so for now just prevent the crash.

Cc: stable@vger.kernel.org # v4.10+
Fixes: 8eb07b187000 ("powerpc/mm: Dump linux pagetables")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/dump_linuxpagetables.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index d659345a98d6..44fe4833910f 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -16,6 +16,7 @@
  */
 #include <linux/debugfs.h>
 #include <linux/fs.h>
+#include <linux/hugetlb.h>
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
@@ -391,7 +392,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 
 	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
 		addr = start + i * PMD_SIZE;
-		if (!pmd_none(*pmd))
+		if (!pmd_none(*pmd) && !pmd_huge(*pmd))
 			/* pmd exists */
 			walk_pte(st, pmd, addr);
 		else
@@ -407,7 +408,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 
 	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
 		addr = start + i * PUD_SIZE;
-		if (!pud_none(*pud))
+		if (!pud_none(*pud) && !pud_huge(*pud))
 			/* pud exists */
 			walk_pmd(st, pud, addr);
 		else
@@ -427,7 +428,7 @@ static void walk_pagetables(struct pg_state *st)
 	 */
 	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
 		addr = KERN_VIRT_START + i * PGDIR_SIZE;
-		if (!pgd_none(*pgd))
+		if (!pgd_none(*pgd) && !pgd_huge(*pgd))
 			/* pgd exists */
 			walk_pud(st, pgd, addr);
 		else
-- 
cgit v1.2.3-59-g8ed1b


From 83345d51a49a4b3f3b4a08a5db644dae438b0189 Mon Sep 17 00:00:00 2001
From: Tin Huynh <tnhuynh@apm.com>
Date: Wed, 17 May 2017 11:25:34 +0700
Subject: i2c: xgene: Set ACPI_COMPANION_I2C

With ACPI, i2c-core requires ACPI companion to be set in order for it
to create slave device.
This patch sets the ACPI companion accordingly.

Signed-off-by: Tin Huynh <tnhuynh@apm.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xgene-slimpro.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c
index dbe7e44c9321..6ba6c83ca8f1 100644
--- a/drivers/i2c/busses/i2c-xgene-slimpro.c
+++ b/drivers/i2c/busses/i2c-xgene-slimpro.c
@@ -416,6 +416,7 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev)
 	adapter->class = I2C_CLASS_HWMON;
 	adapter->dev.parent = &pdev->dev;
 	adapter->dev.of_node = pdev->dev.of_node;
+	ACPI_COMPANION_SET(&adapter->dev, ACPI_COMPANION(&pdev->dev));
 	i2c_set_adapdata(adapter, ctx);
 	rc = i2c_add_adapter(adapter);
 	if (rc) {
-- 
cgit v1.2.3-59-g8ed1b


From 5f63424ab7daac840df2b12dd5bcc5b38d50f779 Mon Sep 17 00:00:00 2001
From: Andrey Korolyov <andrey@xdel.ru>
Date: Tue, 16 May 2017 23:54:41 +0300
Subject: USB: serial: ftdi_sio: add Olimex ARM-USB-TINY(H) PIDs

This patch adds support for recognition of ARM-USB-TINY(H) devices which
are almost identical to ARM-USB-OCD(H) but lacking separate barrel jack
and serial console.

By suggestion from Johan Hovold it is possible to replace
ftdi_jtag_quirk with a bit more generic construction. Since all
Olimex-ARM debuggers has exactly two ports, we could safely always use
only second port within the debugger family.

Signed-off-by: Andrey Korolyov <andrey@xdel.ru>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ftdi_sio.c     | 8 ++++----
 drivers/usb/serial/ftdi_sio_ids.h | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 0e634c11abbf..aba74f817dc6 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -809,10 +809,10 @@ static const struct usb_device_id id_table_combined[] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) },
 	{ USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
-	{ USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID),
-		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
-	{ USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID),
-		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+	{ USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID, 1) },
+	{ USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID, 1) },
+	{ USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_PID, 1) },
+	{ USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_H_PID, 1) },
 	{ USB_DEVICE(FIC_VID, FIC_NEO1973_DEBUG_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
 	{ USB_DEVICE(FTDI_VID, FTDI_OOCDLINK_PID),
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 71fb9e59db71..4fcf1cecb6d7 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -882,6 +882,8 @@
 /* Olimex */
 #define OLIMEX_VID			0x15BA
 #define OLIMEX_ARM_USB_OCD_PID		0x0003
+#define OLIMEX_ARM_USB_TINY_PID	0x0004
+#define OLIMEX_ARM_USB_TINY_H_PID	0x002a
 #define OLIMEX_ARM_USB_OCD_H_PID	0x002b
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 628c2893d44876ddd11602400c70606ade62e129 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 16 May 2017 11:47:29 -0400
Subject: USB: ene_usb6250: fix DMA to the stack

The ene_usb6250 sub-driver in usb-storage does USB I/O to buffers on
the stack, which doesn't work with vmapped stacks.  This patch fixes
the problem by allocating a separate 512-byte buffer at probe time and
using it for all of the offending I/O operations.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: Andreas Hartmann <andihartmann@01019freenet.de>
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/ene_ub6250.c | 90 ++++++++++++++++++++++++----------------
 1 file changed, 55 insertions(+), 35 deletions(-)

diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c
index 369f3c24815a..44af719194b2 100644
--- a/drivers/usb/storage/ene_ub6250.c
+++ b/drivers/usb/storage/ene_ub6250.c
@@ -446,6 +446,10 @@ struct ms_lib_ctrl {
 #define SD_BLOCK_LEN  9
 
 struct ene_ub6250_info {
+
+	/* I/O bounce buffer */
+	u8		*bbuf;
+
 	/* for 6250 code */
 	struct SD_STATUS	SD_Status;
 	struct MS_STATUS	MS_Status;
@@ -493,8 +497,11 @@ static int ene_load_bincode(struct us_data *us, unsigned char flag);
 
 static void ene_ub6250_info_destructor(void *extra)
 {
+	struct ene_ub6250_info *info = (struct ene_ub6250_info *) extra;
+
 	if (!extra)
 		return;
+	kfree(info->bbuf);
 }
 
 static int ene_send_scsi_cmd(struct us_data *us, u8 fDir, void *buf, int use_sg)
@@ -860,8 +867,9 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr,
 		u8 PageNum, u32 *PageBuf, struct ms_lib_type_extdat *ExtraDat)
 {
 	struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
+	struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+	u8 *bbuf = info->bbuf;
 	int result;
-	u8 ExtBuf[4];
 	u32 bn = PhyBlockAddr * 0x20 + PageNum;
 
 	result = ene_load_bincode(us, MS_RW_PATTERN);
@@ -901,7 +909,7 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr,
 	bcb->CDB[2]     = (unsigned char)(PhyBlockAddr>>16);
 	bcb->CDB[6]     = 0x01;
 
-	result = ene_send_scsi_cmd(us, FDIR_READ, &ExtBuf, 0);
+	result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0);
 	if (result != USB_STOR_XFER_GOOD)
 		return USB_STOR_TRANSPORT_ERROR;
 
@@ -910,9 +918,9 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr,
 	ExtraDat->status0  = 0x10;  /* Not yet,fireware support */
 
 	ExtraDat->status1  = 0x00;  /* Not yet,fireware support */
-	ExtraDat->ovrflg   = ExtBuf[0];
-	ExtraDat->mngflg   = ExtBuf[1];
-	ExtraDat->logadr   = memstick_logaddr(ExtBuf[2], ExtBuf[3]);
+	ExtraDat->ovrflg   = bbuf[0];
+	ExtraDat->mngflg   = bbuf[1];
+	ExtraDat->logadr   = memstick_logaddr(bbuf[2], bbuf[3]);
 
 	return USB_STOR_TRANSPORT_GOOD;
 }
@@ -1332,8 +1340,9 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock,
 				u8 PageNum, struct ms_lib_type_extdat *ExtraDat)
 {
 	struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
+	struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+	u8 *bbuf = info->bbuf;
 	int result;
-	u8 ExtBuf[4];
 
 	memset(bcb, 0, sizeof(struct bulk_cb_wrap));
 	bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN);
@@ -1347,7 +1356,7 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock,
 	bcb->CDB[2]     = (unsigned char)(PhyBlock>>16);
 	bcb->CDB[6]     = 0x01;
 
-	result = ene_send_scsi_cmd(us, FDIR_READ, &ExtBuf, 0);
+	result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0);
 	if (result != USB_STOR_XFER_GOOD)
 		return USB_STOR_TRANSPORT_ERROR;
 
@@ -1355,9 +1364,9 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock,
 	ExtraDat->intr     = 0x80;  /* Not yet, waiting for fireware support */
 	ExtraDat->status0  = 0x10;  /* Not yet, waiting for fireware support */
 	ExtraDat->status1  = 0x00;  /* Not yet, waiting for fireware support */
-	ExtraDat->ovrflg   = ExtBuf[0];
-	ExtraDat->mngflg   = ExtBuf[1];
-	ExtraDat->logadr   = memstick_logaddr(ExtBuf[2], ExtBuf[3]);
+	ExtraDat->ovrflg   = bbuf[0];
+	ExtraDat->mngflg   = bbuf[1];
+	ExtraDat->logadr   = memstick_logaddr(bbuf[2], bbuf[3]);
 
 	return USB_STOR_TRANSPORT_GOOD;
 }
@@ -1556,9 +1565,9 @@ static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st)
 	u16 PhyBlock, newblk, i;
 	u16 LogStart, LogEnde;
 	struct ms_lib_type_extdat extdat;
-	u8 buf[0x200];
 	u32 count = 0, index = 0;
 	struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+	u8 *bbuf = info->bbuf;
 
 	for (PhyBlock = 0; PhyBlock < info->MS_Lib.NumberOfPhyBlock;) {
 		ms_lib_phy_to_log_range(PhyBlock, &LogStart, &LogEnde);
@@ -1572,14 +1581,16 @@ static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st)
 			}
 
 			if (count == PhyBlock) {
-				ms_lib_read_extrablock(us, PhyBlock, 0, 0x80, &buf);
+				ms_lib_read_extrablock(us, PhyBlock, 0, 0x80,
+						bbuf);
 				count += 0x80;
 			}
 			index = (PhyBlock % 0x80) * 4;
 
-			extdat.ovrflg = buf[index];
-			extdat.mngflg = buf[index+1];
-			extdat.logadr = memstick_logaddr(buf[index+2], buf[index+3]);
+			extdat.ovrflg = bbuf[index];
+			extdat.mngflg = bbuf[index+1];
+			extdat.logadr = memstick_logaddr(bbuf[index+2],
+					bbuf[index+3]);
 
 			if ((extdat.ovrflg & MS_REG_OVR_BKST) != MS_REG_OVR_BKST_OK) {
 				ms_lib_setacquired_errorblock(us, PhyBlock);
@@ -2062,9 +2073,9 @@ static int ene_ms_init(struct us_data *us)
 {
 	struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
 	int result;
-	u8 buf[0x200];
 	u16 MSP_BlockSize, MSP_UserAreaBlocks;
 	struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+	u8 *bbuf = info->bbuf;
 
 	printk(KERN_INFO "transport --- ENE_MSInit\n");
 
@@ -2083,13 +2094,13 @@ static int ene_ms_init(struct us_data *us)
 	bcb->CDB[0]     = 0xF1;
 	bcb->CDB[1]     = 0x01;
 
-	result = ene_send_scsi_cmd(us, FDIR_READ, &buf, 0);
+	result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0);
 	if (result != USB_STOR_XFER_GOOD) {
 		printk(KERN_ERR "Execution MS Init Code Fail !!\n");
 		return USB_STOR_TRANSPORT_ERROR;
 	}
 	/* the same part to test ENE */
-	info->MS_Status = *(struct MS_STATUS *)&buf[0];
+	info->MS_Status = *(struct MS_STATUS *) bbuf;
 
 	if (info->MS_Status.Insert && info->MS_Status.Ready) {
 		printk(KERN_INFO "Insert     = %x\n", info->MS_Status.Insert);
@@ -2098,15 +2109,15 @@ static int ene_ms_init(struct us_data *us)
 		printk(KERN_INFO "IsMSPHG    = %x\n", info->MS_Status.IsMSPHG);
 		printk(KERN_INFO "WtP= %x\n", info->MS_Status.WtP);
 		if (info->MS_Status.IsMSPro) {
-			MSP_BlockSize      = (buf[6] << 8) | buf[7];
-			MSP_UserAreaBlocks = (buf[10] << 8) | buf[11];
+			MSP_BlockSize      = (bbuf[6] << 8) | bbuf[7];
+			MSP_UserAreaBlocks = (bbuf[10] << 8) | bbuf[11];
 			info->MSP_TotalBlock = MSP_BlockSize * MSP_UserAreaBlocks;
 		} else {
 			ms_card_init(us); /* Card is MS (to ms.c)*/
 		}
 		usb_stor_dbg(us, "MS Init Code OK !!\n");
 	} else {
-		usb_stor_dbg(us, "MS Card Not Ready --- %x\n", buf[0]);
+		usb_stor_dbg(us, "MS Card Not Ready --- %x\n", bbuf[0]);
 		return USB_STOR_TRANSPORT_ERROR;
 	}
 
@@ -2116,9 +2127,9 @@ static int ene_ms_init(struct us_data *us)
 static int ene_sd_init(struct us_data *us)
 {
 	int result;
-	u8  buf[0x200];
 	struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
 	struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+	u8 *bbuf = info->bbuf;
 
 	usb_stor_dbg(us, "transport --- ENE_SDInit\n");
 	/* SD Init Part-1 */
@@ -2152,17 +2163,17 @@ static int ene_sd_init(struct us_data *us)
 	bcb->Flags              = US_BULK_FLAG_IN;
 	bcb->CDB[0]             = 0xF1;
 
-	result = ene_send_scsi_cmd(us, FDIR_READ, &buf, 0);
+	result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0);
 	if (result != USB_STOR_XFER_GOOD) {
 		usb_stor_dbg(us, "Execution SD Init Code Fail !!\n");
 		return USB_STOR_TRANSPORT_ERROR;
 	}
 
-	info->SD_Status =  *(struct SD_STATUS *)&buf[0];
+	info->SD_Status =  *(struct SD_STATUS *) bbuf;
 	if (info->SD_Status.Insert && info->SD_Status.Ready) {
 		struct SD_STATUS *s = &info->SD_Status;
 
-		ene_get_card_status(us, (unsigned char *)&buf);
+		ene_get_card_status(us, bbuf);
 		usb_stor_dbg(us, "Insert     = %x\n", s->Insert);
 		usb_stor_dbg(us, "Ready      = %x\n", s->Ready);
 		usb_stor_dbg(us, "IsMMC      = %x\n", s->IsMMC);
@@ -2170,7 +2181,7 @@ static int ene_sd_init(struct us_data *us)
 		usb_stor_dbg(us, "HiSpeed    = %x\n", s->HiSpeed);
 		usb_stor_dbg(us, "WtP        = %x\n", s->WtP);
 	} else {
-		usb_stor_dbg(us, "SD Card Not Ready --- %x\n", buf[0]);
+		usb_stor_dbg(us, "SD Card Not Ready --- %x\n", bbuf[0]);
 		return USB_STOR_TRANSPORT_ERROR;
 	}
 	return USB_STOR_TRANSPORT_GOOD;
@@ -2180,13 +2191,15 @@ static int ene_sd_init(struct us_data *us)
 static int ene_init(struct us_data *us)
 {
 	int result;
-	u8  misc_reg03 = 0;
+	u8  misc_reg03;
 	struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra);
+	u8 *bbuf = info->bbuf;
 
-	result = ene_get_card_type(us, REG_CARD_STATUS, &misc_reg03);
+	result = ene_get_card_type(us, REG_CARD_STATUS, bbuf);
 	if (result != USB_STOR_XFER_GOOD)
 		return USB_STOR_TRANSPORT_ERROR;
 
+	misc_reg03 = bbuf[0];
 	if (misc_reg03 & 0x01) {
 		if (!info->SD_Status.Ready) {
 			result = ene_sd_init(us);
@@ -2303,8 +2316,9 @@ static int ene_ub6250_probe(struct usb_interface *intf,
 			 const struct usb_device_id *id)
 {
 	int result;
-	u8  misc_reg03 = 0;
+	u8  misc_reg03;
 	struct us_data *us;
+	struct ene_ub6250_info *info;
 
 	result = usb_stor_probe1(&us, intf, id,
 		   (id - ene_ub6250_usb_ids) + ene_ub6250_unusual_dev_list,
@@ -2313,11 +2327,16 @@ static int ene_ub6250_probe(struct usb_interface *intf,
 		return result;
 
 	/* FIXME: where should the code alloc extra buf ? */
-	if (!us->extra) {
-		us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL);
-		if (!us->extra)
-			return -ENOMEM;
-		us->extra_destructor = ene_ub6250_info_destructor;
+	us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL);
+	if (!us->extra)
+		return -ENOMEM;
+	us->extra_destructor = ene_ub6250_info_destructor;
+
+	info = (struct ene_ub6250_info *)(us->extra);
+	info->bbuf = kmalloc(512, GFP_KERNEL);
+	if (!info->bbuf) {
+		kfree(us->extra);
+		return -ENOMEM;
 	}
 
 	us->transport_name = "ene_ub6250";
@@ -2329,12 +2348,13 @@ static int ene_ub6250_probe(struct usb_interface *intf,
 		return result;
 
 	/* probe card type */
-	result = ene_get_card_type(us, REG_CARD_STATUS, &misc_reg03);
+	result = ene_get_card_type(us, REG_CARD_STATUS, info->bbuf);
 	if (result != USB_STOR_XFER_GOOD) {
 		usb_stor_disconnect(intf);
 		return USB_STOR_TRANSPORT_ERROR;
 	}
 
+	misc_reg03 = info->bbuf[0];
 	if (!(misc_reg03 & 0x01)) {
 		pr_info("ums_eneub6250: This driver only supports SD/MS cards. "
 			"It does not support SM cards.\n");
-- 
cgit v1.2.3-59-g8ed1b


From 2f964780c03b73de269b08d12aff96a9618d13f3 Mon Sep 17 00:00:00 2001
From: Vamsi Krishna Samavedam <vskrishn@codeaurora.org>
Date: Tue, 16 May 2017 14:38:08 +0200
Subject: USB: core: replace %p with %pK

Format specifier %p can leak kernel addresses while not valuing the
kptr_restrict system settings. When kptr_restrict is set to (1), kernel
pointers printed using the %pK format specifier will be replaced with
Zeros. Debugging Note : &pK prints only Zeros as address. If you need
actual address information, write 0 to kptr_restrict.

echo 0 > /proc/sys/kernel/kptr_restrict

[Found by poking around in a random vendor kernel tree, it would be nice
if someone would actually send these types of patches upstream - gkh]

Signed-off-by: Vamsi Krishna Samavedam <vskrishn@codeaurora.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/devio.c | 14 +++++++-------
 drivers/usb/core/hcd.c   |  4 ++--
 drivers/usb/core/urb.c   |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index cfc3cff6e8d5..8e6ef671be9b 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -475,11 +475,11 @@ static void snoop_urb(struct usb_device *udev,
 
 	if (userurb) {		/* Async */
 		if (when == SUBMIT)
-			dev_info(&udev->dev, "userurb %p, ep%d %s-%s, "
+			dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, "
 					"length %u\n",
 					userurb, ep, t, d, length);
 		else
-			dev_info(&udev->dev, "userurb %p, ep%d %s-%s, "
+			dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, "
 					"actual_length %u status %d\n",
 					userurb, ep, t, d, length,
 					timeout_or_status);
@@ -1895,7 +1895,7 @@ static int proc_reapurb(struct usb_dev_state *ps, void __user *arg)
 	if (as) {
 		int retval;
 
-		snoop(&ps->dev->dev, "reap %p\n", as->userurb);
+		snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
 		retval = processcompl(as, (void __user * __user *)arg);
 		free_async(as);
 		return retval;
@@ -1912,7 +1912,7 @@ static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg)
 
 	as = async_getcompleted(ps);
 	if (as) {
-		snoop(&ps->dev->dev, "reap %p\n", as->userurb);
+		snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
 		retval = processcompl(as, (void __user * __user *)arg);
 		free_async(as);
 	} else {
@@ -2043,7 +2043,7 @@ static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg)
 	if (as) {
 		int retval;
 
-		snoop(&ps->dev->dev, "reap %p\n", as->userurb);
+		snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
 		retval = processcompl_compat(as, (void __user * __user *)arg);
 		free_async(as);
 		return retval;
@@ -2060,7 +2060,7 @@ static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *ar
 
 	as = async_getcompleted(ps);
 	if (as) {
-		snoop(&ps->dev->dev, "reap %p\n", as->userurb);
+		snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
 		retval = processcompl_compat(as, (void __user * __user *)arg);
 		free_async(as);
 	} else {
@@ -2489,7 +2489,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 #endif
 
 	case USBDEVFS_DISCARDURB:
-		snoop(&dev->dev, "%s: DISCARDURB %p\n", __func__, p);
+		snoop(&dev->dev, "%s: DISCARDURB %pK\n", __func__, p);
 		ret = proc_unlinkurb(ps, p);
 		break;
 
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 49550790a3cb..5235d6be1bdf 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1723,7 +1723,7 @@ int usb_hcd_unlink_urb (struct urb *urb, int status)
 		if (retval == 0)
 			retval = -EINPROGRESS;
 		else if (retval != -EIDRM && retval != -EBUSY)
-			dev_dbg(&udev->dev, "hcd_unlink_urb %p fail %d\n",
+			dev_dbg(&udev->dev, "hcd_unlink_urb %pK fail %d\n",
 					urb, retval);
 		usb_put_dev(udev);
 	}
@@ -1890,7 +1890,7 @@ rescan:
 		/* kick hcd */
 		unlink1(hcd, urb, -ESHUTDOWN);
 		dev_dbg (hcd->self.controller,
-			"shutdown urb %p ep%d%s%s\n",
+			"shutdown urb %pK ep%d%s%s\n",
 			urb, usb_endpoint_num(&ep->desc),
 			is_in ? "in" : "out",
 			({	char *s;
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index d75cb8c0f7df..47903d510955 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -338,7 +338,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 	if (!urb || !urb->complete)
 		return -EINVAL;
 	if (urb->hcpriv) {
-		WARN_ONCE(1, "URB %p submitted while active\n", urb);
+		WARN_ONCE(1, "URB %pK submitted while active\n", urb);
 		return -EBUSY;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 0bd193d62b4270a2a7a09da43ad1034c7ca5b3d3 Mon Sep 17 00:00:00 2001
From: Maksim Salau <maksim.salau@gmail.com>
Date: Sat, 13 May 2017 23:49:26 +0300
Subject: usb: misc: legousbtower: Fix memory leak

get_version_reply is not freed if function returns with success.

Fixes: 942a48730faf ("usb: misc: legousbtower: Fix buffers on stack")
Reported-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Maksim Salau <maksim.salau@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/legousbtower.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index aa3c280fdf8d..0782ac6f5edf 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -926,6 +926,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
 		 USB_MAJOR, dev->minor);
 
 exit:
+	kfree(get_version_reply);
 	return retval;
 
 error:
-- 
cgit v1.2.3-59-g8ed1b


From 41318a2b82f5d5fe1fb408f6d6e0b22aa557111d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 12 May 2017 12:06:32 +0200
Subject: uwb: fix device quirk on big-endian hosts

Add missing endianness conversion when using the USB device-descriptor
idProduct field to apply a hardware quirk.

Fixes: 1ba47da52712 ("uwb: add the i1480 DFU driver")
Cc: stable <stable@vger.kernel.org>     # 2.6.28
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uwb/i1480/dfu/usb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c
index 6345e85822a4..a50cf45e530f 100644
--- a/drivers/uwb/i1480/dfu/usb.c
+++ b/drivers/uwb/i1480/dfu/usb.c
@@ -341,6 +341,7 @@ error_submit_ep1:
 static
 int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id)
 {
+	struct usb_device *udev = interface_to_usbdev(iface);
 	struct i1480_usb *i1480_usb;
 	struct i1480 *i1480;
 	struct device *dev = &iface->dev;
@@ -352,8 +353,8 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id)
 			iface->cur_altsetting->desc.bInterfaceNumber);
 		goto error;
 	}
-	if (iface->num_altsetting > 1
-	    && interface_to_usbdev(iface)->descriptor.idProduct == 0xbabe) {
+	if (iface->num_altsetting > 1 &&
+			le16_to_cpu(udev->descriptor.idProduct) == 0xbabe) {
 		/* Need altsetting #1 [HW QUIRK] or EP1 won't work */
 		result = usb_set_interface(interface_to_usbdev(iface), 0, 1);
 		if (result < 0)
-- 
cgit v1.2.3-59-g8ed1b


From 7cdfe4ddea47dbc66306edf802fd2ef86a6c8867 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 12 May 2017 10:06:29 +0200
Subject: sisusb_con: fix coccinelle warning

After commit d705ff3818 (tty: vt, cleanup and document con_scroll), in
the coccinelle output, we can see:
drivers/usb/misc/sisusbvga/sisusb_con.c:852:8-9: WARNING: return of 0/1 in function 'sisusbcon_scroll_area' with return type bool

Return true instead of 1 in the function returning bool which was
intended to do in d705ff3818 but omitted.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Fixes: d705ff3818 (tty: vt, cleanup and document con_scroll)
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-usb@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/sisusbvga/sisusb_con.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c
index 3c6948af726a..f019d80ca9e4 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_con.c
+++ b/drivers/usb/misc/sisusbvga/sisusb_con.c
@@ -973,7 +973,7 @@ sisusbcon_set_origin(struct vc_data *c)
 
 	mutex_unlock(&sisusb->lock);
 
-	return 1;
+	return true;
 }
 
 /* Interface routine */
-- 
cgit v1.2.3-59-g8ed1b


From 63afd5cc78775018ea2dec4004428dafa5283e93 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 11 May 2017 11:36:01 +0200
Subject: USB: chaoskey: fix Alea quirk on big-endian hosts

Add missing endianness conversion when applying the Alea timeout quirk.

Found using sparse:

	warning: restricted __le16 degrades to integer

Fixes: e4a886e811cd ("hwrng: chaoskey - Fix URB warning due to timeout on Alea")
Cc: stable <stable@vger.kernel.org>     # 4.8
Cc: Bob Ham <bob.ham@collabora.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Keith Packard <keithp@keithp.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/chaoskey.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c
index e9cae4d82af2..15d4e64d3b65 100644
--- a/drivers/usb/misc/chaoskey.c
+++ b/drivers/usb/misc/chaoskey.c
@@ -192,7 +192,7 @@ static int chaoskey_probe(struct usb_interface *interface,
 
 	dev->in_ep = in_ep;
 
-	if (udev->descriptor.idVendor != ALEA_VENDOR_ID)
+	if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID)
 		dev->reads_started = 1;
 
 	dev->size = size;
-- 
cgit v1.2.3-59-g8ed1b


From dd5ca753fa92fb736b1395db892bd29f78e6d408 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 11 May 2017 11:36:02 +0200
Subject: USB: iowarrior: fix info ioctl on big-endian hosts

Drop erroneous le16_to_cpu when returning the USB device speed which is
already in host byte order.

Found using sparse:

	warning: cast to restricted __le16

Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.")
Cc: stable <stable@vger.kernel.org>     # 2.6.21
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/iowarrior.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 77569531b78a..83b05a287b0c 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -554,7 +554,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd,
 			info.revision = le16_to_cpu(dev->udev->descriptor.bcdDevice);
 
 			/* 0==UNKNOWN, 1==LOW(usb1.1) ,2=FULL(usb1.1), 3=HIGH(usb2.0) */
-			info.speed = le16_to_cpu(dev->udev->speed);
+			info.speed = dev->udev->speed;
 			info.if_num = dev->interface->cur_altsetting->desc.bInterfaceNumber;
 			info.report_size = dev->report_size;
 
-- 
cgit v1.2.3-59-g8ed1b


From 1a744d2eb76aaafb997fda004ae3ae62a1538f85 Mon Sep 17 00:00:00 2001
From: Anton Bondarenko <anton.bondarenko.sama@gmail.com>
Date: Sun, 7 May 2017 01:53:46 +0200
Subject: usb: core: fix potential memory leak in error path during hcd
 creation

Free memory allocated for address0_mutex if allocation of bandwidth_mutex
failed.

Fixes: feb26ac31a2a ("usb: core: hub: hub_port_init lock controller instead of bus")

Signed-off-by: Anton Bondarenko <anton.bondarenko.sama@gmail.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 5235d6be1bdf..5dea98358c05 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2520,6 +2520,7 @@ struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver,
 		hcd->bandwidth_mutex = kmalloc(sizeof(*hcd->bandwidth_mutex),
 				GFP_KERNEL);
 		if (!hcd->bandwidth_mutex) {
+			kfree(hcd->address0_mutex);
 			kfree(hcd);
 			dev_dbg(dev, "hcd bandwidth mutex alloc failed\n");
 			return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From dd14a3e9b92ac6f0918054f9e3477438760a4fa6 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Thu, 27 Apr 2017 12:12:49 -0700
Subject: usb: r8a66597-hcd: decrease timeout

The timeout for BULK packets was 300ms which is a long time if other
endpoints or devices are waiting for their turn. Changing it to 50ms
greatly increased the overall performance for multi-endpoint devices.

Fixes: 5d3043586db4 ("usb: r8a66597-hcd: host controller driver for R8A6659")
Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/r8a66597-hcd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index bfa7fa3d2eea..698ec8721403 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -1269,7 +1269,7 @@ static void set_td_timer(struct r8a66597 *r8a66597, struct r8a66597_td *td)
 			time = 30;
 			break;
 		default:
-			time = 300;
+			time = 50;
 			break;
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From 1f873d857b6c2fefb4dada952674aa01bcfb92bd Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Thu, 27 Apr 2017 12:12:02 -0700
Subject: usb: r8a66597-hcd: select a different endpoint on timeout

If multiple endpoints on a single device have pending IN URBs and one
endpoint times out due to NAKs (perfectly legal), select a different
endpoint URB to try.
The existing code only checked to see another device address has pending
URBs and ignores other IN endpoints on the current device address. This
leads to endpoints never getting serviced if one endpoint is using NAK as
a flow control method.

Fixes: 5d3043586db4 ("usb: r8a66597-hcd: host controller driver for R8A6659")
Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/r8a66597-hcd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 698ec8721403..7bf78be1fd32 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -1785,6 +1785,7 @@ static void r8a66597_td_timer(unsigned long _r8a66597)
 		pipe = td->pipe;
 		pipe_stop(r8a66597, pipe);
 
+		/* Select a different address or endpoint */
 		new_td = td;
 		do {
 			list_move_tail(&new_td->queue,
@@ -1794,7 +1795,8 @@ static void r8a66597_td_timer(unsigned long _r8a66597)
 				new_td = td;
 				break;
 			}
-		} while (td != new_td && td->address == new_td->address);
+		} while (td != new_td && td->address == new_td->address &&
+			td->pipe->info.epnum == new_td->pipe->info.epnum);
 
 		start_transfer(r8a66597, new_td);
 
-- 
cgit v1.2.3-59-g8ed1b


From a7415477a20448bbb7d13765784c0b29249a176f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 16 May 2017 16:26:13 +0200
Subject: USB: ehci-platform: fix companion-device leak

Make sure do drop the reference taken to the companion device during
resume.

Fixes: d4d75128b8fd ("usb: host: ehci-platform: fix usb 1.1 device is not connected in system resume")
Cc: stable <stable@vger.kernel.org>     # 4.11
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-platform.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
index bc7b9be12f54..f1908ea9fbd8 100644
--- a/drivers/usb/host/ehci-platform.c
+++ b/drivers/usb/host/ehci-platform.c
@@ -384,8 +384,10 @@ static int ehci_platform_resume(struct device *dev)
 	}
 
 	companion_dev = usb_of_get_companion_dev(hcd->self.controller);
-	if (companion_dev)
+	if (companion_dev) {
 		device_pm_wait_for_dev(hcd->self.controller, companion_dev);
+		put_device(companion_dev);
+	}
 
 	ehci_resume(hcd, priv->reset_on_resume);
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From ef53b92ece675ed9778b50f4432e004683696d01 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 16 May 2017 16:26:14 +0200
Subject: USB: core: of: document reference taken by companion helper

Document that the new companion-device lookup helper takes a reference
to the companion device which needs to be dropped after use.

Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/of.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/core/of.c b/drivers/usb/core/of.c
index d787f195a9a6..d563cbcf76cf 100644
--- a/drivers/usb/core/of.c
+++ b/drivers/usb/core/of.c
@@ -53,6 +53,9 @@ EXPORT_SYMBOL_GPL(usb_of_get_child_node);
  *
  * Find the companion device from platform bus.
  *
+ * Takes a reference to the returned struct device which needs to be dropped
+ * after use.
+ *
  * Return: On success, a pointer to the companion device, %NULL on failure.
  */
 struct device *usb_of_get_companion_dev(struct device *dev)
-- 
cgit v1.2.3-59-g8ed1b


From 5383fae76b8224a8f0465be6ab9c7a645042951a Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Sat, 13 May 2017 15:49:17 +0200
Subject: doc-rst: fixed kernel-doc directives in usb/typec.rst

Even if this file is not yet included in any toctree, it is parsed by
Sphinx since it is named '.rst'. This patch fixes the following two
ERRORs from Sphinx build:

Documentation/usb/typec.rst:116: ERROR: Error in "kernel-doc" directive:
invalid option block.

.. kernel-doc:: drivers/usb/typec/typec.c
   :functions: typec_register_cable typec_unregister_cable typec_register_plug
   typec_unregister_plug

Documentation/usb/typec.rst:139: ERROR: Error in "kernel-doc" directive:
invalid option block.

.. kernel-doc:: drivers/usb/typec/typec.c
   :functions: typec_set_data_role typec_set_pwr_role typec_set_vconn_role
   typec_set_pwr_opmode

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/usb/typec.rst | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/usb/typec.rst b/Documentation/usb/typec.rst
index b67a46779de9..8a7249f2ff04 100644
--- a/Documentation/usb/typec.rst
+++ b/Documentation/usb/typec.rst
@@ -114,8 +114,7 @@ the details during registration. The class offers the following API for
 registering/unregistering cables and their plugs:
 
 .. kernel-doc:: drivers/usb/typec/typec.c
-   :functions: typec_register_cable typec_unregister_cable typec_register_plug
-   typec_unregister_plug
+   :functions: typec_register_cable typec_unregister_cable typec_register_plug typec_unregister_plug
 
 The class will provide a handle to struct typec_cable and struct typec_plug if
 the registration is successful, or NULL if it isn't.
@@ -137,8 +136,7 @@ during connection of a partner or cable, the port driver must use the following
 APIs to report it to the class:
 
 .. kernel-doc:: drivers/usb/typec/typec.c
-   :functions: typec_set_data_role typec_set_pwr_role typec_set_vconn_role
-   typec_set_pwr_opmode
+   :functions: typec_set_data_role typec_set_pwr_role typec_set_vconn_role typec_set_pwr_opmode
 
 Alternate Modes
 ~~~~~~~~~~~~~~~
-- 
cgit v1.2.3-59-g8ed1b


From d81182ce30dbd497a1e7047d7fda2af040347790 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 10 May 2017 18:18:25 +0200
Subject: USB: gadget: dummy_hcd: fix hub-descriptor removable fields

Flag the first and only port as removable while also leaving the
remaining bits (including the reserved bit zero) unset in accordance
with the specifications:

	"Within a byte, if no port exists for a given location, the bit
	field representing the port characteristics shall be 0."

Also add a comment marking the legacy PortPwrCtrlMask field.

Fixes: 1cd8fd2887e1 ("usb: gadget: dummy_hcd: add SuperSpeed support")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: Tatyana Brokhman <tlinder@codeaurora.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/dummy_hcd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index c79081952ea0..ccabb51cb98d 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -2008,7 +2008,7 @@ ss_hub_descriptor(struct usb_hub_descriptor *desc)
 			HUB_CHAR_COMMON_OCPM);
 	desc->bNbrPorts = 1;
 	desc->u.ss.bHubHdrDecLat = 0x04; /* Worst case: 0.4 micro sec*/
-	desc->u.ss.DeviceRemovable = 0xffff;
+	desc->u.ss.DeviceRemovable = 0;
 }
 
 static inline void hub_descriptor(struct usb_hub_descriptor *desc)
@@ -2020,8 +2020,8 @@ static inline void hub_descriptor(struct usb_hub_descriptor *desc)
 			HUB_CHAR_INDV_PORT_LPSM |
 			HUB_CHAR_COMMON_OCPM);
 	desc->bNbrPorts = 1;
-	desc->u.hs.DeviceRemovable[0] = 0xff;
-	desc->u.hs.DeviceRemovable[1] = 0xff;
+	desc->u.hs.DeviceRemovable[0] = 0;
+	desc->u.hs.DeviceRemovable[1] = 0xff;	/* PortPwrCtrlMask */
 }
 
 static int dummy_hub_control(
-- 
cgit v1.2.3-59-g8ed1b


From ec963b412a54aac8e527708ecad06a6988a86fb4 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 10 May 2017 18:18:26 +0200
Subject: USB: usbip: fix nonconforming hub descriptor

Fix up the root-hub descriptor to accommodate the variable-length
DeviceRemovable and PortPwrCtrlMask fields, while marking all ports as
removable (and leaving the reserved bit zero unset).

Also add a build-time constraint on VHCI_HC_PORTS which must never be
greater than USB_MAXCHILDREN (but this was only enforced through a
KConfig constant).

This specifically fixes the descriptor layout whenever VHCI_HC_PORTS is
greater than seven (default is 8).

Fixes: 04679b3489e0 ("Staging: USB/IP: add client driver")
Cc: Takahiro Hirofuchi <hirofuchi@users.sourceforge.net>
Cc: Valentina Manea <valentina.manea.m@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/vhci_hcd.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index 5d8b2c261940..0585078638db 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -235,14 +235,19 @@ done:
 
 static inline void hub_descriptor(struct usb_hub_descriptor *desc)
 {
+	int width;
+
 	memset(desc, 0, sizeof(*desc));
 	desc->bDescriptorType = USB_DT_HUB;
-	desc->bDescLength = 9;
 	desc->wHubCharacteristics = cpu_to_le16(
 		HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM);
+
 	desc->bNbrPorts = VHCI_HC_PORTS;
-	desc->u.hs.DeviceRemovable[0] = 0xff;
-	desc->u.hs.DeviceRemovable[1] = 0xff;
+	BUILD_BUG_ON(VHCI_HC_PORTS > USB_MAXCHILDREN);
+	width = desc->bNbrPorts / 8 + 1;
+	desc->bDescLength = USB_DT_HUB_NONVAR_SIZE + 2 * width;
+	memset(&desc->u.hs.DeviceRemovable[0], 0, width);
+	memset(&desc->u.hs.DeviceRemovable[width], 0xff, width);
 }
 
 static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
-- 
cgit v1.2.3-59-g8ed1b


From 2c25a2c818023df64463aac3288a9f969491e507 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 10 May 2017 18:18:27 +0200
Subject: USB: hub: fix SS hub-descriptor handling

A SuperSpeed hub descriptor does not have any variable-length fields so
bail out when reading a short descriptor.

This avoids parsing and leaking two bytes of uninitialised slab data
through sysfs removable-attributes.

Fixes: dbe79bbe9dcb ("USB 3.0 Hub Changes")
Cc: stable <stable@vger.kernel.org>     # 2.6.39
Cc: John Youn <John.Youn@synopsys.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 9dca59ef18b3..3ff1e9f89f2d 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -380,8 +380,12 @@ static int get_hub_descriptor(struct usb_device *hdev, void *data)
 			USB_REQ_GET_DESCRIPTOR, USB_DIR_IN | USB_RT_HUB,
 			dtype << 8, 0, data, size,
 			USB_CTRL_GET_TIMEOUT);
-		if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2))
+		if (hub_is_superspeed(hdev)) {
+			if (ret == size)
+				return ret;
+		} else if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2)) {
 			return ret;
+		}
 	}
 	return -EINVAL;
 }
@@ -1321,7 +1325,7 @@ static int hub_configure(struct usb_hub *hub,
 
 	/* Request the entire hub descriptor.
 	 * hub->descriptor can handle USB_MAXCHILDREN ports,
-	 * but the hub can/will return fewer bytes here.
+	 * but a (non-SS) hub can/will return fewer bytes here.
 	 */
 	ret = get_hub_descriptor(hdev, hub->descriptor);
 	if (ret < 0) {
-- 
cgit v1.2.3-59-g8ed1b


From bec444cd1c94c48df409a35ad4e5b143c245c3f7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 10 May 2017 18:18:28 +0200
Subject: USB: hub: fix non-SS hub-descriptor handling

Add missing sanity check on the non-SuperSpeed hub-descriptor length in
order to avoid parsing and leaking two bytes of uninitialised slab data
through sysfs removable-attributes (or a compound-device debug
statement).

Note that we only make sure that the DeviceRemovable field is always
present (and specifically ignore the unused PortPwrCtrlMask field) in
order to continue support any hubs with non-compliant descriptors. As a
further safeguard, the descriptor buffer is also cleared.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>     # 2.6.12
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 3ff1e9f89f2d..f77a4ebde7d5 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -362,7 +362,8 @@ static void usb_set_lpm_parameters(struct usb_device *udev)
 }
 
 /* USB 2.0 spec Section 11.24.4.5 */
-static int get_hub_descriptor(struct usb_device *hdev, void *data)
+static int get_hub_descriptor(struct usb_device *hdev,
+		struct usb_hub_descriptor *desc)
 {
 	int i, ret, size;
 	unsigned dtype;
@@ -378,12 +379,16 @@ static int get_hub_descriptor(struct usb_device *hdev, void *data)
 	for (i = 0; i < 3; i++) {
 		ret = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0),
 			USB_REQ_GET_DESCRIPTOR, USB_DIR_IN | USB_RT_HUB,
-			dtype << 8, 0, data, size,
+			dtype << 8, 0, desc, size,
 			USB_CTRL_GET_TIMEOUT);
 		if (hub_is_superspeed(hdev)) {
 			if (ret == size)
 				return ret;
-		} else if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2)) {
+		} else if (ret >= USB_DT_HUB_NONVAR_SIZE + 2) {
+			/* Make sure we have the DeviceRemovable field. */
+			size = USB_DT_HUB_NONVAR_SIZE + desc->bNbrPorts / 8 + 1;
+			if (ret < size)
+				return -EMSGSIZE;
 			return ret;
 		}
 	}
@@ -1317,7 +1322,7 @@ static int hub_configure(struct usb_hub *hub,
 	}
 	mutex_init(&hub->status_mutex);
 
-	hub->descriptor = kmalloc(sizeof(*hub->descriptor), GFP_KERNEL);
+	hub->descriptor = kzalloc(sizeof(*hub->descriptor), GFP_KERNEL);
 	if (!hub->descriptor) {
 		ret = -ENOMEM;
 		goto fail;
-- 
cgit v1.2.3-59-g8ed1b


From 93491ced3c87c94b12220dbac0527e1356702179 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 10 May 2017 18:18:29 +0200
Subject: USB: hub: fix SS max number of ports

Add define for the maximum number of ports on a SuperSpeed hub as per
USB 3.1 spec Table 10-5, and use it when verifying the retrieved hub
descriptor.

This specifically avoids benign attempts to update the DeviceRemovable
mask for non-existing ports (should we get that far).

Fixes: dbe79bbe9dcb ("USB 3.0 Hub Changes")
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c        | 8 +++++++-
 include/uapi/linux/usb/ch11.h | 3 +++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index f77a4ebde7d5..b8bb20d7acdb 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1336,7 +1336,13 @@ static int hub_configure(struct usb_hub *hub,
 	if (ret < 0) {
 		message = "can't read hub descriptor";
 		goto fail;
-	} else if (hub->descriptor->bNbrPorts > USB_MAXCHILDREN) {
+	}
+
+	maxchild = USB_MAXCHILDREN;
+	if (hub_is_superspeed(hdev))
+		maxchild = min_t(unsigned, maxchild, USB_SS_MAXPORTS);
+
+	if (hub->descriptor->bNbrPorts > maxchild) {
 		message = "hub has too many ports!";
 		ret = -ENODEV;
 		goto fail;
diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h
index 361297e96f58..576c704e3fb8 100644
--- a/include/uapi/linux/usb/ch11.h
+++ b/include/uapi/linux/usb/ch11.h
@@ -22,6 +22,9 @@
  */
 #define USB_MAXCHILDREN		31
 
+/* See USB 3.1 spec Table 10-5 */
+#define USB_SS_MAXPORTS		15
+
 /*
  * Hub request types
  */
-- 
cgit v1.2.3-59-g8ed1b


From 5120a266928a07231d198bb518f6fe73148786a3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 10 May 2017 18:18:30 +0200
Subject: USB: host: xhci: use max-port define

Use the new define for the maximum number of SuperSpeed ports instead of
a constant when allocating xHCI root hubs.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mem.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index bbe22bcc550a..69428e970925 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -2307,10 +2307,11 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags)
 	/* Place limits on the number of roothub ports so that the hub
 	 * descriptors aren't longer than the USB core will allocate.
 	 */
-	if (xhci->num_usb3_ports > 15) {
+	if (xhci->num_usb3_ports > USB_SS_MAXPORTS) {
 		xhci_dbg_trace(xhci, trace_xhci_dbg_init,
-				"Limiting USB 3.0 roothub ports to 15.");
-		xhci->num_usb3_ports = 15;
+				"Limiting USB 3.0 roothub ports to %u.",
+				USB_SS_MAXPORTS);
+		xhci->num_usb3_ports = USB_SS_MAXPORTS;
 	}
 	if (xhci->num_usb2_ports > USB_MAXCHILDREN) {
 		xhci_dbg_trace(xhci, trace_xhci_dbg_init,
-- 
cgit v1.2.3-59-g8ed1b


From 5667c86acf021e6dcf02584408b4484a273ac68f Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@qti.qualcomm.com>
Date: Sun, 14 May 2017 21:41:55 -0700
Subject: mac80211: strictly check mesh address extension mode

Mesh forwarding path checks for address extension mode to fetch
appropriate proxied address and MPP address. Existing condition
that looks for 6 address format is not strict enough so that
frames with improper values are processed and invalid entries
are added into MPP table. Fix that by adding a stricter check before
processing the packet.

Per IEEE Std 802.11s-2011 spec. Table 7-6g1 lists address extension
mode 0x3 as reserved one. And also Table Table 9-13 does not specify
0x3 as valid address field.

Fixes: 9b395bc3be1c ("mac80211: verify that skb data is present")
Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c   |  3 ++-
 net/wireless/util.c | 10 ++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 35f4c7d7a500..1f75280ba26c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2492,7 +2492,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 		if (is_multicast_ether_addr(hdr->addr1)) {
 			mpp_addr = hdr->addr3;
 			proxied_addr = mesh_hdr->eaddr1;
-		} else if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6) {
+		} else if ((mesh_hdr->flags & MESH_FLAGS_AE) ==
+			    MESH_FLAGS_AE_A5_A6) {
 			/* has_a4 already checked in ieee80211_rx_mesh_check */
 			mpp_addr = hdr->addr4;
 			proxied_addr = mesh_hdr->eaddr2;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7198373e2920..4992f1025c9d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -454,6 +454,8 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 	if (iftype == NL80211_IFTYPE_MESH_POINT)
 		skb_copy_bits(skb, hdrlen, &mesh_flags, 1);
 
+	mesh_flags &= MESH_FLAGS_AE;
+
 	switch (hdr->frame_control &
 		cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
 	case cpu_to_le16(IEEE80211_FCTL_TODS):
@@ -469,9 +471,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 			     iftype != NL80211_IFTYPE_STATION))
 			return -1;
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
-			if (mesh_flags & MESH_FLAGS_AE_A4)
+			if (mesh_flags == MESH_FLAGS_AE_A4)
 				return -1;
-			if (mesh_flags & MESH_FLAGS_AE_A5_A6) {
+			if (mesh_flags == MESH_FLAGS_AE_A5_A6) {
 				skb_copy_bits(skb, hdrlen +
 					offsetof(struct ieee80211s_hdr, eaddr1),
 					tmp.h_dest, 2 * ETH_ALEN);
@@ -487,9 +489,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 		     ether_addr_equal(tmp.h_source, addr)))
 			return -1;
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
-			if (mesh_flags & MESH_FLAGS_AE_A5_A6)
+			if (mesh_flags == MESH_FLAGS_AE_A5_A6)
 				return -1;
-			if (mesh_flags & MESH_FLAGS_AE_A4)
+			if (mesh_flags == MESH_FLAGS_AE_A4)
 				skb_copy_bits(skb, hdrlen +
 					offsetof(struct ieee80211s_hdr, eaddr1),
 					tmp.h_source, ETH_ALEN);
-- 
cgit v1.2.3-59-g8ed1b


From 1cc896ed61fa0441dffef726ff678fd82a9e6265 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 15 May 2017 16:01:30 +0100
Subject: iommu/dma: Don't touch invalid iova_domain members

When __iommu_dma_map() and iommu_dma_free_iova() are called from
iommu_dma_get_msi_page(), various iova_*() helpers are still invoked in
the process, whcih is unwise since they access a different member of the
union (the iova_domain) from that which was last written, and there's no
guarantee that sensible values will result anyway.

CLean up the code paths that are valid for an MSI cookie to ensure we
only do iova_domain-specific things when we're actually dealing with one.

Fixes: a44e6657585b ("iommu/dma: Clean up MSI IOVA allocation")
Reported-by: Nate Watterson <nwatters@codeaurora.org>
Tested-by: Shanker Donthineni <shankerd@codeaurora.org>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8348f366ddd1..62618e77bedc 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -396,13 +396,13 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 		dma_addr_t iova, size_t size)
 {
 	struct iova_domain *iovad = &cookie->iovad;
-	unsigned long shift = iova_shift(iovad);
 
 	/* The MSI case is only ever cleaning up its most recent allocation */
 	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
 		cookie->msi_iova -= size;
 	else
-		free_iova_fast(iovad, iova >> shift, size >> shift);
+		free_iova_fast(iovad, iova_pfn(iovad, iova),
+				size >> iova_shift(iovad));
 }
 
 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
@@ -617,11 +617,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
-	struct iova_domain *iovad = &cookie->iovad;
-	size_t iova_off = iova_offset(iovad, phys);
+	size_t iova_off = 0;
 	dma_addr_t iova;
 
-	size = iova_align(iovad, size + iova_off);
+	if (cookie->type == IOMMU_DMA_IOVA_COOKIE) {
+		iova_off = iova_offset(&cookie->iovad, phys);
+		size = iova_align(&cookie->iovad, size + iova_off);
+	}
+
 	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
 	if (!iova)
 		return DMA_ERROR_CODE;
-- 
cgit v1.2.3-59-g8ed1b


From f73a7eee900e95404b61408a23a1df5c5811704c Mon Sep 17 00:00:00 2001
From: KarimAllah Ahmed <karahmed@amazon.de>
Date: Fri, 5 May 2017 11:39:59 -0700
Subject: iommu/vt-d: Flush the IOTLB to get rid of the initial kdump mappings

Ever since commit 091d42e43d ("iommu/vt-d: Copy translation tables from
old kernel") the kdump kernel copies the IOMMU context tables from the
previous kernel. Each device mappings will be destroyed once the driver
for the respective device takes over.

This unfortunately breaks the workflow of mapping and unmapping a new
context to the IOMMU. The mapping function assumes that either:

1) Unmapping did the proper IOMMU flushing and it only ever flush if the
   IOMMU unit supports caching invalid entries.
2) The system just booted and the initialization code took care of
   flushing all IOMMU caches.

This assumption is not true for the kdump kernel since the context
tables have been copied from the previous kernel and translations could
have been cached ever since. So make sure to flush the IOTLB as well
when we destroy these old copied mappings.

Cc: Joerg Roedel <joro@8bytes.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Anthony Liguori <aliguori@amazon.com>
Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
Acked-by: David Woodhouse <dwmw@amazon.co.uk>
Cc: stable@vger.kernel.org  v4.2+
Fixes: 091d42e43d ("iommu/vt-d: Copy translation tables from old kernel")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 90ab0115d78e..fc2765ccdb57 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2055,11 +2055,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	if (context_copied(context)) {
 		u16 did_old = context_domain_id(context);
 
-		if (did_old >= 0 && did_old < cap_ndoms(iommu->cap))
+		if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) {
 			iommu->flush.flush_context(iommu, did_old,
 						   (((u16)bus) << 8) | devfn,
 						   DMA_CCMD_MASK_NOBIT,
 						   DMA_CCMD_DEVICE_INVL);
+			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
+						 DMA_TLB_DSI_FLUSH);
+		}
 	}
 
 	pgd = domain->pgd;
-- 
cgit v1.2.3-59-g8ed1b


From 745b6e74704782488dd875292bc49e24d23e81fd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 11 May 2017 13:35:51 +0200
Subject: iommu/mediatek: Include linux/dma-mapping.h

The mediatek iommu driver relied on an implicit include of dma-mapping.h,
but for some reason that is no longer there in 4.12-rc1:

drivers/iommu/mtk_iommu_v1.c: In function 'mtk_iommu_domain_finalise':
drivers/iommu/mtk_iommu_v1.c:233:16: error: implicit declaration of function 'dma_zalloc_coherent'; did you mean 'debug_dma_alloc_coherent'? [-Werror=implicit-function-declaration]
drivers/iommu/mtk_iommu_v1.c: In function 'mtk_iommu_domain_free':
drivers/iommu/mtk_iommu_v1.c:265:2: error: implicit declaration of function 'dma_free_coherent'; did you mean 'debug_dma_free_coherent'? [-Werror=implicit-function-declaration]

This adds an explicit #include to make it build again.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 208480bb27 ('iommu: Remove trace-events include from iommu.h')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu_v1.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index a27ef570c328..bc1efbfb9ddf 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/device.h>
+#include <linux/dma-mapping.h>
 #include <linux/dma-iommu.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
-- 
cgit v1.2.3-59-g8ed1b


From 7e1b9521f5a8356553f5e58b07952bf346632ea4 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 11 Mar 2017 19:09:45 +0000
Subject: dm cache: handle kmalloc failure allocating background_tracker struct

Currently there is no kmalloc failure check on the allocation of
the background_tracker struct in btracker_create(), and so a NULL return
will lead to a NULL pointer dereference.  Add a NULL check.

Detected by CoverityScan, CID#1416587 ("Dereference null return value")

Fixes: b29d4986d ("dm cache: significant rework to leverage dm-bio-prison-v2")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-background-tracker.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c
index 9b1afdfb13f0..707233891291 100644
--- a/drivers/md/dm-cache-background-tracker.c
+++ b/drivers/md/dm-cache-background-tracker.c
@@ -33,6 +33,11 @@ struct background_tracker *btracker_create(unsigned max_work)
 {
 	struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
 
+	if (!b) {
+		DMERR("couldn't create background_tracker");
+		return NULL;
+	}
+
 	b->max_work = max_work;
 	atomic_set(&b->pending_promotes, 0);
 	atomic_set(&b->pending_writebacks, 0);
-- 
cgit v1.2.3-59-g8ed1b


From bca5238816939436d72ae6bab124c4b0641a3a99 Mon Sep 17 00:00:00 2001
From: Ravikumar Kattekola <rk@ti.com>
Date: Wed, 17 May 2017 06:51:38 -0700
Subject: ARM: dts: dra7: Reduce cpu thermal shutdown temperature
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On dra7, as per TRM, the HW shutdown (TSHUT) temperature is hardcoded
to 123C and cannot be modified by SW. This means when the temperature
reaches 123C HW asserts TSHUT output which signals a warm reset.
This reset is held until the temperature goes below the TSHUT low (105C).

While in SW, the thermal driver continuously monitors current temperature
and takes decisions based on whether it reached an alert or a critical point.
The intention of setting a SW critical point is to prevent force reset by HW
and instead do an orderly_poweroff(). But if the SW critical temperature is
greater than or equal to that of HW then it defeats the purpose. To address
this and let SW take action before HW does keep the SW critical temperature
less than HW TSHUT value.

The value for SW critical temperature was chosen as 120C just to ensure
we give SW sometime before HW catches up.

Document reference
SPRUI30C – DRA75x, DRA74x Technical Reference Manual - November 2016
SPRUHZ6H - AM572x Technical Reference Manual - November 2016

Tested on:
DRA75x PG 2.0 Rev H EVM

Signed-off-by: Ravikumar Kattekola <rk@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/dra7.dtsi | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 57892f264cea..e7144662af45 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -2017,4 +2017,8 @@
 	coefficients = <0 2000>;
 };
 
+&cpu_crit {
+	temperature = <120000>; /* milli Celsius */
+};
+
 /include/ "dra7xx-clocks.dtsi"
-- 
cgit v1.2.3-59-g8ed1b


From 69c8ebf83213e6165b13d94ec599b861467ee2dc Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 16 May 2017 12:22:22 +0200
Subject: fuseblk: Fix warning in super_setup_bdi_name()

Commit 5f7f7543f52e "fuse: Convert to separately allocated bdi" didn't
properly handle fuseblk filesystem. When fuse_bdi_init() is called for
that filesystem type, sb->s_bdi is already initialized (by
set_bdev_super()) to point to block device's bdi and consequently
super_setup_bdi_name() complains about this fact when reseting bdi to
the private one.

Fix the problem by properly dropping bdi reference in fuse_bdi_init()
before creating a private bdi in super_setup_bdi_name().

Fixes: 5f7f7543f52e ("fuse: Convert to separately allocated bdi")
Reported-by: Rakesh Pandit <rakesh@tuxera.com>
Tested-by: Rakesh Pandit <rakesh@tuxera.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/fuse/inode.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 73cf05135252..9da1a61276d1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -972,8 +972,15 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 	int err;
 	char *suffix = "";
 
-	if (sb->s_bdev)
+	if (sb->s_bdev) {
 		suffix = "-fuseblk";
+		/*
+		 * sb->s_bdi points to blkdev's bdi however we want to redirect
+		 * it to our private bdi...
+		 */
+		bdi_put(sb->s_bdi);
+		sb->s_bdi = &noop_backing_dev_info;
+	}
 	err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
 				   MINOR(fc->dev), suffix);
 	if (err)
-- 
cgit v1.2.3-59-g8ed1b


From 8d7a10dd323993cc40bd37bce8bc570133b0c396 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Wed, 17 May 2017 16:30:50 +0200
Subject: USB: serial: qcserial: add more Lenovo EM74xx device IDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In their infinite wisdom, and never ending quest for end user frustration,
Lenovo has decided to use new USB device IDs for the wwan modules in
their 2017 laptops.  The actual hardware is still the Sierra Wireless
EM7455 or EM7430, depending on region.

Cc: <stable@vger.kernel.org>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/qcserial.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 38b3f0d8cd58..fd509ed6cf70 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = {
 	{DEVICE_SWI(0x1199, 0x9071)},	/* Sierra Wireless MC74xx */
 	{DEVICE_SWI(0x1199, 0x9078)},	/* Sierra Wireless EM74xx */
 	{DEVICE_SWI(0x1199, 0x9079)},	/* Sierra Wireless EM74xx */
+	{DEVICE_SWI(0x1199, 0x907a)},	/* Sierra Wireless EM74xx QDL */
+	{DEVICE_SWI(0x1199, 0x907b)},	/* Sierra Wireless EM74xx */
 	{DEVICE_SWI(0x413c, 0x81a2)},	/* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a3)},	/* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a4)},	/* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
-- 
cgit v1.2.3-59-g8ed1b


From 23d268eb240954e6e78f7cfab04f2b1e79f84489 Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihrachys@redhat.com>
Date: Tue, 16 May 2017 07:53:43 -0700
Subject: arp: honour gratuitous ARP _replies_

When arp_accept is 1, gratuitous ARPs are supposed to override matching
entries irrespective of whether they arrive during locktime. This was
implemented in commit 56022a8fdd87 ("ipv4: arp: update neighbour address
when a gratuitous arp is received and arp_accept is set")

There is a glitch in the patch though. RFC 2002, section 4.6, "ARP,
Proxy ARP, and Gratuitous ARP", defines gratuitous ARPs so that they can
be either of Request or Reply type. Those Reply gratuitous ARPs can be
triggered with standard tooling, for example, arping -A option does just
that.

This patch fixes the glitch, making both Request and Reply flavours of
gratuitous ARPs to behave identically.

As per RFC, if gratuitous ARPs are of Reply type, their Target Hardware
Address field should also be set to the link-layer address to which this
cache entry should be updated. The field is present in ARP over Ethernet
but not in IEEE 1394. In this patch, I don't consider any broadcasted
ARP replies as gratuitous if the field is not present, to conform the
standard. It's not clear whether there is such a thing for IEEE 1394 as
a gratuitous ARP reply; until it's cleared up, we will ignore such
broadcasts. Note that they will still update existing ARP cache entries,
assuming they arrive out of locktime time interval.

Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 0937b34c27ca..d54345a06f72 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -653,6 +653,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 	unsigned char *arp_ptr;
 	struct rtable *rt;
 	unsigned char *sha;
+	unsigned char *tha = NULL;
 	__be32 sip, tip;
 	u16 dev_type = dev->type;
 	int addr_type;
@@ -724,6 +725,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 		break;
 #endif
 	default:
+		tha = arp_ptr;
 		arp_ptr += dev->addr_len;
 	}
 	memcpy(&tip, arp_ptr, 4);
@@ -842,8 +844,18 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 		   It is possible, that this option should be enabled for some
 		   devices (strip is candidate)
 		 */
-		is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip &&
-			  addr_type == RTN_UNICAST;
+		is_garp = tip == sip && addr_type == RTN_UNICAST;
+
+		/* Unsolicited ARP _replies_ also require target hwaddr to be
+		 * the same as source.
+		 */
+		if (is_garp && arp->ar_op == htons(ARPOP_REPLY))
+			is_garp =
+				/* IPv4 over IEEE 1394 doesn't provide target
+				 * hardware address field in its ARP payload.
+				 */
+				tha &&
+				!memcmp(tha, sha, dev->addr_len);
 
 		if (!n &&
 		    ((arp->ar_op == htons(ARPOP_REPLY)  &&
-- 
cgit v1.2.3-59-g8ed1b


From 77d7123342dcf6442341b67816321d71da8b2b16 Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihrachys@redhat.com>
Date: Tue, 16 May 2017 08:44:24 -0700
Subject: neighbour: update neigh timestamps iff update is effective

It's a common practice to send gratuitous ARPs after moving an
IP address to another device to speed up healing of a service. To
fulfill service availability constraints, the timing of network peers
updating their caches to point to a new location of an IP address can be
particularly important.

Sometimes neigh_update calls won't touch neither lladdr nor state, for
example if an update arrives in locktime interval. The neigh->updated
value is tested by the protocol specific neigh code, which in turn
will influence whether NEIGH_UPDATE_F_OVERRIDE gets set in the
call to neigh_update() or not. As a result, we may effectively ignore
the update request, bailing out of touching the neigh entry, except that
we still bump its timestamps inside neigh_update.

This may be a problem for updates arriving in quick succession. For
example, consider the following scenario:

A service is moved to another device with its IP address. The new device
sends three gratuitous ARP requests into the network with ~1 seconds
interval between them. Just before the first request arrives to one of
network peer nodes, its neigh entry for the IP address transitions from
STALE to DELAY.  This transition, among other things, updates
neigh->updated. Once the kernel receives the first gratuitous ARP, it
ignores it because its arrival time is inside the locktime interval. The
kernel still bumps neigh->updated. Then the second gratuitous ARP
request arrives, and it's also ignored because it's still in the (new)
locktime interval. Same happens for the third request. The node
eventually heals itself (after delay_first_probe_time seconds since the
initial transition to DELAY state), but it just wasted some time and
require a new ARP request/reply round trip. This unfortunate behaviour
both puts more load on the network, as well as reduces service
availability.

This patch changes neigh_update so that it bumps neigh->updated (as well
as neigh->confirmed) only once we are sure that either lladdr or entry
state will change). In the scenario described above, it means that the
second gratuitous ARP request will actually update the entry lladdr.

Ideally, we would update the neigh entry on the very first gratuitous
ARP request. The locktime mechanism is designed to ignore ARP updates in
a short timeframe after a previous ARP update was honoured by the kernel
layer. This would require tracking timestamps for state transitions
separately from timestamps when actual updates are received. This would
probably involve changes in neighbour struct. Therefore, the patch
doesn't tackle the issue of the first gratuitous APR ignored, leaving
it for a follow-up.

Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 58b0bcc125b5..d274f81fcc2c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1132,10 +1132,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 		lladdr = neigh->ha;
 	}
 
-	if (new & NUD_CONNECTED)
-		neigh->confirmed = jiffies;
-	neigh->updated = jiffies;
-
 	/* If entry was valid and address is not changed,
 	   do not change entry state, if new one is STALE.
 	 */
@@ -1157,6 +1153,16 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 		}
 	}
 
+	/* Update timestamps only once we know we will make a change to the
+	 * neighbour entry. Otherwise we risk to move the locktime window with
+	 * noop updates and ignore relevant ARP updates.
+	 */
+	if (new != old || lladdr != neigh->ha) {
+		if (new & NUD_CONNECTED)
+			neigh->confirmed = jiffies;
+		neigh->updated = jiffies;
+	}
+
 	if (new != old) {
 		neigh_del_timer(neigh);
 		if (new & NUD_PROBE)
-- 
cgit v1.2.3-59-g8ed1b


From 63a1e1c95e60e798fa09ab3c536fb555aa5bbf2b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 16 May 2017 15:18:05 +0100
Subject: arm64/cpufeature: don't use mutex in bringup path

Currently, cpus_set_cap() calls static_branch_enable_cpuslocked(), which
must take the jump_label mutex.

We call cpus_set_cap() in the secondary bringup path, from the idle
thread where interrupts are disabled. Taking a mutex in this path "is a
NONO" regardless of whether it's contended, and something we must avoid.
We didn't spot this until recently, as ___might_sleep() won't warn for
this case until all CPUs have been brought up.

This patch avoids taking the mutex in the secondary bringup path. The
poking of static keys is deferred until enable_cpu_capabilities(), which
runs in a suitable context on the boot CPU. To account for the static
keys being set later, cpus_have_const_cap() is updated to use another
static key to check whether the const cap keys have been initialised,
falling back to the caps bitmap until this is the case.

This means that users of cpus_have_const_cap() gain should only gain a
single additional NOP in the fast path once the const caps are
initialised, but should always see the current cap value.

The hyp code should never dereference the caps array, since the caps are
initialized before we run the module initcall to initialise hyp. A check
is added to the hyp init code to document this requirement.

This change will sidestep a number of issues when the upcoming hotplug
locking rework is merged.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyniger <marc.zyngier@arm.com>
Reviewed-by: Suzuki Poulose <suzuki.poulose@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Sewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 12 ++++++++++--
 arch/arm64/include/asm/kvm_host.h   |  8 ++++++--
 arch/arm64/kernel/cpufeature.c      | 23 +++++++++++++++++++++--
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index e7f84a7b4465..428ee1f2468c 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -115,6 +115,7 @@ struct arm64_cpu_capabilities {
 
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
+extern struct static_key_false arm64_const_caps_ready;
 
 bool this_cpu_has_cap(unsigned int cap);
 
@@ -124,7 +125,7 @@ static inline bool cpu_have_feature(unsigned int num)
 }
 
 /* System capability check for constant caps */
-static inline bool cpus_have_const_cap(int num)
+static inline bool __cpus_have_const_cap(int num)
 {
 	if (num >= ARM64_NCAPS)
 		return false;
@@ -138,6 +139,14 @@ static inline bool cpus_have_cap(unsigned int num)
 	return test_bit(num, cpu_hwcaps);
 }
 
+static inline bool cpus_have_const_cap(int num)
+{
+	if (static_branch_likely(&arm64_const_caps_ready))
+		return __cpus_have_const_cap(num);
+	else
+		return cpus_have_cap(num);
+}
+
 static inline void cpus_set_cap(unsigned int num)
 {
 	if (num >= ARM64_NCAPS) {
@@ -145,7 +154,6 @@ static inline void cpus_set_cap(unsigned int num)
 			num, ARM64_NCAPS);
 	} else {
 		__set_bit(num, cpu_hwcaps);
-		static_branch_enable(&cpu_hwcap_keys[num]);
 	}
 }
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5e19165c5fa8..1f252a95bc02 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_types.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -355,9 +356,12 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long vector_ptr)
 {
 	/*
-	 * Call initialization code, and switch to the full blown
-	 * HYP code.
+	 * Call initialization code, and switch to the full blown HYP code.
+	 * If the cpucaps haven't been finalized yet, something has gone very
+	 * wrong, and hyp will crash and burn when it uses any
+	 * cpus_have_const_cap() wrapper.
 	 */
+	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
 	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
 }
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 94b8f7fc3310..817ce3365e20 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -985,8 +985,16 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
  */
 void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
-	for (; caps->matches; caps++)
-		if (caps->enable && cpus_have_cap(caps->capability))
+	for (; caps->matches; caps++) {
+		unsigned int num = caps->capability;
+
+		if (!cpus_have_cap(num))
+			continue;
+
+		/* Ensure cpus_have_const_cap(num) works */
+		static_branch_enable(&cpu_hwcap_keys[num]);
+
+		if (caps->enable) {
 			/*
 			 * Use stop_machine() as it schedules the work allowing
 			 * us to modify PSTATE, instead of on_each_cpu() which
@@ -994,6 +1002,8 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 			 * we return.
 			 */
 			stop_machine(caps->enable, NULL, cpu_online_mask);
+		}
+	}
 }
 
 /*
@@ -1096,6 +1106,14 @@ static void __init setup_feature_capabilities(void)
 	enable_cpu_capabilities(arm64_features);
 }
 
+DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
+EXPORT_SYMBOL(arm64_const_caps_ready);
+
+static void __init mark_const_caps_ready(void)
+{
+	static_branch_enable(&arm64_const_caps_ready);
+}
+
 /*
  * Check if the current CPU has a given feature capability.
  * Should be called from non-preemptible context.
@@ -1131,6 +1149,7 @@ void __init setup_cpu_features(void)
 	/* Set the CPU feature capabilies */
 	setup_feature_capabilities();
 	enable_errata_workarounds();
+	mark_const_caps_ready();
 	setup_elf_hwcaps(arm64_elf_hwcaps);
 
 	if (system_supports_32bit_el0())
-- 
cgit v1.2.3-59-g8ed1b


From 828d4cdd012c8ffbf76625c3ff164312e8666784 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Tue, 16 May 2017 10:08:08 -0600
Subject: dtc: check.c fix compile error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compile error found on odroid-xu4:

checks.c: In function ‘check_simple_bus_reg’:
checks.c:876:41: error: format ‘%lx’ expects argument of type
‘long unsigned int’, but argument 4 has type
‘uint64_t{aka long long unsigned int}’ [-Werror=format=]
  snprintf(unit_addr, sizeof(unit_addr), "%lx", reg);
                                         ^
checks.c:876:41: error: format ‘%lx’ expects argument of type
‘long unsigned int’, but argument 4 has type
‘uint64_t {aka long long unsigned int}’ [-Werror=format=]
cc1: all warnings being treated as errors
Makefile:304: recipe for target 'checks.o' failed
make: *** [checks.o] Error 1

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
[dwg: Correct new format to be correct in general]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
[robh: cherry-picked from upstream dtc commit 2a42b14d0d03]
Signed-off-by: Rob Herring <robh@kernel.org>
---
 scripts/dtc/checks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/dtc/checks.c b/scripts/dtc/checks.c
index 5adfc8f52b4f..4b72b530c84f 100644
--- a/scripts/dtc/checks.c
+++ b/scripts/dtc/checks.c
@@ -873,7 +873,7 @@ static void check_simple_bus_reg(struct check *c, struct dt_info *dti, struct no
 	while (size--)
 		reg = (reg << 32) | fdt32_to_cpu(*(cells++));
 
-	snprintf(unit_addr, sizeof(unit_addr), "%lx", reg);
+	snprintf(unit_addr, sizeof(unit_addr), "%zx", reg);
 	if (!streq(unitname, unit_addr))
 		FAIL(c, dti, "Node %s simple-bus unit address format error, expected \"%s\"",
 		     node->fullpath, unit_addr);
-- 
cgit v1.2.3-59-g8ed1b


From 49e67dd17649b60b4d54966e18ec9c80198227f0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 17 May 2017 17:29:09 +0200
Subject: of: fdt: add missing allocation-failure check

The memory allocator passed to __unflatten_device_tree() (e.g. a wrapped
kzalloc) can fail so add the missing sanity check to avoid dereferencing
a NULL pointer.

Fixes: fe14042358fa ("of/flattree: Refactor unflatten_device_tree and add fdt_unflatten_tree")
Cc: stable <stable@vger.kernel.org>     # 2.6.38
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/fdt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index a0972219ccfc..0373ae49d24d 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -507,6 +507,9 @@ void *__unflatten_device_tree(const void *blob,
 
 	/* Allocate memory for the expanded device tree */
 	mem = dt_alloc(size + 4, __alignof__(struct device_node));
+	if (!mem)
+		return NULL;
+
 	memset(mem, 0, size);
 
 	*(__be32 *)(mem + size) = cpu_to_be32(0xdeadbeef);
-- 
cgit v1.2.3-59-g8ed1b


From 05d8cba4a1e8c7e2d1f91a24a2f3d26852938a04 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 16 May 2017 14:15:03 +0900
Subject: kbuild: skip install/check of headers right under uapi directories

Since commit 61562f981e92 ("uapi: export all arch specifics
directories"), "make INSTALL_HDR_PATH=$root/usr headers_install"
deletes standard glibc headers and others in $(root)/usr/include.

The cause of the issue is that headers_install now starts descending
from arch/$(hdr-arch)/include/uapi with $(root)/usr/include for its
destination when installing asm headers.  So, headers already there
are assumed to be unwanted.

When headers_install starts descending from include/uapi with
$(root)/usr/include for its destination, it works around the problem
by creating an dummy destination $(root)/usr/include/uapi, but this
is tricky.

To fix the problem in a clean way is to skip headers install/check
in include/uapi and arch/$(hdr-arch)/include/uapi because we know
there are only sub-directories in uapi directories.  A good side
effect is the empty destination $(root)/usr/include/uapi will go
away.

I am also removing the trailing slash in the headers_check target to
skip checking in arch/$(hdr-arch)/include/uapi.

Fixes: 61562f981e92 ("uapi: export all arch specifics directories")
Reported-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 Makefile                     |  2 +-
 scripts/Makefile.headersinst | 43 +++++++++++++++++++++++++++----------------
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/Makefile b/Makefile
index b400c0604fac..b1ee4a49efa2 100644
--- a/Makefile
+++ b/Makefile
@@ -1172,7 +1172,7 @@ headers_check_all: headers_install_all
 PHONY += headers_check
 headers_check: headers_install
 	$(Q)$(MAKE) $(hdr-inst)=include/uapi HDRCHECK=1
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/ $(hdr-dst) HDRCHECK=1
+	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi $(hdr-dst) HDRCHECK=1
 
 # ---------------------------------------------------------------------------
 # Kernel selftest
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 6ba97a1f9c5a..ce753a408c56 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -8,6 +8,29 @@
 #
 # ==========================================================================
 
+PHONY := __headers
+__headers:
+
+include scripts/Kbuild.include
+
+srcdir        := $(srctree)/$(obj)
+subdirs       := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.))
+# caller may set destination dir (when installing to asm/)
+_dst          := $(if $(dst),$(dst),$(obj))
+
+# Recursion
+__headers: $(subdirs)
+
+.PHONY: $(subdirs)
+$(subdirs):
+	$(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@
+
+# Skip header install/check for include/uapi and arch/$(hdr-arch)/include/uapi.
+# We have only sub-directories there.
+skip-inst := $(if $(filter %/uapi,$(obj)),1)
+
+ifeq ($(skip-inst),)
+
 # generated header directory
 gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj)))
 
@@ -15,21 +38,14 @@ gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj)))
 kbuild-file := $(srctree)/$(obj)/Kbuild
 -include $(kbuild-file)
 
-# called may set destination dir (when installing to asm/)
-_dst := $(if $(dst),$(dst),$(obj))
-
 old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild
 ifneq ($(wildcard $(old-kbuild-file)),)
 include $(old-kbuild-file)
 endif
 
-include scripts/Kbuild.include
-
 installdir    := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst))
 
-srcdir        := $(srctree)/$(obj)
 gendir        := $(objtree)/$(gen)
-subdirs       := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.))
 header-files  := $(notdir $(wildcard $(srcdir)/*.h))
 header-files  += $(notdir $(wildcard $(srcdir)/*.agh))
 header-files  := $(filter-out $(no-export-headers), $(header-files))
@@ -88,11 +104,9 @@ quiet_cmd_check = CHECK   $(printdir) ($(words $(all-files)) files)
                   $(PERL) $< $(INSTALL_HDR_PATH)/include $(SRCARCH); \
 	          touch $@
 
-PHONY += __headersinst __headerscheck
-
 ifndef HDRCHECK
 # Rules for installing headers
-__headersinst: $(subdirs) $(install-file)
+__headers: $(install-file)
 	@:
 
 targets += $(install-file)
@@ -104,7 +118,7 @@ $(install-file): scripts/headers_install.sh \
 	$(call if_changed,install)
 
 else
-__headerscheck: $(subdirs) $(check-file)
+__headers: $(check-file)
 	@:
 
 targets += $(check-file)
@@ -113,11 +127,6 @@ $(check-file): scripts/headers_check.pl $(output-files) FORCE
 
 endif
 
-# Recursion
-.PHONY: $(subdirs)
-$(subdirs):
-	$(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@
-
 targets := $(wildcard $(sort $(targets)))
 cmd_files := $(wildcard \
              $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
@@ -126,6 +135,8 @@ ifneq ($(cmd_files),)
 	include $(cmd_files)
 endif
 
+endif # skip-inst
+
 .PHONY: $(PHONY)
 PHONY += FORCE
 FORCE: ;
-- 
cgit v1.2.3-59-g8ed1b


From 2423496af35d94a87156b063ea5cedffc10a70a1 Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Tue, 16 May 2017 14:36:23 -0400
Subject: ipv6: Prevent overrun when parsing v6 header options

The KASAN warning repoted below was discovered with a syzkaller
program.  The reproducer is basically:
  int s = socket(AF_INET6, SOCK_RAW, NEXTHDR_HOP);
  send(s, &one_byte_of_data, 1, MSG_MORE);
  send(s, &more_than_mtu_bytes_data, 2000, 0);

The socket() call sets the nexthdr field of the v6 header to
NEXTHDR_HOP, the first send call primes the payload with a non zero
byte of data, and the second send call triggers the fragmentation path.

The fragmentation code tries to parse the header options in order
to figure out where to insert the fragment option.  Since nexthdr points
to an invalid option, the calculation of the size of the network header
can made to be much larger than the linear section of the skb and data
is read outside of it.

This fix makes ip6_find_1stfrag return an error if it detects
running out-of-bounds.

[   42.361487] ==================================================================
[   42.364412] BUG: KASAN: slab-out-of-bounds in ip6_fragment+0x11c8/0x3730
[   42.365471] Read of size 840 at addr ffff88000969e798 by task ip6_fragment-oo/3789
[   42.366469]
[   42.366696] CPU: 1 PID: 3789 Comm: ip6_fragment-oo Not tainted 4.11.0+ #41
[   42.367628] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014
[   42.368824] Call Trace:
[   42.369183]  dump_stack+0xb3/0x10b
[   42.369664]  print_address_description+0x73/0x290
[   42.370325]  kasan_report+0x252/0x370
[   42.370839]  ? ip6_fragment+0x11c8/0x3730
[   42.371396]  check_memory_region+0x13c/0x1a0
[   42.371978]  memcpy+0x23/0x50
[   42.372395]  ip6_fragment+0x11c8/0x3730
[   42.372920]  ? nf_ct_expect_unregister_notifier+0x110/0x110
[   42.373681]  ? ip6_copy_metadata+0x7f0/0x7f0
[   42.374263]  ? ip6_forward+0x2e30/0x2e30
[   42.374803]  ip6_finish_output+0x584/0x990
[   42.375350]  ip6_output+0x1b7/0x690
[   42.375836]  ? ip6_finish_output+0x990/0x990
[   42.376411]  ? ip6_fragment+0x3730/0x3730
[   42.376968]  ip6_local_out+0x95/0x160
[   42.377471]  ip6_send_skb+0xa1/0x330
[   42.377969]  ip6_push_pending_frames+0xb3/0xe0
[   42.378589]  rawv6_sendmsg+0x2051/0x2db0
[   42.379129]  ? rawv6_bind+0x8b0/0x8b0
[   42.379633]  ? _copy_from_user+0x84/0xe0
[   42.380193]  ? debug_check_no_locks_freed+0x290/0x290
[   42.380878]  ? ___sys_sendmsg+0x162/0x930
[   42.381427]  ? rcu_read_lock_sched_held+0xa3/0x120
[   42.382074]  ? sock_has_perm+0x1f6/0x290
[   42.382614]  ? ___sys_sendmsg+0x167/0x930
[   42.383173]  ? lock_downgrade+0x660/0x660
[   42.383727]  inet_sendmsg+0x123/0x500
[   42.384226]  ? inet_sendmsg+0x123/0x500
[   42.384748]  ? inet_recvmsg+0x540/0x540
[   42.385263]  sock_sendmsg+0xca/0x110
[   42.385758]  SYSC_sendto+0x217/0x380
[   42.386249]  ? SYSC_connect+0x310/0x310
[   42.386783]  ? __might_fault+0x110/0x1d0
[   42.387324]  ? lock_downgrade+0x660/0x660
[   42.387880]  ? __fget_light+0xa1/0x1f0
[   42.388403]  ? __fdget+0x18/0x20
[   42.388851]  ? sock_common_setsockopt+0x95/0xd0
[   42.389472]  ? SyS_setsockopt+0x17f/0x260
[   42.390021]  ? entry_SYSCALL_64_fastpath+0x5/0xbe
[   42.390650]  SyS_sendto+0x40/0x50
[   42.391103]  entry_SYSCALL_64_fastpath+0x1f/0xbe
[   42.391731] RIP: 0033:0x7fbbb711e383
[   42.392217] RSP: 002b:00007ffff4d34f28 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
[   42.393235] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fbbb711e383
[   42.394195] RDX: 0000000000001000 RSI: 00007ffff4d34f60 RDI: 0000000000000003
[   42.395145] RBP: 0000000000000046 R08: 00007ffff4d34f40 R09: 0000000000000018
[   42.396056] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000400aad
[   42.396598] R13: 0000000000000066 R14: 00007ffff4d34ee0 R15: 00007fbbb717af00
[   42.397257]
[   42.397411] Allocated by task 3789:
[   42.397702]  save_stack_trace+0x16/0x20
[   42.398005]  save_stack+0x46/0xd0
[   42.398267]  kasan_kmalloc+0xad/0xe0
[   42.398548]  kasan_slab_alloc+0x12/0x20
[   42.398848]  __kmalloc_node_track_caller+0xcb/0x380
[   42.399224]  __kmalloc_reserve.isra.32+0x41/0xe0
[   42.399654]  __alloc_skb+0xf8/0x580
[   42.400003]  sock_wmalloc+0xab/0xf0
[   42.400346]  __ip6_append_data.isra.41+0x2472/0x33d0
[   42.400813]  ip6_append_data+0x1a8/0x2f0
[   42.401122]  rawv6_sendmsg+0x11ee/0x2db0
[   42.401505]  inet_sendmsg+0x123/0x500
[   42.401860]  sock_sendmsg+0xca/0x110
[   42.402209]  ___sys_sendmsg+0x7cb/0x930
[   42.402582]  __sys_sendmsg+0xd9/0x190
[   42.402941]  SyS_sendmsg+0x2d/0x50
[   42.403273]  entry_SYSCALL_64_fastpath+0x1f/0xbe
[   42.403718]
[   42.403871] Freed by task 1794:
[   42.404146]  save_stack_trace+0x16/0x20
[   42.404515]  save_stack+0x46/0xd0
[   42.404827]  kasan_slab_free+0x72/0xc0
[   42.405167]  kfree+0xe8/0x2b0
[   42.405462]  skb_free_head+0x74/0xb0
[   42.405806]  skb_release_data+0x30e/0x3a0
[   42.406198]  skb_release_all+0x4a/0x60
[   42.406563]  consume_skb+0x113/0x2e0
[   42.406910]  skb_free_datagram+0x1a/0xe0
[   42.407288]  netlink_recvmsg+0x60d/0xe40
[   42.407667]  sock_recvmsg+0xd7/0x110
[   42.408022]  ___sys_recvmsg+0x25c/0x580
[   42.408395]  __sys_recvmsg+0xd6/0x190
[   42.408753]  SyS_recvmsg+0x2d/0x50
[   42.409086]  entry_SYSCALL_64_fastpath+0x1f/0xbe
[   42.409513]
[   42.409665] The buggy address belongs to the object at ffff88000969e780
[   42.409665]  which belongs to the cache kmalloc-512 of size 512
[   42.410846] The buggy address is located 24 bytes inside of
[   42.410846]  512-byte region [ffff88000969e780, ffff88000969e980)
[   42.411941] The buggy address belongs to the page:
[   42.412405] page:ffffea000025a780 count:1 mapcount:0 mapping:          (null) index:0x0 compound_mapcount: 0
[   42.413298] flags: 0x100000000008100(slab|head)
[   42.413729] raw: 0100000000008100 0000000000000000 0000000000000000 00000001800c000c
[   42.414387] raw: ffffea00002a9500 0000000900000007 ffff88000c401280 0000000000000000
[   42.415074] page dumped because: kasan: bad access detected
[   42.415604]
[   42.415757] Memory state around the buggy address:
[   42.416222]  ffff88000969e880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   42.416904]  ffff88000969e900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   42.417591] >ffff88000969e980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[   42.418273]                    ^
[   42.418588]  ffff88000969ea00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   42.419273]  ffff88000969ea80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   42.419882] ==================================================================

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Craig Gallek <kraig@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_offload.c |  2 ++
 net/ipv6/ip6_output.c  |  4 ++++
 net/ipv6/output_core.c | 14 ++++++++------
 net/ipv6/udp_offload.c |  2 ++
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 93e58a5e1837..eab36abc9f22 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -117,6 +117,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 
 		if (udpfrag) {
 			unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+			if (unfrag_ip6hlen < 0)
+				return ERR_PTR(unfrag_ip6hlen);
 			fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
 			fptr->frag_off = htons(offset);
 			if (skb->next)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 58f6288e9ba5..01deecda2f84 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -598,6 +598,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	u8 *prevhdr, nexthdr = 0;
 
 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
+	if (hlen < 0) {
+		err = hlen;
+		goto fail;
+	}
 	nexthdr = *prevhdr;
 
 	mtu = ip6_skb_dst_mtu(skb);
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index cd4252346a32..e9065b8d3af8 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -79,14 +79,13 @@ EXPORT_SYMBOL(ipv6_select_ident);
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr =
-				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 	unsigned int packet_len = skb_tail_pointer(skb) -
 		skb_network_header(skb);
 	int found_rhdr = 0;
 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 
-	while (offset + 1 <= packet_len) {
+	while (offset <= packet_len) {
+		struct ipv6_opt_hdr *exthdr;
 
 		switch (**nexthdr) {
 
@@ -107,13 +106,16 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 			return offset;
 		}
 
-		offset += ipv6_optlen(exthdr);
-		*nexthdr = &exthdr->nexthdr;
+		if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
+			return -EINVAL;
+
 		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 						 offset);
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
 	}
 
-	return offset;
+	return -EINVAL;
 }
 EXPORT_SYMBOL(ip6_find_1stfragopt);
 
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ac858c480f2f..b348cff47395 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -91,6 +91,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		 * bytes to insert fragment header.
 		 */
 		unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+		if (unfrag_ip6hlen < 0)
+			return ERR_PTR(unfrag_ip6hlen);
 		nexthdr = *prevhdr;
 		*prevhdr = NEXTHDR_FRAGMENT;
 		unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
-- 
cgit v1.2.3-59-g8ed1b


From b6c41cb050d5debc7e4eaa0a81cbdbad72588891 Mon Sep 17 00:00:00 2001
From: Nitin Gupta <nitin.m.gupta@oracle.com>
Date: Mon, 15 May 2017 16:28:17 -0700
Subject: sparc64: Fix mapping of 64k pages with MAP_FIXED

An incorrect huge page alignment check caused
mmap failure for 64K pages when MAP_FIXED is used
with address not aligned to HPAGE_SIZE.

Orabug: 25885991

Fixes: dcd1912d21a0 ("sparc64: Add 64K page size support")
Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/hugetlb.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index dcbf985ab243..d1f837dc77a4 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -24,9 +24,11 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 static inline int prepare_hugepage_range(struct file *file,
 			unsigned long addr, unsigned long len)
 {
-	if (len & ~HPAGE_MASK)
+	struct hstate *h = hstate_file(file);
+
+	if (len & ~huge_page_mask(h))
 		return -EINVAL;
-	if (addr & ~HPAGE_MASK)
+	if (addr & ~huge_page_mask(h))
 		return -EINVAL;
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From deba804c90642c8ed0f15ac1083663976d578f54 Mon Sep 17 00:00:00 2001
From: Orlando Arias <oarias@knights.ucf.edu>
Date: Tue, 16 May 2017 15:34:00 -0400
Subject: sparc: Fix -Wstringop-overflow warning

Greetings,

GCC 7 introduced the -Wstringop-overflow flag to detect buffer overflows
in calls to string handling functions [1][2]. Due to the way
``empty_zero_page'' is declared in arch/sparc/include/setup.h, this
causes a warning to trigger at compile time in the function mem_init(),
which is subsequently converted to an error. The ensuing patch fixes
this issue and aligns the declaration of empty_zero_page to that of
other architectures. Thank you.

Cheers,
Orlando.

[1] https://gcc.gnu.org/ml/gcc-patches/2016-10/msg02308.html
[2] https://gcc.gnu.org/gcc-7/changes.html

Signed-off-by: Orlando Arias <oarias@knights.ucf.edu>

--------------------------------------------------------------------------------
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/pgtable_32.h | 4 ++--
 arch/sparc/include/asm/setup.h      | 2 +-
 arch/sparc/mm/init_32.c             | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index ce6f56980aef..cf190728360b 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -91,9 +91,9 @@ extern unsigned long pfn_base;
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
  */
-extern unsigned long empty_zero_page;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
-#define ZERO_PAGE(vaddr) (virt_to_page(&empty_zero_page))
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 
 /*
  * In general all page table modifications should use the V8 atomic
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 478bf6bb4598..3fae200dd251 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -16,7 +16,7 @@ extern char reboot_command[];
  */
 extern unsigned char boot_cpu_id;
 
-extern unsigned long empty_zero_page;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
 extern int serial_console;
 static inline int con_is_present(void)
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index c6afe98de4d9..3bd0d513bddb 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -290,7 +290,7 @@ void __init mem_init(void)
 
 
 	/* Saves us work later. */
-	memset((void *)&empty_zero_page, 0, PAGE_SIZE);
+	memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
 	i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5);
 	i += 1;
-- 
cgit v1.2.3-59-g8ed1b


From 48078d2dac0a26f84f5f3ec704f24f7c832cce14 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
Date: Wed, 17 May 2017 11:47:00 -0400
Subject: sparc/ftrace: Fix ftrace graph time measurement

The ftrace function_graph time measurements of a given function is not
accurate according to those recorded by ftrace using the function
filters.  This change pulls the x86_64 fix from 'commit 722b3c746953
("ftrace/graph: Trace function entry before updating index")' into the
sparc specific prepare_ftrace_return which stops ftrace from
counting interrupted tasks in the time measurement.

Example measurements for select_task_rq_fair running "hackbench 100
process 1000":

              |  tracing/trace_stat/function0  |  function_graph
 Before patch |  2.802 us                      |  4.255 us
 After patch  |  2.749 us                      |  3.094 us

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/ftrace.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
index 6bcff698069b..cec54dc4ab81 100644
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -130,17 +130,16 @@ unsigned long prepare_ftrace_return(unsigned long parent,
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return parent + 8UL;
 
-	if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
-				     frame_pointer, NULL) == -EBUSY)
-		return parent + 8UL;
-
 	trace.func = self_addr;
+	trace.depth = current->curr_ret_stack + 1;
 
 	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace)) {
-		current->curr_ret_stack--;
+	if (!ftrace_graph_entry(&trace))
+		return parent + 8UL;
+
+	if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
+				     frame_pointer, NULL) == -EBUSY)
 		return parent + 8UL;
-	}
 
 	return return_hooker;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 9142e9007f2d7ab58a587a1e1d921b0064a339aa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 May 2017 13:27:53 -0700
Subject: net: fix compile error in skb_orphan_partial()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_INET is not set, net/core/sock.c can not compile :

net/core/sock.c: In function ‘skb_orphan_partial’:
net/core/sock.c:1810:2: error: implicit declaration of function
‘skb_is_tcp_pure_ack’ [-Werror=implicit-function-declaration]
  if (skb_is_tcp_pure_ack(skb))
  ^

Fix this by always including <net/tcp.h>

Fixes: f6ba8d33cfbb ("netem: fix skb_orphan_partial()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index e43e71d7856b..727f924b7f91 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,10 +139,7 @@
 
 #include <trace/events/sock.h>
 
-#ifdef CONFIG_INET
 #include <net/tcp.h>
-#endif
-
 #include <net/busy_poll.h>
 
 static DEFINE_MUTEX(proto_list_mutex);
-- 
cgit v1.2.3-59-g8ed1b


From 87fe603274aa9889c05cca3c3e45675e1997cb13 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 16 May 2017 16:39:43 -0400
Subject: bnxt_en: Call bnxt_dcb_init() after getting firmware DCBX
 configuration.

In the current code, bnxt_dcb_init() is called too early before we
determine if the firmware DCBX agent is running or not.  As a result,
we are not setting the DCB_CAP_DCBX_HOST and DCB_CAP_DCBX_LLD_MANAGED
flags properly to report to DCBNL.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b56c54d68d5e..03f55daecb20 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7630,8 +7630,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->min_mtu = ETH_ZLEN;
 	dev->max_mtu = BNXT_MAX_MTU;
 
-	bnxt_dcb_init(bp);
-
 #ifdef CONFIG_BNXT_SRIOV
 	init_waitqueue_head(&bp->sriov_cfg_wait);
 #endif
@@ -7669,6 +7667,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	bnxt_hwrm_func_qcfg(bp);
 	bnxt_hwrm_port_led_qcaps(bp);
 	bnxt_ethtool_init(bp);
+	bnxt_dcb_init(bp);
 
 	bnxt_set_rx_skb_mode(bp, false);
 	bnxt_set_tpa_flags(bp);
-- 
cgit v1.2.3-59-g8ed1b


From f667724b99ad1afc91f16064d8fb293d2805bd57 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 16 May 2017 16:39:44 -0400
Subject: bnxt_en: Check status of firmware DCBX agent before setting
 DCB_CAP_DCBX_HOST.

Otherwise, all the host based DCBX settings from lldpad will fail if the
firmware DCBX agent is running.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 46de2f8ff024..5c6dd0ce209f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -553,8 +553,10 @@ static u8 bnxt_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 	if ((mode & DCB_CAP_DCBX_VER_CEE) || !(mode & DCB_CAP_DCBX_VER_IEEE))
 		return 1;
 
-	if ((mode & DCB_CAP_DCBX_HOST) && BNXT_VF(bp))
-		return 1;
+	if (mode & DCB_CAP_DCBX_HOST) {
+		if (BNXT_VF(bp) || (bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
+			return 1;
+	}
 
 	if (mode == bp->dcbx_cap)
 		return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 579f1d926c66cc0bd3bd87b1fe2e091084b07430 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 17 May 2017 15:18:05 -0700
Subject: selftests/bpf: fix broken build due to types.h

Commit 0a5539f66133 ("bpf: Provide a linux/types.h override
for bpf selftests.") caused a build failure for tools/testing/selftest/bpf
because of some missing types:
    $ make -C tools/testing/selftests/bpf/
    ...
    In file included from /home/yhs/work/net-next/tools/testing/selftests/bpf/test_pkt_access.c:8:
    ../../../include/uapi/linux/bpf.h:170:3: error: unknown type name '__aligned_u64'
                    __aligned_u64   key;
    ...
    /usr/include/linux/swab.h:160:8: error: unknown type name '__always_inline'
    static __always_inline __u16 __swab16p(const __u16 *p)
    ...
The type __aligned_u64 is defined in linux:include/uapi/linux/types.h.

The fix is to copy missing type definition into
tools/testing/selftests/bpf/include/uapi/linux/types.h.
Adding additional include "string.h" resolves __always_inline issue.

Fixes: 0a5539f66133 ("bpf: Provide a linux/types.h override for bpf selftests.")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/include/uapi/linux/types.h | 16 ++++++++++++++++
 tools/testing/selftests/bpf/test_pkt_access.c          |  1 +
 2 files changed, 17 insertions(+)

diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h
index fbd16a7554af..51841848fbfe 100644
--- a/tools/testing/selftests/bpf/include/uapi/linux/types.h
+++ b/tools/testing/selftests/bpf/include/uapi/linux/types.h
@@ -3,4 +3,20 @@
 
 #include <asm-generic/int-ll64.h>
 
+/* copied from linux:include/uapi/linux/types.h */
+#define __bitwise
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+
+#define __aligned_u64 __u64 __attribute__((aligned(8)))
+#define __aligned_be64 __be64 __attribute__((aligned(8)))
+#define __aligned_le64 __le64 __attribute__((aligned(8)))
+
 #endif /* _UAPI_LINUX_TYPES_H */
diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c
index 39387bb7e08c..6e11ba11709e 100644
--- a/tools/testing/selftests/bpf/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/test_pkt_access.c
@@ -5,6 +5,7 @@
  * License as published by the Free Software Foundation.
  */
 #include <stddef.h>
+#include <string.h>
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
-- 
cgit v1.2.3-59-g8ed1b


From 53cf29d3b1bc5b86fcff5fdc52f873d79d908ef4 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@linux.vnet.ibm.com>
Date: Wed, 17 May 2017 19:02:17 -0300
Subject: scsi: lpfc: Fix NULL pointer dereference during PCI error recovery

Recent commit on patchset "lpfc updates for 11.2.0.14" fixed an issue
about dereferencing a NULL pointer on port reset. The specific commit,
named "lpfc: Fix system crash when port is reset.", is missing a check
against NULL pointer on lpfc_els_flush_cmd() though.

Since we destroy the queues on adapter resets, like in PCI error
recovery path, we need the validation present on this patch in order to
avoid a NULL pointer dereference when trying to flush commands of ELS
wq, after it has been destroyed (which would lead to a kernel oops).

Tested-by: Raphael Silva <raphasil@linux.vnet.ibm.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Acked-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_els.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 1d36f82fa369..8e532b39ae93 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -7451,6 +7451,13 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
 	 */
 	spin_lock_irq(&phba->hbalock);
 	pring = lpfc_phba_elsring(phba);
+
+	/* Bail out if we've no ELS wq, like in PCI error recovery case. */
+	if (unlikely(!pring)) {
+		spin_unlock_irq(&phba->hbalock);
+		return;
+	}
+
 	if (phba->sli_rev == LPFC_SLI_REV4)
 		spin_lock(&pring->ring_lock);
 
-- 
cgit v1.2.3-59-g8ed1b


From eeeb51d834d76c66784e7fe1a9ace3ce3f8d2af1 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 16 May 2017 20:52:29 -0700
Subject: scsi: lpfc: fix build issue if NVME_FC_TARGET is not defined

fix build issue if NVME_FC_TARGET is not defined. noop the code.  The
code will never be invoked if target mode is not enabled.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvmet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 312f54278bd4..f94294b77b7b 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -157,6 +157,7 @@ out:
 void
 lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
@@ -260,6 +261,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 		      &phba->sli4_hba.lpfc_nvmet_ctx_list);
 	phba->sli4_hba.nvmet_ctx_cnt++;
 	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag);
+#endif
 }
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-- 
cgit v1.2.3-59-g8ed1b


From b9ef0326c05a008c3c576bd4d676208b50c344d5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 17 May 2017 11:14:35 -0400
Subject: tracing: Move postpone selftests to core from early_initcall

I hit the following lockdep splat when booting with ftrace selftests
enabled, as well as CONFIG_PREEMPT and LOCKDEP.

 Testing dynamic ftrace ops #1:
 (1 0 1 0 0)
 (1 1 2 0 0)
 (2 1 3 0 169)
 (2 2 4 0 50066)
 ------------[ cut here ]------------
 WARNING: CPU: 0 PID: 13 at kernel/rcu/srcutree.c:202 check_init_srcu_struct+0x60/0x70
 Modules linked in:
 CPU: 0 PID: 13 Comm: rcu_tasks_kthre Not tainted 4.12.0-rc1-test+ #587
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012
 task: ffff880119628040 task.stack: ffffc900006a4000
 RIP: 0010:check_init_srcu_struct+0x60/0x70
 RSP: 0000:ffffc900006a7d98 EFLAGS: 00010246
 RAX: 0000000000000246 RBX: 0000000000000000 RCX: 0000000000000000
 RDX: ffff880119628040 RSI: 00000000ffffffff RDI: ffffffff81e5fb40
 RBP: ffffc900006a7e20 R08: 00000023b403c000 R09: 0000000000000001
 R10: ffffc900006a7e40 R11: 0000000000000000 R12: ffffffff81e5fb40
 R13: 0000000000000286 R14: ffff880119628040 R15: ffffc900006a7e98
 FS:  0000000000000000(0000) GS:ffff88011ea00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: ffff88011edff000 CR3: 0000000001e0f000 CR4: 00000000001406f0
 Call Trace:
  ? __synchronize_srcu+0x6e/0x140
  ? lock_acquire+0xdc/0x1d0
  ? ktime_get_mono_fast_ns+0x5d/0xb0
  synchronize_srcu+0x6f/0x110
  ? synchronize_srcu+0x6f/0x110
  rcu_tasks_kthread+0x20a/0x540
  kthread+0x114/0x150
  ? __rcu_read_unlock+0x70/0x70
  ? kthread_create_on_node+0x40/0x40
  ret_from_fork+0x2e/0x40
 Code: f6 83 70 06 00 00 03 49 89 c5 74 0d be 01 00 00 00 48 89 df e8 42 fa ff ff 4c 89 ee 4c 89 e7 e8 b7 42 75 00 5b 41 5c 41 5d 5d c3 <0f> ff eb aa 66 90 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00
 ---[ end trace 5c3f4206ce50f6ac ]---

What happens is that the selftests include a creating of a dynamically
allocated ftrace_ops, which requires the use of synchronize_rcu_tasks()
which uses srcu, and triggers the above warning.

It appears that synchronize_rcu_tasks() is not set up at early_initcall(),
but it is at core_initcall(). By moving the tests down to that location
works out properly.

Link: http://lkml.kernel.org/r/20170517111435.7388c033@gandalf.local.home

Acked-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c4536c449021..cdf97ce8cff2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1558,7 +1558,7 @@ static __init int init_trace_selftests(void)
 
 	return 0;
 }
-early_initcall(init_trace_selftests);
+core_initcall(init_trace_selftests);
 #else
 static inline int run_tracer_selftest(struct tracer *type)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 30e7d894c1478c88d50ce94ddcdbd7f9763d9cdd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 17 May 2017 10:19:49 +0200
Subject: tracing/kprobes: Enforce kprobes teardown after testing

Enabling the tracer selftest triggers occasionally the warning in
text_poke(), which warns when the to be modified page is not marked
reserved.

The reason is that the tracer selftest installs kprobes on functions marked
__init for testing. These probes are removed after the tests, but that
removal schedules the delayed kprobes_optimizer work, which will do the
actual text poke. If the work is executed after the init text is freed,
then the warning triggers. The bug can be reproduced reliably when the work
delay is increased.

Flush the optimizer work and wait for the optimizing/unoptimizing lists to
become empty before returning from the kprobes tracer selftest. That
ensures that all operations which were queued due to the probes removal
have completed.

Link: http://lkml.kernel.org/r/20170516094802.76a468bb@gandalf.local.home

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: stable@vger.kernel.org
Fixes: 6274de498 ("kprobes: Support delayed unoptimizing")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/kprobes.h     | 3 +++
 kernel/kprobes.c            | 2 +-
 kernel/trace/trace_kprobe.c | 5 +++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 30f90c1a0aaf..541df0b5b815 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -349,6 +349,9 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 					     int write, void __user *buffer,
 					     size_t *length, loff_t *ppos);
 #endif
+extern void wait_for_kprobe_optimizer(void);
+#else
+static inline void wait_for_kprobe_optimizer(void) { }
 #endif /* CONFIG_OPTPROBES */
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7367e0ec6f81..199243bba554 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -595,7 +595,7 @@ static void kprobe_optimizer(struct work_struct *work)
 }
 
 /* Wait for completing optimization and unoptimization */
-static void wait_for_kprobe_optimizer(void)
+void wait_for_kprobe_optimizer(void)
 {
 	mutex_lock(&kprobe_mutex);
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8485f6738a87..c129fca6ec99 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1535,6 +1535,11 @@ static __init int kprobe_trace_self_tests_init(void)
 
 end:
 	release_all_trace_kprobes();
+	/*
+	 * Wait for the optimizer work to finish. Otherwise it might fiddle
+	 * with probes in already freed __init text.
+	 */
+	wait_for_kprobe_optimizer();
 	if (warn)
 		pr_cont("NG: Some tests are failed. Please check them.\n");
 	else
-- 
cgit v1.2.3-59-g8ed1b


From cbab567c3dc7d6f443b4c84eab76e8967d5c1dee Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 16 May 2017 23:21:25 +0530
Subject: ftrace: Simplify glob handling in
 unregister_ftrace_function_probe_func()

Handle a NULL glob properly and simplify the check.

Link: http://lkml.kernel.org/r/5df74d4ffb4721db6d5a22fa08ca031d62ead493.1494956770.git.naveen.n.rao@linux.vnet.ibm.com

Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 39dca4e86a94..c35c3e67d09a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4144,9 +4144,9 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
 	int i, ret = -ENODEV;
 	int size;
 
-	if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
+	if (!glob || !strlen(glob) || !strcmp(glob, "*"))
 		func_g.search = NULL;
-	else if (glob) {
+	else {
 		int not;
 
 		func_g.type = filter_parse_regex(glob, strlen(glob),
-- 
cgit v1.2.3-59-g8ed1b


From a0e6369e4bac8844825ae1a66ccd122b290dcc86 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 16 May 2017 23:21:26 +0530
Subject: ftrace/instances: Clear function triggers when removing instances

If instance directories are deleted while there are registered function
triggers:

  # cd /sys/kernel/debug/tracing/instances
  # mkdir test
  # echo "schedule:enable_event:sched:sched_switch" > test/set_ftrace_filter
  # rmdir test
  Unable to handle kernel paging request for data at address 0x00000008
  Unable to handle kernel paging request for data at address 0x00000008
  Faulting instruction address: 0xc0000000021edde8
  Oops: Kernel access of bad area, sig: 11 [#1]
  SMP NR_CPUS=2048
  NUMA
  pSeries
  Modules linked in: iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge stp llc kvm iptable_filter fuse binfmt_misc pseries_rng rng_core vmx_crypto ib_iser rdma_cm iw_cm ib_cm ib_core libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c multipath virtio_net virtio_blk virtio_pci crc32c_vpmsum virtio_ring virtio
  CPU: 8 PID: 8694 Comm: rmdir Not tainted 4.11.0-nnr+ #113
  task: c0000000bab52800 task.stack: c0000000baba0000
  NIP: c0000000021edde8 LR: c0000000021f0590 CTR: c000000002119620
  REGS: c0000000baba3870 TRAP: 0300   Not tainted  (4.11.0-nnr+)
  MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE>
    CR: 22002422  XER: 20000000
  CFAR: 00007fffabb725a8 DAR: 0000000000000008 DSISR: 40000000 SOFTE: 0
  GPR00: c00000000220f750 c0000000baba3af0 c000000003157e00 0000000000000000
  GPR04: 0000000000000040 00000000000000eb 0000000000000040 0000000000000000
  GPR08: 0000000000000000 0000000000000113 0000000000000000 c00000000305db98
  GPR12: c000000002119620 c00000000fd42c00 0000000000000000 0000000000000000
  GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
  GPR20: 0000000000000000 0000000000000000 c0000000bab52e90 0000000000000000
  GPR24: 0000000000000000 00000000000000eb 0000000000000040 c0000000baba3bb0
  GPR28: c00000009cb06eb0 c0000000bab52800 c00000009cb06eb0 c0000000baba3bb0
  NIP [c0000000021edde8] ring_buffer_lock_reserve+0x8/0x4e0
  LR [c0000000021f0590] trace_event_buffer_lock_reserve+0xe0/0x1a0
  Call Trace:
  [c0000000baba3af0] [c0000000021f96c8] trace_event_buffer_commit+0x1b8/0x280 (unreliable)
  [c0000000baba3b60] [c00000000220f750] trace_event_buffer_reserve+0x80/0xd0
  [c0000000baba3b90] [c0000000021196b8] trace_event_raw_event_sched_switch+0x98/0x180
  [c0000000baba3c10] [c0000000029d9980] __schedule+0x6e0/0xab0
  [c0000000baba3ce0] [c000000002122230] do_task_dead+0x70/0xc0
  [c0000000baba3d10] [c0000000020ea9c8] do_exit+0x828/0xd00
  [c0000000baba3dd0] [c0000000020eaf70] do_group_exit+0x60/0x100
  [c0000000baba3e10] [c0000000020eb034] SyS_exit_group+0x24/0x30
  [c0000000baba3e30] [c00000000200bcec] system_call+0x38/0x54
  Instruction dump:
  60000000 60420000 7d244b78 7f63db78 4bffaa09 393efff8 793e0020 39200000
  4bfffecc 60420000 3c4c00f7 3842a020 <81230008> 2f890000 409e02f0 a14d0008
  ---[ end trace b917b8985d0e650b ]---
  Unable to handle kernel paging request for data at address 0x00000008
  Faulting instruction address: 0xc0000000021edde8
  Unable to handle kernel paging request for data at address 0x00000008
  Faulting instruction address: 0xc0000000021edde8
  Faulting instruction address: 0xc0000000021edde8

To address this, let's clear all registered function probes before
deleting the ftrace instance.

Link: http://lkml.kernel.org/r/c5f1ca624043690bd94642bb6bffd3f2fc504035.1494956770.git.naveen.n.rao@linux.vnet.ibm.com

Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 8 ++++++++
 kernel/trace/trace.c  | 3 +++
 kernel/trace/trace.h  | 1 +
 3 files changed, 12 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c35c3e67d09a..74fdfe9ed3db 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4256,6 +4256,14 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
 	return ret;
 }
 
+void clear_ftrace_function_probes(struct trace_array *tr)
+{
+	struct ftrace_func_probe *probe, *n;
+
+	list_for_each_entry_safe(probe, n, &tr->func_probes, list)
+		unregister_ftrace_function_probe_func(NULL, tr, probe->probe_ops);
+}
+
 static LIST_HEAD(ftrace_commands);
 static DEFINE_MUTEX(ftrace_cmd_mutex);
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index cdf97ce8cff2..664c44a6d48f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7550,6 +7550,9 @@ static int instance_rmdir(const char *name)
 	}
 
 	tracing_set_nop(tr);
+#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
+	clear_ftrace_function_probes(tr);
+#endif
 	event_trace_del_tracer(tr);
 	ftrace_clear_pids(tr);
 	ftrace_destroy_function_files(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 291a1bca5748..98e0845f7235 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -980,6 +980,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr,
 extern int
 unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
 				      struct ftrace_probe_ops *ops);
+extern void clear_ftrace_function_probes(struct trace_array *tr);
 
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
-- 
cgit v1.2.3-59-g8ed1b


From 8a49f3e03c8ac52fe1b706fffb13142295fa0c47 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 17 May 2017 21:53:32 -0400
Subject: ftrace: Remove #ifdef from code and add
 clear_ftrace_function_probes() stub

No need to add ugly #ifdefs in the code. Having a standard stub file is much
prettier.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 --
 kernel/trace/trace.h | 4 ++++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 664c44a6d48f..fcc9a2d774c3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7550,9 +7550,7 @@ static int instance_rmdir(const char *name)
 	}
 
 	tracing_set_nop(tr);
-#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
 	clear_ftrace_function_probes(tr);
-#endif
 	event_trace_del_tracer(tr);
 	ftrace_clear_pids(tr);
 	ftrace_destroy_function_files(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 98e0845f7235..39fd77330aab 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -999,6 +999,10 @@ static inline __init int unregister_ftrace_command(char *cmd_name)
 {
 	return -EINVAL;
 }
+static inline void clear_ftrace_function_probes(struct trace_array *tr)
+{
+}
+
 /*
  * The ops parameter passed in is usually undefined.
  * This must be a macro.
-- 
cgit v1.2.3-59-g8ed1b


From d2ffb8d3cc3458e2102b2f067a2e82c84947deea Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 16 May 2017 23:21:27 +0530
Subject: selftests/ftrace: Fix bashisms

Fix a few bashisms in ftrace selftests.

Link: http://lkml.kernel.org/r/5fbf4613eef0766918fa04e3ff537cae271223ee.1494956770.git.naveen.n.rao@linux.vnet.ibm.com

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/selftests/ftrace/ftracetest                           | 2 +-
 tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc | 2 +-
 tools/testing/selftests/ftrace/test.d/functions                     | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 32e6211e1c6e..717581145cfc 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -58,7 +58,7 @@ parse_opts() { # opts
     ;;
     --verbose|-v|-vv)
       VERBOSE=$((VERBOSE + 1))
-      [ $1 == '-vv' ] && VERBOSE=$((VERBOSE + 1))
+      [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1))
       shift 1
     ;;
     --debug|-d)
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
index 07bb3e5930b4..aa31368851c9 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -48,7 +48,7 @@ test_event_enabled() {
     e=`cat $EVENT_ENABLE`
     if [ "$e" != $val ]; then
 	echo "Expected $val but found $e"
-	exit -1
+	exit 1
     fi
 }
 
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 9aec6fcb7729..f2019b37370d 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -34,10 +34,10 @@ reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
     echo > set_ftrace_filter
     grep -v '^#' set_ftrace_filter | while read t; do
 	tr=`echo $t | cut -d: -f2`
-	if [ "$tr" == "" ]; then
+	if [ "$tr" = "" ]; then
 	    continue
 	fi
-	if [ $tr == "enable_event" -o $tr == "disable_event" ]; then
+	if [ $tr = "enable_event" -o $tr = "disable_event" ]; then
 	    tr=`echo $t | cut -d: -f1-4`
 	    limit=`echo $t | cut -d: -f5`
 	else
-- 
cgit v1.2.3-59-g8ed1b


From b172296b90b799c8b634521c248e9316581c8154 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 16 May 2017 23:21:28 +0530
Subject: selftests/ftrace: Add test to remove instance with active event
 triggers

Add a test to ensure we clean up properly when removing an instance
with active event triggers.

Link: http://lkml.kernel.org/r/c479465b2009397708d6c52c8561e1523c22cd31.1494956770.git.naveen.n.rao@linux.vnet.ibm.com

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/selftests/ftrace/test.d/instances/instance-event.tc | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
index 4c5a061a5b4e..c73db7863adb 100644
--- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
@@ -75,9 +75,13 @@ rmdir foo
 if [ -d foo ]; then
         fail "foo still exists"
 fi
-exit 0
-
 
+mkdir foo
+echo "schedule:enable_event:sched:sched_switch" > foo/set_ftrace_filter
+rmdir foo
+if [ -d foo ]; then
+        fail "foo still exists"
+fi
 
 
 instance_slam() {
-- 
cgit v1.2.3-59-g8ed1b


From 545a028190dae4437aac4f86da7c8ab20857647c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 16 May 2017 14:58:35 -0400
Subject: kprobes: Document how optimized kprobes are removed from module
 unload

Thomas discovered a bug where the kprobe trace tests had a race
condition where the kprobe_optimizer called from a delayed work queue
that does the optimizing and "unoptimizing" of a kprobe, can try to
modify the text after it has been freed by the init code.

The kprobe trace selftest is a special case, and Thomas and myself
investigated to see if there's a chance that this could also be a bug
with module unloading, as the code is not obvious to how it handles
this. After adding lots of printks, I figured it out. Thomas suggested
that this should be commented so that others will not have to go
through this exercise again.

Link: http://lkml.kernel.org/r/20170516145835.3827d3aa@gandalf.local.home

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/kprobes.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 199243bba554..2d2d3a568e4e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2183,6 +2183,12 @@ static int kprobes_module_callback(struct notifier_block *nb,
 				 * The vaddr this probe is installed will soon
 				 * be vfreed buy not synced to disk. Hence,
 				 * disarming the breakpoint isn't needed.
+				 *
+				 * Note, this will also move any optimized probes
+				 * that are pending to be removed from their
+				 * corresponding lists to the freeing_list and
+				 * will not be touched by the delayed
+				 * kprobe_optimizer work handler.
 				 */
 				kill_kprobe(p);
 			}
-- 
cgit v1.2.3-59-g8ed1b


From 7dd7eb9513bd02184d45f000ab69d78cb1fa1531 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 17 May 2017 22:54:11 -0400
Subject: ipv6: Check ip6_find_1stfragopt() return value properly.

Do not use unsigned variables to see if it returns a negative
error or not.

Fixes: 2423496af35d ("ipv6: Prevent overrun when parsing v6 header options")
Reported-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_offload.c | 9 ++++-----
 net/ipv6/ip6_output.c  | 7 +++----
 net/ipv6/udp_offload.c | 8 +++++---
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index eab36abc9f22..280268f1dd7b 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -63,7 +63,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 	const struct net_offload *ops;
 	int proto;
 	struct frag_hdr *fptr;
-	unsigned int unfrag_ip6hlen;
 	unsigned int payload_len;
 	u8 *prevhdr;
 	int offset = 0;
@@ -116,10 +115,10 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		skb->network_header = (u8 *)ipv6h - skb->head;
 
 		if (udpfrag) {
-			unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
-			if (unfrag_ip6hlen < 0)
-				return ERR_PTR(unfrag_ip6hlen);
-			fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
+			int err = ip6_find_1stfragopt(skb, &prevhdr);
+			if (err < 0)
+				return ERR_PTR(err);
+			fptr = (struct frag_hdr *)((u8 *)ipv6h + err);
 			fptr->frag_off = htons(offset);
 			if (skb->next)
 				fptr->frag_off |= htons(IP6_MF);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 01deecda2f84..d4a31becbd25 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -597,11 +597,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	int ptr, offset = 0, err = 0;
 	u8 *prevhdr, nexthdr = 0;
 
-	hlen = ip6_find_1stfragopt(skb, &prevhdr);
-	if (hlen < 0) {
-		err = hlen;
+	err = ip6_find_1stfragopt(skb, &prevhdr);
+	if (err < 0)
 		goto fail;
-	}
+	hlen = err;
 	nexthdr = *prevhdr;
 
 	mtu = ip6_skb_dst_mtu(skb);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b348cff47395..a2267f80febb 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -29,6 +29,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 	u8 frag_hdr_sz = sizeof(struct frag_hdr);
 	__wsum csum;
 	int tnl_hlen;
+	int err;
 
 	mss = skb_shinfo(skb)->gso_size;
 	if (unlikely(skb->len <= mss))
@@ -90,9 +91,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		/* Find the unfragmentable header and shift it left by frag_hdr_sz
 		 * bytes to insert fragment header.
 		 */
-		unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
-		if (unfrag_ip6hlen < 0)
-			return ERR_PTR(unfrag_ip6hlen);
+		err = ip6_find_1stfragopt(skb, &prevhdr);
+		if (err < 0)
+			return ERR_PTR(err);
+		unfrag_ip6hlen = err;
 		nexthdr = *prevhdr;
 		*prevhdr = NEXTHDR_FRAGMENT;
 		unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
-- 
cgit v1.2.3-59-g8ed1b


From 3c2ce60bdd3d57051bf85615deec04a694473840 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 18 May 2017 03:00:06 +0200
Subject: bpf: adjust verifier heuristics

Current limits with regards to processing program paths do not
really reflect today's needs anymore due to programs becoming
more complex and verifier smarter, keeping track of more data
such as const ALU operations, alignment tracking, spilling of
PTR_TO_MAP_VALUE_ADJ registers, and other features allowing for
smarter matching of what LLVM generates.

This also comes with the side-effect that we result in fewer
opportunities to prune search states and thus often need to do
more work to prove safety than in the past due to different
register states and stack layout where we mismatch. Generally,
it's quite hard to determine what caused a sudden increase in
complexity, it could be caused by something as trivial as a
single branch somewhere at the beginning of the program where
LLVM assigned a stack slot that is marked differently throughout
other branches and thus causing a mismatch, where verifier
then needs to prove safety for the whole rest of the program.
Subsequently, programs with even less than half the insn size
limit can get rejected. We noticed that while some programs
load fine under pre 4.11, they get rejected due to hitting
limits on more recent kernels. We saw that in the vast majority
of cases (90+%) pruning failed due to register mismatches. In
case of stack mismatches, majority of cases failed due to
different stack slot types (invalid, spill, misc) rather than
differences in spilled registers.

This patch makes pruning more aggressive by also adding markers
that sit at conditional jumps as well. Currently, we only mark
jump targets for pruning. For example in direct packet access,
these are usually error paths where we bail out. We found that
adding these markers, it can reduce number of processed insns
by up to 30%. Another option is to ignore reg->id in probing
PTR_TO_MAP_VALUE_OR_NULL registers, which can help pruning
slightly as well by up to 7% observed complexity reduction as
stand-alone. Meaning, if a previous path with register type
PTR_TO_MAP_VALUE_OR_NULL for map X was found to be safe, then
in the current state a PTR_TO_MAP_VALUE_OR_NULL register for
the same map X must be safe as well. Last but not least the
patch also adds a scheduling point and bumps the current limit
for instructions to be processed to a more adequate value.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 39f2dcbc4cbc..1eddb713b815 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -140,7 +140,7 @@ struct bpf_verifier_stack_elem {
 	struct bpf_verifier_stack_elem *next;
 };
 
-#define BPF_COMPLEXITY_LIMIT_INSNS	65536
+#define BPF_COMPLEXITY_LIMIT_INSNS	98304
 #define BPF_COMPLEXITY_LIMIT_STACK	1024
 
 #define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
@@ -2640,6 +2640,7 @@ peek_stack:
 				env->explored_states[t + 1] = STATE_LIST_MARK;
 		} else {
 			/* conditional jump with two edges */
+			env->explored_states[t] = STATE_LIST_MARK;
 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
 			if (ret == 1)
 				goto peek_stack;
@@ -2798,6 +2799,12 @@ static bool states_equal(struct bpf_verifier_env *env,
 		     rcur->type != NOT_INIT))
 			continue;
 
+		/* Don't care about the reg->id in this case. */
+		if (rold->type == PTR_TO_MAP_VALUE_OR_NULL &&
+		    rcur->type == PTR_TO_MAP_VALUE_OR_NULL &&
+		    rold->map_ptr == rcur->map_ptr)
+			continue;
+
 		if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
 		    compare_ptrs_to_packet(rold, rcur))
 			continue;
@@ -2932,6 +2939,9 @@ static int do_check(struct bpf_verifier_env *env)
 			goto process_bpf_exit;
 		}
 
+		if (need_resched())
+			cond_resched();
+
 		if (log_level > 1 || (log_level && do_print_state)) {
 			if (log_level > 1)
 				verbose("%d:", insn_idx);
-- 
cgit v1.2.3-59-g8ed1b


From 9933e113c2e87a9f46a40fde8dafbf801dca1ab9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 10 May 2017 03:48:23 +0800
Subject: crypto: skcipher - Add missing API setkey checks

The API setkey checks for key sizes and alignment went AWOL during the
skcipher conversion.  This patch restores them.

Cc: <stable@vger.kernel.org>
Fixes: 4e6c3df4d729 ("crypto: skcipher - Add low-level skcipher...")
Reported-by: Baozeng <sploving1@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/skcipher.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 014af741fc6a..4faa0fd53b0c 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -764,6 +764,44 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static int skcipher_setkey_unaligned(struct crypto_skcipher *tfm,
+				     const u8 *key, unsigned int keylen)
+{
+	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
+	u8 *buffer, *alignbuffer;
+	unsigned long absize;
+	int ret;
+
+	absize = keylen + alignmask;
+	buffer = kmalloc(absize, GFP_ATOMIC);
+	if (!buffer)
+		return -ENOMEM;
+
+	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+	memcpy(alignbuffer, key, keylen);
+	ret = cipher->setkey(tfm, alignbuffer, keylen);
+	kzfree(buffer);
+	return ret;
+}
+
+static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
+	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+
+	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	if ((unsigned long)key & alignmask)
+		return skcipher_setkey_unaligned(tfm, key, keylen);
+
+	return cipher->setkey(tfm, key, keylen);
+}
+
 static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
@@ -784,7 +822,7 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm)
 	    tfm->__crt_alg->cra_type == &crypto_givcipher_type)
 		return crypto_init_skcipher_ops_ablkcipher(tfm);
 
-	skcipher->setkey = alg->setkey;
+	skcipher->setkey = skcipher_setkey;
 	skcipher->encrypt = alg->encrypt;
 	skcipher->decrypt = alg->decrypt;
 	skcipher->ivsize = alg->ivsize;
-- 
cgit v1.2.3-59-g8ed1b


From 552c9f47f8d451830a6b47151c6d2db77f77cc3e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Wed, 17 May 2017 13:12:51 +0200
Subject: KVM: arm/arm64: Fix bug when registering redist iodevs

If userspace creates the VCPUs after initializing the VGIC, then we end
up in a situation where we trigger a bug in kvm_vcpu_get_idx(), because
it is called prior to adding the VCPU into the vcpus array on the VM.

There is no tight coupling between the VCPU index and the area of the
redistributor region used for the VCPU, so we can simply ensure that all
creations of redistributors are serialized per VM, and increment an
offset when we successfully add a redistributor.

The vgic_register_redist_iodev() function can be called from two paths:
vgic_redister_all_redist_iodev() which is called via the kvm_vgic_addr()
device attribute handler.  This patch already holds the kvm->lock mutex.

The other path is via kvm_vgic_vcpu_init, which is called through a
longer chain from kvm_vm_ioctl_create_vcpu(), which releases the
kvm->lock mutex just before calling kvm_arch_vcpu_create(), so we can
simply take this mutex again later for our purposes.

Fixes: ab6f468c10 ("KVM: arm/arm64: Register iodevs when setting redist base and creating VCPUs")
Signed-off-by: Christoffer Dall <cdall@linaro.org>
Tested-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 include/kvm/arm_vgic.h           | 5 ++++-
 virt/kvm/arm/vgic/vgic-init.c    | 5 ++++-
 virt/kvm/arm/vgic/vgic-mmio-v3.c | 9 ++++++---
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 97b8d3728b31..ef718586321c 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -195,7 +195,10 @@ struct vgic_dist {
 		/* either a GICv2 CPU interface */
 		gpa_t			vgic_cpu_base;
 		/* or a number of GICv3 redistributor regions */
-		gpa_t			vgic_redist_base;
+		struct {
+			gpa_t		vgic_redist_base;
+			gpa_t		vgic_redist_free_offset;
+		};
 	};
 
 	/* distributor enabled */
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index dc68e2e424ab..3a0b8999f011 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -242,8 +242,11 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	 * If we are creating a VCPU with a GICv3 we must also register the
 	 * KVM io device for the redistributor that belongs to this VCPU.
 	 */
-	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+		mutex_lock(&vcpu->kvm->lock);
 		ret = vgic_register_redist_iodev(vcpu);
+		mutex_unlock(&vcpu->kvm->lock);
+	}
 	return ret;
 }
 
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 99da1a207c19..9b0f6810e7a8 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -586,7 +586,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 	if (!vgic_v3_check_base(kvm))
 		return -EINVAL;
 
-	rd_base = vgic->vgic_redist_base + kvm_vcpu_get_idx(vcpu) * SZ_64K * 2;
+	rd_base = vgic->vgic_redist_base + vgic->vgic_redist_free_offset;
 	sgi_base = rd_base + SZ_64K;
 
 	kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
@@ -615,11 +615,14 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
 				      SZ_64K, &sgi_dev->dev);
 	mutex_unlock(&kvm->slots_lock);
-	if (ret)
+	if (ret) {
 		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
 					  &rd_dev->dev);
+		return ret;
+	}
 
-	return ret;
+	vgic->vgic_redist_free_offset += 2 * SZ_64K;
+	return 0;
 }
 
 static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3-59-g8ed1b


From fa472fa91a5a0b241f5ddae927d2e235d07545df Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Wed, 17 May 2017 21:16:09 +0200
Subject: KVM: arm/arm64: Hold slots_lock when unregistering kvm io bus devices

We were not holding the kvm->slots_lock as required when calling
kvm_io_bus_unregister_dev() as required.

This only affects the error path, but still, let's do our due
diligence.

Reported by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 virt/kvm/arm/vgic/vgic-mmio-v3.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 9b0f6810e7a8..201d5e2e973d 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -614,15 +614,16 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
 				      SZ_64K, &sgi_dev->dev);
-	mutex_unlock(&kvm->slots_lock);
 	if (ret) {
 		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
 					  &rd_dev->dev);
-		return ret;
+		goto out;
 	}
 
 	vgic->vgic_redist_free_offset += 2 * SZ_64K;
-	return 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return ret;
 }
 
 static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
@@ -647,10 +648,12 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
 
 	if (ret) {
 		/* The current c failed, so we start with the previous one. */
+		mutex_lock(&kvm->slots_lock);
 		for (c--; c >= 0; c--) {
 			vcpu = kvm_get_vcpu(kvm, c);
 			vgic_unregister_redist_iodev(vcpu);
 		}
+		mutex_unlock(&kvm->slots_lock);
 	}
 
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 751a9c763849f5859cb69ea44b0430d00672f637 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 17 May 2017 11:24:47 -0400
Subject: netfilter: xtables: fix build failure from COMPAT_XT_ALIGN outside
 CONFIG_COMPAT

The patch in the Fixes references COMPAT_XT_ALIGN in the definition
of XT_DATA_TO_USER, outside an #ifdef CONFIG_COMPAT block.

Split XT_DATA_TO_USER into separate compat and non compat variants and
define the first inside an CONFIG_COMPAT block.

This simplifies both variants by removing branches inside the macro.

Fixes: 324318f0248c ("netfilter: xtables: zero padding in data_to_user")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d17769599c10..1770c1d9b37f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -296,18 +296,17 @@ int xt_data_to_user(void __user *dst, const void *src,
 }
 EXPORT_SYMBOL_GPL(xt_data_to_user);
 
-#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE)				\
+#define XT_DATA_TO_USER(U, K, TYPE)					\
 	xt_data_to_user(U->data, K->data,				\
 			K->u.kernel.TYPE->usersize,			\
-			C_SIZE ? : K->u.kernel.TYPE->TYPE##size,	\
-			C_SIZE ? COMPAT_XT_ALIGN(C_SIZE) :		\
-				 XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
+			K->u.kernel.TYPE->TYPE##size,			\
+			XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
 
 int xt_match_to_user(const struct xt_entry_match *m,
 		     struct xt_entry_match __user *u)
 {
 	return XT_OBJ_TO_USER(u, m, match, 0) ||
-	       XT_DATA_TO_USER(u, m, match, 0);
+	       XT_DATA_TO_USER(u, m, match);
 }
 EXPORT_SYMBOL_GPL(xt_match_to_user);
 
@@ -315,7 +314,7 @@ int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u)
 {
 	return XT_OBJ_TO_USER(u, t, target, 0) ||
-	       XT_DATA_TO_USER(u, t, target, 0);
+	       XT_DATA_TO_USER(u, t, target);
 }
 EXPORT_SYMBOL_GPL(xt_target_to_user);
 
@@ -614,6 +613,12 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
 
+#define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE)			\
+	xt_data_to_user(U->data, K->data,				\
+			K->u.kernel.TYPE->usersize,			\
+			C_SIZE,						\
+			COMPAT_XT_ALIGN(C_SIZE))
+
 int xt_compat_match_to_user(const struct xt_entry_match *m,
 			    void __user **dstptr, unsigned int *size)
 {
@@ -629,7 +634,7 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
 		if (match->compat_to_user((void __user *)cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
+		if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
 			return -EFAULT;
 	}
 
@@ -975,7 +980,7 @@ int xt_compat_target_to_user(const struct xt_entry_target *t,
 		if (target->compat_to_user((void __user *)ct->data, t->data))
 			return -EFAULT;
 	} else {
-		if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
+		if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
 			return -EFAULT;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From d3e7dec054174fdddae33eaa0032a82c3f42181d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 18 May 2017 10:38:53 +0300
Subject: KVM: Silence underflow warning in avic_get_physical_id_entry()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Smatch complains that we check cap the upper bound of "index" but don't
check for negatives.  It's a false positive because "index" is never
negative.  But it's also simple enough to make it unsigned which makes
the code easier to audit.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/svm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c27ac6923a18..183ddb235fb4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1272,7 +1272,8 @@ static void init_vmcb(struct vcpu_svm *svm)
 
 }
 
-static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index)
+static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
+				       unsigned int index)
 {
 	u64 *avic_physical_id_table;
 	struct kvm_arch *vm_data = &vcpu->kvm->arch;
-- 
cgit v1.2.3-59-g8ed1b


From 7bc5d5aff356f3ba16c4d1e9eaf95cc99b7574ab Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 17 May 2017 18:31:59 +0300
Subject: usb: xhci: trace URB before giving it back instead of after

Don't access any members of a URB after giving it back.
URB might be freed by then already.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 74bf5c60a260..507ba7734b94 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -641,8 +641,8 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci,
 	xhci_urb_free_priv(urb_priv);
 	usb_hcd_unlink_urb_from_ep(hcd, urb);
 	spin_unlock(&xhci->lock);
-	usb_hcd_giveback_urb(hcd, urb, status);
 	trace_xhci_urb_giveback(urb);
+	usb_hcd_giveback_urb(hcd, urb, status);
 	spin_lock(&xhci->lock);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a0c16630d35a874e82bdf2088f58ecaca1024315 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 17 May 2017 18:32:00 +0300
Subject: xhci: apply PME_STUCK_QUIRK and MISSING_CAS quirk for Denverton

Intel Denverton microserver is Atom based and need the PME and CAS quirks
as well.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 7b86508ac8cf..fcf1f3f63e7a 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -52,6 +52,7 @@
 #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI		0x0aa8
 #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI		0x1aa8
 #define PCI_DEVICE_ID_INTEL_APL_XHCI			0x5aa8
+#define PCI_DEVICE_ID_INTEL_DNV_XHCI			0x19d0
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -166,7 +167,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		 pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
 		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI ||
 		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI ||
-		 pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) {
+		 pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI ||
+		 pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) {
 		xhci->quirks |= XHCI_PME_STUCK_QUIRK;
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
@@ -175,7 +177,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
 	    (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
-	     pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI))
+	     pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI))
 		xhci->quirks |= XHCI_MISSING_CAS;
 
 	if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
-- 
cgit v1.2.3-59-g8ed1b


From 7480d912d549f414e0ce39331870899e89a5598c Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Wed, 17 May 2017 18:32:01 +0300
Subject: usb: host: xhci-mem: allocate zeroed Scratchpad Buffer

According to xHCI ch4.20 Scratchpad Buffers, the Scratchpad
Buffer needs to be zeroed.

	...
	The following operations take place to allocate
       	Scratchpad Buffers to the xHC:
	...
		b. Software clears the Scratchpad Buffer to '0'

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 69428e970925..12b573cfb846 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1724,7 +1724,7 @@ static int scratchpad_alloc(struct xhci_hcd *xhci, gfp_t flags)
 	xhci->dcbaa->dev_context_ptrs[0] = cpu_to_le64(xhci->scratchpad->sp_dma);
 	for (i = 0; i < num_sp; i++) {
 		dma_addr_t dma;
-		void *buf = dma_alloc_coherent(dev, xhci->page_size, &dma,
+		void *buf = dma_zalloc_coherent(dev, xhci->page_size, &dma,
 				flags);
 		if (!buf)
 			goto fail_sp4;
-- 
cgit v1.2.3-59-g8ed1b


From 6a29beef9d1b16c762e469d77e28c3de3f5c3dbb Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Wed, 17 May 2017 18:32:02 +0300
Subject: usb: host: xhci-ring: don't need to clear interrupt pending for MSI
 enabled hcd

According to xHCI spec Figure 30: Interrupt Throttle Flow Diagram

	If PCI Message Signaled Interrupts (MSI or MSI-X) are enabled,
       	then the assertion of the Interrupt Pending (IP) flag in Figure 30
       	generates a PCI Dword write. The IP flag is automatically cleared
       	by the completion of the PCI write.

the MSI enabled HCs don't need to clear interrupt pending bit, but
hcd->irq = 0 doesn't equal to MSI enabled HCD. At some Dual-role
controller software designs, it sets hcd->irq as 0 to avoid HCD
requesting interrupt, and they want to decide when to call usb_hcd_irq
by software.

Signed-off-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 5 +----
 drivers/usb/host/xhci.c      | 5 +++--
 include/linux/usb/hcd.h      | 1 +
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 507ba7734b94..0830b25f9499 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2707,12 +2707,9 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
 	 */
 	status |= STS_EINT;
 	writel(status, &xhci->op_regs->status);
-	/* FIXME when MSI-X is supported and there are multiple vectors */
-	/* Clear the MSI-X event interrupt status */
 
-	if (hcd->irq) {
+	if (!hcd->msi_enabled) {
 		u32 irq_pending;
-		/* Acknowledge the PCI interrupt */
 		irq_pending = readl(&xhci->ir_set->irq_pending);
 		irq_pending |= IMAN_IP;
 		writel(irq_pending, &xhci->ir_set->irq_pending);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2d1310220832..71eb2991c698 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -359,9 +359,10 @@ static int xhci_try_enable_msi(struct usb_hcd *hcd)
 		/* fall back to msi*/
 		ret = xhci_setup_msi(xhci);
 
-	if (!ret)
-		/* hcd->irq is 0, we have MSI */
+	if (!ret) {
+		hcd->msi_enabled = 1;
 		return 0;
+	}
 
 	if (!pdev->irq) {
 		xhci_err(xhci, "No msi-x/msi found and no IRQ in BIOS\n");
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index a469999a106d..50398b69ca44 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -148,6 +148,7 @@ struct usb_hcd {
 	unsigned		rh_registered:1;/* is root hub registered? */
 	unsigned		rh_pollable:1;	/* may we poll the root hub? */
 	unsigned		msix_enabled:1;	/* driver has MSI-X enabled? */
+	unsigned		msi_enabled:1;	/* driver has MSI enabled? */
 	unsigned		remove_phy:1;	/* auto-remove USB phy */
 
 	/* The next flag is a stopgap, to be removed when all the HCDs
-- 
cgit v1.2.3-59-g8ed1b


From 63aea0dbab90a2461faaae357cbc8cfd6c8de9fe Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 17 May 2017 18:32:03 +0300
Subject: USB: xhci: fix lock-inversion problem

With threaded interrupts, bottom-half handlers are called with
interrupts enabled.  Therefore they can't safely use spin_lock(); they
have to use spin_lock_irqsave().  Lockdep warns about a violation
occurring in xhci_irq():

=========================================================
[ INFO: possible irq lock inversion dependency detected ]
4.11.0-rc8-dbg+ #1 Not tainted
---------------------------------------------------------
swapper/7/0 just changed the state of lock:
 (&(&ehci->lock)->rlock){-.-...}, at: [<ffffffffa0130a69>]
ehci_hrtimer_func+0x29/0xc0 [ehci_hcd]
but this lock took another, HARDIRQ-unsafe lock in the past:
 (hcd_urb_list_lock){+.....}

and interrupts could create inverse lock ordering between them.

other info that might help us debug this:
 Possible interrupt unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(hcd_urb_list_lock);
                               local_irq_disable();
                               lock(&(&ehci->lock)->rlock);
                               lock(hcd_urb_list_lock);
  <Interrupt>
    lock(&(&ehci->lock)->rlock);
 *** DEADLOCK ***

no locks held by swapper/7/0.
the shortest dependencies between 2nd lock and 1st lock:
 -> (hcd_urb_list_lock){+.....} ops: 252 {
    HARDIRQ-ON-W at:
                      __lock_acquire+0x602/0x1280
                      lock_acquire+0xd5/0x1c0
                      _raw_spin_lock+0x2f/0x40
                      usb_hcd_unlink_urb_from_ep+0x1b/0x60 [usbcore]
                      xhci_giveback_urb_in_irq.isra.45+0x70/0x1b0 [xhci_hcd]
                      finish_td.constprop.60+0x1d8/0x2e0 [xhci_hcd]
                      xhci_irq+0xdd6/0x1fa0 [xhci_hcd]
                      usb_hcd_irq+0x26/0x40 [usbcore]
                      irq_forced_thread_fn+0x2f/0x70
                      irq_thread+0x149/0x1d0
                      kthread+0x113/0x150
                      ret_from_fork+0x2e/0x40

This patch fixes the problem.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: Bart Van Assche <bart.vanassche@sandisk.com>
CC: <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 0830b25f9499..6d2492c1c643 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2677,11 +2677,12 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 	union xhci_trb *event_ring_deq;
 	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
 	dma_addr_t deq;
 	u64 temp_64;
 	u32 status;
 
-	spin_lock(&xhci->lock);
+	spin_lock_irqsave(&xhci->lock, flags);
 	/* Check if the xHC generated the interrupt, or the irq is shared */
 	status = readl(&xhci->op_regs->status);
 	if (status == ~(u32)0) {
@@ -2754,7 +2755,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
 	ret = IRQ_HANDLED;
 
 out:
-	spin_unlock(&xhci->lock);
+	spin_unlock_irqrestore(&xhci->lock, flags);
 
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 5db851cf20857c5504b146046e97cb7781f2a743 Mon Sep 17 00:00:00 2001
From: Matthias Lange <matthias.lange@kernkonzept.com>
Date: Wed, 17 May 2017 18:32:04 +0300
Subject: xhci: remove GFP_DMA flag from allocation

There is no reason to restrict allocations to the first 16MB ISA DMA
addresses.

It is causing problems in a virtualization setup with enabled IOMMU
(x86_64). The result is that USB is not working in the VM.

CC: <stable@vger.kernel.org>
Signed-off-by: Matthias Lange <matthias.lange@kernkonzept.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 12b573cfb846..1f1687e888d6 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -56,7 +56,7 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci,
 	}
 
 	if (max_packet) {
-		seg->bounce_buf = kzalloc(max_packet, flags | GFP_DMA);
+		seg->bounce_buf = kzalloc(max_packet, flags);
 		if (!seg->bounce_buf) {
 			dma_pool_free(xhci->segment_pool, seg->trbs, dma);
 			kfree(seg);
-- 
cgit v1.2.3-59-g8ed1b


From 604d02a2a66ab7f93fd3b2bde3698c29ef057b65 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 17 May 2017 18:32:05 +0300
Subject: xhci: Fix command ring stop regression in 4.11

In 4.11 TRB completion codes were renamed to match spec.

Completion codes for command ring stopped and endpoint stopped
were mixed, leading to failures while handling a stopped command ring.

Use the correct completion code for command ring stopped events.

Fixes: 0b7c105a04ca ("usb: host: xhci: rename completion codes to match spec")
Cc: <stable@vger.kernel.org> # 4.11
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-hub.c  | 2 +-
 drivers/usb/host/xhci-ring.c | 8 ++++----
 drivers/usb/host/xhci.c      | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 5e3e9d4c6956..0dde49c35dd2 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -419,7 +419,7 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend)
 	wait_for_completion(cmd->completion);
 
 	if (cmd->status == COMP_COMMAND_ABORTED ||
-			cmd->status == COMP_STOPPED) {
+	    cmd->status == COMP_COMMAND_RING_STOPPED) {
 		xhci_warn(xhci, "Timeout while waiting for stop endpoint command\n");
 		ret = -ETIME;
 	}
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 6d2492c1c643..03f63f50afb6 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -323,7 +323,7 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
 		if (i_cmd->status != COMP_COMMAND_ABORTED)
 			continue;
 
-		i_cmd->status = COMP_STOPPED;
+		i_cmd->status = COMP_COMMAND_RING_STOPPED;
 
 		xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
 			 i_cmd->command_trb);
@@ -1380,7 +1380,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 	cmd_comp_code = GET_COMP_CODE(le32_to_cpu(event->status));
 
 	/* If CMD ring stopped we own the trbs between enqueue and dequeue */
-	if (cmd_comp_code == COMP_STOPPED) {
+	if (cmd_comp_code == COMP_COMMAND_RING_STOPPED) {
 		complete_all(&xhci->cmd_ring_stop_completion);
 		return;
 	}
@@ -1436,8 +1436,8 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 		break;
 	case TRB_CMD_NOOP:
 		/* Is this an aborted command turned to NO-OP? */
-		if (cmd->status == COMP_STOPPED)
-			cmd_comp_code = COMP_STOPPED;
+		if (cmd->status == COMP_COMMAND_RING_STOPPED)
+			cmd_comp_code = COMP_COMMAND_RING_STOPPED;
 		break;
 	case TRB_RESET_EP:
 		WARN_ON(slot_id != TRB_TO_SLOT_ID(
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 71eb2991c698..30f47d92a610 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1764,7 +1764,7 @@ static int xhci_configure_endpoint_result(struct xhci_hcd *xhci,
 
 	switch (*cmd_status) {
 	case COMP_COMMAND_ABORTED:
-	case COMP_STOPPED:
+	case COMP_COMMAND_RING_STOPPED:
 		xhci_warn(xhci, "Timeout while waiting for configure endpoint command\n");
 		ret = -ETIME;
 		break;
@@ -1814,7 +1814,7 @@ static int xhci_evaluate_context_result(struct xhci_hcd *xhci,
 
 	switch (*cmd_status) {
 	case COMP_COMMAND_ABORTED:
-	case COMP_STOPPED:
+	case COMP_COMMAND_RING_STOPPED:
 		xhci_warn(xhci, "Timeout while waiting for evaluate context command\n");
 		ret = -ETIME;
 		break;
@@ -3433,7 +3433,7 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd,
 	ret = reset_device_cmd->status;
 	switch (ret) {
 	case COMP_COMMAND_ABORTED:
-	case COMP_STOPPED:
+	case COMP_COMMAND_RING_STOPPED:
 		xhci_warn(xhci, "Timeout waiting for reset device command\n");
 		ret = -ETIME;
 		goto command_cleanup;
@@ -3818,7 +3818,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
 	 */
 	switch (command->status) {
 	case COMP_COMMAND_ABORTED:
-	case COMP_STOPPED:
+	case COMP_COMMAND_RING_STOPPED:
 		xhci_warn(xhci, "Timeout while waiting for setup device command\n");
 		ret = -ETIME;
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From 4b148d5144d64ee135b8924350cb0b3a7fd21150 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 17 May 2017 18:32:06 +0300
Subject: usb: host: xhci-plat: propagate return value of platform_get_irq()

platform_get_irq() returns an error code, but the xhci-plat driver
ignores it and always returns -ENODEV. This is not correct, and
prevents -EPROBE_DEFER from being propagated properly.

CC: <stable@vger.kernel.org>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-plat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 7c2a9e7c8e0f..c04144b25a67 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -177,7 +177,7 @@ static int xhci_plat_probe(struct platform_device *pdev)
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
-		return -ENODEV;
+		return irq;
 
 	/*
 	 * sysdev must point to a device that is known to the system firmware
-- 
cgit v1.2.3-59-g8ed1b


From 3c50ffef25855a9d9e4b07b02d756a8cdd653069 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 17 May 2017 11:23:10 -0500
Subject: usb: musb: Fix trying to suspend while active for OTG configurations

Commit d8e5f0eca1e8 ("usb: musb: Fix hardirq-safe hardirq-unsafe
lock order error") caused a regression where musb keeps trying to
enable host mode with no cable connected. This seems to be caused
by the fact that now phy is enabled earlier, and we are wrongly
trying to force USB host mode on an OTG port. The errors we are
getting are "trying to suspend as a_idle while active".

For ports configured as OTG, we should not need to do anything
to try to force USB host mode on it's OTG port. Trying to force host
mode in this case just seems to completely confuse the musb state
machine.

Let's fix the issue by making musb_host_setup() attempt to force the
mode only if port_mode is configured for host mode.

Fixes: d8e5f0eca1e8 ("usb: musb: Fix hardirq-safe hardirq-unsafe lock order error")
Cc: Johan Hovold <johan@kernel.org>
Cc: stable <stable@vger.kernel.org>
Reported-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reported-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_host.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index ac3a4952abb4..dbe617a735d8 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2780,10 +2780,11 @@ int musb_host_setup(struct musb *musb, int power_budget)
 	int ret;
 	struct usb_hcd *hcd = musb->hcd;
 
-	MUSB_HST_MODE(musb);
-	musb->xceiv->otg->default_a = 1;
-	musb->xceiv->otg->state = OTG_STATE_A_IDLE;
-
+	if (musb->port_mode == MUSB_PORT_MODE_HOST) {
+		MUSB_HST_MODE(musb);
+		musb->xceiv->otg->default_a = 1;
+		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
+	}
 	otg_set_host(musb->xceiv->otg, &hcd->self);
 	hcd->self.otg_port = 1;
 	musb->xceiv->otg->host = &hcd->self;
-- 
cgit v1.2.3-59-g8ed1b


From 6df2b42f7c040d57d9ecb67244e04e905ab87ac6 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 17 May 2017 11:23:11 -0500
Subject: usb: musb: tusb6010_omap: Do not reset the other direction's packet
 size

We have one register for each EP to set the maximum packet size for both
TX and RX.
If for example an RX programming would happen before the previous TX
transfer finishes we would reset the TX packet side.

To fix this issue, only modify the TX or RX part of the register.

Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support")
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/tusb6010_omap.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c
index 8b43c4b99f04..7870b37e0ea5 100644
--- a/drivers/usb/musb/tusb6010_omap.c
+++ b/drivers/usb/musb/tusb6010_omap.c
@@ -219,6 +219,7 @@ static int tusb_omap_dma_program(struct dma_channel *channel, u16 packet_sz,
 	u32				dma_remaining;
 	int				src_burst, dst_burst;
 	u16				csr;
+	u32				psize;
 	int				ch;
 	s8				dmareq;
 	s8				sync_dev;
@@ -390,15 +391,19 @@ static int tusb_omap_dma_program(struct dma_channel *channel, u16 packet_sz,
 
 	if (chdat->tx) {
 		/* Send transfer_packet_sz packets at a time */
-		musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET,
-			chdat->transfer_packet_sz);
+		psize = musb_readl(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET);
+		psize &= ~0x7ff;
+		psize |= chdat->transfer_packet_sz;
+		musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, psize);
 
 		musb_writel(ep_conf, TUSB_EP_TX_OFFSET,
 			TUSB_EP_CONFIG_XFR_SIZE(chdat->transfer_len));
 	} else {
 		/* Receive transfer_packet_sz packets at a time */
-		musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET,
-			chdat->transfer_packet_sz << 16);
+		psize = musb_readl(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET);
+		psize &= ~(0x7ff << 16);
+		psize |= (chdat->transfer_packet_sz << 16);
+		musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, psize);
 
 		musb_writel(ep_conf, TUSB_EP_RX_OFFSET,
 			TUSB_EP_CONFIG_XFR_SIZE(chdat->transfer_len));
-- 
cgit v1.2.3-59-g8ed1b


From 66ea5974b36b73129bbdc129847ec73cecb3f14d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 15 May 2017 10:16:30 -0700
Subject: MAINTAINERS: greybus-dev list is members-only

The greybus-dev mailing list is a members-only list and is
moderated for non-subscribers.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index da0149f2d16c..f37700c663f1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5702,7 +5702,7 @@ M:	Alex Elder <elder@kernel.org>
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 S:	Maintained
 F:	drivers/staging/greybus/
-L:	greybus-dev@lists.linaro.org
+L:	greybus-dev@lists.linaro.org (moderated for non-subscribers)
 
 GREYBUS AUDIO PROTOCOLS DRIVERS
 M:	Vaibhav Agarwal <vaibhav.sr@gmail.com>
-- 
cgit v1.2.3-59-g8ed1b


From 64df6d525fcff1630098db9238bfd2b3e092d5c1 Mon Sep 17 00:00:00 2001
From: linzhang <xiaolou4617@gmail.com>
Date: Wed, 17 May 2017 12:05:07 +0800
Subject: net: x25: fix one potential use-after-free issue

The function x25_init is not properly unregister related resources
on error handler.It is will result in kernel oops if x25_init init
failed, so add properly unregister call on error handler.

Also, i adjust the coding style and make x25_register_sysctl properly
return failure.

Signed-off-by: linzhang <xiaolou4617@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/x25.h        |  4 ++--
 net/x25/af_x25.c         | 24 ++++++++++++++++--------
 net/x25/sysctl_net_x25.c |  5 ++++-
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/include/net/x25.h b/include/net/x25.h
index c383aa4edbf0..6d30a01d281d 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -298,10 +298,10 @@ void x25_check_rbuf(struct sock *);
 
 /* sysctl_net_x25.c */
 #ifdef CONFIG_SYSCTL
-void x25_register_sysctl(void);
+int x25_register_sysctl(void);
 void x25_unregister_sysctl(void);
 #else
-static inline void x25_register_sysctl(void) {};
+static inline int x25_register_sysctl(void) { return 0; };
 static inline void x25_unregister_sysctl(void) {};
 #endif /* CONFIG_SYSCTL */
 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 8b911c29860e..5a1a98df3499 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1791,32 +1791,40 @@ void x25_kill_by_neigh(struct x25_neigh *nb)
 
 static int __init x25_init(void)
 {
-	int rc = proto_register(&x25_proto, 0);
+	int rc;
 
-	if (rc != 0)
+	rc = proto_register(&x25_proto, 0);
+	if (rc)
 		goto out;
 
 	rc = sock_register(&x25_family_ops);
-	if (rc != 0)
+	if (rc)
 		goto out_proto;
 
 	dev_add_pack(&x25_packet_type);
 
 	rc = register_netdevice_notifier(&x25_dev_notifier);
-	if (rc != 0)
+	if (rc)
 		goto out_sock;
 
-	pr_info("Linux Version 0.2\n");
+	rc = x25_register_sysctl();
+	if (rc)
+		goto out_dev;
 
-	x25_register_sysctl();
 	rc = x25_proc_init();
-	if (rc != 0)
-		goto out_dev;
+	if (rc)
+		goto out_sysctl;
+
+	pr_info("Linux Version 0.2\n");
+
 out:
 	return rc;
+out_sysctl:
+	x25_unregister_sysctl();
 out_dev:
 	unregister_netdevice_notifier(&x25_dev_notifier);
 out_sock:
+	dev_remove_pack(&x25_packet_type);
 	sock_unregister(AF_X25);
 out_proto:
 	proto_unregister(&x25_proto);
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index a06dfe143c67..ba078c85f0a1 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -73,9 +73,12 @@ static struct ctl_table x25_table[] = {
 	{ },
 };
 
-void __init x25_register_sysctl(void)
+int __init x25_register_sysctl(void)
 {
 	x25_table_header = register_net_sysctl(&init_net, "net/x25", x25_table);
+	if (!x25_table_header)
+		return -ENOMEM;
+	return 0;
 }
 
 void x25_unregister_sysctl(void)
-- 
cgit v1.2.3-59-g8ed1b


From 47ab37a19a8112670e63474016d5fbf86bc8737f Mon Sep 17 00:00:00 2001
From: Greentime Hu <green.hu@gmail.com>
Date: Wed, 17 May 2017 15:28:19 +0800
Subject: net: ethernet: faraday: To support device tree usage.

To support device tree usage for ftmac100.

Signed-off-by: Greentime Hu <green.hu@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/faraday/ftmac100.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 6ac336b546e6..1536356e2ea8 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1174,11 +1174,17 @@ static int ftmac100_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id ftmac100_of_ids[] = {
+	{ .compatible = "andestech,atmac100" },
+	{ }
+};
+
 static struct platform_driver ftmac100_driver = {
 	.probe		= ftmac100_probe,
 	.remove		= ftmac100_remove,
 	.driver		= {
 		.name	= DRV_NAME,
+		.of_match_table = ftmac100_of_ids
 	},
 };
 
@@ -1202,3 +1208,4 @@ module_exit(ftmac100_exit);
 MODULE_AUTHOR("Po-Yu Chuang <ratbert@faraday-tech.com>");
 MODULE_DESCRIPTION("FTMAC100 driver");
 MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(of, ftmac100_of_ids);
-- 
cgit v1.2.3-59-g8ed1b


From a285860211bf257b0e6d522dac6006794be348af Mon Sep 17 00:00:00 2001
From: Tobias Jungel <tobias.jungel@bisdn.de>
Date: Wed, 17 May 2017 09:29:12 +0200
Subject: bridge: netlink: check vlan_default_pvid range

Currently it is allowed to set the default pvid of a bridge to a value
above VLAN_VID_MASK (0xfff). This patch adds a check to br_validate and
returns -EINVAL in case the pvid is out of bounds.

Reproduce by calling:

[root@test ~]# ip l a type bridge
[root@test ~]# ip l a type dummy
[root@test ~]# ip l s bridge0 type bridge vlan_filtering 1
[root@test ~]# ip l s bridge0 type bridge vlan_default_pvid 9999
[root@test ~]# ip l s dummy0 master bridge0
[root@test ~]# bridge vlan
port	vlan ids
bridge0	 9999 PVID Egress Untagged

dummy0	 9999 PVID Egress Untagged

Fixes: 0f963b7592ef ("bridge: netlink: add support for default_pvid")
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Tobias Jungel <tobias.jungel@bisdn.de>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index c5ce7745b230..574f78824d8a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -835,6 +835,13 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 			return -EPROTONOSUPPORT;
 		}
 	}
+
+	if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
+		__u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
+
+		if (defpvid >= VLAN_VID_MASK)
+			return -EINVAL;
+	}
 #endif
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From a3f96c47c8d4c38be71fdbb440a99968c764ba62 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 17 May 2017 14:52:16 +0200
Subject: udp: make *udp*_queue_rcv_skb() functions static

Since the udp memory accounting refactor, we don't need any more
to export the *udp*_queue_rcv_skb(). Make them static and fix
a couple of sparse warnings:

net/ipv4/udp.c:1615:5: warning: symbol 'udp_queue_rcv_skb' was not
declared. Should it be static?
net/ipv6/udp.c:572:5: warning: symbol 'udpv6_queue_rcv_skb' was not
declared. Should it be static?

Fixes: 850cbaddb52d ("udp: use it's own memory accounting schema")
Fixes: c915fe13cbaa ("udplite: fix NULL pointer dereference")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c      | 4 ++--
 net/ipv4/udp_impl.h | 1 -
 net/ipv6/udp.c      | 4 ++--
 net/ipv6/udp_impl.h | 1 -
 4 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ea6e4cff9faf..1d6219bf2d6b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1612,7 +1612,7 @@ static void udp_v4_rehash(struct sock *sk)
 	udp_lib_rehash(sk, new_hash);
 }
 
-int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
 
@@ -1657,7 +1657,7 @@ EXPORT_SYMBOL(udp_encap_enable);
  * Note that in the success and error cases, the skb is assumed to
  * have either been requeued or freed.
  */
-int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
 	int is_udplite = IS_UDPLITE(sk);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index feb50a16398d..a8cf8c6fb60c 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -25,7 +25,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 		int flags, int *addr_len);
 int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
 		 int flags);
-int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 void udp_destroy_sock(struct sock *sk);
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 04862abfe4ec..06ec39b79609 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -526,7 +526,7 @@ out:
 	return;
 }
 
-int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
 
@@ -569,7 +569,7 @@ void udpv6_encap_enable(void)
 }
 EXPORT_SYMBOL(udpv6_encap_enable);
 
-int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
 	int is_udplite = IS_UDPLITE(sk);
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index e78bdc76dcc3..f180b3d85e31 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -26,7 +26,6 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
 int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 		  int flags, int *addr_len);
-int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 void udpv6_destroy_sock(struct sock *sk);
 
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3-59-g8ed1b


From fdcee2cbb8438702ea1b328fb6e0ac5e9a40c7f8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 17 May 2017 07:16:40 -0700
Subject: sctp: do not inherit ipv6_{mc|ac|fl}_list from parent

SCTP needs fixes similar to 83eaddab4378 ("ipv6/dccp: do not inherit
ipv6_mc_list from parent"), otherwise bad things can happen.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 142b70e959af..f5b45b8b8b16 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -677,6 +677,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
 	newnp = inet6_sk(newsk);
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+	newnp->ipv6_mc_list = NULL;
+	newnp->ipv6_ac_list = NULL;
+	newnp->ipv6_fl_list = NULL;
 
 	rcu_read_lock();
 	opt = rcu_dereference(np->opt);
-- 
cgit v1.2.3-59-g8ed1b


From 486181bcb3248e2f1977f4e69387a898234a4e1e Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Wed, 17 May 2017 16:31:41 +0200
Subject: qmi_wwan: add another Lenovo EM74xx device ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In their infinite wisdom, and never ending quest for end user frustration,
Lenovo has decided to use a new USB device ID for the wwan modules in
their 2017 laptops.  The actual hardware is still the Sierra Wireless
EM7455 or EM7430, depending on region.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index d7165767ca9d..8f923a147fa9 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1196,6 +1196,8 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx */
 	{QMI_FIXED_INTF(0x1199, 0x9079, 8)},	/* Sierra Wireless EM74xx */
 	{QMI_FIXED_INTF(0x1199, 0x9079, 10)},	/* Sierra Wireless EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x907b, 8)},	/* Sierra Wireless EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x907b, 10)},	/* Sierra Wireless EM74xx */
 	{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},	/* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
 	{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},	/* Alcatel L800MA */
 	{QMI_FIXED_INTF(0x2357, 0x0201, 4)},	/* TP-LINK HSUPA Modem MA180 */
-- 
cgit v1.2.3-59-g8ed1b


From 1ac91bff9c65d4a5e0da244495ea6275fd514c5a Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Thu, 18 May 2017 00:08:16 +0530
Subject: cxgb4: update latest firmware version supported

Change t4fw_version.h to update latest firmware version
number to 1.16.43.0.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index fa376444e57c..3549d3876278 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -37,7 +37,7 @@
 
 #define T4FW_VERSION_MAJOR 0x01
 #define T4FW_VERSION_MINOR 0x10
-#define T4FW_VERSION_MICRO 0x21
+#define T4FW_VERSION_MICRO 0x2B
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -46,7 +46,7 @@
 
 #define T5FW_VERSION_MAJOR 0x01
 #define T5FW_VERSION_MINOR 0x10
-#define T5FW_VERSION_MICRO 0x21
+#define T5FW_VERSION_MICRO 0x2B
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -55,7 +55,7 @@
 
 #define T6FW_VERSION_MAJOR 0x01
 #define T6FW_VERSION_MINOR 0x10
-#define T6FW_VERSION_MICRO 0x21
+#define T6FW_VERSION_MICRO 0x2B
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
-- 
cgit v1.2.3-59-g8ed1b


From 52499a6ad2aef20f7baf5872e97988c385170f1b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Apr 2017 20:24:41 +0200
Subject: gpu: host1x: select IOMMU_IOVA

When IOMMU_IOVA is not built-in but host1x is, we get a link error:

drivers/gpu/host1x/dev.o: In function `host1x_remove':
dev.c:(.text.host1x_remove+0x50): undefined reference to `put_iova_domain'
drivers/gpu/host1x/dev.o: In function `host1x_probe':
dev.c:(.text.host1x_probe+0x31c): undefined reference to `init_iova_domain'
dev.c:(.text.host1x_probe+0x38c): undefined reference to `put_iova_domain'
drivers/gpu/host1x/cdma.o: In function `host1x_cdma_init':
cdma.c:(.text.host1x_cdma_init+0x238): undefined reference to `alloc_iova'
cdma.c:(.text.host1x_cdma_init+0x2c0): undefined reference to `__free_iova'
drivers/gpu/host1x/cdma.o: In function `host1x_cdma_deinit':
cdma.c:(.text.host1x_cdma_deinit+0xb0): undefined reference to `free_iova'

This adds the same select statement that we have for drm_tegra.

Fixes: 404bfb78daf3 ("gpu: host1x: Add IOMMU support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170419182449.885312-1-arnd@arndb.de
---
 drivers/gpu/host1x/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
index b2fd029d67b3..91916326957f 100644
--- a/drivers/gpu/host1x/Kconfig
+++ b/drivers/gpu/host1x/Kconfig
@@ -1,6 +1,7 @@
 config TEGRA_HOST1X
 	tristate "NVIDIA Tegra host1x driver"
 	depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
+	select IOMMU_IOVA if IOMMU_SUPPORT
 	help
 	  Driver for the NVIDIA Tegra host1x hardware.
 
-- 
cgit v1.2.3-59-g8ed1b


From b299cde245b0b76c977f4291162cf668e087b408 Mon Sep 17 00:00:00 2001
From: Julius Werner <jwerner@chromium.org>
Date: Fri, 12 May 2017 14:42:58 -0700
Subject: drivers: char: mem: Check for address space wraparound with mmap()

/dev/mem currently allows mmap() mappings that wrap around the end of
the physical address space, which should probably be illegal. It
circumvents the existing STRICT_DEVMEM permission check because the loop
immediately terminates (as the start address is already higher than the
end address). On the x86_64 architecture it will then cause a panic
(from the BUG(start >= end) in arch/x86/mm/pat.c:reserve_memtype()).

This patch adds an explicit check to make sure offset + size will not
wrap around in the physical address type.

Signed-off-by: Julius Werner <jwerner@chromium.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/mem.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 7e4a9d1296bb..6e0cbe092220 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -340,6 +340,11 @@ static const struct vm_operations_struct mmap_mem_ops = {
 static int mmap_mem(struct file *file, struct vm_area_struct *vma)
 {
 	size_t size = vma->vm_end - vma->vm_start;
+	phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
+
+	/* It's illegal to wrap around the end of the physical address space. */
+	if (offset + (phys_addr_t)size < offset)
+		return -EINVAL;
 
 	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
 		return -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From 6bee9b78a7a5ea257b24d93974538938c82b1169 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 18 May 2017 14:35:21 +0200
Subject: drm/atmel-hlcdc: Fix output initialization

drm_of_find_panel_or_bridge() is expecting np to point to the encoder
node, not the bridge or panel this encoder is feeding.
Moreover, the endpoint parameter passed to drm_of_find_panel_or_bridge()
is always set to zero, which prevents us from probing all outputs.

We also move the atmel_hlcdc_rgb_output allocation after the
panel/bridge detection to avoid useless allocations.

Reported-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Fixes: ebc944613567 ("drm: convert drivers to use drm_of_find_panel_or_bridge")
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/1495110921-4032-1-git-send-email-boris.brezillon@free-electrons.com
---
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c | 36 +++++++++---------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 65a3bd7a0c00..423dda2785d4 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -152,8 +152,7 @@ static const struct drm_connector_funcs atmel_hlcdc_panel_connector_funcs = {
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
-static int atmel_hlcdc_attach_endpoint(struct drm_device *dev,
-				       const struct device_node *np)
+static int atmel_hlcdc_attach_endpoint(struct drm_device *dev, int endpoint)
 {
 	struct atmel_hlcdc_dc *dc = dev->dev_private;
 	struct atmel_hlcdc_rgb_output *output;
@@ -161,6 +160,11 @@ static int atmel_hlcdc_attach_endpoint(struct drm_device *dev,
 	struct drm_bridge *bridge;
 	int ret;
 
+	ret = drm_of_find_panel_or_bridge(dev->dev->of_node, 0, endpoint,
+					  &panel, &bridge);
+	if (ret)
+		return ret;
+
 	output = devm_kzalloc(dev->dev, sizeof(*output), GFP_KERNEL);
 	if (!output)
 		return -EINVAL;
@@ -177,10 +181,6 @@ static int atmel_hlcdc_attach_endpoint(struct drm_device *dev,
 
 	output->encoder.possible_crtcs = 0x1;
 
-	ret = drm_of_find_panel_or_bridge(np, 0, 0, &panel, &bridge);
-	if (ret)
-		return ret;
-
 	if (panel) {
 		output->connector.dpms = DRM_MODE_DPMS_OFF;
 		output->connector.polled = DRM_CONNECTOR_POLL_CONNECT;
@@ -220,22 +220,14 @@ err_encoder_cleanup:
 
 int atmel_hlcdc_create_outputs(struct drm_device *dev)
 {
-	struct device_node *remote;
-	int ret = -ENODEV;
-	int endpoint = 0;
-
-	while (true) {
-		/* Loop thru possible multiple connections to the output */
-		remote = of_graph_get_remote_node(dev->dev->of_node, 0,
-						  endpoint++);
-		if (!remote)
-			break;
-
-		ret = atmel_hlcdc_attach_endpoint(dev, remote);
-		of_node_put(remote);
-		if (ret)
-			return ret;
-	}
+	int endpoint, ret = 0;
+
+	for (endpoint = 0; !ret; endpoint++)
+		ret = atmel_hlcdc_attach_endpoint(dev, endpoint);
+
+	/* At least one device was successfully attached.*/
+	if (ret == -ENODEV && endpoint)
+		return 0;
 
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 9434cec130a941e8a0698d598dfa5499dbdeb949 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 5 May 2017 21:08:44 +0200
Subject: firmware: Google VPD: Fix memory allocation error handling

This patch fixes several issues:
   - if the 1st 'kzalloc' fails, we dereference a NULL pointer
   - if the 2nd 'kzalloc' fails, there is a memory leak
   - if 'sysfs_create_bin_file' fails there is also a memory leak

Fix it by adding a test after the first memory allocation and some error
handling paths to correctly free memory if needed.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/google/vpd.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c
index 3ce813110d5e..1e7860f02f4f 100644
--- a/drivers/firmware/google/vpd.c
+++ b/drivers/firmware/google/vpd.c
@@ -116,9 +116,13 @@ static int vpd_section_attrib_add(const u8 *key, s32 key_len,
 		return VPD_OK;
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
-	info->key = kzalloc(key_len + 1, GFP_KERNEL);
-	if (!info->key)
+	if (!info)
 		return -ENOMEM;
+	info->key = kzalloc(key_len + 1, GFP_KERNEL);
+	if (!info->key) {
+		ret = -ENOMEM;
+		goto free_info;
+	}
 
 	memcpy(info->key, key, key_len);
 
@@ -135,12 +139,17 @@ static int vpd_section_attrib_add(const u8 *key, s32 key_len,
 	list_add_tail(&info->list, &sec->attribs);
 
 	ret = sysfs_create_bin_file(sec->kobj, &info->bin_attr);
-	if (ret) {
-		kfree(info->key);
-		return ret;
-	}
+	if (ret)
+		goto free_info_key;
 
 	return 0;
+
+free_info_key:
+	kfree(info->key);
+free_info:
+	kfree(info);
+
+	return ret;
 }
 
 static void vpd_section_attrib_destroy(struct vpd_section *sec)
-- 
cgit v1.2.3-59-g8ed1b


From 6dd4aba36f2dca00c3b6976a0d59c5cee16ad545 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 18 May 2017 09:18:52 +0200
Subject: mlxsw: spectrum_dpipe: Fix incorrect entry index

In case of disabled counters the entry index will be incorrect. Fix this
by moving the entry index set before the counter status check.

Fixes: 2ba5999f009d ("mlxsw: spectrum: Add Support for erif table entries access")
Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index ea56f6ade6b4..5f0a7bc692a4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -199,10 +199,11 @@ static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp,
 
 	entry->counter_valid = false;
 	entry->counter = 0;
+	entry->index = mlxsw_sp_rif_index(rif);
+
 	if (!counters_enabled)
 		return 0;
 
-	entry->index = mlxsw_sp_rif_index(rif);
 	err = mlxsw_sp_rif_counter_value_get(mlxsw_sp, rif,
 					     MLXSW_SP_RIF_COUNTER_EGRESS,
 					     &cnt);
-- 
cgit v1.2.3-59-g8ed1b


From 6b1206bbbce6092b2ec412125300889e6e551bc2 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 18 May 2017 09:18:53 +0200
Subject: mlxsw: spectrum_router: Fix rif counter freeing routine

During rif counter freeing the counter index can be invalid. Add check
of validity before freeing the counter.

Fixes: e0c0afd8aa4e ("mlxsw: spectrum: Support for counters on router interfaces")
Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 33cec1cc1642..9f89c4137d21 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -206,6 +206,9 @@ void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
 {
 	unsigned int *p_counter_index;
 
+	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
+		return;
+
 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
 	if (WARN_ON(!p_counter_index))
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From 5f5c5449acad0cd3322e53e1ac68c044483b0aa5 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 18 May 2017 15:01:34 +0200
Subject: sh_eth: Use platform device for printing before register_netdev()

The MDIO initialization failure message is printed using the network
device, before it has been registered, leading to:

     (null): failed to initialise MDIO

Use the platform device instead to fix this:

    sh-eth ee700000.ethernet: failed to initialise MDIO

Fixes: daacf03f0bbfefee ("sh_eth: Register MDIO bus before registering the network device")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index f68c4db656ed..c85222b02754 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3220,7 +3220,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 	/* MDIO bus init */
 	ret = sh_mdio_init(mdp, pd);
 	if (ret) {
-		dev_err(&ndev->dev, "failed to initialise MDIO\n");
+		dev_err(&pdev->dev, "failed to initialise MDIO\n");
 		goto out_release;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From b7ce520e9f71ff65d0aa0ad86223f94ae4095fae Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 18 May 2017 15:01:35 +0200
Subject: sh_eth: Do not print an error message for probe deferral

EPROBE_DEFER is not an error, hence printing an error message like

    sh-eth ee700000.ethernet: failed to initialise MDIO

may confuse the user.

To fix this, suppress the error message in case of probe deferral.
While at it, shorten the message, and add the actual error code.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index c85222b02754..2d686ccf971b 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3220,7 +3220,8 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 	/* MDIO bus init */
 	ret = sh_mdio_init(mdp, pd);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to initialise MDIO\n");
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "MDIO init failed: %d\n", ret);
 		goto out_release;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From c0e01eac7ada785fdeaea1ae5476ec1cf3b00374 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 18 May 2017 13:03:52 +0200
Subject: mlxsw: spectrum: Avoid possible NULL pointer dereference

In case we got an FDB notification for a port that doesn't exist we
execute an FDB entry delete to prevent it from re-appearing the next
time we poll for notifications.

If the operation failed we would trigger a NULL pointer dereference as
'mlxsw_sp_port' is NULL.

Fix it by reporting the error using the underlying bus device instead.

Fixes: 12f1501e7511 ("mlxsw: spectrum: remove FDB entry in case we get unknown object notification")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 0d8411f1f954..f4bb0c0b7c1d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1497,8 +1497,7 @@ do_fdb_op:
 	err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid,
 				      adding, true);
 	if (err) {
-		if (net_ratelimit())
-			netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n");
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to set FDB entry\n");
 		return;
 	}
 
@@ -1558,8 +1557,7 @@ do_fdb_op:
 	err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid,
 					  adding, true);
 	if (err) {
-		if (net_ratelimit())
-			netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n");
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to set FDB entry\n");
 		return;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From d8f1deaa5256aba3296025e103e8abb96f3e6479 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 2 Mar 2017 13:09:52 +0100
Subject: watchdog: orion: fix compile-test dependencies

I ran into one corner case with the orion watchdog using the
atomic_io_modify interface:

drivers/watchdog/orion_wdt.o: In function `orion_stop':
orion_wdt.c:(.text.orion_stop+0x28): undefined reference to `atomic_io_modify'
drivers/watchdog/orion_wdt.o: In function `armada375_stop':
orion_wdt.c:(.text.armada375_stop+0x28): undefined reference to `atomic_io_modify'

This function is available on all 32-bit ARM builds except for ebsa110, so
we have to specifically exclude that from compile-testing.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 52a70ee6014f..8b9049dac094 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -452,7 +452,7 @@ config DAVINCI_WATCHDOG
 
 config ORION_WATCHDOG
 	tristate "Orion watchdog"
-	depends on ARCH_ORION5X || ARCH_DOVE || MACH_DOVE || ARCH_MVEBU || COMPILE_TEST
+	depends on ARCH_ORION5X || ARCH_DOVE || MACH_DOVE || ARCH_MVEBU || (COMPILE_TEST && !ARCH_EBSA110)
 	depends on ARM
 	select WATCHDOG_CORE
 	help
-- 
cgit v1.2.3-59-g8ed1b


From 015b528644a84b0018d3286ecd6ea5f82dce0180 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 2 Mar 2017 18:31:11 +0100
Subject: watchdog: sama5d4: fix WDDIS handling

The datasheet states: "When setting the WDDIS bit, and while it is set, the
fields WDV and WDD must not be modified."

Because the whole configuration is already cached inside .mr, wait for the
user to enable the watchdog to configure it so it is enabled and configured
at the same time (what the IP is actually expecting).

When the watchdog is already enabled, it is not an issue to reconfigure it.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Wenyou.Yang <wenyou.yang@microchip.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/sama5d4_wdt.c | 48 ++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c
index f709962018ac..5cee20caca78 100644
--- a/drivers/watchdog/sama5d4_wdt.c
+++ b/drivers/watchdog/sama5d4_wdt.c
@@ -44,6 +44,8 @@ MODULE_PARM_DESC(nowayout,
 	"Watchdog cannot be stopped once started (default="
 	__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
+#define wdt_enabled (!(wdt->mr & AT91_WDT_WDDIS))
+
 #define wdt_read(wdt, field) \
 	readl_relaxed((wdt)->reg_base + (field))
 
@@ -89,7 +91,16 @@ static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd,
 	wdt->mr &= ~AT91_WDT_WDD;
 	wdt->mr |= AT91_WDT_SET_WDV(value);
 	wdt->mr |= AT91_WDT_SET_WDD(value);
-	wdt_write(wdt, AT91_WDT_MR, wdt->mr);
+
+	/*
+	 * WDDIS has to be 0 when updating WDD/WDV. The datasheet states: When
+	 * setting the WDDIS bit, and while it is set, the fields WDV and WDD
+	 * must not be modified.
+	 * If the watchdog is enabled, then the timeout can be updated. Else,
+	 * wait that the user enables it.
+	 */
+	if (wdt_enabled)
+		wdt_write(wdt, AT91_WDT_MR, wdt->mr & ~AT91_WDT_WDDIS);
 
 	wdd->timeout = timeout;
 
@@ -145,23 +156,20 @@ static int of_sama5d4_wdt_init(struct device_node *np, struct sama5d4_wdt *wdt)
 
 static int sama5d4_wdt_init(struct sama5d4_wdt *wdt)
 {
-	struct watchdog_device *wdd = &wdt->wdd;
-	u32 value = WDT_SEC2TICKS(wdd->timeout);
 	u32 reg;
-
 	/*
-	 * Because the fields WDV and WDD must not be modified when the WDDIS
-	 * bit is set, so clear the WDDIS bit before writing the WDT_MR.
+	 * When booting and resuming, the bootloader may have changed the
+	 * watchdog configuration.
+	 * If the watchdog is already running, we can safely update it.
+	 * Else, we have to disable it properly.
 	 */
-	reg = wdt_read(wdt, AT91_WDT_MR);
-	reg &= ~AT91_WDT_WDDIS;
-	wdt_write(wdt, AT91_WDT_MR, reg);
-
-	wdt->mr |= AT91_WDT_SET_WDD(value);
-	wdt->mr |= AT91_WDT_SET_WDV(value);
-
-	wdt_write(wdt, AT91_WDT_MR, wdt->mr);
-
+	if (wdt_enabled) {
+		wdt_write(wdt, AT91_WDT_MR, wdt->mr);
+	} else {
+		reg = wdt_read(wdt, AT91_WDT_MR);
+		if (!(reg & AT91_WDT_WDDIS))
+			wdt_write(wdt, AT91_WDT_MR, reg | AT91_WDT_WDDIS);
+	}
 	return 0;
 }
 
@@ -172,6 +180,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 	struct resource *res;
 	void __iomem *regs;
 	u32 irq = 0;
+	u32 timeout;
 	int ret;
 
 	wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
@@ -221,6 +230,11 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	timeout = WDT_SEC2TICKS(wdd->timeout);
+
+	wdt->mr |= AT91_WDT_SET_WDD(timeout);
+	wdt->mr |= AT91_WDT_SET_WDV(timeout);
+
 	ret = sama5d4_wdt_init(wdt);
 	if (ret)
 		return ret;
@@ -263,9 +277,7 @@ static int sama5d4_wdt_resume(struct device *dev)
 {
 	struct sama5d4_wdt *wdt = dev_get_drvdata(dev);
 
-	wdt_write(wdt, AT91_WDT_MR, wdt->mr & ~AT91_WDT_WDDIS);
-	if (wdt->mr & AT91_WDT_WDDIS)
-		wdt_write(wdt, AT91_WDT_MR, wdt->mr);
+	sama5d4_wdt_init(wdt);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From ddd6d240b26dcb8b8dc98bd493eba944dd97ebc8 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 2 Mar 2017 18:31:12 +0100
Subject: watchdog: sama5d4: fix race condition

WDT_MR and WDT_CR must not updated within three slow clock periods after
the last ping (write to WDT_CR or WDT_MR). Ensure enough time has elapsed
before writing those registers.
wdt_write() waits for 4 periods to ensure at least 3 edges are seen by the
IP.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Wenyou.Yang <wenyou.yang@microchip.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/sama5d4_wdt.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c
index 5cee20caca78..362fd229786d 100644
--- a/drivers/watchdog/sama5d4_wdt.c
+++ b/drivers/watchdog/sama5d4_wdt.c
@@ -6,6 +6,7 @@
  * Licensed under GPLv2.
  */
 
+#include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
@@ -29,6 +30,7 @@ struct sama5d4_wdt {
 	struct watchdog_device	wdd;
 	void __iomem		*reg_base;
 	u32			mr;
+	unsigned long		last_ping;
 };
 
 static int wdt_timeout = WDT_DEFAULT_TIMEOUT;
@@ -49,8 +51,29 @@ MODULE_PARM_DESC(nowayout,
 #define wdt_read(wdt, field) \
 	readl_relaxed((wdt)->reg_base + (field))
 
-#define wdt_write(wtd, field, val) \
-	writel_relaxed((val), (wdt)->reg_base + (field))
+/* 4 slow clock periods is 4/32768 = 122.07µs*/
+#define WDT_DELAY	usecs_to_jiffies(123)
+
+static void wdt_write(struct sama5d4_wdt *wdt, u32 field, u32 val)
+{
+	/*
+	 * WDT_CR and WDT_MR must not be modified within three slow clock
+	 * periods following a restart of the watchdog performed by a write
+	 * access in WDT_CR.
+	 */
+	while (time_before(jiffies, wdt->last_ping + WDT_DELAY))
+		usleep_range(30, 125);
+	writel_relaxed(val, wdt->reg_base + field);
+	wdt->last_ping = jiffies;
+}
+
+static void wdt_write_nosleep(struct sama5d4_wdt *wdt, u32 field, u32 val)
+{
+	if (time_before(jiffies, wdt->last_ping + WDT_DELAY))
+		udelay(123);
+	writel_relaxed(val, wdt->reg_base + field);
+	wdt->last_ping = jiffies;
+}
 
 static int sama5d4_wdt_start(struct watchdog_device *wdd)
 {
@@ -164,11 +187,12 @@ static int sama5d4_wdt_init(struct sama5d4_wdt *wdt)
 	 * Else, we have to disable it properly.
 	 */
 	if (wdt_enabled) {
-		wdt_write(wdt, AT91_WDT_MR, wdt->mr);
+		wdt_write_nosleep(wdt, AT91_WDT_MR, wdt->mr);
 	} else {
 		reg = wdt_read(wdt, AT91_WDT_MR);
 		if (!(reg & AT91_WDT_WDDIS))
-			wdt_write(wdt, AT91_WDT_MR, reg | AT91_WDT_WDDIS);
+			wdt_write_nosleep(wdt, AT91_WDT_MR,
+					  reg | AT91_WDT_WDDIS);
 	}
 	return 0;
 }
@@ -193,6 +217,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 	wdd->ops = &sama5d4_wdt_ops;
 	wdd->min_timeout = MIN_WDT_TIMEOUT;
 	wdd->max_timeout = MAX_WDT_TIMEOUT;
+	wdt->last_ping = jiffies;
 
 	watchdog_set_drvdata(wdd, wdt);
 
-- 
cgit v1.2.3-59-g8ed1b


From 46c319b848268dab3f0e7c4a5b6e9146d3bca8a4 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:49:45 +0100
Subject: watchdog: pcwd_usb: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/pcwd_usb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c
index 99ebf6ea3de6..5615f4013924 100644
--- a/drivers/watchdog/pcwd_usb.c
+++ b/drivers/watchdog/pcwd_usb.c
@@ -630,6 +630,9 @@ static int usb_pcwd_probe(struct usb_interface *interface,
 		return -ENODEV;
 	}
 
+	if (iface_desc->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	/* check out the endpoint: it has to be Interrupt & IN */
 	endpoint = &iface_desc->endpoint[0].desc;
 
-- 
cgit v1.2.3-59-g8ed1b


From 0ddad77b90cb52075b5a9498f0621e3e265cc19f Mon Sep 17 00:00:00 2001
From: Tomas Melin <tomas.melin@vaisala.com>
Date: Mon, 20 Mar 2017 09:29:31 +0200
Subject: watchdog: cadence_wdt: fix timeout setting

wdt_timeout must not be initialized to CDNS_WDT_DEFAULT_TIMEOUT in
order to allow the value to be overriddden by a device tree setting.

This way, the default timeout value will be used only in case module_param
has not been set, or device tree timeout-sec has not been defined.

Signed-off-by: Tomas Melin <tomas.melin@vaisala.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/cadence_wdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c
index 8d61e8bfe60b..86e0b5d2e761 100644
--- a/drivers/watchdog/cadence_wdt.c
+++ b/drivers/watchdog/cadence_wdt.c
@@ -49,7 +49,7 @@
 /* Counter maximum value */
 #define CDNS_WDT_COUNTER_MAX 0xFFF
 
-static int wdt_timeout = CDNS_WDT_DEFAULT_TIMEOUT;
+static int wdt_timeout;
 static int nowayout = WATCHDOG_NOWAYOUT;
 
 module_param(wdt_timeout, int, 0);
-- 
cgit v1.2.3-59-g8ed1b


From 455a9a60b6d4afb293b0e63ec75cc8e82912a767 Mon Sep 17 00:00:00 2001
From: Shile Zhang <shile.zhang@nokia.com>
Date: Mon, 10 Apr 2017 22:39:33 +0800
Subject: watchdog: wdt_pci: fix build error if define SOFTWARE_REBOOT

To fix following build error when SOFTWARE_REBOOT is defined:

  CC [M]  driver/watchdog/wdt_pci.o
driver/watchdog/wdt_pci.c: In function 'wdtpci_interrupt':
driver/watchdog/wdt_pci.c:335:3: error: too many arguments to function 'emergency_restart'
   emergency_restart(NULL);
   ^
In file included from driver/watchdog/wdt_pci.c:51:0:
include/linux/reboot.h:80:13: note: declared here
 extern void emergency_restart(void);
             ^

Signed-off-by: Shile Zhang <shile.zhang@nokia.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/wdt_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/wdt_pci.c b/drivers/watchdog/wdt_pci.c
index 48b2c058b009..bc7addc2dc06 100644
--- a/drivers/watchdog/wdt_pci.c
+++ b/drivers/watchdog/wdt_pci.c
@@ -332,7 +332,7 @@ static irqreturn_t wdtpci_interrupt(int irq, void *dev_id)
 		pr_crit("Would Reboot\n");
 #else
 		pr_crit("Initiating system reboot\n");
-		emergency_restart(NULL);
+		emergency_restart();
 #endif
 #else
 		pr_crit("Reset in 5ms\n");
-- 
cgit v1.2.3-59-g8ed1b


From 14e3995e63759b80eb22a3c06958d105db4d3f79 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 16 May 2017 14:22:47 +0000
Subject: xen/9pfs: fix return value check in xen_9pfs_front_probe()

In case of error, the function xenbus_read() returns ERR_PTR() and never
returns NULL. The NULL test in the return value check should be replaced
with IS_ERR().

Fixes: 71ebd71921e4 ("xen/9pfs: connect to the backend")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
---
 net/9p/trans_xen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 71e85643b3f9..83fe487f460e 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -454,8 +454,8 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
 			goto error_xenbus;
 	}
 	priv->tag = xenbus_read(xbt, dev->nodename, "tag", NULL);
-	if (!priv->tag) {
-		ret = -EINVAL;
+	if (IS_ERR(priv->tag)) {
+		ret = PTR_ERR(priv->tag);
 		goto error_xenbus;
 	}
 	ret = xenbus_transaction_end(xbt, 0);
-- 
cgit v1.2.3-59-g8ed1b


From aaf0475a0b3f445000c50f7fc75d5e846bf7ee7b Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 18 May 2017 15:22:41 +0000
Subject: xen/9pfs: p9_trans_xen_init and p9_trans_xen_exit can be static

Fixes the following sparse warnings:

net/9p/trans_xen.c:528:5: warning:
 symbol 'p9_trans_xen_init' was not declared. Should it be static?
net/9p/trans_xen.c:540:6: warning:
 symbol 'p9_trans_xen_exit' was not declared. Should it be static?

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
---
 net/9p/trans_xen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 83fe487f460e..6ad3e043c617 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -525,7 +525,7 @@ static struct xenbus_driver xen_9pfs_front_driver = {
 	.otherend_changed = xen_9pfs_front_changed,
 };
 
-int p9_trans_xen_init(void)
+static int p9_trans_xen_init(void)
 {
 	if (!xen_domain())
 		return -ENODEV;
@@ -537,7 +537,7 @@ int p9_trans_xen_init(void)
 }
 module_init(p9_trans_xen_init);
 
-void p9_trans_xen_exit(void)
+static void p9_trans_xen_exit(void)
 {
 	v9fs_unregister_trans(&p9_xen_trans);
 	return xenbus_unregister_driver(&xen_9pfs_front_driver);
-- 
cgit v1.2.3-59-g8ed1b


From 463f620b1256e0488d932088e04a372817e8c42e Mon Sep 17 00:00:00 2001
From: Michał Potomski <michalx.potomski@intel.com>
Date: Fri, 12 May 2017 08:36:27 +0200
Subject: scsi: ufs: Clean up some rpm/spm level SysFS nodes upon remove
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When reloading module these two attributes aren't cleaned up properly
and they persist causing warnings when trying to load module
again. Additionally they are not recreated properly due to that.

Signed-off-by: Michał Potomski <michalx.potomski@intel.com>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index abc7e87937cc..ffe8d8608818 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7698,6 +7698,12 @@ static inline void ufshcd_add_sysfs_nodes(struct ufs_hba *hba)
 	ufshcd_add_spm_lvl_sysfs_nodes(hba);
 }
 
+static inline void ufshcd_remove_sysfs_nodes(struct ufs_hba *hba)
+{
+	device_remove_file(hba->dev, &hba->rpm_lvl_attr);
+	device_remove_file(hba->dev, &hba->spm_lvl_attr);
+}
+
 /**
  * ufshcd_shutdown - shutdown routine
  * @hba: per adapter instance
@@ -7735,6 +7741,7 @@ EXPORT_SYMBOL(ufshcd_shutdown);
  */
 void ufshcd_remove(struct ufs_hba *hba)
 {
+	ufshcd_remove_sysfs_nodes(hba);
 	scsi_remove_host(hba->host);
 	/* disable interrupts */
 	ufshcd_disable_intr(hba, hba->intr_mask);
-- 
cgit v1.2.3-59-g8ed1b


From a351e40b6de550049423a26f7ded7b639e363d89 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Wed, 17 May 2017 20:30:43 +0530
Subject: scsi: csiostor: fix use after free in csio_hw_use_fwconfig()

mbp pointer is passed to csio_hw_validate_caps() so call mempool_free()
after calling csio_hw_validate_caps().

Signed-off-by: Varun Prakash <varun@chelsio.com>
Fixes: 541c571fa2fd ("csiostor:Use firmware version from cxgb4/t4fw_version.h")
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/csiostor/csio_hw.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c
index 622bdabc8894..dab195f04da7 100644
--- a/drivers/scsi/csiostor/csio_hw.c
+++ b/drivers/scsi/csiostor/csio_hw.c
@@ -1769,7 +1769,6 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param)
 		goto bye;
 	}
 
-	mempool_free(mbp, hw->mb_mempool);
 	if (finicsum != cfcsum) {
 		csio_warn(hw,
 		      "Config File checksum mismatch: csum=%#x, computed=%#x\n",
@@ -1780,6 +1779,10 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param)
 	rv = csio_hw_validate_caps(hw, mbp);
 	if (rv != 0)
 		goto bye;
+
+	mempool_free(mbp, hw->mb_mempool);
+	mbp = NULL;
+
 	/*
 	 * Note that we're operating with parameters
 	 * not supplied by the driver, rather than from hard-wired
-- 
cgit v1.2.3-59-g8ed1b


From 1bad6c4a57efda0d5f5bf8a2403b21b1ed24875c Mon Sep 17 00:00:00 2001
From: Long Li <longli@microsoft.com>
Date: Thu, 18 May 2017 15:40:05 -0700
Subject: scsi: zero per-cmd private driver data for each MQ I/O

In lower layer driver's (LLD) scsi_host_template, the driver may
optionally ask SCSI to allocate its private driver memory for each
command, by specifying cmd_size. This memory is allocated at the end of
scsi_cmnd by SCSI.  Later when SCSI queues a command, the LLD can use
scsi_cmd_priv to get to its private data.

Some LLD, e.g. hv_storvsc, doesn't clear its private data before use. In
this case, the LLD may get to stale or uninitialized data in its private
driver memory. This may result in unexpected driver and hardware
behavior.

Fix this problem by also zeroing the private driver memory before
passing them to LLD.

Signed-off-by: Long Li <longli@microsoft.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Reviewed-by: KY Srinivasan <kys@microsoft.com>
Reviewed-by: Christoph Hellwig <hch@infradead.org>
CC: <stable@vger.kernel.org> # 4.11+
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e31f1cc90b81..99e16ac479e3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1851,7 +1851,7 @@ static int scsi_mq_prep_fn(struct request *req)
 
 	/* zero out the cmd, except for the embedded scsi_request */
 	memset((char *)cmd + sizeof(cmd->req), 0,
-		sizeof(*cmd) - sizeof(cmd->req));
+		sizeof(*cmd) - sizeof(cmd->req) + shost->hostt->cmd_size);
 
 	req->special = cmd;
 
-- 
cgit v1.2.3-59-g8ed1b


From e41e53cd4fe331d0d1f06f8e4ed7e2cc63ee2c34 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 18 May 2017 20:37:31 +1000
Subject: powerpc/mm: Fix virt_addr_valid() etc. on 64-bit hash

virt_addr_valid() is supposed to tell you if it's OK to call virt_to_page() on
an address. What this means in practice is that it should only return true for
addresses in the linear mapping which are backed by a valid PFN.

We are failing to properly check that the address is in the linear mapping,
because virt_to_pfn() will return a valid looking PFN for more or less any
address. That bug is actually caused by __pa(), used in virt_to_pfn().

eg: __pa(0xc000000000010000) = 0x10000  # Good
    __pa(0xd000000000010000) = 0x10000  # Bad!
    __pa(0x0000000000010000) = 0x10000  # Bad!

This started happening after commit bdbc29c19b26 ("powerpc: Work around gcc
miscompilation of __pa() on 64-bit") (Aug 2013), where we changed the definition
of __pa() to work around a GCC bug. Prior to that we subtracted PAGE_OFFSET from
the value passed to __pa(), meaning __pa() of a 0xd or 0x0 address would give
you something bogus back.

Until we can verify if that GCC bug is no longer an issue, or come up with
another solution, this commit does the minimal fix to make virt_addr_valid()
work, by explicitly checking that the address is in the linear mapping region.

Fixes: bdbc29c19b26 ("powerpc: Work around gcc miscompilation of __pa() on 64-bit")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Tested-by: Breno Leitao <breno.leitao@gmail.com>
---
 arch/powerpc/include/asm/page.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 2a32483c7b6c..8da5d4c1cab2 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -132,7 +132,19 @@ extern long long virt_phys_offset;
 #define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * On hash the vmalloc and other regions alias to the kernel region when passed
+ * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can
+ * return true for some vmalloc addresses, which is incorrect. So explicitly
+ * check that the address is in the kernel region.
+ */
+#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \
+				pfn_valid(virt_to_pfn(kaddr)))
+#else
 #define virt_addr_valid(kaddr)	pfn_valid(virt_to_pfn(kaddr))
+#endif
 
 /*
  * On Book-E parts we need __va to parse the device tree and we can't
-- 
cgit v1.2.3-59-g8ed1b


From a33d7d94eed92b23fbbc7b0de06a41b2bbaa49e3 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 12 May 2017 13:15:45 -0400
Subject: tracing: Make sure RCU is watching before calling a stack trace

As stack tracing now requires "rcu watching", force RCU to be watching when
recording a stack trace.

Link: http://lkml.kernel.org/r/20170512172449.879684501@goodmis.org

Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fcc9a2d774c3..1122f151466f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2568,7 +2568,36 @@ static inline void ftrace_trace_stack(struct trace_array *tr,
 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
 		   int pc)
 {
-	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
+	struct ring_buffer *buffer = tr->trace_buffer.buffer;
+
+	if (rcu_is_watching()) {
+		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
+		return;
+	}
+
+	/*
+	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
+	 * but if the above rcu_is_watching() failed, then the NMI
+	 * triggered someplace critical, and rcu_irq_enter() should
+	 * not be called from NMI.
+	 */
+	if (unlikely(in_nmi()))
+		return;
+
+	/*
+	 * It is possible that a function is being traced in a
+	 * location that RCU is not watching. A call to
+	 * rcu_irq_enter() will make sure that it is, but there's
+	 * a few internal rcu functions that could be traced
+	 * where that wont work either. In those cases, we just
+	 * do nothing.
+	 */
+	if (unlikely(rcu_irq_enter_disabled()))
+		return;
+
+	rcu_irq_enter_irqson();
+	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
+	rcu_irq_exit_irqson();
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 989513a735f51407280acd91e436d83eb48514cd Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 16 May 2017 09:41:06 +0200
Subject: xen: cleanup pvh leftovers from pv-only sources

There are some leftovers testing for pvh guest mode in pv-only source
files. Remove them.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten_pv.c |  15 ++-----
 arch/x86/xen/mmu_pv.c       | 102 +++++++++++++++++---------------------------
 2 files changed, 42 insertions(+), 75 deletions(-)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 7cd442690f9d..f33eef4ebd12 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -142,9 +142,7 @@ static void __init xen_banner(void)
 	struct xen_extraversion extra;
 	HYPERVISOR_xen_version(XENVER_extraversion, &extra);
 
-	pr_info("Booting paravirtualized kernel %son %s\n",
-		xen_feature(XENFEAT_auto_translated_physmap) ?
-			"with PVH extensions " : "", pv_info.name);
+	pr_info("Booting paravirtualized kernel on %s\n", pv_info.name);
 	printk(KERN_INFO "Xen version: %d.%d%s%s\n",
 	       version >> 16, version & 0xffff, extra.extraversion,
 	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
@@ -957,15 +955,10 @@ static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
 
 void xen_setup_shared_info(void)
 {
-	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		set_fixmap(FIX_PARAVIRT_BOOTMAP,
-			   xen_start_info->shared_info);
+	set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
 
-		HYPERVISOR_shared_info =
-			(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
-	} else
-		HYPERVISOR_shared_info =
-			(struct shared_info *)__va(xen_start_info->shared_info);
+	HYPERVISOR_shared_info =
+		(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
 
 #ifndef CONFIG_SMP
 	/* In UP this is as good a place as any to set up shared info */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 7397d8b8459d..1f386d7fdf70 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -355,10 +355,8 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
 		pteval_t flags = val & PTE_FLAGS_MASK;
 		unsigned long mfn;
 
-		if (!xen_feature(XENFEAT_auto_translated_physmap))
-			mfn = __pfn_to_mfn(pfn);
-		else
-			mfn = pfn;
+		mfn = __pfn_to_mfn(pfn);
+
 		/*
 		 * If there's no mfn for the pfn, then just create an
 		 * empty non-present pte.  Unfortunately this loses
@@ -647,9 +645,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 	limit--;
 	BUG_ON(limit >= FIXADDR_TOP);
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return 0;
-
 	/*
 	 * 64-bit has a great big hole in the middle of the address
 	 * space, which contains the Xen mappings.  On 32-bit these
@@ -1289,9 +1284,6 @@ static void __init xen_pagetable_cleanhighmap(void)
 
 static void __init xen_pagetable_p2m_setup(void)
 {
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	xen_vmalloc_p2m_tree();
 
 #ifdef CONFIG_X86_64
@@ -1314,8 +1306,7 @@ static void __init xen_pagetable_init(void)
 	xen_build_mfn_list_list();
 
 	/* Remap memory freed due to conflicts with E820 map */
-	if (!xen_feature(XENFEAT_auto_translated_physmap))
-		xen_remap_memory();
+	xen_remap_memory();
 
 	xen_setup_shared_info();
 }
@@ -1925,21 +1916,20 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	/* Zap identity mapping */
 	init_level4_pgt[0] = __pgd(0);
 
-	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		/* Pre-constructed entries are in pfn, so convert to mfn */
-		/* L4[272] -> level3_ident_pgt
-		 * L4[511] -> level3_kernel_pgt */
-		convert_pfn_mfn(init_level4_pgt);
+	/* Pre-constructed entries are in pfn, so convert to mfn */
+	/* L4[272] -> level3_ident_pgt  */
+	/* L4[511] -> level3_kernel_pgt */
+	convert_pfn_mfn(init_level4_pgt);
 
-		/* L3_i[0] -> level2_ident_pgt */
-		convert_pfn_mfn(level3_ident_pgt);
-		/* L3_k[510] -> level2_kernel_pgt
-		 * L3_k[511] -> level2_fixmap_pgt */
-		convert_pfn_mfn(level3_kernel_pgt);
+	/* L3_i[0] -> level2_ident_pgt */
+	convert_pfn_mfn(level3_ident_pgt);
+	/* L3_k[510] -> level2_kernel_pgt */
+	/* L3_k[511] -> level2_fixmap_pgt */
+	convert_pfn_mfn(level3_kernel_pgt);
+
+	/* L3_k[511][506] -> level1_fixmap_pgt */
+	convert_pfn_mfn(level2_fixmap_pgt);
 
-		/* L3_k[511][506] -> level1_fixmap_pgt */
-		convert_pfn_mfn(level2_fixmap_pgt);
-	}
 	/* We get [511][511] and have Xen's version of level2_kernel_pgt */
 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
 	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
@@ -1962,34 +1952,30 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	if (i && i < pgd_index(__START_KERNEL_map))
 		init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
 
-	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		/* Make pagetable pieces RO */
-		set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
-		set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
-
-		/* Pin down new L4 */
-		pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
-				  PFN_DOWN(__pa_symbol(init_level4_pgt)));
-
-		/* Unpin Xen-provided one */
-		pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+	/* Make pagetable pieces RO */
+	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
+	set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
+
+	/* Pin down new L4 */
+	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+			  PFN_DOWN(__pa_symbol(init_level4_pgt)));
+
+	/* Unpin Xen-provided one */
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
-		/*
-		 * At this stage there can be no user pgd, and no page
-		 * structure to attach it to, so make sure we just set kernel
-		 * pgd.
-		 */
-		xen_mc_batch();
-		__xen_write_cr3(true, __pa(init_level4_pgt));
-		xen_mc_issue(PARAVIRT_LAZY_CPU);
-	} else
-		native_write_cr3(__pa(init_level4_pgt));
+	/*
+	 * At this stage there can be no user pgd, and no page structure to
+	 * attach it to, so make sure we just set kernel pgd.
+	 */
+	xen_mc_batch();
+	__xen_write_cr3(true, __pa(init_level4_pgt));
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
 
 	/* We can't that easily rip out L3 and L2, as the Xen pagetables are
 	 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ...  for
@@ -2403,9 +2389,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 
 static void __init xen_post_allocator_init(void)
 {
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	pv_mmu_ops.set_pte = xen_set_pte;
 	pv_mmu_ops.set_pmd = xen_set_pmd;
 	pv_mmu_ops.set_pud = xen_set_pud;
@@ -2511,9 +2494,6 @@ void __init xen_init_mmu_ops(void)
 {
 	x86_init.paging.pagetable_init = xen_pagetable_init;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	pv_mmu_ops = xen_mmu_ops;
 
 	memset(dummy_mapping, 0xff, PAGE_SIZE);
@@ -2650,9 +2630,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 	 * this function are redundant and can be ignored.
 	 */
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return 0;
-
 	if (unlikely(order > MAX_CONTIG_ORDER))
 		return -ENOMEM;
 
@@ -2689,9 +2666,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 	int success;
 	unsigned long vstart;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	if (unlikely(order > MAX_CONTIG_ORDER))
 		return;
 
-- 
cgit v1.2.3-59-g8ed1b


From c71e6d804c88168ecf02aaf14e1fd5773d683b5f Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 18 May 2017 17:46:48 +0200
Subject: xen: make xen_flush_tlb_all() static

xen_flush_tlb_all() is used in arch/x86/xen/mmu.c only. Make it static.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e375a5e815f..3be06f3caf3c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,7 +42,7 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 }
 EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
 
-void xen_flush_tlb_all(void)
+static void xen_flush_tlb_all(void)
 {
 	struct mmuext_op *op;
 	struct multicall_space mcs;
-- 
cgit v1.2.3-59-g8ed1b


From d5d332d3f7e8435e264a71b90178dee69428d630 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Fri, 12 May 2017 20:13:26 -0700
Subject: devicetree: Move include prefixes from arch to separate directory

We use a directory under arch/$ARCH/boot/dts as an include path
that has links outside of the subtree to find dt-bindings from under
include/dt-bindings. That's been working well, but new DT architectures
haven't been adding them by default.

Recently there's been a desire to share some of the DT material between
arm and arm64, which originally caused developers to create symlinks or
relative includes between the subtrees. This isn't ideal -- it breaks
if the DT files aren't stored in the exact same hierarchy as the kernel
tree, and generally it's just icky.

As a somewhat cleaner solution we decided to add a $ARCH/ prefix link
once, and allow DTS files to reference dtsi (and dts) files in other
architectures that way.

Original approach was to create these links under each architecture,
but it lead to the problem of recursive symlinks.

As a remedy, move the include link directories out of the architecture
trees into a common location. At the same time, they can now share one
directory and one dt-bindings/ link as well.

Fixes: 4027494ae6e3 ('ARM: dts: add arm/arm64 include symlinks')
Reported-by: Russell King <linux@armlinux.org.uk>
Reported-by: Omar Sandoval <osandov@osandov.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Rob Herring <robh@kernel.org>
Cc: Heiko Stuebner <heiko@sntech.de>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: linux-arch <linux-arch@vger.kernel.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/boot/dts/include/arm             | 1 -
 arch/arm/boot/dts/include/arm64           | 1 -
 arch/arm/boot/dts/include/dt-bindings     | 1 -
 arch/arm64/boot/dts/include/arm           | 1 -
 arch/arm64/boot/dts/include/arm64         | 1 -
 arch/arm64/boot/dts/include/dt-bindings   | 1 -
 arch/cris/boot/dts/include/dt-bindings    | 1 -
 arch/metag/boot/dts/include/dt-bindings   | 1 -
 arch/mips/boot/dts/include/dt-bindings    | 1 -
 arch/powerpc/boot/dts/include/dt-bindings | 1 -
 scripts/Makefile.lib                      | 2 +-
 scripts/dtc/include-prefixes/arc          | 1 +
 scripts/dtc/include-prefixes/arm          | 1 +
 scripts/dtc/include-prefixes/arm64        | 1 +
 scripts/dtc/include-prefixes/c6x          | 1 +
 scripts/dtc/include-prefixes/cris         | 1 +
 scripts/dtc/include-prefixes/dt-bindings  | 1 +
 scripts/dtc/include-prefixes/h8300        | 1 +
 scripts/dtc/include-prefixes/metag        | 1 +
 scripts/dtc/include-prefixes/microblaze   | 1 +
 scripts/dtc/include-prefixes/mips         | 1 +
 scripts/dtc/include-prefixes/nios2        | 1 +
 scripts/dtc/include-prefixes/openrisc     | 1 +
 scripts/dtc/include-prefixes/powerpc      | 1 +
 scripts/dtc/include-prefixes/sh           | 1 +
 scripts/dtc/include-prefixes/xtensa       | 1 +
 26 files changed, 16 insertions(+), 11 deletions(-)
 delete mode 120000 arch/arm/boot/dts/include/arm
 delete mode 120000 arch/arm/boot/dts/include/arm64
 delete mode 120000 arch/arm/boot/dts/include/dt-bindings
 delete mode 120000 arch/arm64/boot/dts/include/arm
 delete mode 120000 arch/arm64/boot/dts/include/arm64
 delete mode 120000 arch/arm64/boot/dts/include/dt-bindings
 delete mode 120000 arch/cris/boot/dts/include/dt-bindings
 delete mode 120000 arch/metag/boot/dts/include/dt-bindings
 delete mode 120000 arch/mips/boot/dts/include/dt-bindings
 delete mode 120000 arch/powerpc/boot/dts/include/dt-bindings
 create mode 120000 scripts/dtc/include-prefixes/arc
 create mode 120000 scripts/dtc/include-prefixes/arm
 create mode 120000 scripts/dtc/include-prefixes/arm64
 create mode 120000 scripts/dtc/include-prefixes/c6x
 create mode 120000 scripts/dtc/include-prefixes/cris
 create mode 120000 scripts/dtc/include-prefixes/dt-bindings
 create mode 120000 scripts/dtc/include-prefixes/h8300
 create mode 120000 scripts/dtc/include-prefixes/metag
 create mode 120000 scripts/dtc/include-prefixes/microblaze
 create mode 120000 scripts/dtc/include-prefixes/mips
 create mode 120000 scripts/dtc/include-prefixes/nios2
 create mode 120000 scripts/dtc/include-prefixes/openrisc
 create mode 120000 scripts/dtc/include-prefixes/powerpc
 create mode 120000 scripts/dtc/include-prefixes/sh
 create mode 120000 scripts/dtc/include-prefixes/xtensa

diff --git a/arch/arm/boot/dts/include/arm b/arch/arm/boot/dts/include/arm
deleted file mode 120000
index a96aa0ea9d8c..000000000000
--- a/arch/arm/boot/dts/include/arm
+++ /dev/null
@@ -1 +0,0 @@
-..
\ No newline at end of file
diff --git a/arch/arm/boot/dts/include/arm64 b/arch/arm/boot/dts/include/arm64
deleted file mode 120000
index 074a835fca3e..000000000000
--- a/arch/arm/boot/dts/include/arm64
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arm64/boot/dts
\ No newline at end of file
diff --git a/arch/arm/boot/dts/include/dt-bindings b/arch/arm/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/arm/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
\ No newline at end of file
diff --git a/arch/arm64/boot/dts/include/arm b/arch/arm64/boot/dts/include/arm
deleted file mode 120000
index cf63d80e2b93..000000000000
--- a/arch/arm64/boot/dts/include/arm
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arm/boot/dts
\ No newline at end of file
diff --git a/arch/arm64/boot/dts/include/arm64 b/arch/arm64/boot/dts/include/arm64
deleted file mode 120000
index a96aa0ea9d8c..000000000000
--- a/arch/arm64/boot/dts/include/arm64
+++ /dev/null
@@ -1 +0,0 @@
-..
\ No newline at end of file
diff --git a/arch/arm64/boot/dts/include/dt-bindings b/arch/arm64/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/arm64/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
\ No newline at end of file
diff --git a/arch/cris/boot/dts/include/dt-bindings b/arch/cris/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/cris/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
\ No newline at end of file
diff --git a/arch/metag/boot/dts/include/dt-bindings b/arch/metag/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/metag/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
\ No newline at end of file
diff --git a/arch/mips/boot/dts/include/dt-bindings b/arch/mips/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/mips/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
\ No newline at end of file
diff --git a/arch/powerpc/boot/dts/include/dt-bindings b/arch/powerpc/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/powerpc/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings
\ No newline at end of file
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 6dc1eda13b8e..58c05e5d9870 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -175,7 +175,7 @@ ld_flags       = $(LDFLAGS) $(ldflags-y)
 
 dtc_cpp_flags  = -Wp,-MD,$(depfile).pre.tmp -nostdinc                    \
 		 -I$(srctree)/arch/$(SRCARCH)/boot/dts                   \
-		 -I$(srctree)/arch/$(SRCARCH)/boot/dts/include           \
+		 -I$(srctree)/scripts/dtc/include-prefixes               \
 		 -I$(srctree)/drivers/of/testcase-data                   \
 		 -undef -D__DTS__
 
diff --git a/scripts/dtc/include-prefixes/arc b/scripts/dtc/include-prefixes/arc
new file mode 120000
index 000000000000..5d21b5a69a11
--- /dev/null
+++ b/scripts/dtc/include-prefixes/arc
@@ -0,0 +1 @@
+../../../arch/arc/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/arm b/scripts/dtc/include-prefixes/arm
new file mode 120000
index 000000000000..eb14d4515a57
--- /dev/null
+++ b/scripts/dtc/include-prefixes/arm
@@ -0,0 +1 @@
+../../../arch/arm/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/arm64 b/scripts/dtc/include-prefixes/arm64
new file mode 120000
index 000000000000..275c42c21d71
--- /dev/null
+++ b/scripts/dtc/include-prefixes/arm64
@@ -0,0 +1 @@
+../../../arch/arm64/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/c6x b/scripts/dtc/include-prefixes/c6x
new file mode 120000
index 000000000000..49ded4cae2be
--- /dev/null
+++ b/scripts/dtc/include-prefixes/c6x
@@ -0,0 +1 @@
+../../../arch/c6x/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/cris b/scripts/dtc/include-prefixes/cris
new file mode 120000
index 000000000000..736d998ba506
--- /dev/null
+++ b/scripts/dtc/include-prefixes/cris
@@ -0,0 +1 @@
+../../../arch/cris/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/dt-bindings b/scripts/dtc/include-prefixes/dt-bindings
new file mode 120000
index 000000000000..04fdbb3af016
--- /dev/null
+++ b/scripts/dtc/include-prefixes/dt-bindings
@@ -0,0 +1 @@
+../../../include/dt-bindings
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/h8300 b/scripts/dtc/include-prefixes/h8300
new file mode 120000
index 000000000000..3bdaa332c54c
--- /dev/null
+++ b/scripts/dtc/include-prefixes/h8300
@@ -0,0 +1 @@
+../../../arch/h8300/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/metag b/scripts/dtc/include-prefixes/metag
new file mode 120000
index 000000000000..87a3c847db8f
--- /dev/null
+++ b/scripts/dtc/include-prefixes/metag
@@ -0,0 +1 @@
+../../../arch/metag/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/microblaze b/scripts/dtc/include-prefixes/microblaze
new file mode 120000
index 000000000000..d9830330a21d
--- /dev/null
+++ b/scripts/dtc/include-prefixes/microblaze
@@ -0,0 +1 @@
+../../../arch/microblaze/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/mips b/scripts/dtc/include-prefixes/mips
new file mode 120000
index 000000000000..ae8d4948dc8d
--- /dev/null
+++ b/scripts/dtc/include-prefixes/mips
@@ -0,0 +1 @@
+../../../arch/mips/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/nios2 b/scripts/dtc/include-prefixes/nios2
new file mode 120000
index 000000000000..51772336d13f
--- /dev/null
+++ b/scripts/dtc/include-prefixes/nios2
@@ -0,0 +1 @@
+../../../arch/nios2/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/openrisc b/scripts/dtc/include-prefixes/openrisc
new file mode 120000
index 000000000000..71c3bc75c560
--- /dev/null
+++ b/scripts/dtc/include-prefixes/openrisc
@@ -0,0 +1 @@
+../../../arch/openrisc/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/powerpc b/scripts/dtc/include-prefixes/powerpc
new file mode 120000
index 000000000000..7cd6ec16e899
--- /dev/null
+++ b/scripts/dtc/include-prefixes/powerpc
@@ -0,0 +1 @@
+../../../arch/powerpc/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/sh b/scripts/dtc/include-prefixes/sh
new file mode 120000
index 000000000000..67d37808c599
--- /dev/null
+++ b/scripts/dtc/include-prefixes/sh
@@ -0,0 +1 @@
+../../../arch/sh/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/xtensa b/scripts/dtc/include-prefixes/xtensa
new file mode 120000
index 000000000000..d1eaf6ec7a2b
--- /dev/null
+++ b/scripts/dtc/include-prefixes/xtensa
@@ -0,0 +1 @@
+../../../arch/xtensa/boot/dts
\ No newline at end of file
-- 
cgit v1.2.3-59-g8ed1b


From 877dd4b1f7986bd7ca31b1e69589289e4aa3e066 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 14 May 2017 23:45:02 +0200
Subject: ARM: configs: add a gemini defconfig

It makes sense to have a stripped-down defconfig for just Gemini, as
it is a pretty small platform used in NAS etc, and will use appended
device tree. It is also quick to compile and test. Hopefully this
defconfig can be a good base for distributions such as OpenWRT.

I plan to add in the config options needed for the different
variants of Gemini as we go along.

Cc: Janos Laube <janos.dev@gmail.com>
Cc: Paulius Zaleckas <paulius.zaleckas@gmail.com>
Cc: Hans Ulli Kroll <ulli.kroll@googlemail.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/configs/gemini_defconfig | 68 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 arch/arm/configs/gemini_defconfig

diff --git a/arch/arm/configs/gemini_defconfig b/arch/arm/configs/gemini_defconfig
new file mode 100644
index 000000000000..d2d75fa664a6
--- /dev/null
+++ b/arch/arm/configs/gemini_defconfig
@@ -0,0 +1,68 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_USER_NS=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARCH_MULTI_V4=y
+# CONFIG_ARCH_MULTI_V7 is not set
+CONFIG_ARCH_GEMINI=y
+CONFIG_PCI=y
+CONFIG_PREEMPT=y
+CONFIG_AEABI=y
+CONFIG_CMDLINE="console=ttyS0,115200n8"
+CONFIG_KEXEC=y
+CONFIG_BINFMT_MISC=y
+CONFIG_PM=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_STAA=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_KEYBOARD_GPIO=y
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_GEMINI_WATCHDOG=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_FOTG210_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GEMINI=y
+CONFIG_DMADEVICES=y
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_ROMFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_DEBUG_FS=y
-- 
cgit v1.2.3-59-g8ed1b


From eb1e6716cc9c6fd22e706379ab082b4ac198a1b1 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 17 May 2017 16:52:29 -0500
Subject: arm64: defconfig: sync with savedefconfig

Sync the defconfig with savedefconfig as config options change/move over
time.

Generated with the following commands:
make defconfig
make savedefconfig
cp defconfig arch/arm64/configs/defconfig

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm64/configs/defconfig | 103 ++++++++++++++++++-------------------------
 1 file changed, 42 insertions(+), 61 deletions(-)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index ce072859e3b2..d916fc316698 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -30,7 +30,6 @@ CONFIG_PROFILING=y
 CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 CONFIG_ARCH_SUNXI=y
 CONFIG_ARCH_ALPINE=y
@@ -62,16 +61,15 @@ CONFIG_ARCH_XGENE=y
 CONFIG_ARCH_ZX=y
 CONFIG_ARCH_ZYNQMP=y
 CONFIG_PCI=y
-CONFIG_PCI_MSI=y
 CONFIG_PCI_IOV=y
-CONFIG_PCI_AARDVARK=y
-CONFIG_PCIE_RCAR=y
-CONFIG_PCI_HOST_GENERIC=y
-CONFIG_PCI_XGENE=y
 CONFIG_PCI_LAYERSCAPE=y
 CONFIG_PCI_HISI=y
 CONFIG_PCIE_QCOM=y
 CONFIG_PCIE_ARMADA_8K=y
+CONFIG_PCI_AARDVARK=y
+CONFIG_PCIE_RCAR=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI_XGENE=y
 CONFIG_ARM64_VA_BITS_48=y
 CONFIG_SCHED_MC=y
 CONFIG_NUMA=y
@@ -80,12 +78,11 @@ CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
 CONFIG_SECCOMP=y
-CONFIG_XEN=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+CONFIG_XEN=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
-CONFIG_CPU_IDLE=y
 CONFIG_HIBERNATION=y
 CONFIG_ARM_CPUIDLE=y
 CONFIG_CPU_FREQ=y
@@ -155,8 +152,8 @@ CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_NBD=m
 CONFIG_VIRTIO_BLK=y
-CONFIG_EEPROM_AT25=m
 CONFIG_SRAM=y
+CONFIG_EEPROM_AT25=m
 # CONFIG_SCSI_PROC_FS is not set
 CONFIG_BLK_DEV_SD=y
 CONFIG_SCSI_SAS_ATA=y
@@ -168,8 +165,8 @@ CONFIG_AHCI_CEVA=y
 CONFIG_AHCI_MVEBU=y
 CONFIG_AHCI_XGENE=y
 CONFIG_AHCI_QORIQ=y
-CONFIG_SATA_RCAR=y
 CONFIG_SATA_SIL24=y
+CONFIG_SATA_RCAR=y
 CONFIG_PATA_PLATFORM=y
 CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
@@ -186,18 +183,17 @@ CONFIG_HNS_ENET=y
 CONFIG_E1000E=y
 CONFIG_IGB=y
 CONFIG_IGBVF=y
-CONFIG_MVPP2=y
 CONFIG_MVNETA=y
+CONFIG_MVPP2=y
 CONFIG_SKY2=y
 CONFIG_RAVB=y
 CONFIG_SMC91X=y
 CONFIG_SMSC911X=y
 CONFIG_STMMAC_ETH=m
-CONFIG_REALTEK_PHY=m
+CONFIG_MDIO_BUS_MUX_MMIOREG=y
 CONFIG_MESON_GXL_PHY=m
 CONFIG_MICREL_PHY=y
-CONFIG_MDIO_BUS_MUX=y
-CONFIG_MDIO_BUS_MUX_MMIOREG=y
+CONFIG_REALTEK_PHY=m
 CONFIG_USB_PEGASUS=m
 CONFIG_USB_RTL8150=m
 CONFIG_USB_RTL8152=m
@@ -230,14 +226,14 @@ CONFIG_SERIAL_8250_UNIPHIER=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_MESON=y
+CONFIG_SERIAL_MESON_CONSOLE=y
 CONFIG_SERIAL_SAMSUNG=y
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
 CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=11
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_MESON=y
-CONFIG_SERIAL_MESON_CONSOLE=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
 CONFIG_SERIAL_XILINX_PS_UART=y
@@ -261,14 +257,14 @@ CONFIG_I2C_UNIPHIER_F=y
 CONFIG_I2C_RCAR=y
 CONFIG_I2C_CROS_EC_TUNNEL=y
 CONFIG_SPI=y
-CONFIG_SPI_MESON_SPIFC=m
 CONFIG_SPI_BCM2835=m
 CONFIG_SPI_BCM2835AUX=m
+CONFIG_SPI_MESON_SPIFC=m
 CONFIG_SPI_ORION=y
 CONFIG_SPI_PL022=y
 CONFIG_SPI_QUP=y
-CONFIG_SPI_SPIDEV=m
 CONFIG_SPI_S3C64XX=y
+CONFIG_SPI_SPIDEV=m
 CONFIG_SPMI=y
 CONFIG_PINCTRL_SINGLE=y
 CONFIG_PINCTRL_MAX77620=y
@@ -286,39 +282,35 @@ CONFIG_GPIO_PCA953X=y
 CONFIG_GPIO_PCA953X_IRQ=y
 CONFIG_GPIO_MAX77620=y
 CONFIG_POWER_RESET_MSM=y
-CONFIG_BATTERY_BQ27XXX=y
 CONFIG_POWER_RESET_XGENE=y
 CONFIG_POWER_RESET_SYSCON=y
+CONFIG_BATTERY_BQ27XXX=y
+CONFIG_SENSORS_ARM_SCPI=y
 CONFIG_SENSORS_LM90=m
 CONFIG_SENSORS_INA2XX=m
-CONFIG_SENSORS_ARM_SCPI=y
-CONFIG_THERMAL=y
-CONFIG_THERMAL_EMULATION=y
 CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
 CONFIG_CPU_THERMAL=y
-CONFIG_BCM2835_THERMAL=y
+CONFIG_THERMAL_EMULATION=y
 CONFIG_EXYNOS_THERMAL=y
 CONFIG_WATCHDOG=y
-CONFIG_BCM2835_WDT=y
-CONFIG_RENESAS_WDT=y
 CONFIG_S3C2410_WATCHDOG=y
 CONFIG_MESON_GXBB_WATCHDOG=m
 CONFIG_MESON_WATCHDOG=m
+CONFIG_RENESAS_WDT=y
+CONFIG_BCM2835_WDT=y
+CONFIG_MFD_CROS_EC=y
+CONFIG_MFD_CROS_EC_I2C=y
 CONFIG_MFD_EXYNOS_LPASS=m
+CONFIG_MFD_HI655X_PMIC=y
 CONFIG_MFD_MAX77620=y
-CONFIG_MFD_RK808=y
 CONFIG_MFD_SPMI_PMIC=y
+CONFIG_MFD_RK808=y
 CONFIG_MFD_SEC_CORE=y
-CONFIG_MFD_HI655X_PMIC=y
-CONFIG_REGULATOR=y
-CONFIG_MFD_CROS_EC=y
-CONFIG_MFD_CROS_EC_I2C=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_REGULATOR_GPIO=y
 CONFIG_REGULATOR_HI655X=y
 CONFIG_REGULATOR_MAX77620=y
 CONFIG_REGULATOR_PWM=y
-CONFIG_REGULATOR_QCOM_SMD_RPM=y
 CONFIG_REGULATOR_QCOM_SPMI=y
 CONFIG_REGULATOR_RK808=y
 CONFIG_REGULATOR_S2MPS11=y
@@ -345,13 +337,12 @@ CONFIG_DRM_EXYNOS_DSI=y
 CONFIG_DRM_EXYNOS_HDMI=y
 CONFIG_DRM_EXYNOS_MIC=y
 CONFIG_DRM_RCAR_DU=m
-CONFIG_DRM_RCAR_HDMI=y
 CONFIG_DRM_RCAR_LVDS=y
 CONFIG_DRM_RCAR_VSP=y
 CONFIG_DRM_TEGRA=m
-CONFIG_DRM_VC4=m
 CONFIG_DRM_PANEL_SIMPLE=m
 CONFIG_DRM_I2C_ADV7511=m
+CONFIG_DRM_VC4=m
 CONFIG_DRM_HISI_KIRIN=m
 CONFIG_DRM_MESON=m
 CONFIG_FB=y
@@ -366,26 +357,24 @@ CONFIG_SOUND=y
 CONFIG_SND=y
 CONFIG_SND_SOC=y
 CONFIG_SND_BCM2835_SOC_I2S=m
-CONFIG_SND_SOC_RCAR=y
 CONFIG_SND_SOC_SAMSUNG=y
+CONFIG_SND_SOC_RCAR=y
 CONFIG_SND_SOC_AK4613=y
 CONFIG_USB=y
 CONFIG_USB_OTG=y
 CONFIG_USB_XHCI_HCD=y
-CONFIG_USB_XHCI_PLATFORM=y
-CONFIG_USB_XHCI_RCAR=y
-CONFIG_USB_EHCI_EXYNOS=y
 CONFIG_USB_XHCI_TEGRA=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_MSM=y
+CONFIG_USB_EHCI_EXYNOS=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_OHCI_EXYNOS=y
 CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_EXYNOS=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_RENESAS_USBHS=m
 CONFIG_USB_STORAGE=y
-CONFIG_USB_DWC2=y
 CONFIG_USB_DWC3=y
+CONFIG_USB_DWC2=y
 CONFIG_USB_CHIPIDEA=y
 CONFIG_USB_CHIPIDEA_UDC=y
 CONFIG_USB_CHIPIDEA_HOST=y
@@ -398,7 +387,6 @@ CONFIG_USB_RENESAS_USBHS_UDC=m
 CONFIG_MMC=y
 CONFIG_MMC_BLOCK_MINORS=32
 CONFIG_MMC_ARMMMCI=y
-CONFIG_MMC_MESON_GX=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_ACPI=y
 CONFIG_MMC_SDHCI_PLTFM=y
@@ -406,6 +394,7 @@ CONFIG_MMC_SDHCI_OF_ARASAN=y
 CONFIG_MMC_SDHCI_OF_ESDHC=y
 CONFIG_MMC_SDHCI_CADENCE=y
 CONFIG_MMC_SDHCI_TEGRA=y
+CONFIG_MMC_MESON_GX=y
 CONFIG_MMC_SDHCI_MSM=y
 CONFIG_MMC_SPI=y
 CONFIG_MMC_SDHI=y
@@ -414,32 +403,31 @@ CONFIG_MMC_DW_EXYNOS=y
 CONFIG_MMC_DW_K3=y
 CONFIG_MMC_DW_ROCKCHIP=y
 CONFIG_MMC_SUNXI=y
-CONFIG_MMC_SDHCI_XENON=y
 CONFIG_MMC_BCM2835=y
+CONFIG_MMC_SDHCI_XENON=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_GPIO=y
 CONFIG_LEDS_PWM=y
 CONFIG_LEDS_SYSCON=y
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_CPU=y
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_MAX77686=y
+CONFIG_RTC_DRV_RK808=m
 CONFIG_RTC_DRV_S5M=y
 CONFIG_RTC_DRV_DS3232=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_S3C=y
 CONFIG_RTC_DRV_PL031=y
 CONFIG_RTC_DRV_SUN6I=y
-CONFIG_RTC_DRV_RK808=m
 CONFIG_RTC_DRV_TEGRA=y
 CONFIG_RTC_DRV_XGENE=y
-CONFIG_RTC_DRV_S3C=y
 CONFIG_DMADEVICES=y
+CONFIG_DMA_BCM2835=m
 CONFIG_MV_XOR_V2=y
 CONFIG_PL330_DMA=y
-CONFIG_DMA_BCM2835=m
 CONFIG_TEGRA20_APB_DMA=y
 CONFIG_QCOM_BAM_DMA=y
 CONFIG_QCOM_HIDMA_MGMT=y
@@ -452,18 +440,17 @@ CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_XEN_GNTDEV=y
 CONFIG_XEN_GRANT_DEV_ALLOC=y
+CONFIG_COMMON_CLK_RK808=y
 CONFIG_COMMON_CLK_SCPI=y
 CONFIG_COMMON_CLK_CS2000_CP=y
 CONFIG_COMMON_CLK_S2MPS11=y
-CONFIG_COMMON_CLK_PWM=y
-CONFIG_COMMON_CLK_RK808=y
 CONFIG_CLK_QORIQ=y
+CONFIG_COMMON_CLK_PWM=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_MSM_GCC_8994=y
 CONFIG_MSM_MMCC_8996=y
 CONFIG_HWSPINLOCK_QCOM=y
-CONFIG_MAILBOX=y
 CONFIG_ARM_MHU=y
 CONFIG_PLATFORM_MHU=y
 CONFIG_BCM2835_MBOX=y
@@ -472,32 +459,29 @@ CONFIG_ARM_SMMU=y
 CONFIG_ARM_SMMU_V3=y
 CONFIG_RASPBERRYPI_POWER=y
 CONFIG_QCOM_SMEM=y
-CONFIG_QCOM_SMD=y
-CONFIG_QCOM_SMD_RPM=y
 CONFIG_ROCKCHIP_PM_DOMAINS=y
 CONFIG_ARCH_TEGRA_132_SOC=y
 CONFIG_ARCH_TEGRA_210_SOC=y
 CONFIG_ARCH_TEGRA_186_SOC=y
 CONFIG_EXTCON_USB_GPIO=y
+CONFIG_IIO=y
+CONFIG_EXYNOS_ADC=y
 CONFIG_PWM=y
 CONFIG_PWM_BCM2835=m
+CONFIG_PWM_MESON=m
 CONFIG_PWM_ROCKCHIP=y
+CONFIG_PWM_SAMSUNG=y
 CONFIG_PWM_TEGRA=m
-CONFIG_PWM_MESON=m
-CONFIG_COMMON_RESET_HI6220=y
 CONFIG_PHY_RCAR_GEN3_USB2=y
 CONFIG_PHY_HI6220_USB=y
+CONFIG_PHY_SUN4I_USB=y
 CONFIG_PHY_ROCKCHIP_INNO_USB2=y
 CONFIG_PHY_ROCKCHIP_EMMC=y
-CONFIG_PHY_SUN4I_USB=y
 CONFIG_PHY_XGENE=y
 CONFIG_PHY_TEGRA_XUSB=y
 CONFIG_ARM_SCPI_PROTOCOL=y
-CONFIG_ACPI=y
-CONFIG_IIO=y
-CONFIG_EXYNOS_ADC=y
-CONFIG_PWM_SAMSUNG=y
 CONFIG_RASPBERRYPI_FIRMWARE=y
+CONFIG_ACPI=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -511,7 +495,6 @@ CONFIG_FUSE_FS=m
 CONFIG_CUSE=m
 CONFIG_OVERLAY_FS=m
 CONFIG_VFAT_FS=y
-CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=y
 CONFIG_EFIVAR_FS=y
@@ -539,11 +522,9 @@ CONFIG_MEMTEST=y
 CONFIG_SECURITY=y
 CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
-CONFIG_CRYPTO_DEV_SAFEXCEL=m
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
 CONFIG_CRYPTO_SHA2_ARM64_CE=y
 CONFIG_CRYPTO_GHASH_ARM64_CE=y
 CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
-# CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set
-- 
cgit v1.2.3-59-g8ed1b


From bae3dee0992dcb336a591468376b046e5447997b Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Tue, 16 May 2017 14:17:20 +0800
Subject: mmc: sdhci-xenon: kill xenon_clean_phy()

Currently, the xenon_clean_phy() is only used for freeing phy_params.
The phy_params is allocated by devm_kzalloc(), there's no need to free
is explicitly.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Acked-by: Hu Ziji <huziji@marvell.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
---
 drivers/mmc/host/sdhci-xenon-phy.c | 14 +-------------
 drivers/mmc/host/sdhci-xenon.c     |  6 +-----
 drivers/mmc/host/sdhci-xenon.h     |  1 -
 3 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 6356781f1cca..f7e26b031e76 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -787,14 +787,6 @@ int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios)
 	return ret;
 }
 
-void xenon_clean_phy(struct sdhci_host *host)
-{
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
-
-	kfree(priv->phy_params);
-}
-
 static int xenon_add_phy(struct device_node *np, struct sdhci_host *host,
 			 const char *phy_name)
 {
@@ -819,11 +811,7 @@ static int xenon_add_phy(struct device_node *np, struct sdhci_host *host,
 	if (ret)
 		return ret;
 
-	ret = xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params);
-	if (ret)
-		xenon_clean_phy(host);
-
-	return ret;
+	return xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params);
 }
 
 int xenon_phy_parse_dt(struct device_node *np, struct sdhci_host *host)
diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c
index 67246655315b..bc1781bb070b 100644
--- a/drivers/mmc/host/sdhci-xenon.c
+++ b/drivers/mmc/host/sdhci-xenon.c
@@ -486,7 +486,7 @@ static int xenon_probe(struct platform_device *pdev)
 
 	err = xenon_sdhc_prepare(host);
 	if (err)
-		goto clean_phy_param;
+		goto err_clk;
 
 	err = sdhci_add_host(host);
 	if (err)
@@ -496,8 +496,6 @@ static int xenon_probe(struct platform_device *pdev)
 
 remove_sdhc:
 	xenon_sdhc_unprepare(host);
-clean_phy_param:
-	xenon_clean_phy(host);
 err_clk:
 	clk_disable_unprepare(pltfm_host->clk);
 free_pltfm:
@@ -510,8 +508,6 @@ static int xenon_remove(struct platform_device *pdev)
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 
-	xenon_clean_phy(host);
-
 	sdhci_remove_host(host, 0);
 
 	xenon_sdhc_unprepare(host);
diff --git a/drivers/mmc/host/sdhci-xenon.h b/drivers/mmc/host/sdhci-xenon.h
index 6e6523ea01ce..73debb42dc2f 100644
--- a/drivers/mmc/host/sdhci-xenon.h
+++ b/drivers/mmc/host/sdhci-xenon.h
@@ -93,7 +93,6 @@ struct xenon_priv {
 };
 
 int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios);
-void xenon_clean_phy(struct sdhci_host *host);
 int xenon_phy_parse_dt(struct device_node *np,
 		       struct sdhci_host *host);
 void xenon_soc_pad_ctrl(struct sdhci_host *host,
-- 
cgit v1.2.3-59-g8ed1b


From f4e506c5a3a026a28c99ca2cbc1c79aeca1a1b68 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 17 May 2017 16:52:30 -0500
Subject: arm64: defconfig: enable options needed for QCom DB410c board

Enable Qualcomm drivers needed to boot Dragonboard 410c with HDMI. This
enables support for clocks, regulators, and USB PHY.

Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: John Stultz <john.stultz@linaro.org>
Signed-off-by: Rob Herring <robh@kernel.org>
[Olof: Turned off _RPM configs per follow-up email]
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm64/configs/defconfig | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index d916fc316698..65cdd878cfbd 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -311,6 +311,7 @@ CONFIG_REGULATOR_GPIO=y
 CONFIG_REGULATOR_HI655X=y
 CONFIG_REGULATOR_MAX77620=y
 CONFIG_REGULATOR_PWM=y
+CONFIG_REGULATOR_QCOM_SMD_RPM=y
 CONFIG_REGULATOR_QCOM_SPMI=y
 CONFIG_REGULATOR_RK808=y
 CONFIG_REGULATOR_S2MPS11=y
@@ -381,6 +382,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
 CONFIG_USB_ISP1760=y
 CONFIG_USB_HSIC_USB3503=y
 CONFIG_USB_MSM_OTG=y
+CONFIG_USB_QCOM_8X16_PHY=y
 CONFIG_USB_ULPI=y
 CONFIG_USB_GADGET=y
 CONFIG_USB_RENESAS_USBHS_UDC=m
@@ -447,6 +449,7 @@ CONFIG_COMMON_CLK_S2MPS11=y
 CONFIG_CLK_QORIQ=y
 CONFIG_COMMON_CLK_PWM=y
 CONFIG_COMMON_CLK_QCOM=y
+CONFIG_QCOM_CLK_SMD_RPM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_MSM_GCC_8994=y
 CONFIG_MSM_MMCC_8996=y
@@ -457,8 +460,12 @@ CONFIG_BCM2835_MBOX=y
 CONFIG_HI6220_MBOX=y
 CONFIG_ARM_SMMU=y
 CONFIG_ARM_SMMU_V3=y
+CONFIG_RPMSG_QCOM_SMD=y
 CONFIG_RASPBERRYPI_POWER=y
 CONFIG_QCOM_SMEM=y
+CONFIG_QCOM_SMD_RPM=y
+CONFIG_QCOM_SMP2P=y
+CONFIG_QCOM_SMSM=y
 CONFIG_ROCKCHIP_PM_DOMAINS=y
 CONFIG_ARCH_TEGRA_132_SOC=y
 CONFIG_ARCH_TEGRA_210_SOC=y
-- 
cgit v1.2.3-59-g8ed1b


From aca69344c8a99e7374d913e42ba9120c398ee16f Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 16 May 2017 11:36:51 +0200
Subject: mmc: cavium-octeon: Fix interrupt enable code

OCTEON SoCs with CIU3 do not have interrupt masking local to the MMC
bus interface.  Unfortunately, some even have a diagnostic register at
the same address of the enable register, which causes the interrupts
to fire immediately if stored to, thus breaking the driver.  The proper
action on these SoCs is not to touch this register.

Fixes: 01d95843335c ("mmc: cavium: Add MMC support for Octeon SOCs.")
Signed-off-by: David Daney <david.daney@cavium.com>
[jglauber@cavium.com: removed point after subject line]
Signed-off-by: Jan Glauber <jglauber@cavium.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/cavium-octeon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c
index 772d0900026d..d698d66e3327 100644
--- a/drivers/mmc/host/cavium-octeon.c
+++ b/drivers/mmc/host/cavium-octeon.c
@@ -108,7 +108,7 @@ static void octeon_mmc_release_bus(struct cvm_mmc_host *host)
 static void octeon_mmc_int_enable(struct cvm_mmc_host *host, u64 val)
 {
 	writeq(val, host->base + MIO_EMM_INT(host));
-	if (!host->dma_active || (host->dma_active && !host->has_ciu3))
+	if (!host->has_ciu3)
 		writeq(val, host->base + MIO_EMM_INT_EN(host));
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 899e4aad15e93315fa18ab9e9c88904ad237cfa0 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 16 May 2017 11:36:52 +0200
Subject: mmc: cavium-octeon: Use proper GPIO name for power control

The devm_gpiod_get_optional() function appends a "-gpios" to the
string passed to it, so if we want to find the "power-gpios" signal,
we must pass "power" to this function.

Fixes: 01d95843335c ("mmc: cavium: Add MMC support for Octeon SOCs.")
Signed-off-by: David Daney <david.daney@cavium.com>
[jglauber@cavium.com: removed point after subject line]
Signed-off-by: Jan Glauber <jglauber@cavium.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/cavium-octeon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c
index d698d66e3327..cbb566377508 100644
--- a/drivers/mmc/host/cavium-octeon.c
+++ b/drivers/mmc/host/cavium-octeon.c
@@ -267,7 +267,7 @@ static int octeon_mmc_probe(struct platform_device *pdev)
 	}
 
 	host->global_pwr_gpiod = devm_gpiod_get_optional(&pdev->dev,
-							 "power-gpios",
+							 "power",
 							 GPIOD_OUT_HIGH);
 	if (IS_ERR(host->global_pwr_gpiod)) {
 		dev_err(&pdev->dev, "Invalid power GPIO\n");
-- 
cgit v1.2.3-59-g8ed1b


From 0527873b29b077fc8e656acd63e1866b429fef55 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 11 May 2017 13:50:16 +0200
Subject: ARM: remove duplicate 'const' annotations'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gcc-7 warns about some declarations that are more 'const' than necessary:

arch/arm/mach-at91/pm.c:338:34: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier]
 static const struct of_device_id const ramc_ids[] __initconst = {
arch/arm/mach-bcm/bcm_kona_smc.c:36:34: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier]
 static const struct of_device_id const bcm_kona_smc_ids[] __initconst = {
arch/arm/mach-spear/time.c:207:34: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier]
 static const struct of_device_id const timer_of_match[] __initconst = {
arch/arm/mach-omap2/prm_common.c:714:34: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier]
 static const struct of_device_id const omap_prcm_dt_match_table[] __initconst = {
arch/arm/mach-omap2/vc.c:562:35: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier]
 static const struct i2c_init_data const omap4_i2c_timing_data[] __initconst = {

The ones in arch/arm were apparently all introduced accidentally by one
commit that correctly marked a lot of variables as __initconst.

Fixes: 19c233b79d1a ("ARM: appropriate __init annotation for const data")
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Krzysztof Hałasa <khalasa@piap.pl>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-at91/pm.c          | 2 +-
 arch/arm/mach-bcm/bcm_kona_smc.c | 2 +-
 arch/arm/mach-cns3xxx/core.c     | 2 +-
 arch/arm/mach-omap2/prm_common.c | 2 +-
 arch/arm/mach-omap2/vc.c         | 2 +-
 arch/arm/mach-spear/time.c       | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 2cd27c830ab6..283e79ab587d 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -335,7 +335,7 @@ static const struct ramc_info ramc_infos[] __initconst = {
 	{ .idle = sama5d3_ddr_standby, .memctrl = AT91_MEMCTRL_DDRSDR},
 };
 
-static const struct of_device_id const ramc_ids[] __initconst = {
+static const struct of_device_id ramc_ids[] __initconst = {
 	{ .compatible = "atmel,at91rm9200-sdramc", .data = &ramc_infos[0] },
 	{ .compatible = "atmel,at91sam9260-sdramc", .data = &ramc_infos[1] },
 	{ .compatible = "atmel,at91sam9g45-ddramc", .data = &ramc_infos[2] },
diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c
index cf3f8658f0e5..a55a7ecf146a 100644
--- a/arch/arm/mach-bcm/bcm_kona_smc.c
+++ b/arch/arm/mach-bcm/bcm_kona_smc.c
@@ -33,7 +33,7 @@ struct bcm_kona_smc_data {
 	unsigned result;
 };
 
-static const struct of_device_id const bcm_kona_smc_ids[] __initconst = {
+static const struct of_device_id bcm_kona_smc_ids[] __initconst = {
 	{.compatible = "brcm,kona-smc"},
 	{.compatible = "bcm,kona-smc"}, /* deprecated name */
 	{},
diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index 03da3813f1ab..7d5a44a06648 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -346,7 +346,7 @@ static struct usb_ohci_pdata cns3xxx_usb_ohci_pdata = {
 	.power_off	= csn3xxx_usb_power_off,
 };
 
-static const struct of_dev_auxdata const cns3xxx_auxdata[] __initconst = {
+static const struct of_dev_auxdata cns3xxx_auxdata[] __initconst = {
 	{ "intel,usb-ehci", CNS3XXX_USB_BASE, "ehci-platform", &cns3xxx_usb_ehci_pdata },
 	{ "intel,usb-ohci", CNS3XXX_USB_OHCI_BASE, "ohci-platform", &cns3xxx_usb_ohci_pdata },
 	{ "cavium,cns3420-ahci", CNS3XXX_SATA2_BASE, "ahci", NULL },
diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c
index 2b138b65129a..dc11841ca334 100644
--- a/arch/arm/mach-omap2/prm_common.c
+++ b/arch/arm/mach-omap2/prm_common.c
@@ -711,7 +711,7 @@ static struct omap_prcm_init_data scrm_data __initdata = {
 };
 #endif
 
-static const struct of_device_id const omap_prcm_dt_match_table[] __initconst = {
+static const struct of_device_id omap_prcm_dt_match_table[] __initconst = {
 #ifdef CONFIG_SOC_AM33XX
 	{ .compatible = "ti,am3-prcm", .data = &am3_prm_data },
 #endif
diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c
index 2028167fff31..d76b1e5eb8ba 100644
--- a/arch/arm/mach-omap2/vc.c
+++ b/arch/arm/mach-omap2/vc.c
@@ -559,7 +559,7 @@ struct i2c_init_data {
 	u8 hsscll_12;
 };
 
-static const struct i2c_init_data const omap4_i2c_timing_data[] __initconst = {
+static const struct i2c_init_data omap4_i2c_timing_data[] __initconst = {
 	{
 		.load = 50,
 		.loadbits = 0x3,
diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c
index 4878ba90026d..289e036c9c30 100644
--- a/arch/arm/mach-spear/time.c
+++ b/arch/arm/mach-spear/time.c
@@ -204,7 +204,7 @@ static void __init spear_clockevent_init(int irq)
 	setup_irq(irq, &spear_timer_irq);
 }
 
-static const struct of_device_id const timer_of_match[] __initconst = {
+static const struct of_device_id timer_of_match[] __initconst = {
 	{ .compatible = "st,spear-timer", },
 	{ },
 };
-- 
cgit v1.2.3-59-g8ed1b


From 76cefef8e838304a71725a0b5007c375619d78fb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Jan 2017 12:53:05 +0100
Subject: firmware: ti_sci: fix strncat length check

gcc-7 notices that the length we pass to strncat is wrong:

drivers/firmware/ti_sci.c: In function 'ti_sci_probe':
drivers/firmware/ti_sci.c:204:32: error: specified bound 50 equals the size of the destination [-Werror=stringop-overflow=]

Instead of the total length, we must pass the length of the
remaining space here.

Fixes: aa276781a64a ("firmware: Add basic support for TI System Control Interface (TI-SCI) protocol")
Cc: stable@vger.kernel.org
Acked-by: Nishanth Menon <nm@ti.com>
Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/firmware/ti_sci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c
index 874ff32db366..00cfed3c3e1a 100644
--- a/drivers/firmware/ti_sci.c
+++ b/drivers/firmware/ti_sci.c
@@ -202,7 +202,8 @@ static int ti_sci_debugfs_create(struct platform_device *pdev,
 	info->debug_buffer[info->debug_region_size] = 0;
 
 	info->d = debugfs_create_file(strncat(debug_name, dev_name(dev),
-					      sizeof(debug_name)),
+					      sizeof(debug_name) -
+					      sizeof("ti_sci_debug@")),
 				      0444, NULL, info, &ti_sci_debug_fops);
 	if (IS_ERR(info->d))
 		return PTR_ERR(info->d);
-- 
cgit v1.2.3-59-g8ed1b


From 1fccb73011ea8a5fa0c6d357c33fa29c695139ea Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 5 Apr 2017 13:41:15 +0200
Subject: iTCO_wdt: all versions count down twice

The ICH9 is listed as having TCO v2, and indeed the behavior in the
datasheet corresponds to v2 (for example the NO_REBOOT flag is
accessible via the 16KiB-aligned Root Complex Base Address).

However, the TCO counts twice just like in v1; the documentation
of the SECOND_TO_STS bit says: "ICH9 sets this bit to 1 to indicate
that the TIMEOUT bit had been (or is currently) set and a second
timeout occurred before the TCO_RLD register was written. If this
bit is set and the NO_REBOOT config bit is 0, then the ICH9 will
reboot the system after the second timeout.  The same can be found
in the BayTrail (Atom E3800) datasheet, and even HOWTOs around
the Internet say that it will reboot after _twice_ the specified
heartbeat.

I did not find the Apollo Lake datasheet, but because v4/v5 has
a SECOND_TO_STS bit just like the previous version I'm enabling
this for Apollo Lake as well.

Cc: linux-watchdog@vger.kernel.org
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 Documentation/watchdog/watchdog-parameters.txt |  2 +-
 drivers/watchdog/iTCO_wdt.c                    | 22 ++++++++++------------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index 4f7d86dd0a5d..914518aeb972 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt
@@ -117,7 +117,7 @@ nowayout: Watchdog cannot be stopped once started
 -------------------------------------------------
 iTCO_wdt:
 heartbeat: Watchdog heartbeat in seconds.
-	(2<heartbeat<39 (TCO v1) or 613 (TCO v2), default=30)
+	(5<=heartbeat<=74 (TCO v1) or 1226 (TCO v2), default=30)
 nowayout: Watchdog cannot be stopped once started
 	(default=kernel config parameter)
 -------------------------------------------------
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 347f0389b089..c4f65873bfa4 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -306,16 +306,15 @@ static int iTCO_wdt_ping(struct watchdog_device *wd_dev)
 
 	iTCO_vendor_pre_keepalive(p->smi_res, wd_dev->timeout);
 
+	/* Reset the timeout status bit so that the timer
+	 * needs to count down twice again before rebooting */
+	outw(0x0008, TCO1_STS(p));	/* write 1 to clear bit */
+
 	/* Reload the timer by writing to the TCO Timer Counter register */
-	if (p->iTCO_version >= 2) {
+	if (p->iTCO_version >= 2)
 		outw(0x01, TCO_RLD(p));
-	} else if (p->iTCO_version == 1) {
-		/* Reset the timeout status bit so that the timer
-		 * needs to count down twice again before rebooting */
-		outw(0x0008, TCO1_STS(p));	/* write 1 to clear bit */
-
+	else if (p->iTCO_version == 1)
 		outb(0x01, TCO_RLD(p));
-	}
 
 	spin_unlock(&p->io_lock);
 	return 0;
@@ -328,11 +327,8 @@ static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t)
 	unsigned char val8;
 	unsigned int tmrval;
 
-	tmrval = seconds_to_ticks(p, t);
-
-	/* For TCO v1 the timer counts down twice before rebooting */
-	if (p->iTCO_version == 1)
-		tmrval /= 2;
+	/* The timer counts down twice before rebooting */
+	tmrval = seconds_to_ticks(p, t) / 2;
 
 	/* from the specs: */
 	/* "Values of 0h-3h are ignored and should not be attempted" */
@@ -385,6 +381,8 @@ static unsigned int iTCO_wdt_get_timeleft(struct watchdog_device *wd_dev)
 		spin_lock(&p->io_lock);
 		val16 = inw(TCO_RLD(p));
 		val16 &= 0x3ff;
+		if (!(inw(TCO1_STS(p)) & 0x0008))
+			val16 += (inw(TCOv2_TMR(p)) & 0x3ff);
 		spin_unlock(&p->io_lock);
 
 		time_left = ticks_to_seconds(p, val16);
-- 
cgit v1.2.3-59-g8ed1b


From 07441a7dd11f6855bcf55fbbfc6abba42258b2c6 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 25 Apr 2017 16:17:33 +0000
Subject: watchdog: zx2967: remove redundant dev_err call in zx2967_wdt_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/zx2967_wdt.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/watchdog/zx2967_wdt.c b/drivers/watchdog/zx2967_wdt.c
index e290d5a13a6d..c98252733c30 100644
--- a/drivers/watchdog/zx2967_wdt.c
+++ b/drivers/watchdog/zx2967_wdt.c
@@ -211,10 +211,8 @@ static int zx2967_wdt_probe(struct platform_device *pdev)
 
 	base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	wdt->reg_base = devm_ioremap_resource(dev, base);
-	if (IS_ERR(wdt->reg_base)) {
-		dev_err(dev, "ioremap failed\n");
+	if (IS_ERR(wdt->reg_base))
 		return PTR_ERR(wdt->reg_base);
-	}
 
 	zx2967_wdt_reset_sysctrl(dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From fedf266f9955d9a019643cde199a2fd9a0259f6f Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 27 Apr 2017 18:02:32 -0700
Subject: watchdog: bcm281xx: Fix use of uninitialized spinlock.

The bcm_kona_wdt_set_resolution_reg() call takes the spinlock, so
initialize it earlier.  Fixes a warning at boot with lock debugging
enabled.

Fixes: 6adb730dc208 ("watchdog: bcm281xx: Watchdog Driver")
Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/bcm_kona_wdt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c
index 6fce17d5b9f1..a5775dfd8d5f 100644
--- a/drivers/watchdog/bcm_kona_wdt.c
+++ b/drivers/watchdog/bcm_kona_wdt.c
@@ -304,6 +304,8 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev)
 	if (!wdt)
 		return -ENOMEM;
 
+	spin_lock_init(&wdt->lock);
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	wdt->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(wdt->base))
@@ -316,7 +318,6 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	spin_lock_init(&wdt->lock);
 	platform_set_drvdata(pdev, wdt);
 	watchdog_set_drvdata(&bcm_kona_wdt_wdd, wdt);
 	bcm_kona_wdt_wdd.parent = &pdev->dev;
-- 
cgit v1.2.3-59-g8ed1b


From a486cd23661c9387fb076c3f6ae8b2aa9d20d54a Mon Sep 17 00:00:00 2001
From: Antony Antony <antony@phenome.org>
Date: Fri, 19 May 2017 12:47:00 +0200
Subject: xfrm: fix state migration copy replay sequence numbers

During xfrm migration copy replay and preplay sequence numbers
from the previous state.

Here is a tcpdump output showing the problem.
10.0.10.46 is running vanilla kernel, is the IKE/IPsec responder.
After the migration it sent wrong sequence number, reset to 1.
The migration is from 10.0.0.52 to 10.0.0.53.

IP 10.0.0.52.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7cf), length 136
IP 10.0.10.46.4500 > 10.0.0.52.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x7cf), length 136
IP 10.0.0.52.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7d0), length 136
IP 10.0.10.46.4500 > 10.0.0.52.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x7d0), length 136

IP 10.0.0.53.4500 > 10.0.10.46.4500: NONESP-encap: isakmp: child_sa  inf2[I]
IP 10.0.10.46.4500 > 10.0.0.53.4500: NONESP-encap: isakmp: child_sa  inf2[R]
IP 10.0.0.53.4500 > 10.0.10.46.4500: NONESP-encap: isakmp: child_sa  inf2[I]
IP 10.0.10.46.4500 > 10.0.0.53.4500: NONESP-encap: isakmp: child_sa  inf2[R]

IP 10.0.0.53.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7d1), length 136

NOTE: next sequence is wrong 0x1

IP 10.0.10.46.4500 > 10.0.0.53.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x1), length 136
IP 10.0.0.53.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7d2), length 136
IP 10.0.10.46.4500 > 10.0.0.53.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x2), length 136

Signed-off-by: Antony Antony <antony@phenome.org>
Reviewed-by: Richard Guy Briggs <rgb@tricolour.ca>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_state.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index fc3c5aa38754..2e291bc5f1fc 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1383,6 +1383,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
 	x->curlft.add_time = orig->curlft.add_time;
 	x->km.state = orig->km.state;
 	x->km.seq = orig->km.seq;
+	x->replay = orig->replay;
+	x->preplay = orig->preplay;
 
 	return x;
 
-- 
cgit v1.2.3-59-g8ed1b


From 6bf1c2d26716dcd483699cc62474e49d164c5563 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 19 May 2017 14:12:00 +0200
Subject: arm64: dts: rockchip: fix include reference

The way we handle include paths for DT has changed a bit, which
broke a file that had an unconventional way to reference a common
header file:

arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts:47:10: fatal error: include/dt-bindings/input/linux-event-codes.h: No such file or directory

This removes the leading "include/" from the path name, which fixes it.

Fixes: d5d332d3f7e8 ("devicetree: Move include prefixes from arch to separate directory")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
index 658bb9dc9dfd..7bd31066399b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
@@ -44,7 +44,7 @@
 
 /dts-v1/;
 #include "rk3399-gru.dtsi"
-#include <include/dt-bindings/input/linux-event-codes.h>
+#include <dt-bindings/input/linux-event-codes.h>
 
 /*
  * Kevin-specific things
-- 
cgit v1.2.3-59-g8ed1b


From 9d6408433019bfae15e2d0d5f4498c4ff70b86c0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 19 May 2017 09:56:40 +0100
Subject: i2c: designware: don't infer timings described by ACPI from clock
 rate

Commit bd698d24b1b57 ("i2c: designware: Get selected speed mode
sda-hold-time via ACPI") updated the logic that reads the timing
parameters for various I2C bus rates from the DSDT, to only read
the timing parameters for the currently selected mode.

This causes a WARN_ON() splat on platforms that legally omit the clock
frequency from the ACPI description, because in the new situation, the
core I2C designware driver still accesses the fields in the driver
struct that we no longer populate, and proceeds to calculate them from
the clock frequency. Since the clock frequency is unspecified, the
driver complains loudly using a WARN_ON().

So revert back to the old situation, where the struct fields for all
timings are populated, but retain the new logic which chooses the SDA
hold time from the timing mode that is currently in use.

Fixes: bd698d24b1b57 ("i2c: designware: Get selected speed mode ...")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reported-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-platdrv.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index f2acd4b6bf01..6283b99d2b17 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -96,6 +96,7 @@ static int dw_i2c_acpi_configure(struct platform_device *pdev)
 	struct dw_i2c_dev *dev = platform_get_drvdata(pdev);
 	acpi_handle handle = ACPI_HANDLE(&pdev->dev);
 	const struct acpi_device_id *id;
+	u32 ss_ht, fp_ht, hs_ht, fs_ht;
 	struct acpi_device *adev;
 	const char *uid;
 
@@ -107,23 +108,24 @@ static int dw_i2c_acpi_configure(struct platform_device *pdev)
 	 * Try to get SDA hold time and *CNT values from an ACPI method for
 	 * selected speed modes.
 	 */
+	dw_i2c_acpi_params(pdev, "SSCN", &dev->ss_hcnt, &dev->ss_lcnt, &ss_ht);
+	dw_i2c_acpi_params(pdev, "FPCN", &dev->fp_hcnt, &dev->fp_lcnt, &fp_ht);
+	dw_i2c_acpi_params(pdev, "HSCN", &dev->hs_hcnt, &dev->hs_lcnt, &hs_ht);
+	dw_i2c_acpi_params(pdev, "FMCN", &dev->fs_hcnt, &dev->fs_lcnt, &fs_ht);
+
 	switch (dev->clk_freq) {
 	case 100000:
-		dw_i2c_acpi_params(pdev, "SSCN", &dev->ss_hcnt, &dev->ss_lcnt,
-				   &dev->sda_hold_time);
+		dev->sda_hold_time = ss_ht;
 		break;
 	case 1000000:
-		dw_i2c_acpi_params(pdev, "FPCN", &dev->fp_hcnt, &dev->fp_lcnt,
-				   &dev->sda_hold_time);
+		dev->sda_hold_time = fp_ht;
 		break;
 	case 3400000:
-		dw_i2c_acpi_params(pdev, "HSCN", &dev->hs_hcnt, &dev->hs_lcnt,
-				   &dev->sda_hold_time);
+		dev->sda_hold_time = hs_ht;
 		break;
 	case 400000:
 	default:
-		dw_i2c_acpi_params(pdev, "FMCN", &dev->fs_hcnt, &dev->fs_lcnt,
-				   &dev->sda_hold_time);
+		dev->sda_hold_time = fs_ht;
 		break;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 3ecb3ac7b950ff8f6c6a61e8b7b0d6e3546429a0 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 15 May 2017 19:16:15 -0700
Subject: xfs: avoid mount-time deadlock in CoW extent recovery

If a malicious user corrupts the refcount btree to cause a cycle between
different levels of the tree, the next mount attempt will deadlock in
the CoW recovery routine while grabbing buffer locks.  We can use the
ability to re-grab a buffer that was previous locked to a transaction to
avoid deadlocks, so do that here.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_refcount.c | 43 +++++++++++++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index b177ef33cd4c..82a38d86ebad 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1629,13 +1629,28 @@ xfs_refcount_recover_cow_leftovers(
 	if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
 		return -EOPNOTSUPP;
 
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	INIT_LIST_HEAD(&debris);
+
+	/*
+	 * In this first part, we use an empty transaction to gather up
+	 * all the leftover CoW extents so that we can subsequently
+	 * delete them.  The empty transaction is used to avoid
+	 * a buffer lock deadlock if there happens to be a loop in the
+	 * refcountbt because we're allowed to re-grab a buffer that is
+	 * already attached to our transaction.  When we're done
+	 * recording the CoW debris we cancel the (empty) transaction
+	 * and everything goes away cleanly.
+	 */
+	error = xfs_trans_alloc_empty(mp, &tp);
 	if (error)
 		return error;
-	cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+
+	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+	if (error)
+		goto out_trans;
+	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
 
 	/* Find all the leftover CoW staging extents. */
-	INIT_LIST_HEAD(&debris);
 	memset(&low, 0, sizeof(low));
 	memset(&high, 0, sizeof(high));
 	low.rc.rc_startblock = XFS_REFC_COW_START;
@@ -1645,10 +1660,11 @@ xfs_refcount_recover_cow_leftovers(
 	if (error)
 		goto out_cursor;
 	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-	xfs_buf_relse(agbp);
+	xfs_trans_brelse(tp, agbp);
+	xfs_trans_cancel(tp);
 
 	/* Now iterate the list to free the leftovers */
-	list_for_each_entry(rr, &debris, rr_list) {
+	list_for_each_entry_safe(rr, n, &debris, rr_list) {
 		/* Set up transaction. */
 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
 		if (error)
@@ -1676,8 +1692,16 @@ xfs_refcount_recover_cow_leftovers(
 		error = xfs_trans_commit(tp);
 		if (error)
 			goto out_free;
+
+		list_del(&rr->rr_list);
+		kmem_free(rr);
 	}
 
+	return error;
+out_defer:
+	xfs_defer_cancel(&dfops);
+out_trans:
+	xfs_trans_cancel(tp);
 out_free:
 	/* Free the leftover list */
 	list_for_each_entry_safe(rr, n, &debris, rr_list) {
@@ -1688,11 +1712,6 @@ out_free:
 
 out_cursor:
 	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-	xfs_buf_relse(agbp);
-	goto out_free;
-
-out_defer:
-	xfs_defer_cancel(&dfops);
-	xfs_trans_cancel(tp);
-	goto out_free;
+	xfs_trans_brelse(tp, agbp);
+	goto out_trans;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 5f3394530fbe90d3bcd1c204618960bc50236578 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Fri, 19 May 2017 08:04:59 -0700
Subject: blktrace: fix integer parse

sscanf is a very poor way to parse integer. For example, I input
"discard" for act_mask, it gets 0xd and completely messes up. Using
correct API to do integer parse.

This patch also makes attributes accept any base of integer.

Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 kernel/trace/blktrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index bd8ae8d5ae9c..193c5f5e3f79 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1662,14 +1662,14 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 		goto out;
 
 	if (attr == &dev_attr_act_mask) {
-		if (sscanf(buf, "%llx", &value) != 1) {
+		if (kstrtoull(buf, 0, &value)) {
 			/* Assume it is a list of trace category names */
 			ret = blk_trace_str2mask(buf);
 			if (ret < 0)
 				goto out;
 			value = ret;
 		}
-	} else if (sscanf(buf, "%llu", &value) != 1)
+	} else if (kstrtoull(buf, 0, &value))
 		goto out;
 
 	ret = -ENXIO;
-- 
cgit v1.2.3-59-g8ed1b


From e2c2206a18993bc9f62393d49c7b2066c3845b25 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 11 May 2017 18:12:05 -0700
Subject: KVM: x86: Fix potential preemption when get the current kvmclock
 timestamp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

 BUG: using __this_cpu_read() in preemptible [00000000] code: qemu-system-x86/2809
 caller is __this_cpu_preempt_check+0x13/0x20
 CPU: 2 PID: 2809 Comm: qemu-system-x86 Not tainted 4.11.0+ #13
 Call Trace:
  dump_stack+0x99/0xce
  check_preemption_disabled+0xf5/0x100
  __this_cpu_preempt_check+0x13/0x20
  get_kvmclock_ns+0x6f/0x110 [kvm]
  get_time_ref_counter+0x5d/0x80 [kvm]
  kvm_hv_process_stimers+0x2a1/0x8a0 [kvm]
  ? kvm_hv_process_stimers+0x2a1/0x8a0 [kvm]
  ? kvm_arch_vcpu_ioctl_run+0xac9/0x1ce0 [kvm]
  kvm_arch_vcpu_ioctl_run+0x5bf/0x1ce0 [kvm]
  kvm_vcpu_ioctl+0x384/0x7b0 [kvm]
  ? kvm_vcpu_ioctl+0x384/0x7b0 [kvm]
  ? __fget+0xf3/0x210
  do_vfs_ioctl+0xa4/0x700
  ? __fget+0x114/0x210
  SyS_ioctl+0x79/0x90
  entry_SYSCALL_64_fastpath+0x23/0xc2
 RIP: 0033:0x7f9d164ed357
  ? __this_cpu_preempt_check+0x13/0x20

This can be reproduced by run kvm-unit-tests/hyperv_stimer.flat w/
CONFIG_PREEMPT and CONFIG_DEBUG_PREEMPT enabled.

Safe access to per-CPU data requires a couple of constraints, though: the
thread working with the data cannot be preempted and it cannot be migrated
while it manipulates per-CPU variables. If the thread is preempted, the
thread that replaces it could try to work with the same variables; migration
to another CPU could also cause confusion. However there is no preemption
disable when reads host per-CPU tsc rate to calculate the current kvmclock
timestamp.

This patch fixes it by utilizing get_cpu/put_cpu pair to guarantee both
__this_cpu_read() and rdtsc() are not preempted.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/x86.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b54125b590e8..3b5fc7e35f6e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1763,6 +1763,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
 {
 	struct kvm_arch *ka = &kvm->arch;
 	struct pvclock_vcpu_time_info hv_clock;
+	u64 ret;
 
 	spin_lock(&ka->pvclock_gtod_sync_lock);
 	if (!ka->use_master_clock) {
@@ -1774,10 +1775,17 @@ u64 get_kvmclock_ns(struct kvm *kvm)
 	hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
 	spin_unlock(&ka->pvclock_gtod_sync_lock);
 
+	/* both __this_cpu_read() and rdtsc() should be on the same cpu */
+	get_cpu();
+
 	kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
 			   &hv_clock.tsc_shift,
 			   &hv_clock.tsc_to_system_mul);
-	return __pvclock_read_cycles(&hv_clock, rdtsc());
+	ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+
+	put_cpu();
+
+	return ret;
 }
 
 static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
-- 
cgit v1.2.3-59-g8ed1b


From cbfc6c9184ce71b52df4b1d82af5afc81a709178 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Fri, 19 May 2017 02:46:56 -0700
Subject: KVM: X86: Fix read out-of-bounds vulnerability in kvm pio emulation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Huawei folks reported a read out-of-bounds vulnerability in kvm pio emulation.

- "inb" instruction to access PIT Mod/Command register (ioport 0x43, write only,
  a read should be ignored) in guest can get a random number.
- "rep insb" instruction to access PIT register port 0x43 can control memcpy()
  in emulator_pio_in_emulated() to copy max 0x400 bytes but only read 1 bytes,
  which will disclose the unimportant kernel memory in host but no crash.

The similar test program below can reproduce the read out-of-bounds vulnerability:

void hexdump(void *mem, unsigned int len)
{
        unsigned int i, j;

        for(i = 0; i < len + ((len % HEXDUMP_COLS) ? (HEXDUMP_COLS - len % HEXDUMP_COLS) : 0); i++)
        {
                /* print offset */
                if(i % HEXDUMP_COLS == 0)
                {
                        printf("0x%06x: ", i);
                }

                /* print hex data */
                if(i < len)
                {
                        printf("%02x ", 0xFF & ((char*)mem)[i]);
                }
                else /* end of block, just aligning for ASCII dump */
                {
                        printf("   ");
                }

                /* print ASCII dump */
                if(i % HEXDUMP_COLS == (HEXDUMP_COLS - 1))
                {
                        for(j = i - (HEXDUMP_COLS - 1); j <= i; j++)
                        {
                                if(j >= len) /* end of block, not really printing */
                                {
                                        putchar(' ');
                                }
                                else if(isprint(((char*)mem)[j])) /* printable char */
                                {
                                        putchar(0xFF & ((char*)mem)[j]);
                                }
                                else /* other char */
                                {
                                        putchar('.');
                                }
                        }
                        putchar('\n');
                }
        }
}

int main(void)
{
	int i;
	if (iopl(3))
	{
		err(1, "set iopl unsuccessfully\n");
		return -1;
	}
	static char buf[0x40];

	/* test ioport 0x40,0x41,0x42,0x43,0x44,0x45 */

	memset(buf, 0xab, sizeof(buf));

	asm volatile("push %rdi;");
	asm volatile("mov %0, %%rdi;"::"q"(buf));

	asm volatile ("mov $0x40, %rdx;");
	asm volatile ("in %dx,%al;");
	asm volatile ("stosb;");

	asm volatile ("mov $0x41, %rdx;");
	asm volatile ("in %dx,%al;");
	asm volatile ("stosb;");

	asm volatile ("mov $0x42, %rdx;");
	asm volatile ("in %dx,%al;");
	asm volatile ("stosb;");

	asm volatile ("mov $0x43, %rdx;");
	asm volatile ("in %dx,%al;");
	asm volatile ("stosb;");

	asm volatile ("mov $0x44, %rdx;");
	asm volatile ("in %dx,%al;");
	asm volatile ("stosb;");

	asm volatile ("mov $0x45, %rdx;");
	asm volatile ("in %dx,%al;");
	asm volatile ("stosb;");

	asm volatile ("pop %rdi;");
	hexdump(buf, 0x40);

	printf("\n");

	/* ins port 0x40 */

	memset(buf, 0xab, sizeof(buf));

	asm volatile("push %rdi;");
	asm volatile("mov %0, %%rdi;"::"q"(buf));

	asm volatile ("mov $0x20, %rcx;");
	asm volatile ("mov $0x40, %rdx;");
	asm volatile ("rep insb;");

	asm volatile ("pop %rdi;");
	hexdump(buf, 0x40);

	printf("\n");

	/* ins port 0x43 */

	memset(buf, 0xab, sizeof(buf));

	asm volatile("push %rdi;");
	asm volatile("mov %0, %%rdi;"::"q"(buf));

	asm volatile ("mov $0x20, %rcx;");
	asm volatile ("mov $0x43, %rdx;");
	asm volatile ("rep insb;");

	asm volatile ("pop %rdi;");
	hexdump(buf, 0x40);

	printf("\n");
	return 0;
}

The vcpu->arch.pio_data buffer is used by both in/out instrutions emulation
w/o clear after using which results in some random datas are left over in
the buffer. Guest reads port 0x43 will be ignored since it is write only,
however, the function kernel_pio() can't distigush this ignore from successfully
reads data from device's ioport. There is no new data fill the buffer from
port 0x43, however, emulator_pio_in_emulated() will copy the stale data in
the buffer to the guest unconditionally. This patch fixes it by clearing the
buffer before in instruction emulation to avoid to grant guest the stale data
in the buffer.

In addition, string I/O is not supported for in kernel device. So there is no
iteration to read ioport %RCX times for string I/O. The function kernel_pio()
just reads one round, and then copy the io size * %RCX to the guest unconditionally,
actually it copies the one round ioport data w/ other random datas which are left
over in the vcpu->arch.pio_data buffer to the guest. This patch fixes it by
introducing the string I/O support for in kernel device in order to grant the right
ioport datas to the guest.

Before the patch:

0x000000: fe 38 93 93 ff ff ab ab .8......
0x000008: ab ab ab ab ab ab ab ab ........
0x000010: ab ab ab ab ab ab ab ab ........
0x000018: ab ab ab ab ab ab ab ab ........
0x000020: ab ab ab ab ab ab ab ab ........
0x000028: ab ab ab ab ab ab ab ab ........
0x000030: ab ab ab ab ab ab ab ab ........
0x000038: ab ab ab ab ab ab ab ab ........

0x000000: f6 00 00 00 00 00 00 00 ........
0x000008: 00 00 00 00 00 00 00 00 ........
0x000010: 00 00 00 00 4d 51 30 30 ....MQ00
0x000018: 30 30 20 33 20 20 20 20 00 3
0x000020: ab ab ab ab ab ab ab ab ........
0x000028: ab ab ab ab ab ab ab ab ........
0x000030: ab ab ab ab ab ab ab ab ........
0x000038: ab ab ab ab ab ab ab ab ........

0x000000: f6 00 00 00 00 00 00 00 ........
0x000008: 00 00 00 00 00 00 00 00 ........
0x000010: 00 00 00 00 4d 51 30 30 ....MQ00
0x000018: 30 30 20 33 20 20 20 20 00 3
0x000020: ab ab ab ab ab ab ab ab ........
0x000028: ab ab ab ab ab ab ab ab ........
0x000030: ab ab ab ab ab ab ab ab ........
0x000038: ab ab ab ab ab ab ab ab ........

After the patch:

0x000000: 1e 02 f8 00 ff ff ab ab ........
0x000008: ab ab ab ab ab ab ab ab ........
0x000010: ab ab ab ab ab ab ab ab ........
0x000018: ab ab ab ab ab ab ab ab ........
0x000020: ab ab ab ab ab ab ab ab ........
0x000028: ab ab ab ab ab ab ab ab ........
0x000030: ab ab ab ab ab ab ab ab ........
0x000038: ab ab ab ab ab ab ab ab ........

0x000000: d2 e2 d2 df d2 db d2 d7 ........
0x000008: d2 d3 d2 cf d2 cb d2 c7 ........
0x000010: d2 c4 d2 c0 d2 bc d2 b8 ........
0x000018: d2 b4 d2 b0 d2 ac d2 a8 ........
0x000020: ab ab ab ab ab ab ab ab ........
0x000028: ab ab ab ab ab ab ab ab ........
0x000030: ab ab ab ab ab ab ab ab ........
0x000038: ab ab ab ab ab ab ab ab ........

0x000000: 00 00 00 00 00 00 00 00 ........
0x000008: 00 00 00 00 00 00 00 00 ........
0x000010: 00 00 00 00 00 00 00 00 ........
0x000018: 00 00 00 00 00 00 00 00 ........
0x000020: ab ab ab ab ab ab ab ab ........
0x000028: ab ab ab ab ab ab ab ab ........
0x000030: ab ab ab ab ab ab ab ab ........
0x000038: ab ab ab ab ab ab ab ab ........

Reported-by: Moguofang <moguofang@huawei.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Moguofang <moguofang@huawei.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/x86.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3b5fc7e35f6e..519f3572e48e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4831,16 +4831,20 @@ emul_write:
 
 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 {
-	/* TODO: String I/O for in kernel device */
-	int r;
+	int r = 0, i;
 
-	if (vcpu->arch.pio.in)
-		r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
-				    vcpu->arch.pio.size, pd);
-	else
-		r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
-				     vcpu->arch.pio.port, vcpu->arch.pio.size,
-				     pd);
+	for (i = 0; i < vcpu->arch.pio.count; i++) {
+		if (vcpu->arch.pio.in)
+			r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
+					    vcpu->arch.pio.size, pd);
+		else
+			r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
+					     vcpu->arch.pio.port, vcpu->arch.pio.size,
+					     pd);
+		if (r)
+			break;
+		pd += vcpu->arch.pio.size;
+	}
 	return r;
 }
 
@@ -4878,6 +4882,8 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 	if (vcpu->arch.pio.count)
 		goto data_avail;
 
+	memset(vcpu->arch.pio_data, 0, size * count);
+
 	ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
 	if (ret) {
 data_avail:
-- 
cgit v1.2.3-59-g8ed1b


From f0367ee1d64d27fa08be2407df5c125442e885e3 Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Thu, 18 May 2017 19:37:30 +0200
Subject: KVM: x86: zero base3 of unusable segments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Static checker noticed that base3 could be used uninitialized if the
segment was not present (useable).  Random stack values probably would
not pass VMCS entry checks.

Reported-by:  Dan Carpenter <dan.carpenter@oracle.com>
Fixes: 1aa366163b8b ("KVM: x86 emulator: consolidate segment accessors")
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/x86.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 519f3572e48e..02363e37d4a6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5067,6 +5067,8 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
 
 	if (var.unusable) {
 		memset(desc, 0, sizeof(*desc));
+		if (base3)
+			*base3 = 0;
 		return false;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 34b0dadbdf698f9b277a31b2747b625b9a75ea1f Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Thu, 18 May 2017 19:37:31 +0200
Subject: KVM: x86/vPMU: fix undefined shift in intel_pmu_refresh()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Static analysis noticed that pmu->nr_arch_gp_counters can be 32
(INTEL_PMC_MAX_GENERIC) and therefore cannot be used to shift 'int'.

I didn't add BUILD_BUG_ON for it as we have a better checker.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: 25462f7f5295 ("KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch")
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/pmu_intel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 9d4a8504a95a..5ab4a364348e 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -294,7 +294,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 			((u64)1 << edx.split.bit_width_fixed) - 1;
 	}
 
-	pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
+	pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
 		(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
 	pmu->global_ctrl_mask = ~pmu->global_ctrl;
 
-- 
cgit v1.2.3-59-g8ed1b


From 92ceb7679ab8807d3b7fbcc6daf2279036954ef5 Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Thu, 18 May 2017 19:37:32 +0200
Subject: KVM: x86: prevent uninitialized variable warning in check_svme()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

get_msr() of MSR_EFER is currently always going to succeed, but static
checker doesn't see that far.

Don't complicate stuff and just use 0 for the fallback -- it means that
the feature is not present.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/emulate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c25cfaf584e7..0816ab2e8adc 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4173,7 +4173,7 @@ static int check_dr_write(struct x86_emulate_ctxt *ctxt)
 
 static int check_svme(struct x86_emulate_ctxt *ctxt)
 {
-	u64 efer;
+	u64 efer = 0;
 
 	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 
-- 
cgit v1.2.3-59-g8ed1b


From f63572dff1421b6ca6abce71d46e03411e605c94 Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Fri, 5 May 2017 14:52:06 -0600
Subject: nvme: unmap CMB and remove sysfs file in reset path

CMB doesn't get unmapped until removal while getting remapped on every
reset. Add the unmapping and sysfs file removal to the reset path in
nvme_pci_disable to match the mapping path in nvme_pci_enable.

Fixes: 202021c1a ("nvme : Add sysfs entry for NVMe CMBs when appropriate")

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Acked-by: Keith Busch <keith.busch@intel.com>
Reviewed-By: Stephen Bates <sbates@raithlin.com>
Cc: <stable@vger.kernel.org> # 4.9+
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/pci.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 56a315bd4d96..0866f64890e5 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1510,6 +1510,11 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
 	if (dev->cmb) {
 		iounmap(dev->cmb);
 		dev->cmb = NULL;
+		if (dev->cmbsz) {
+			sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
+						     &dev_attr_cmb.attr, NULL);
+			dev->cmbsz = 0;
+		}
 	}
 }
 
@@ -1783,6 +1788,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
+	nvme_release_cmb(dev);
 	pci_free_irq_vectors(pdev);
 
 	if (pci_is_enabled(pdev)) {
@@ -2188,7 +2194,6 @@ static void nvme_remove(struct pci_dev *pdev)
 	nvme_dev_disable(dev, true);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
-	nvme_release_cmb(dev);
 	nvme_release_prp_pools(dev);
 	nvme_dev_unmap(dev);
 	nvme_put_ctrl(&dev->ctrl);
-- 
cgit v1.2.3-59-g8ed1b


From 4123109050a869a8871e58a50f28f383d41e49ad Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 5 May 2017 16:13:02 -0700
Subject: nvme-fc: correct port role bits

FC Port roles is a bit mask, not individual values.
Correct nvme definitions to unique bits.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme-fc-driver.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 0db37158a61d..12e344b5b77f 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -27,8 +27,8 @@
 
 /* FC Port role bitmask - can merge with FC Port Roles in fc transport */
 #define FC_PORT_ROLE_NVME_INITIATOR	0x10
-#define FC_PORT_ROLE_NVME_TARGET	0x11
-#define FC_PORT_ROLE_NVME_DISCOVERY	0x12
+#define FC_PORT_ROLE_NVME_TARGET	0x20
+#define FC_PORT_ROLE_NVME_DISCOVERY	0x40
 
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 85e6a6adf8de7f992e01d2c3c59d9875d658b276 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 5 May 2017 16:13:15 -0700
Subject: nvme-fc: require target or discovery role for fc-nvme targets

In order to create an association, the remoteport must be
serving either a target role or a discovery role.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/fc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 70e689bf1cad..912d457150d5 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2720,6 +2720,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	unsigned long flags;
 	int ret, idx;
 
+	if (!(rport->remoteport.port_role &
+	    (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
+		ret = -EBADR;
+		goto out_fail;
+	}
+
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl) {
 		ret = -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From 2952a879bacbfae8b03fd886754e64fe14b8041e Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 25 Apr 2017 15:32:01 -0700
Subject: nvme-fc: stop queues on error detection

Per the recommendation by Sagi on:
http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html

Rather than waiting for reset work thread to stop queues and abort the ios,
immediately stop the queues on error detection. Reset thread will restop
the queues (as it's called on other paths), but it does not appear to have
a side effect.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/fc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 912d457150d5..dca7165fabcf 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1754,6 +1754,10 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 	dev_info(ctrl->ctrl.device,
 		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
 
+	/* stop the queues on error, cleanup is in reset thread */
+	if (ctrl->queue_count > 1)
+		nvme_stop_queues(&ctrl->ctrl);
+
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
 		dev_err(ctrl->ctrl.device,
 			"NVME-FC{%d}: error_recovery: Couldn't change state "
-- 
cgit v1.2.3-59-g8ed1b


From 4b8ba5fa525bc8bdaaed2a5c5433f0f2008d7bc5 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 25 Apr 2017 16:23:09 -0700
Subject: nvmet-fc: remove target cpu scheduling flag

Remove NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED. It's unnecessary.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/target/fc.c       |  4 +---
 drivers/nvme/target/fcloop.c   |  1 -
 drivers/scsi/lpfc/lpfc_nvmet.c |  1 -
 include/linux/nvme-fc-driver.h | 12 ++----------
 4 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 62eba29c85fb..2006fae61980 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -517,9 +517,7 @@ nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid)
 {
 	int cpu, idx, cnt;
 
-	if (!(tgtport->ops->target_features &
-			NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED) ||
-	    tgtport->ops->max_hw_queues == 1)
+	if (tgtport->ops->max_hw_queues == 1)
 		return WORK_CPU_UNBOUND;
 
 	/* Simple cpu selection based on qid modulo active cpu count */
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 15551ef79c8c..294a6611fb24 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -698,7 +698,6 @@ static struct nvmet_fc_target_template tgttemplate = {
 	.dma_boundary		= FCLOOP_DMABOUND_4G,
 	/* optional features */
 	.target_features	= NVMET_FCTGTFEAT_CMD_IN_ISR |
-				  NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED |
 				  NVMET_FCTGTFEAT_OPDONE_IN_ISR,
 	/* sizes of additional private data for data structures */
 	.target_priv_sz		= sizeof(struct fcloop_tport),
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 94434e621c33..0488580eea12 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -764,7 +764,6 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 	lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
 	lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel;
 	lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
-					   NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED |
 					   NVMET_FCTGTFEAT_CMD_IN_ISR |
 					   NVMET_FCTGTFEAT_OPDONE_IN_ISR;
 
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 12e344b5b77f..6c8c5d8041b7 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -642,15 +642,7 @@ enum {
 		 * sequence in one LLDD operation. Errors during Data
 		 * sequence transmit must not allow RSP sequence to be sent.
 		 */
-	NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1),
-		/* Bit 1: When 0, the LLDD will deliver FCP CMD
-		 * on the CPU it should be affinitized to. Thus work will
-		 * be scheduled on the cpu received on. When 1, the LLDD
-		 * may not deliver the CMD on the CPU it should be worked
-		 * on. The transport should pick a cpu to schedule the work
-		 * on.
-		 */
-	NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 2),
+	NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1),
 		/* Bit 2: When 0, the LLDD is calling the cmd rcv handler
 		 * in a non-isr context, allowing the transport to finish
 		 * op completion in the calling context. When 1, the LLDD
@@ -658,7 +650,7 @@ enum {
 		 * requiring the transport to transition to a workqueue
 		 * for op completion.
 		 */
-	NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 3),
+	NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2),
 		/* Bit 3: When 0, the LLDD is calling the op done handler
 		 * in a non-isr context, allowing the transport to finish
 		 * op completion in the calling context. When 1, the LLDD
-- 
cgit v1.2.3-59-g8ed1b


From 549f01ae7b913355bea76100d3f17694bc9ec769 Mon Sep 17 00:00:00 2001
From: Vijay Immanuel <vijayi@attalasystems.com>
Date: Mon, 8 May 2017 16:38:35 -0700
Subject: nvmet: release the sq ref on rdma read errors

On rdma read errors, release the sq ref that was taken
when the req was initialized. This avoids a hang in
nvmet_sq_destroy() when the queue is being freed.

Signed-off-by: Vijay Immanuel <vijayi@attalasystems.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/target/core.c  | 6 ++++++
 drivers/nvme/target/nvmet.h | 1 +
 drivers/nvme/target/rdma.c  | 1 +
 3 files changed, 8 insertions(+)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index cf90713043da..eb9399ac97cf 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -529,6 +529,12 @@ fail:
 }
 EXPORT_SYMBOL_GPL(nvmet_req_init);
 
+void nvmet_req_uninit(struct nvmet_req *req)
+{
+	percpu_ref_put(&req->sq->ref);
+}
+EXPORT_SYMBOL_GPL(nvmet_req_uninit);
+
 static inline bool nvmet_cc_en(u32 cc)
 {
 	return cc & 0x1;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 7cb77ba5993b..cfc5c7fb0ab7 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -261,6 +261,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
 
 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 		struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops);
+void nvmet_req_uninit(struct nvmet_req *req);
 void nvmet_req_complete(struct nvmet_req *req, u16 status);
 
 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 99c69018a35f..9e45cde63376 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -567,6 +567,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
 	rsp->n_rdma = 0;
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		nvmet_req_uninit(&rsp->req);
 		nvmet_rdma_release_rsp(rsp);
 		if (wc->status != IB_WC_WR_FLUSH_ERR) {
 			pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n",
-- 
cgit v1.2.3-59-g8ed1b


From a7cc722fff0b32bcd28bf4722dff816b0b695f7d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 21 May 2017 13:08:42 -0400
Subject: fix unsafe_put_user()

__put_user_size() relies upon its first argument having the same type as what
the second one points to; the only other user makes sure of that and
unsafe_put_user() should do the same.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 68766b276d9e..d9668c3beb5b 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -703,7 +703,7 @@ extern struct movsl_mask {
 #define unsafe_put_user(x, ptr, err_label)					\
 do {										\
 	int __pu_err;								\
-	__put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);		\
+	__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);		\
 	if (unlikely(__pu_err)) goto err_label;					\
 } while (0)
 
-- 
cgit v1.2.3-59-g8ed1b


From a8c39544a6eb2093c04afd5005b6192bd0e880c6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 May 2017 21:47:25 -0400
Subject: osf_wait4(): fix infoleak

failing sys_wait4() won't fill struct rusage...

Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/osf_sys.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 9ec56dc97374..ce93124a850b 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1201,8 +1201,10 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
 	if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur)))
 		return -EFAULT;
 
-	err = 0;
-	err |= put_user(status, ustatus);
+	err = put_user(status, ustatus);
+	if (ret < 0)
+		return err ? err : ret;
+
 	err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec);
 	err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec);
 	err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec);
-- 
cgit v1.2.3-59-g8ed1b


From 499350a5a6e7512d9ed369ed63a4244b6536f4f8 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Thu, 18 May 2017 11:22:33 -0700
Subject: tcp: initialize rcv_mss to TCP_MIN_MSS instead of 0

When tcp_disconnect() is called, inet_csk_delack_init() sets
icsk->icsk_ack.rcv_mss to 0.
This could potentially cause tcp_recvmsg() => tcp_cleanup_rbuf() =>
__tcp_select_window() call path to have division by 0 issue.
So this patch initializes rcv_mss to TCP_MIN_MSS instead of 0.

Reported-by: Andrey Konovalov  <andreyknvl@google.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1e4c76d2b827..842b575f8fdd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2320,6 +2320,10 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
+	/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
+	 * issue in __tcp_select_window()
+	 */
+	icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
 	tcp_init_send_head(sk);
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
-- 
cgit v1.2.3-59-g8ed1b


From 34eb5fe07831458cf8238d54c1fc847dedeaf68c Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihrachys@redhat.com>
Date: Thu, 18 May 2017 12:41:18 -0700
Subject: arp: fixed error in a comment

the is_garp code deals just with gratuitous ARP packets, not every
unsolicited packet.

This patch is a result of a discussion in netdev:
http://marc.info/?l=linux-netdev&m=149506354216994

Suggested-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index d54345a06f72..053492af8a6e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -846,7 +846,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 		 */
 		is_garp = tip == sip && addr_type == RTN_UNICAST;
 
-		/* Unsolicited ARP _replies_ also require target hwaddr to be
+		/* Gratuitous ARP _replies_ also require target hwaddr to be
 		 * the same as source.
 		 */
 		if (is_garp && arp->ar_op == htons(ARPOP_REPLY))
-- 
cgit v1.2.3-59-g8ed1b


From 6fd05633bdafc0ae6ec0d55e61af10780d4d3530 Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihrachys@redhat.com>
Date: Thu, 18 May 2017 12:41:19 -0700
Subject: arp: decompose is_garp logic into a separate function

The code is quite involving already to earn a separate function for
itself. If anything, it helps arp_process readability.

Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 053492af8a6e..ca6e1e6c1496 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -641,6 +641,27 @@ void arp_xmit(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(arp_xmit);
 
+static bool arp_is_garp(struct net_device *dev, int addr_type,
+			__be16 ar_op,
+			__be32 sip, __be32 tip,
+			unsigned char *sha, unsigned char *tha)
+{
+	bool is_garp = tip == sip && addr_type == RTN_UNICAST;
+
+	/* Gratuitous ARP _replies_ also require target hwaddr to be
+	 * the same as source.
+	 */
+	if (is_garp && ar_op == htons(ARPOP_REPLY))
+		is_garp =
+			/* IPv4 over IEEE 1394 doesn't provide target
+			 * hardware address field in its ARP payload.
+			 */
+			tha &&
+			!memcmp(tha, sha, dev->addr_len);
+
+	return is_garp;
+}
+
 /*
  *	Process an arp request.
  */
@@ -844,18 +865,8 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 		   It is possible, that this option should be enabled for some
 		   devices (strip is candidate)
 		 */
-		is_garp = tip == sip && addr_type == RTN_UNICAST;
-
-		/* Gratuitous ARP _replies_ also require target hwaddr to be
-		 * the same as source.
-		 */
-		if (is_garp && arp->ar_op == htons(ARPOP_REPLY))
-			is_garp =
-				/* IPv4 over IEEE 1394 doesn't provide target
-				 * hardware address field in its ARP payload.
-				 */
-				tha &&
-				!memcmp(tha, sha, dev->addr_len);
+		is_garp = arp_is_garp(dev, addr_type, arp->ar_op,
+				      sip, tip, sha, tha);
 
 		if (!n &&
 		    ((arp->ar_op == htons(ARPOP_REPLY)  &&
-- 
cgit v1.2.3-59-g8ed1b


From d9ef2e7bf99f59179b89d5c1c4d5b4919375daee Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihrachys@redhat.com>
Date: Thu, 18 May 2017 12:41:20 -0700
Subject: arp: postpone addr_type calculation to as late as possible

The addr_type retrieval can be costly, so it's worth trying to avoid its
calculation as much as possible. This patch makes it calculated only
for gratuitous ARP packets. This is especially important since later we
may want to move is_garp calculation outside of arp_accept block, at
which point the costly operation will be executed for all setups.

The patch is the result of a discussion in net-dev:
http://marc.info/?l=linux-netdev&m=149506354216994

Suggested-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ca6e1e6c1496..c22103cec823 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -641,12 +641,12 @@ void arp_xmit(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(arp_xmit);
 
-static bool arp_is_garp(struct net_device *dev, int addr_type,
-			__be16 ar_op,
+static bool arp_is_garp(struct net *net, struct net_device *dev,
+			int *addr_type, __be16 ar_op,
 			__be32 sip, __be32 tip,
 			unsigned char *sha, unsigned char *tha)
 {
-	bool is_garp = tip == sip && addr_type == RTN_UNICAST;
+	bool is_garp = tip == sip;
 
 	/* Gratuitous ARP _replies_ also require target hwaddr to be
 	 * the same as source.
@@ -659,6 +659,11 @@ static bool arp_is_garp(struct net_device *dev, int addr_type,
 			tha &&
 			!memcmp(tha, sha, dev->addr_len);
 
+	if (is_garp) {
+		*addr_type = inet_addr_type_dev_table(net, dev, sip);
+		if (*addr_type != RTN_UNICAST)
+			is_garp = false;
+	}
 	return is_garp;
 }
 
@@ -859,18 +864,23 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
 
 	if (IN_DEV_ARP_ACCEPT(in_dev)) {
-		unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip);
+		addr_type = -1;
 
 		/* Unsolicited ARP is not accepted by default.
 		   It is possible, that this option should be enabled for some
 		   devices (strip is candidate)
 		 */
-		is_garp = arp_is_garp(dev, addr_type, arp->ar_op,
+		is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
 				      sip, tip, sha, tha);
 
 		if (!n &&
-		    ((arp->ar_op == htons(ARPOP_REPLY)  &&
-				addr_type == RTN_UNICAST) || is_garp))
+		    (is_garp ||
+		     (arp->ar_op == htons(ARPOP_REPLY) &&
+		      (addr_type == RTN_UNICAST ||
+		       (addr_type < 0 &&
+			/* postpone calculation to as late as possible */
+			inet_addr_type_dev_table(net, dev, sip) ==
+				RTN_UNICAST)))))
 			n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 7d472a59c0e5ec117220a05de6b370447fb6cb66 Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihrachys@redhat.com>
Date: Thu, 18 May 2017 12:41:21 -0700
Subject: arp: always override existing neigh entries with gratuitous ARP

Currently, when arp_accept is 1, we always override existing neigh
entries with incoming gratuitous ARP replies. Otherwise, we override
them only if new replies satisfy _locktime_ conditional (packets arrive
not earlier than _locktime_ seconds since the last update to the neigh
entry).

The idea behind locktime is to pick the very first (=> close) reply
received in a unicast burst when ARP proxies are used. This helps to
avoid ARP thrashing where Linux would switch back and forth from one
proxy to another.

This logic has nothing to do with gratuitous ARP replies that are
generally not aligned in time when multiple IP address carriers send
them into network.

This patch enforces overriding of existing neigh entries by all incoming
gratuitous ARP packets, irrespective of their time of arrival. This will
make the kernel honour all incoming gratuitous ARP packets.

Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c22103cec823..ae96e6f3e0cb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -863,16 +863,17 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
 
-	if (IN_DEV_ARP_ACCEPT(in_dev)) {
+	if (n || IN_DEV_ARP_ACCEPT(in_dev)) {
 		addr_type = -1;
+		is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
+				      sip, tip, sha, tha);
+	}
 
+	if (IN_DEV_ARP_ACCEPT(in_dev)) {
 		/* Unsolicited ARP is not accepted by default.
 		   It is possible, that this option should be enabled for some
 		   devices (strip is candidate)
 		 */
-		is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
-				      sip, tip, sha, tha);
-
 		if (!n &&
 		    (is_garp ||
 		     (arp->ar_op == htons(ARPOP_REPLY) &&
-- 
cgit v1.2.3-59-g8ed1b


From fe0cd8ca1b82983db24b173bb8518ea646c02d25 Mon Sep 17 00:00:00 2001
From: Nisar Sayed <Nisar.Sayed@microchip.com>
Date: Fri, 19 May 2017 14:00:25 +0000
Subject: smsc95xx: Support only IPv4 TCP/UDP csum offload

When TX checksum offload is used, if the computed checksum is 0 the
LAN95xx device do not alter the checksum to 0xffff.  In the case of ipv4
UDP checksum, it indicates to receiver that no checksum is calculated.
Under ipv6, UDP checksum yields a result of zero must be changed to
0xffff. Hence disabling checksum offload for ipv6 packets.

Signed-off-by: Nisar Sayed <Nisar.Sayed@microchip.com>

Reported-by: popcorn mix <popcornmix@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc95xx.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 765400b62168..2dfca96a63b6 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -681,7 +681,7 @@ static int smsc95xx_set_features(struct net_device *netdev,
 	if (ret < 0)
 		return ret;
 
-	if (features & NETIF_F_HW_CSUM)
+	if (features & NETIF_F_IP_CSUM)
 		read_buf |= Tx_COE_EN_;
 	else
 		read_buf &= ~Tx_COE_EN_;
@@ -1279,12 +1279,19 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	spin_lock_init(&pdata->mac_cr_lock);
 
+	/* LAN95xx devices do not alter the computed checksum of 0 to 0xffff.
+	 * RFC 2460, ipv6 UDP calculated checksum yields a result of zero must
+	 * be changed to 0xffff. RFC 768, ipv4 UDP computed checksum is zero,
+	 * it is transmitted as all ones. The zero transmitted checksum means
+	 * transmitter generated no checksum. Hence, enable csum offload only
+	 * for ipv4 packets.
+	 */
 	if (DEFAULT_TX_CSUM_ENABLE)
-		dev->net->features |= NETIF_F_HW_CSUM;
+		dev->net->features |= NETIF_F_IP_CSUM;
 	if (DEFAULT_RX_CSUM_ENABLE)
 		dev->net->features |= NETIF_F_RXCSUM;
 
-	dev->net->hw_features = NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+	dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
 
 	smsc95xx_init_mac_address(dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From 6d18c732b95c0a9d35e9f978b4438bba15412284 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 19 May 2017 22:20:29 +0800
Subject: bridge: start hello_timer when enabling KERNEL_STP in br_stp_start

Since commit 76b91c32dd86 ("bridge: stp: when using userspace stp stop
kernel hello and hold timers"), bridge would not start hello_timer if
stp_enabled is not KERNEL_STP when br_dev_open.

The problem is even if users set stp_enabled with KERNEL_STP later,
the timer will still not be started. It causes that KERNEL_STP can
not really work. Users have to re-ifup the bridge to avoid this.

This patch is to fix it by starting br->hello_timer when enabling
KERNEL_STP in br_stp_start.

As an improvement, it's also to start hello_timer again only when
br->stp_enabled is KERNEL_STP in br_hello_timer_expired, there is
no reason to start the timer again when it's NO_STP.

Fixes: 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers")
Reported-by: Haidong Li <haili@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Reviewed-by: Ivan Vecera <cera@cera.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp_if.c    | 1 +
 net/bridge/br_stp_timer.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 08341d2aa9c9..0db8102995a5 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -179,6 +179,7 @@ static void br_stp_start(struct net_bridge *br)
 		br_debug(br, "using kernel STP\n");
 
 		/* To start timers on any ports left in blocking */
+		mod_timer(&br->hello_timer, jiffies + br->hello_time);
 		br_port_state_selection(br);
 	}
 
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index c98b3e5c140a..60b6fe277a8b 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg)
 	if (br->dev->flags & IFF_UP) {
 		br_config_bpdu_generation(br);
 
-		if (br->stp_enabled != BR_USER_STP)
+		if (br->stp_enabled == BR_KERNEL_STP)
 			mod_timer(&br->hello_timer,
 				  round_jiffies(jiffies + br->hello_time));
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 334a023ee50997b45ffb8fbcc8bc875519040aac Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 21 May 2017 15:25:46 -0700
Subject: Clean up x86 unsafe_get/put_user() type handling

Al noticed that unsafe_put_user() had type problems, and fixed them in
commit a7cc722fff0b ("fix unsafe_put_user()"), which made me look more
at those functions.

It turns out that unsafe_get_user() had a type issue too: it limited the
largest size of the type it could handle to "unsigned long".  Which is
fine with the current users, but doesn't match our existing normal
get_user() semantics, which can also handle "u64" even when that does
not fit in a long.

While at it, also clean up the type cast in unsafe_put_user().  We
actually want to just make it an assignment to the expected type of the
pointer, because we actually do want warnings from types that don't
convert silently.  And it makes the code more readable by not having
that one very long and complex line.

[ This patch might become stable material if we ever end up back-porting
  any new users of the unsafe uaccess code, but as things stand now this
  doesn't matter for any current existing uses. ]

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/uaccess.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index d9668c3beb5b..fc1eb64fdfff 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -703,14 +703,15 @@ extern struct movsl_mask {
 #define unsafe_put_user(x, ptr, err_label)					\
 do {										\
 	int __pu_err;								\
-	__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);		\
+	__typeof__(*(ptr)) __pu_val = (x);					\
+	__put_user_size(__pu_val, (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);	\
 	if (unlikely(__pu_err)) goto err_label;					\
 } while (0)
 
 #define unsafe_get_user(x, ptr, err_label)					\
 do {										\
 	int __gu_err;								\
-	unsigned long __gu_val;							\
+	__inttype(*(ptr)) __gu_val;						\
 	__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT);	\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
 	if (unlikely(__gu_err)) goto err_label;					\
-- 
cgit v1.2.3-59-g8ed1b


From 33c9e9729033387ef0521324c62e7eba529294af Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 21 May 2017 18:26:54 -0700
Subject: x86: fix 32-bit case of __get_user_asm_u64()

The code to fetch a 64-bit value from user space was entirely buggered,
and has been since the code was merged in early 2016 in commit
b2f680380ddf ("x86/mm/32: Add support for 64-bit __get_user() on 32-bit
kernels").

Happily the buggered routine is almost certainly entirely unused, since
the normal way to access user space memory is just with the non-inlined
"get_user()", and the inlined version didn't even historically exist.

The normal "get_user()" case is handled by external hand-written asm in
arch/x86/lib/getuser.S that doesn't have either of these issues.

There were two independent bugs in __get_user_asm_u64():

 - it still did the STAC/CLAC user space access marking, even though
   that is now done by the wrapper macros, see commit 11f1a4b9755f
   ("x86: reorganize SMAP handling in user space accesses").

   This didn't result in a semantic error, it just means that the
   inlined optimized version was hugely less efficient than the
   allegedly slower standard version, since the CLAC/STAC overhead is
   quite high on modern Intel CPU's.

 - the double register %eax/%edx was marked as an output, but the %eax
   part of it was touched early in the asm, and could thus clobber other
   inputs to the asm that gcc didn't expect it to touch.

   In particular, that meant that the generated code could look like
   this:

        mov    (%eax),%eax
        mov    0x4(%eax),%edx

   where the load of %edx obviously was _supposed_ to be from the 32-bit
   word that followed the source of %eax, but because %eax was
   overwritten by the first instruction, the source of %edx was
   basically random garbage.

The fixes are trivial: remove the extraneous STAC/CLAC entries, and mark
the 64-bit output as early-clobber to let gcc know that no inputs should
alias with the output register.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable@kernel.org   # v4.8+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/uaccess.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index fc1eb64fdfff..a059aac9e937 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -319,10 +319,10 @@ do {									\
 #define __get_user_asm_u64(x, ptr, retval, errret)			\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
-	asm volatile(ASM_STAC "\n"					\
+	asm volatile("\n"					\
 		     "1:	movl %2,%%eax\n"			\
 		     "2:	movl %3,%%edx\n"			\
-		     "3: " ASM_CLAC "\n"				\
+		     "3:\n"				\
 		     ".section .fixup,\"ax\"\n"				\
 		     "4:	mov %4,%0\n"				\
 		     "	xorl %%eax,%%eax\n"				\
@@ -331,7 +331,7 @@ do {									\
 		     ".previous\n"					\
 		     _ASM_EXTABLE(1b, 4b)				\
 		     _ASM_EXTABLE(2b, 4b)				\
-		     : "=r" (retval), "=A"(x)				\
+		     : "=r" (retval), "=&A"(x)				\
 		     : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1),	\
 		       "i" (errret), "0" (retval));			\
 })
-- 
cgit v1.2.3-59-g8ed1b


From 08332893e37af6ae779367e78e444f8f9571511d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 21 May 2017 19:30:23 -0700
Subject: Linux 4.12-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b1ee4a49efa2..63e10bd4f14a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3-59-g8ed1b


From e480eabae232b92ff44ce63678280373713920a4 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 18 May 2017 21:33:44 +0200
Subject: drm/radeon: Fix oops upon driver load on PowerXpress laptops

Nicolai Stange reports the following oops which is caused by
dereferencing rdev->pdev before it's subsequently set by
radeon_device_init().  Fix it.

  BUG: unable to handle kernel NULL pointer dereference at 00000000000007cb
  IP: radeon_driver_load_kms+0xeb/0x230 [radeon]
  ...
  Call Trace:
   drm_dev_register+0x146/0x1d0 [drm]
   drm_get_pci_dev+0x9a/0x180 [drm]
   radeon_pci_probe+0xb8/0xe0 [radeon]
   local_pci_probe+0x45/0xa0
   pci_device_probe+0x14f/0x1a0
   driver_probe_device+0x29c/0x450
   __driver_attach+0xdf/0xf0
   ? driver_probe_device+0x450/0x450
   bus_for_each_dev+0x6c/0xc0
   driver_attach+0x1e/0x20
   bus_add_driver+0x170/0x270
   driver_register+0x60/0xe0
   ? 0xffffffffc0508000
   __pci_register_driver+0x4c/0x50
   drm_pci_init+0xeb/0x100 [drm]
   ? vga_switcheroo_register_handler+0x6a/0x90
   ? 0xffffffffc0508000
   radeon_init+0x98/0xb6 [radeon]
   do_one_initcall+0x52/0x1a0
   ? __vunmap+0x81/0xb0
   ? kmem_cache_alloc_trace+0x159/0x1b0
   ? do_init_module+0x27/0x1f8
   do_init_module+0x5f/0x1f8
   load_module+0x27ce/0x2be0
   SYSC_finit_module+0xdf/0x110
   ? SYSC_finit_module+0xdf/0x110
   SyS_finit_module+0xe/0x10
   do_syscall_64+0x67/0x150
   entry_SYSCALL64_slow_path+0x25/0x25

Fixes: 7ffb0ce31cf9 ("drm/radeon: Don't register Thunderbolt eGPU with vga_switcheroo")
Reported-and-tested-by: Nicolai Stange <nicstange@gmail.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: http://patchwork.freedesktop.org/patch/msgid/cfb91ba052af06117137eec0637543a2626a7979.1495135190.git.lukas@wunner.de
---
 drivers/gpu/drm/radeon/radeon_kms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index e3e7cb1d10a2..4761f27f2ca2 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -116,7 +116,7 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	if ((radeon_runtime_pm != 0) &&
 	    radeon_has_atpx() &&
 	    ((flags & RADEON_IS_IGP) == 0) &&
-	    !pci_is_thunderbolt_attached(rdev->pdev))
+	    !pci_is_thunderbolt_attached(dev->pdev))
 		flags |= RADEON_IS_PX;
 
 	/* radeon_device_init should report only fatal error
-- 
cgit v1.2.3-59-g8ed1b


From 5165da5923d6c7df6f2927b0113b2e4d9288661e Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Fri, 5 May 2017 11:06:50 +0200
Subject: i2c: i2c-tiny-usb: fix buffer not being DMA capable

Since v4.9 i2c-tiny-usb generates the below call trace
and longer works, since it can't communicate with the
USB device. The reason is, that since v4.9 the USB
stack checks, that the buffer it should transfer is DMA
capable. This was a requirement since v2.2 days, but it
usually worked nevertheless.

[   17.504959] ------------[ cut here ]------------
[   17.505488] WARNING: CPU: 0 PID: 93 at drivers/usb/core/hcd.c:1587 usb_hcd_map_urb_for_dma+0x37c/0x570
[   17.506545] transfer buffer not dma capable
[   17.507022] Modules linked in:
[   17.507370] CPU: 0 PID: 93 Comm: i2cdetect Not tainted 4.11.0-rc8+ #10
[   17.508103] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
[   17.509039] Call Trace:
[   17.509320]  ? dump_stack+0x5c/0x78
[   17.509714]  ? __warn+0xbe/0xe0
[   17.510073]  ? warn_slowpath_fmt+0x5a/0x80
[   17.510532]  ? nommu_map_sg+0xb0/0xb0
[   17.510949]  ? usb_hcd_map_urb_for_dma+0x37c/0x570
[   17.511482]  ? usb_hcd_submit_urb+0x336/0xab0
[   17.511976]  ? wait_for_completion_timeout+0x12f/0x1a0
[   17.512549]  ? wait_for_completion_timeout+0x65/0x1a0
[   17.513125]  ? usb_start_wait_urb+0x65/0x160
[   17.513604]  ? usb_control_msg+0xdc/0x130
[   17.514061]  ? usb_xfer+0xa4/0x2a0
[   17.514445]  ? __i2c_transfer+0x108/0x3c0
[   17.514899]  ? i2c_transfer+0x57/0xb0
[   17.515310]  ? i2c_smbus_xfer_emulated+0x12f/0x590
[   17.515851]  ? _raw_spin_unlock_irqrestore+0x11/0x20
[   17.516408]  ? i2c_smbus_xfer+0x125/0x330
[   17.516876]  ? i2c_smbus_xfer+0x125/0x330
[   17.517329]  ? i2cdev_ioctl_smbus+0x1c1/0x2b0
[   17.517824]  ? i2cdev_ioctl+0x75/0x1c0
[   17.518248]  ? do_vfs_ioctl+0x9f/0x600
[   17.518671]  ? vfs_write+0x144/0x190
[   17.519078]  ? SyS_ioctl+0x74/0x80
[   17.519463]  ? entry_SYSCALL_64_fastpath+0x1e/0xad
[   17.519959] ---[ end trace d047c04982f5ac50 ]---

Cc: <stable@vger.kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Till Harbaum <till@harbaum.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-tiny-usb.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c
index 0ed77eeff31e..a2e3dd715380 100644
--- a/drivers/i2c/busses/i2c-tiny-usb.c
+++ b/drivers/i2c/busses/i2c-tiny-usb.c
@@ -178,22 +178,39 @@ static int usb_read(struct i2c_adapter *adapter, int cmd,
 		    int value, int index, void *data, int len)
 {
 	struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data;
+	void *dmadata = kmalloc(len, GFP_KERNEL);
+	int ret;
+
+	if (!dmadata)
+		return -ENOMEM;
 
 	/* do control transfer */
-	return usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0),
+	ret = usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0),
 			       cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE |
-			       USB_DIR_IN, value, index, data, len, 2000);
+			       USB_DIR_IN, value, index, dmadata, len, 2000);
+
+	memcpy(data, dmadata, len);
+	kfree(dmadata);
+	return ret;
 }
 
 static int usb_write(struct i2c_adapter *adapter, int cmd,
 		     int value, int index, void *data, int len)
 {
 	struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data;
+	void *dmadata = kmemdup(data, len, GFP_KERNEL);
+	int ret;
+
+	if (!dmadata)
+		return -ENOMEM;
 
 	/* do control transfer */
-	return usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0),
+	ret = usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0),
 			       cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
-			       value, index, data, len, 2000);
+			       value, index, dmadata, len, 2000);
+
+	kfree(dmadata);
+	return ret;
 }
 
 static void i2c_tiny_usb_free(struct i2c_tiny_usb *dev)
-- 
cgit v1.2.3-59-g8ed1b


From e2c824924cdb41528932c550647406ad81336b18 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 22 May 2017 07:46:55 +0200
Subject: i2c: designware: Fix bogus sda_hold_time due to uninitialized vars

We need to initializes those variables to 0 for platforms that do not
provide ACPI parameters. Otherwise, we set sda_hold_time to random
values, breaking e.g. Galileo and IOT2000 boards.

Fixes: 9d6408433019 ("i2c: designware: don't infer timings described by ACPI from clock rate")
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-platdrv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 6283b99d2b17..d1263b82d646 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -94,9 +94,9 @@ static void dw_i2c_acpi_params(struct platform_device *pdev, char method[],
 static int dw_i2c_acpi_configure(struct platform_device *pdev)
 {
 	struct dw_i2c_dev *dev = platform_get_drvdata(pdev);
+	u32 ss_ht = 0, fp_ht = 0, hs_ht = 0, fs_ht = 0;
 	acpi_handle handle = ACPI_HANDLE(&pdev->dev);
 	const struct acpi_device_id *id;
-	u32 ss_ht, fp_ht, hs_ht, fs_ht;
 	struct acpi_device *adev;
 	const char *uid;
 
-- 
cgit v1.2.3-59-g8ed1b


From 63691587f7b0028326ddd1226c378aaaeca4d4e4 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 22 May 2017 16:32:46 +0200
Subject: ALSA: hda - Apply dual-codec quirk for MSI Z270-Gaming mobo

MSI Z270-Gamin mobo has also two ALC1220 codecs like Gigabyte AZ370-
Gaming mobo.  Apply the same quirk to this one.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 9c22ad694534..3fdd5af190a4 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2328,6 +2328,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
 	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
+	SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
 	SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
 	SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
-- 
cgit v1.2.3-59-g8ed1b


From ba90d6a6b00a84f2a18112145c113e5ef628e561 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 22 May 2017 16:38:47 +0200
Subject: ALSA: hda - Provide dual-codecs model option for a few Realtek codecs

Recently some laptops and mobos are equipped with the dual Realtek
codecs that require special quirks.  For making the debugging easier,
add the model "dual-codecs" to be passed via module option.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3fdd5af190a4..918e45268915 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2343,6 +2343,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = {
 	{.id = ALC883_FIXUP_ACER_EAPD, .name = "acer-aspire"},
 	{.id = ALC882_FIXUP_INV_DMIC, .name = "inv-dmic"},
 	{.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"},
+	{.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"},
 	{}
 };
 
@@ -6015,6 +6016,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"},
 	{.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
 	{.id = ALC292_FIXUP_TPT460, .name = "tpt460"},
+	{.id = ALC233_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"},
 	{}
 };
 #define ALC225_STANDARD_PINS \
@@ -7342,6 +7344,7 @@ static const struct hda_model_fixup alc662_fixup_models[] = {
 	{.id = ALC662_FIXUP_ASUS_MODE8, .name = "asus-mode8"},
 	{.id = ALC662_FIXUP_INV_DMIC, .name = "inv-dmic"},
 	{.id = ALC668_FIXUP_DELL_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
+	{.id = ALC662_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"},
 	{}
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From a79e7df97592b2326be81d5dae286bdb5c529a01 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 22 May 2017 16:41:24 +0200
Subject: ALSA: hda - Update the list of quirk models

I've forgotten to sync the documentation with the actually available
options for some time.  Now all updated.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 Documentation/sound/hd-audio/models.rst | 114 ++++++++++++++++++--------------
 1 file changed, 65 insertions(+), 49 deletions(-)

diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst
index 5338673c88d9..773d2bfacc6c 100644
--- a/Documentation/sound/hd-audio/models.rst
+++ b/Documentation/sound/hd-audio/models.rst
@@ -16,6 +16,8 @@ ALC880
     6-jack in back, 2-jack in front
 6stack-digout
     6-jack with a SPDIF out
+6stack-automute
+    6-jack with headphone jack detection
 
 ALC260
 ======
@@ -62,6 +64,8 @@ lenovo-dock
     Enables docking station I/O for some Lenovos
 hp-gpio-led
     GPIO LED support on HP laptops
+hp-dock-gpio-mic1-led
+    HP dock with mic LED support
 dell-headset-multi
     Headset jack, which can also be used as mic-in
 dell-headset-dock
@@ -72,6 +76,12 @@ alc283-sense-combo
     Combo jack sensing on ALC283
 tpt440-dock
     Pin configs for Lenovo Thinkpad Dock support
+tpt440
+    Lenovo Thinkpad T440s setup
+tpt460
+    Lenovo Thinkpad T460/560 setup
+dual-codecs
+    Lenovo laptops with dual codecs
 
 ALC66x/67x/892
 ==============
@@ -97,6 +107,8 @@ inv-dmic
     Inverted internal mic workaround
 dell-headset-multi
     Headset jack, which can also be used as mic-in
+dual-codecs
+    Lenovo laptops with dual codecs
 
 ALC680
 ======
@@ -114,6 +126,8 @@ inv-dmic
     Inverted internal mic workaround
 no-primary-hp
     VAIO Z/VGC-LN51JGB workaround (for fixed speaker DAC)
+dual-codecs
+    ALC1220 dual codecs for Gaming mobos
 
 ALC861/660
 ==========
@@ -206,65 +220,47 @@ auto
 
 Conexant 5045
 =============
-laptop-hpsense
-    Laptop with HP sense (old model laptop)
-laptop-micsense
-    Laptop with Mic sense (old model fujitsu)
-laptop-hpmicsense
-    Laptop with HP and Mic senses
-benq
-    Benq R55E
-laptop-hp530
-    HP 530 laptop
-test
-    for testing/debugging purpose, almost all controls can be
-    adjusted.  Appearing only when compiled with $CONFIG_SND_DEBUG=y
+cap-mix-amp
+    Fix max input level on mixer widget
+toshiba-p105
+    Toshiba P105 quirk
+hp-530
+    HP 530 quirk
 
 Conexant 5047
 =============
-laptop
-    Basic Laptop config 
-laptop-hp
-    Laptop config for some HP models (subdevice 30A5)
-laptop-eapd
-    Laptop config with EAPD support
-test
-    for testing/debugging purpose, almost all controls can be
-    adjusted.  Appearing only when compiled with $CONFIG_SND_DEBUG=y
+cap-mix-amp
+    Fix max input level on mixer widget
 
 Conexant 5051
 =============
-laptop
-    Basic Laptop config (default)
-hp
-    HP Spartan laptop
-hp-dv6736
-    HP dv6736
-hp-f700
-    HP Compaq Presario F700
-ideapad
-    Lenovo IdeaPad laptop
-toshiba
-    Toshiba Satellite M300
+lenovo-x200
+    Lenovo X200 quirk
 
 Conexant 5066
 =============
-laptop
-    Basic Laptop config (default)
-hp-laptop
-    HP laptops, e g G60
-asus
-    Asus K52JU, Lenovo G560
-dell-laptop
-    Dell laptops
-dell-vostro
-    Dell Vostro
-olpc-xo-1_5
-    OLPC XO 1.5
-ideapad
-    Lenovo IdeaPad U150
+stereo-dmic
+    Workaround for inverted stereo digital mic
+gpio1
+    Enable GPIO1 pin
+headphone-mic-pin
+    Enable headphone mic NID 0x18 without detection
+tp410
+    Thinkpad T400 & co quirks
 thinkpad
-    Lenovo Thinkpad
+    Thinkpad mute/mic LED quirk
+lemote-a1004
+    Lemote A1004 quirk
+lemote-a1205
+    Lemote A1205 quirk
+olpc-xo
+    OLPC XO quirk
+mute-led-eapd
+    Mute LED control via EAPD
+hp-dock
+    HP dock support
+mute-led-gpio
+    Mute LED control via GPIO
 
 STAC9200
 ========
@@ -444,6 +440,8 @@ dell-eq
     Dell desktops/laptops
 alienware
     Alienware M17x
+asus-mobo
+    Pin configs for ASUS mobo with 5.1/SPDIF out
 auto
     BIOS setup (default)
 
@@ -477,6 +475,8 @@ hp-envy-ts-bass
     Pin fixup for HP Envy TS bass speaker (NID 0x10)
 hp-bnb13-eq
     Hardware equalizer setup for HP laptops
+hp-envy-ts-bass
+    HP Envy TS bass support
 auto
     BIOS setup (default)
 
@@ -496,10 +496,22 @@ auto
 
 Cirrus Logic CS4206/4207
 ========================
+mbp53
+    MacBook Pro 5,3
 mbp55
     MacBook Pro 5,5
 imac27
     IMac 27 Inch
+imac27_122
+    iMac 12,2
+apple
+    Generic Apple quirk
+mbp101
+    MacBookPro 10,1
+mbp81
+    MacBookPro 8,1
+mba42
+    MacBookAir 4,2
 auto
     BIOS setup (default)
 
@@ -509,6 +521,10 @@ mba6
     MacBook Air 6,1 and 6,2
 gpio0
     Enable GPIO 0 amp
+mbp11
+    MacBookPro 11,2
+macmini
+    MacMini 7,1
 auto
     BIOS setup (default)
 
-- 
cgit v1.2.3-59-g8ed1b


From 232cd35d0804cc241eb887bb8d4d9b3b9881c64a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 May 2017 14:17:48 -0700
Subject: ipv6: fix out of bound writes in __ip6_append_data()

Andrey Konovalov and idaifish@gmail.com reported crashes caused by
one skb shared_info being overwritten from __ip6_append_data()

Andrey program lead to following state :

copy -4200 datalen 2000 fraglen 2040
maxfraglen 2040 alloclen 2048 transhdrlen 0 offset 0 fraggap 6200

The skb_copy_and_csum_bits(skb_prev, maxfraglen, data + transhdrlen,
fraggap, 0); is overwriting skb->head and skb_shared_info

Since we apparently detect this rare condition too late, move the
code earlier to even avoid allocating skb and risking crashes.

Once again, many thanks to Andrey and syzkaller team.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Reported-by: <idaifish@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d4a31becbd25..bf8a58a1c32d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1466,6 +1466,11 @@ alloc_new_skb:
 			 */
 			alloclen += sizeof(struct frag_hdr);
 
+			copy = datalen - transhdrlen - fraggap;
+			if (copy < 0) {
+				err = -EINVAL;
+				goto error;
+			}
 			if (transhdrlen) {
 				skb = sock_alloc_send_skb(sk,
 						alloclen + hh_len,
@@ -1515,13 +1520,9 @@ alloc_new_skb:
 				data += fraggap;
 				pskb_trim_unique(skb_prev, maxfraglen);
 			}
-			copy = datalen - transhdrlen - fraggap;
-
-			if (copy < 0) {
-				err = -EINVAL;
-				kfree_skb(skb);
-				goto error;
-			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+			if (copy > 0 &&
+			    getfrag(from, data + transhdrlen, offset,
+				    copy, fraggap, skb) < 0) {
 				err = -EFAULT;
 				kfree_skb(skb);
 				goto error;
-- 
cgit v1.2.3-59-g8ed1b


From 9e7b9a25e170722f15ed54f5b963e9867f79195d Mon Sep 17 00:00:00 2001
From: Jan Glauber <jglauber@cavium.com>
Date: Mon, 22 May 2017 13:09:19 +0200
Subject: mmc: cavium: Prevent crash with incomplete DT

In case the DT specifies neither a regulator nor a gpio
for the shared power the driver will crash accessing the regulator.
Prevent the crash by checking the regulator before use.

Use mmc_regulator_get_supply() instead of open coding the same
logic.

Signed-off-by: Jan Glauber <jglauber@cavium.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/cavium.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c
index 58b51ba6aabd..b8aaf0fdb77c 100644
--- a/drivers/mmc/host/cavium.c
+++ b/drivers/mmc/host/cavium.c
@@ -839,14 +839,14 @@ static void cvm_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		cvm_mmc_reset_bus(slot);
 		if (host->global_pwr_gpiod)
 			host->set_shared_power(host, 0);
-		else
+		else if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
 		break;
 
 	case MMC_POWER_UP:
 		if (host->global_pwr_gpiod)
 			host->set_shared_power(host, 1);
-		else
+		else if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
 		break;
 	}
@@ -968,20 +968,15 @@ static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot)
 		return -EINVAL;
 	}
 
-	mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
-	if (IS_ERR(mmc->supply.vmmc)) {
-		if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
-		/*
-		 * Legacy Octeon firmware has no regulator entry, fall-back to
-		 * a hard-coded voltage to get a sane OCR.
-		 */
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret == -EPROBE_DEFER)
+		return ret;
+	/*
+	 * Legacy Octeon firmware has no regulator entry, fall-back to
+	 * a hard-coded voltage to get a sane OCR.
+	 */
+	if (IS_ERR(mmc->supply.vmmc))
 		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-	} else {
-		ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
-		if (ret > 0)
-			mmc->ocr_avail = ret;
-	}
 
 	/* Common MMC bindings */
 	ret = mmc_of_parse(mmc);
-- 
cgit v1.2.3-59-g8ed1b


From c2372c20425bd75a5527b3e2204059762190f6ca Mon Sep 17 00:00:00 2001
From: Jan Glauber <jglauber@cavium.com>
Date: Mon, 22 May 2017 13:09:20 +0200
Subject: of/platform: Make of_platform_device_destroy globally visible

of_platform_device_destroy is the counterpart to
of_platform_device_create which is a non-static function.

After creating a platform device it might be neccessary
to destroy it to deal with -EPROBE_DEFER where a
repeated of_platform_device_create call would fail otherwise.

Therefore also make of_platform_device_destroy globally visible.

Signed-off-by: Jan Glauber <jglauber@cavium.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/of/platform.c       | 3 ++-
 include/linux/of_platform.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 71fecc2debfc..703a42118ffc 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -523,7 +523,7 @@ static int __init of_platform_default_populate_init(void)
 arch_initcall_sync(of_platform_default_populate_init);
 #endif
 
-static int of_platform_device_destroy(struct device *dev, void *data)
+int of_platform_device_destroy(struct device *dev, void *data)
 {
 	/* Do not touch devices not populated from the device tree */
 	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
@@ -544,6 +544,7 @@ static int of_platform_device_destroy(struct device *dev, void *data)
 	of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(of_platform_device_destroy);
 
 /**
  * of_platform_depopulate() - Remove devices populated from device tree
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index dc8224ae28d5..e0d1946270f3 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -64,6 +64,7 @@ extern struct platform_device *of_platform_device_create(struct device_node *np,
 						   const char *bus_id,
 						   struct device *parent);
 
+extern int of_platform_device_destroy(struct device *dev, void *data);
 extern int of_platform_bus_probe(struct device_node *root,
 				 const struct of_device_id *matches,
 				 struct device *parent);
-- 
cgit v1.2.3-59-g8ed1b


From 8fb83b142823cdd1f85f78dcf9e861e9033919f9 Mon Sep 17 00:00:00 2001
From: Jan Glauber <jglauber@cavium.com>
Date: Mon, 22 May 2017 13:09:21 +0200
Subject: mmc: cavium: Fix probing race with regulator

If the regulator probing is not yet finished this driver
might catch a -EPROBE_DEFER. Returning after this condition
did not remove the created platform device. On a repeated
call to the probe function the of_platform_device_create
fails.

Calling of_platform_device_destroy after EPROBE_DEFER resolves
this bug.

Signed-off-by: Jan Glauber <jglauber@cavium.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/cavium-octeon.c   | 11 ++++++++++-
 drivers/mmc/host/cavium-thunderx.c |  6 ++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c
index cbb566377508..951d2cdd7888 100644
--- a/drivers/mmc/host/cavium-octeon.c
+++ b/drivers/mmc/host/cavium-octeon.c
@@ -288,11 +288,20 @@ static int octeon_mmc_probe(struct platform_device *pdev)
 		if (ret) {
 			dev_err(&pdev->dev, "Error populating slots\n");
 			octeon_mmc_set_shared_power(host, 0);
-			return ret;
+			goto error;
 		}
 		i++;
 	}
 	return 0;
+
+error:
+	for (i = 0; i < CAVIUM_MAX_MMC; i++) {
+		if (host->slot[i])
+			cvm_mmc_of_slot_remove(host->slot[i]);
+		if (host->slot_pdev[i])
+			of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL);
+	}
+	return ret;
 }
 
 static int octeon_mmc_remove(struct platform_device *pdev)
diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c
index fe3d77267cd6..b9cc95998799 100644
--- a/drivers/mmc/host/cavium-thunderx.c
+++ b/drivers/mmc/host/cavium-thunderx.c
@@ -146,6 +146,12 @@ static int thunder_mmc_probe(struct pci_dev *pdev,
 	return 0;
 
 error:
+	for (i = 0; i < CAVIUM_MAX_MMC; i++) {
+		if (host->slot[i])
+			cvm_mmc_of_slot_remove(host->slot[i]);
+		if (host->slot_pdev[i])
+			of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL);
+	}
 	clk_disable_unprepare(host->clk);
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From bd703a1524e851b8d6d646be9dafd794d4eb6045 Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Sat, 20 May 2017 01:52:11 +0300
Subject: net: atheros: atl2: don't return zero on failure path in atl2_probe()

If dma mask checks fail in atl2_probe(), it breaks off initialization,
deallocates all resources, but returns zero.

The patch adds proper error code return value and
make error code setup unified.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atlx/atl2.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 63f2deec2a52..77a1c03255de 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1353,6 +1353,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) &&
 		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
 		printk(KERN_ERR "atl2: No usable DMA configuration, aborting\n");
+		err = -EIO;
 		goto err_dma;
 	}
 
@@ -1366,10 +1367,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * pcibios_set_master to do the needed arch specific settings */
 	pci_set_master(pdev);
 
-	err = -ENOMEM;
 	netdev = alloc_etherdev(sizeof(struct atl2_adapter));
-	if (!netdev)
+	if (!netdev) {
+		err = -ENOMEM;
 		goto err_alloc_etherdev;
+	}
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
@@ -1408,8 +1410,6 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_sw_init;
 
-	err = -EIO;
-
 	netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX;
 	netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
 
-- 
cgit v1.2.3-59-g8ed1b


From 751da2a69b7cc82d83dc310ed7606225f2d6e014 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Fri, 19 May 2017 19:43:45 -0400
Subject: bonding: fix accounting of active ports in 3ad

As of 7bb11dc9f59d and 0622cab0341c, bond slaves in a 3ad bond are not
removed from the aggregator when they are down, and the active slave count
is NOT equal to number of ports in the aggregator, but rather the number
of ports in the aggregator that are still enabled. The sysfs spew for
bonding_show_ad_num_ports() has a comment that says "Show number of active
802.3ad ports.", but it's currently showing total number of ports, both
active and inactive. Remedy it by using the same logic introduced in
0622cab0341c in __bond_3ad_get_active_agg_info(), so sysfs, procfs and
netlink all report the number of active ports. Note that this means that
IFLA_BOND_AD_INFO_NUM_PORTS really means NUM_ACTIVE_PORTS instead of
NUM_PORTS, and thus perhaps should be renamed for clarity.

Lightly tested on a dual i40e lacp bond, simulating link downs with an ip
link set dev <slave2> down, was able to produce the state where I could
see both in the same aggregator, but a number of ports count of 1.

MII Status: up
Active Aggregator Info:
        Aggregator ID: 1
        Number of ports: 2 <---
Slave Interface: ens10
MII Status: up <---
Aggregator ID: 1
Slave Interface: ens11
MII Status: up
Aggregator ID: 1

MII Status: up
Active Aggregator Info:
        Aggregator ID: 1
        Number of ports: 1 <---
Slave Interface: ens10
MII Status: down <---
Aggregator ID: 1
Slave Interface: ens11
MII Status: up
Aggregator ID: 1

CC: Jay Vosburgh <j.vosburgh@gmail.com>
CC: Veaceslav Falico <vfalico@gmail.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index c5fd4259da33..b44a6aeb346d 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2577,7 +2577,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
 		return -1;
 
 	ad_info->aggregator_id = aggregator->aggregator_identifier;
-	ad_info->ports = aggregator->num_of_ports;
+	ad_info->ports = __agg_active_ports(aggregator);
 	ad_info->actor_key = aggregator->actor_oper_aggregator_key;
 	ad_info->partner_key = aggregator->partner_oper_aggregator_key;
 	ether_addr_copy(ad_info->partner_system,
-- 
cgit v1.2.3-59-g8ed1b


From f5f968f2371ccdebb8a365487649673c9af68d09 Mon Sep 17 00:00:00 2001
From: Srinath Mannam <srinath.mannam@broadcom.com>
Date: Thu, 18 May 2017 22:27:40 +0530
Subject: mmc: sdhci-iproc: suppress spurious interrupt with Multiblock read

The stingray SDHCI hardware supports ACMD12 and automatically
issues after multi block transfer completed.

If ACMD12 in SDHCI is disabled, spurious tx done interrupts are seen
on multi block read command with below error message:

Got data interrupt 0x00000002 even though no data
operation was in progress.

This patch uses SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 to enable
ACM12 support in SDHCI hardware and suppress spurious interrupt.

Signed-off-by: Srinath Mannam <srinath.mannam@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Fixes: b580c52d58d9 ("mmc: sdhci-iproc: add IPROC SDHCI driver")
Cc: <stable@vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-iproc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 3275d4995812..61666d269771 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -187,7 +187,8 @@ static const struct sdhci_iproc_data iproc_cygnus_data = {
 };
 
 static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = {
-	.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK,
+	.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
+		  SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
 	.quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN,
 	.ops = &sdhci_iproc_ops,
 };
-- 
cgit v1.2.3-59-g8ed1b


From e4eda884db7930cee434828759064b4711604078 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 22 May 2017 12:27:07 -0400
Subject: net: Make IP alignment calulations clearer.

The assignmnet:

	ip_align = strict ? 2 : NET_IP_ALIGN;

in compare_pkt_ptr_alignment() trips up Coverity because we can only
get to this code when strict is true, therefore ip_align will always
be 2 regardless of NET_IP_ALIGN's value.

So just assign directly to '2' and explain the situation in the
comment above.

Reported-by: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1eddb713b815..c72cd41f5b8b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -808,11 +808,15 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 		reg_off += reg->aux_off;
 	}
 
-	/* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking
-	 * we force this to 2 which is universally what architectures use
-	 * when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+	/* For platforms that do not have a Kconfig enabling
+	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
+	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
+	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
+	 * to this code only in strict mode where we want to emulate
+	 * the NET_IP_ALIGN==2 checking.  Therefore use an
+	 * unconditional IP align value of '2'.
 	 */
-	ip_align = strict ? 2 : NET_IP_ALIGN;
+	ip_align = 2;
 	if ((ip_align + reg_off + off) % size != 0) {
 		verbose("misaligned packet access off %d+%d+%d size %d\n",
 			ip_align, reg_off, off, size);
-- 
cgit v1.2.3-59-g8ed1b


From 72ccc471e13b8266d2ee2104521df5b92ba08e9c Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Fri, 19 May 2017 14:46:46 -0400
Subject: bonding: fix randomly populated arp target array

In commit dc9c4d0fe023, the arp_target array moved from a static global
to a local variable. By the nature of static globals, the array used to
be initialized to all 0. At present, it's full of random data, which
that gets interpreted as arp_target values, when none have actually been
specified. Systems end up booting with spew along these lines:

[   32.161783] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
[   32.168475] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
[   32.175089] 8021q: adding VLAN 0 to HW filter on device lacp0
[   32.193091] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready
[   32.204892] lacp0: Setting MII monitoring interval to 100
[   32.211071] lacp0: Removing ARP target 216.124.228.17
[   32.216824] lacp0: Removing ARP target 218.160.255.255
[   32.222646] lacp0: Removing ARP target 185.170.136.184
[   32.228496] lacp0: invalid ARP target 255.255.255.255 specified for removal
[   32.236294] lacp0: option arp_ip_target: invalid value (-255.255.255.255)
[   32.243987] lacp0: Removing ARP target 56.125.228.17
[   32.249625] lacp0: Removing ARP target 218.160.255.255
[   32.255432] lacp0: Removing ARP target 15.157.233.184
[   32.261165] lacp0: invalid ARP target 255.255.255.255 specified for removal
[   32.268939] lacp0: option arp_ip_target: invalid value (-255.255.255.255)
[   32.276632] lacp0: Removing ARP target 16.0.0.0
[   32.281755] lacp0: Removing ARP target 218.160.255.255
[   32.287567] lacp0: Removing ARP target 72.125.228.17
[   32.293165] lacp0: Removing ARP target 218.160.255.255
[   32.298970] lacp0: Removing ARP target 8.125.228.17
[   32.304458] lacp0: Removing ARP target 218.160.255.255

None of these were actually specified as ARP targets, and the driver does
seem to clean up the mess okay, but it's rather noisy and confusing, leaks
values to userspace, and the 255.255.255.255 spew shows up even when debug
prints are disabled.

The fix: just zero out arp_target at init time.

While we're in here, init arp_all_targets_value in the right place.

Fixes: dc9c4d0fe023 ("bonding: reduce scope of some global variables")
CC: Mahesh Bandewar <maheshb@google.com>
CC: Jay Vosburgh <j.vosburgh@gmail.com>
CC: Veaceslav Falico <vfalico@gmail.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: netdev@vger.kernel.org
CC: stable@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2be78807fd6e..73313318399c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4271,10 +4271,10 @@ static int bond_check_params(struct bond_params *params)
 	int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
 	struct bond_opt_value newval;
 	const struct bond_opt_value *valptr;
-	int arp_all_targets_value;
+	int arp_all_targets_value = 0;
 	u16 ad_actor_sys_prio = 0;
 	u16 ad_user_port_key = 0;
-	__be32 arp_target[BOND_MAX_ARP_TARGETS];
+	__be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 };
 	int arp_ip_count;
 	int bond_mode	= BOND_MODE_ROUNDROBIN;
 	int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
@@ -4501,7 +4501,6 @@ static int bond_check_params(struct bond_params *params)
 		arp_validate_value = 0;
 	}
 
-	arp_all_targets_value = 0;
 	if (arp_all_targets) {
 		bond_opt_initstr(&newval, arp_all_targets);
 		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS),
-- 
cgit v1.2.3-59-g8ed1b


From 499fde662f1957e3cb8d192a94a099ebe19c714b Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Fri, 19 May 2017 11:21:59 -0700
Subject: vsock: use new wait API for vsock_stream_sendmsg()

As reported by Michal, vsock_stream_sendmsg() could still
sleep at vsock_stream_has_space() after prepare_to_wait():

  vsock_stream_has_space
    vmci_transport_stream_has_space
      vmci_qpair_produce_free_space
        qp_lock
          qp_acquire_queue_mutex
            mutex_lock

Just switch to the new wait API like we did for commit
d9dc8b0f8b4e ("net: fix sleeping for sk_wait_event()").

Reported-by: Michal Kubecek <mkubecek@suse.cz>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Jorgen Hansen <jhansen@vmware.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 6f7f6757ceef..dfc8c51e4d74 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1540,8 +1540,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	long timeout;
 	int err;
 	struct vsock_transport_send_notify_data send_data;
-
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	sk = sock->sk;
 	vsk = vsock_sk(sk);
@@ -1584,11 +1583,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	if (err < 0)
 		goto out;
 
-
 	while (total_written < len) {
 		ssize_t written;
 
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		add_wait_queue(sk_sleep(sk), &wait);
 		while (vsock_stream_has_space(vsk) == 0 &&
 		       sk->sk_err == 0 &&
 		       !(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1597,33 +1595,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 			/* Don't wait for non-blocking sockets. */
 			if (timeout == 0) {
 				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
 
 			err = transport->notify_send_pre_block(vsk, &send_data);
 			if (err < 0) {
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
 
 			release_sock(sk);
-			timeout = schedule_timeout(timeout);
+			timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
 			lock_sock(sk);
 			if (signal_pending(current)) {
 				err = sock_intr_errno(timeout);
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			} else if (timeout == 0) {
 				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
-
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
 		}
-		finish_wait(sk_sleep(sk), &wait);
+		remove_wait_queue(sk_sleep(sk), &wait);
 
 		/* These checks occur both as part of and after the loop
 		 * conditional since we need to check before and after
-- 
cgit v1.2.3-59-g8ed1b


From 0544f5494a03b8846db74e02be5685d1f32b06c9 Mon Sep 17 00:00:00 2001
From: Marta Rybczynska <mrybczyn@kalray.eu>
Date: Mon, 10 Apr 2017 17:12:34 +0200
Subject: nvme-rdma: support devices with queue size < 32

In the case of small NVMe-oF queue size (<32) we may enter a deadlock
caused by the fact that the IB completions aren't sent waiting for 32
and the send queue will fill up.

The error is seen as (using mlx5):
[ 2048.693355] mlx5_0:mlx5_ib_post_send:3765:(pid 7273):
[ 2048.693360] nvme nvme1: nvme_rdma_post_send failed with error code -12

This patch changes the way the signaling is done so that it depends on
the queue depth now. The magic define has been removed completely.

Cc: stable@vger.kernel.org
Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu>
Signed-off-by: Samuel Jones <sjones@kalray.eu>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index dd1c6deef82f..e2c18f3d9dcf 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1038,6 +1038,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 		nvme_rdma_wr_error(cq, wc, "SEND");
 }
 
+static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+{
+	int sig_limit;
+
+	/*
+	 * We signal completion every queue depth/2 and also handle the
+	 * degenerated case of a  device with queue_depth=1, where we
+	 * would need to signal every message.
+	 */
+	sig_limit = max(queue->queue_size / 2, 1);
+	return (++queue->sig_count % sig_limit) == 0;
+}
+
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 		struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
 		struct ib_send_wr *first, bool flush)
@@ -1065,9 +1078,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	 * Would have been way to obvious to handle this in hardware or
 	 * at least the RDMA stack..
 	 *
-	 * This messy and racy code sniplet is copy and pasted from the iSER
-	 * initiator, and the magic '32' comes from there as well.
-	 *
 	 * Always signal the flushes. The magic request used for the flush
 	 * sequencer is not allocated in our driver's tagset and it's
 	 * triggered to be freed by blk_cleanup_queue(). So we need to
@@ -1075,7 +1085,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	 * embedded in request's payload, is not freed when __ib_process_cq()
 	 * calls wr_cqe->done().
 	 */
-	if ((++queue->sig_count % 32) == 0 || flush)
+	if (nvme_rdma_queue_sig_limit(queue) || flush)
 		wr.send_flags |= IB_SEND_SIGNALED;
 
 	if (first)
-- 
cgit v1.2.3-59-g8ed1b


From 806f026f9b901eaf1a6baeb48b5da18d6a4f818e Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 22 May 2017 23:05:03 +0800
Subject: nvme: use blk_mq_start_hw_queues() in nvme_kill_queues()

Inside nvme_kill_queues(), we have to start hw queues for
draining requests in sw queues, .dispatch list and requeue list,
so use blk_mq_start_hw_queues() instead of blk_mq_start_stopped_hw_queues()
which only run queues if queues are stopped, but the queues may have
been started already, for example nvme_start_queues() is called in reset work
function.

blk_mq_start_hw_queues() run hw queues in current context, instead
of running asynchronously like before. Given nvme_kill_queues() is
run from either remove context or reset worker context, both are fine
to run hw queue directly. And the mutex of namespaces_mutex isn't a
problem too becasue nvme_start_freeze() runs hw queue in this way
already.

Cc: stable@vger.kernel.org
Reported-by: Zhang Yi <yizhan@redhat.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d5e0906262ea..40d5e4a9e8d7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2437,7 +2437,13 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 		revalidate_disk(ns->disk);
 		blk_set_queue_dying(ns->queue);
 		blk_mq_abort_requeue_list(ns->queue);
-		blk_mq_start_stopped_hw_queues(ns->queue, true);
+
+		/*
+		 * Forcibly start all queues to avoid having stuck requests.
+		 * Note that we must ensure the queues are not stopped
+		 * when the final removal happens.
+		 */
+		blk_mq_start_hw_queues(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 986f75c876dbafed98eba7cb516c5118f155db23 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 22 May 2017 23:05:04 +0800
Subject: nvme: avoid to use blk_mq_abort_requeue_list()

NVMe may add request into requeue list simply and not kick off the
requeue if hw queues are stopped. Then blk_mq_abort_requeue_list()
is called in both nvme_kill_queues() and nvme_ns_remove() for
dealing with this issue.

Unfortunately blk_mq_abort_requeue_list() is absolutely a
race maker, for example, one request may be requeued during
the aborting. So this patch just calls blk_mq_kick_requeue_list() in
nvme_kill_queues() to handle this issue like what nvme_start_queues()
does. Now all requests in requeue list when queues are stopped will be
handled by blk_mq_kick_requeue_list() when queues are restarted, either
in nvme_start_queues() or in nvme_kill_queues().

Cc: stable@vger.kernel.org
Reported-by: Zhang Yi <yizhan@redhat.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 40d5e4a9e8d7..04e115834702 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2098,7 +2098,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		if (ns->ndev)
 			nvme_nvm_unregister_sysfs(ns);
 		del_gendisk(ns->disk);
-		blk_mq_abort_requeue_list(ns->queue);
 		blk_cleanup_queue(ns->queue);
 	}
 
@@ -2436,7 +2435,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 			continue;
 		revalidate_disk(ns->disk);
 		blk_set_queue_dying(ns->queue);
-		blk_mq_abort_requeue_list(ns->queue);
 
 		/*
 		 * Forcibly start all queues to avoid having stuck requests.
@@ -2444,6 +2442,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 		 * when the final removal happens.
 		 */
 		blk_mq_start_hw_queues(ns->queue);
+
+		/* draining requests in requeue list */
+		blk_mq_kick_requeue_list(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7254a50a5db40ca6739ddf37e0a45e6912532b2c Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 22 May 2017 23:05:05 +0800
Subject: blk-mq: remove blk_mq_abort_requeue_list()

No one uses it any more, so remove it.

Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c         | 19 -------------------
 include/linux/blk-mq.h |  1 -
 2 files changed, 20 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a69ad122ed66..f2224ffd225d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
 
-void blk_mq_abort_requeue_list(struct request_queue *q)
-{
-	unsigned long flags;
-	LIST_HEAD(rq_list);
-
-	spin_lock_irqsave(&q->requeue_lock, flags);
-	list_splice_init(&q->requeue_list, &rq_list);
-	spin_unlock_irqrestore(&q->requeue_lock, flags);
-
-	while (!list_empty(&rq_list)) {
-		struct request *rq;
-
-		rq = list_first_entry(&rq_list, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-		blk_mq_end_request(rq, -EIO);
-	}
-}
-EXPORT_SYMBOL(blk_mq_abort_requeue_list);
-
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	if (tag < tags->nr_tags) {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c47aa248c640..fcd641032f8d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -238,7 +238,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
 				bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
-void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq);
 
 bool blk_mq_queue_stopped(struct request_queue *q);
-- 
cgit v1.2.3-59-g8ed1b


From 2d76b2f8b54abd16225cd80afca36ed43f113c41 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 22 May 2017 16:46:13 +0200
Subject: net: sched: cls_matchall: fix null pointer dereference

Since the head is guaranteed by the check above to be null, the call_rcu
would explode. Remove the previously logically dead code that was made
logically very much alive and kicking.

Fixes: 985538eee06f ("net/sched: remove redundant null check on head")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_matchall.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index dee469fed967..51859b8edd7e 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -203,7 +203,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 
 	*arg = (unsigned long) head;
 	rcu_assign_pointer(tp->root, new);
-	call_rcu(&head->rcu, mall_destroy_rcu);
 	return 0;
 
 err_replace_hw_filter:
-- 
cgit v1.2.3-59-g8ed1b


From 0ce872bf8b5c4d425a41940a523ff1b8daa0b275 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 17:10:15 -0700
Subject: nvme_fc: get rid of local reconnect_delay

Remove the local copy of reconnect_delay.
Use the value in the controller options directly.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index dca7165fabcf..c3ab1043efbd 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -165,7 +165,6 @@ struct nvme_fc_ctrl {
 	struct work_struct	delete_work;
 	struct work_struct	reset_work;
 	struct delayed_work	connect_work;
-	int			reconnect_delay;
 	int			connect_attempts;
 
 	struct kref		ref;
@@ -2615,9 +2614,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 
 		dev_warn(ctrl->ctrl.device,
 			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->reconnect_delay);
+			ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
 		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
-				ctrl->reconnect_delay * HZ);
+				ctrl->ctrl.opts->reconnect_delay * HZ);
 	} else
 		dev_info(ctrl->ctrl.device,
 			"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
@@ -2695,9 +2694,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
 
 		dev_warn(ctrl->ctrl.device,
 			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->reconnect_delay);
+			ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
 		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
-				ctrl->reconnect_delay * HZ);
+				ctrl->ctrl.opts->reconnect_delay * HZ);
 	} else
 		dev_info(ctrl->ctrl.device,
 			"NVME-FC{%d}: controller reconnect complete\n",
@@ -2755,7 +2754,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
 	INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
-	ctrl->reconnect_delay = opts->reconnect_delay;
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
-- 
cgit v1.2.3-59-g8ed1b


From 5bbecdbc8e7ffaaf47ac1f02014bf3bedda3fd11 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 17:10:16 -0700
Subject: nvme_fc: Support ctrl_loss_tmo

Sync with Sagi's recent addition of ctrl_loss_tmo in the core fabrics
layer.

Remove local connect limits and connect_attempts variable.
Use fabrics new nr_connects variable and use of nvmf_should_reconnect()
Refactor duplicate reconnect failure code.

Addresses review comment by Sagi on controller reset support:
http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 116 +++++++++++++++++++++----------------------------
 1 file changed, 49 insertions(+), 67 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index c3ab1043efbd..a0f05d5e966c 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -45,8 +45,6 @@ enum nvme_fc_queue_flags {
 
 #define NVMEFC_QUEUE_DELAY	3		/* ms units */
 
-#define NVME_FC_MAX_CONNECT_ATTEMPTS	1
-
 struct nvme_fc_queue {
 	struct nvme_fc_ctrl	*ctrl;
 	struct device		*dev;
@@ -165,7 +163,6 @@ struct nvme_fc_ctrl {
 	struct work_struct	delete_work;
 	struct work_struct	reset_work;
 	struct delayed_work	connect_work;
-	int			connect_attempts;
 
 	struct kref		ref;
 	u32			flags;
@@ -2305,7 +2302,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	int ret;
 	bool changed;
 
-	ctrl->connect_attempts++;
+	++ctrl->ctrl.opts->nr_reconnects;
 
 	/*
 	 * Create the admin queue
@@ -2402,7 +2399,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	ctrl->connect_attempts = 0;
+	ctrl->ctrl.opts->nr_reconnects = 0;
 
 	kref_get(&ctrl->ctrl.kref);
 
@@ -2545,16 +2542,22 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
-static int
-__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
+static bool
+__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
 {
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
-		return -EBUSY;
+		return true;
 
 	if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
-		return -EBUSY;
+		return true;
 
-	return 0;
+	return false;
+}
+
+static int
+__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
+{
+	return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0;
 }
 
 /*
@@ -2579,6 +2582,35 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
 	return ret;
 }
 
+static void
+nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
+{
+	/* If we are resetting/deleting then do nothing */
+	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
+			ctrl->ctrl.state == NVME_CTRL_LIVE);
+		return;
+	}
+
+	dev_warn(ctrl->ctrl.device,
+		"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
+		ctrl->cnum, status);
+
+	if (nvmf_should_reconnect(&ctrl->ctrl)) {
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
+			ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
+		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+				ctrl->ctrl.opts->reconnect_delay * HZ);
+	} else {
+		dev_info(ctrl->ctrl.device,
+				"NVME-FC{%d}: Max reconnect attempts (%d) "
+				"reached. Removing controller\n",
+				ctrl->cnum, ctrl->ctrl.opts->nr_reconnects);
+		WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
+	}
+}
+
 static void
 nvme_fc_reset_ctrl_work(struct work_struct *work)
 {
@@ -2590,34 +2622,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 	nvme_fc_delete_association(ctrl);
 
 	ret = nvme_fc_create_association(ctrl);
-	if (ret) {
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
-			ctrl->cnum, ret);
-		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
-			dev_warn(ctrl->ctrl.device,
-				"NVME-FC{%d}: Max reconnect attempts (%d) "
-				"reached. Removing controller\n",
-				ctrl->cnum, ctrl->connect_attempts);
-
-			if (!nvme_change_ctrl_state(&ctrl->ctrl,
-				NVME_CTRL_DELETING)) {
-				dev_err(ctrl->ctrl.device,
-					"NVME-FC{%d}: failed to change state "
-					"to DELETING\n", ctrl->cnum);
-				return;
-			}
-
-			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
-			return;
-		}
-
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
-		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
-				ctrl->ctrl.opts->reconnect_delay * HZ);
-	} else
+	if (ret)
+		nvme_fc_reconnect_or_delete(ctrl, ret);
+	else
 		dev_info(ctrl->ctrl.device,
 			"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
 }
@@ -2670,34 +2677,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
 				struct nvme_fc_ctrl, connect_work);
 
 	ret = nvme_fc_create_association(ctrl);
-	if (ret) {
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt failed (%d)\n",
-			ctrl->cnum, ret);
-		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
-			dev_warn(ctrl->ctrl.device,
-				"NVME-FC{%d}: Max reconnect attempts (%d) "
-				"reached. Removing controller\n",
-				ctrl->cnum, ctrl->connect_attempts);
-
-			if (!nvme_change_ctrl_state(&ctrl->ctrl,
-				NVME_CTRL_DELETING)) {
-				dev_err(ctrl->ctrl.device,
-					"NVME-FC{%d}: failed to change state "
-					"to DELETING\n", ctrl->cnum);
-				return;
-			}
-
-			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
-			return;
-		}
-
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
-		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
-				ctrl->ctrl.opts->reconnect_delay * HZ);
-	} else
+	if (ret)
+		nvme_fc_reconnect_or_delete(ctrl, ret);
+	else
 		dev_info(ctrl->ctrl.device,
 			"NVME-FC{%d}: controller reconnect complete\n",
 			ctrl->cnum);
@@ -2969,7 +2951,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 static struct nvmf_transport_ops nvme_fc_transport = {
 	.name		= "fc",
 	.required_opts	= NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
-	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY,
+	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
 	.create_ctrl	= nvme_fc_create_ctrl,
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From a5321aa5efea05ae748dc5b3e8053584213325ca Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 17:10:18 -0700
Subject: nvme_fc: revise comment on teardown

Per the recommendation by Sagi on:
http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html

An extra reference was pointed out.  There's no issue with the
references, but rather a literal interpretation of what the comment
is saying.

Reword the comment to avoid confusion.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index a0f05d5e966c..0b7f7dd2779a 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2532,10 +2532,10 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 
 	/*
 	 * tear down the controller
-	 * This will result in the last reference on the nvme ctrl to
-	 * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback.
-	 * From there, the transport will tear down it's logical queues and
-	 * association.
+	 * After the last reference on the nvme ctrl is removed,
+	 * the transport nvme_fc_nvme_ctrl_freed() callback will be
+	 * invoked. From there, the transport will tear down it's
+	 * logical queues and association.
 	 */
 	nvme_uninit_ctrl(&ctrl->ctrl);
 
-- 
cgit v1.2.3-59-g8ed1b


From 589ff7753bb54edd3ee4a9399ccc3ac48d9b22d7 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 17:10:19 -0700
Subject: nvme_fc: set logging level on resets/deletes

Per the review by Sagi on:
http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html

Looked at existing warn vs info vs err dev_xxx levels for the messages
printed on reconnects and deletes:
- Resets due to error and resets transitioned to deletes are dev_warn
- Other reset/disconnect messages are dev_info
- Removed chatty io queue related messages

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0b7f7dd2779a..e4817f9f4323 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1747,7 +1747,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: transport association error detected: %s\n",
 		ctrl->cnum, errmsg);
-	dev_info(ctrl->ctrl.device,
+	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
 
 	/* stop the queues on error, cleanup is in reset thread */
@@ -2191,9 +2191,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (!opts->nr_io_queues)
 		return 0;
 
-	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
-			opts->nr_io_queues);
-
 	nvme_fc_init_io_queues(ctrl);
 
 	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
@@ -2264,9 +2261,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (ctrl->queue_count == 1)
 		return 0;
 
-	dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n",
-			opts->nr_io_queues);
-
 	nvme_fc_init_io_queues(ctrl);
 
 	ret = blk_mq_reinit_tagset(&ctrl->tag_set);
@@ -2592,7 +2586,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 		return;
 	}
 
-	dev_warn(ctrl->ctrl.device,
+	dev_info(ctrl->ctrl.device,
 		"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
 		ctrl->cnum, status);
 
@@ -2603,7 +2597,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
 				ctrl->ctrl.opts->reconnect_delay * HZ);
 	} else {
-		dev_info(ctrl->ctrl.device,
+		dev_warn(ctrl->ctrl.device,
 				"NVME-FC{%d}: Max reconnect attempts (%d) "
 				"reached. Removing controller\n",
 				ctrl->cnum, ctrl->ctrl.opts->nr_reconnects);
@@ -2638,7 +2632,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 
-	dev_warn(ctrl->ctrl.device,
+	dev_info(ctrl->ctrl.device,
 		"NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
 
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-- 
cgit v1.2.3-59-g8ed1b


From e392e1f1f408fe8baf1046c970d05cbf1f0ec945 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 17:10:24 -0700
Subject: nvme_fc: correct nvme status set on abort

correct nvme status set on abort. Patch that changed status to being actual
nvme status crossed in the night with the patch that added abort values.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e4817f9f4323..775869c69df6 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1372,9 +1372,9 @@ done:
 	complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
 	if (!complete_rq) {
 		if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
-			status = cpu_to_le16(NVME_SC_ABORT_REQ);
+			status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
 			if (blk_queue_dying(rq->q))
-				status |= cpu_to_le16(NVME_SC_DNR);
+				status |= cpu_to_le16(NVME_SC_DNR << 1);
 		}
 		nvme_end_request(rq, status, result);
 	} else
-- 
cgit v1.2.3-59-g8ed1b


From 2cb657bc0242dfdca20869685bf179774ef1a6fb Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 15 May 2017 17:10:22 -0700
Subject: nvme_fc: remove extra controller reference taken on reconnect

fix extra controller reference taken on reconnect by moving
reference to initial controller create

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 775869c69df6..14a009e43aa5 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2395,8 +2395,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 
 	ctrl->ctrl.opts->nr_reconnects = 0;
 
-	kref_get(&ctrl->ctrl.kref);
-
 	if (ctrl->queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
 		nvme_queue_scan(&ctrl->ctrl);
@@ -2793,7 +2791,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		ctrl->ctrl.opts = NULL;
 		/* initiate nvme ctrl ref counting teardown */
 		nvme_uninit_ctrl(&ctrl->ctrl);
-		nvme_put_ctrl(&ctrl->ctrl);
 
 		/* as we're past the point where we transition to the ref
 		 * counting teardown path, if we return a bad pointer here,
@@ -2809,6 +2806,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		return ERR_PTR(ret);
 	}
 
+	kref_get(&ctrl->ctrl.kref);
+
 	dev_info(ctrl->ctrl.device,
 		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
 		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
-- 
cgit v1.2.3-59-g8ed1b


From aace34c0bb8ea3c8bdcec865b6a4be4db0a68e33 Mon Sep 17 00:00:00 2001
From: Tin Huynh <tnhuynh@apm.com>
Date: Mon, 22 May 2017 16:19:20 +0700
Subject: leds: pca955x: Correct I2C Functionality

The driver checks an incorrect flag of functionality of adapter.
When a driver requires i2c_smbus_read_byte_data and
i2c_smbus_write_byte_data, it should check I2C_FUNC_SMBUS_BYTE_DATA
instead I2C_FUNC_I2C.
This patch fixes the problem.

Signed-off-by: Tin Huynh <tnhuynh@apm.com>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 drivers/leds/leds-pca955x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index 78a7ce816a47..9a873118ea5f 100644
--- a/drivers/leds/leds-pca955x.c
+++ b/drivers/leds/leds-pca955x.c
@@ -285,7 +285,7 @@ static int pca955x_probe(struct i2c_client *client,
 			"slave address 0x%02x\n",
 			client->name, chip->bits, client->addr);
 
-	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
 		return -EIO;
 
 	if (pdata) {
-- 
cgit v1.2.3-59-g8ed1b


From 5b81fc3cc625e857275573cb4240bbab553f919c Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 17 May 2017 13:07:24 -0700
Subject: blk-throttle: add hierarchy support for latency target and idle time

For idle time, children's setting should not be bigger than parent's.
For latency target, children's setting should not be smaller than
parent's. The leaf nodes will adjust their settings according to the
hierarchy and compare their IO with the settings and do
upgrade/downgrade. parents nodes don't need to track their IO
latency/idle time.

Signed-off-by: Shaohua Li <shli@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-throttle.c | 50 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 14 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b78db2e5fdff..16174f8cb0a1 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -157,6 +157,7 @@ struct throtl_grp {
 	unsigned long last_check_time;
 
 	unsigned long latency_target; /* us */
+	unsigned long latency_target_conf; /* us */
 	/* When did we start a new slice */
 	unsigned long slice_start[2];
 	unsigned long slice_end[2];
@@ -165,6 +166,7 @@ struct throtl_grp {
 	unsigned long checked_last_finish_time; /* ns / 1024 */
 	unsigned long avg_idletime; /* ns / 1024 */
 	unsigned long idletime_threshold; /* us */
+	unsigned long idletime_threshold_conf; /* us */
 
 	unsigned int bio_cnt; /* total bios */
 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
@@ -482,6 +484,7 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 	/* LIMIT_LOW will have default value 0 */
 
 	tg->latency_target = DFL_LATENCY_TARGET;
+	tg->latency_target_conf = DFL_LATENCY_TARGET;
 
 	return &tg->pd;
 }
@@ -512,6 +515,7 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
 	tg->td = td;
 
 	tg->idletime_threshold = td->dft_idletime_threshold;
+	tg->idletime_threshold_conf = td->dft_idletime_threshold;
 }
 
 /*
@@ -1367,8 +1371,25 @@ static void tg_conf_updated(struct throtl_grp *tg)
 	 * restrictions in the whole hierarchy and allows them to bypass
 	 * blk-throttle.
 	 */
-	blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
-		tg_update_has_rules(blkg_to_tg(blkg));
+	blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg)) {
+		struct throtl_grp *this_tg = blkg_to_tg(blkg);
+		struct throtl_grp *parent_tg;
+
+		tg_update_has_rules(this_tg);
+		/* ignore root/second level */
+		if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
+		    !blkg->parent->parent)
+			continue;
+		parent_tg = blkg_to_tg(blkg->parent);
+		/*
+		 * make sure all children has lower idle time threshold and
+		 * higher latency target
+		 */
+		this_tg->idletime_threshold = min(this_tg->idletime_threshold,
+				parent_tg->idletime_threshold);
+		this_tg->latency_target = max(this_tg->latency_target,
+				parent_tg->latency_target);
+	}
 
 	/*
 	 * We're already holding queue_lock and know @tg is valid.  Let's
@@ -1497,8 +1518,8 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 	    tg->iops_conf[READ][off] == iops_dft &&
 	    tg->iops_conf[WRITE][off] == iops_dft &&
 	    (off != LIMIT_LOW ||
-	     (tg->idletime_threshold == tg->td->dft_idletime_threshold &&
-	      tg->latency_target == DFL_LATENCY_TARGET)))
+	     (tg->idletime_threshold_conf == tg->td->dft_idletime_threshold &&
+	      tg->latency_target_conf == DFL_LATENCY_TARGET)))
 		return 0;
 
 	if (tg->bps_conf[READ][off] != bps_dft)
@@ -1514,17 +1535,17 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 		snprintf(bufs[3], sizeof(bufs[3]), "%u",
 			tg->iops_conf[WRITE][off]);
 	if (off == LIMIT_LOW) {
-		if (tg->idletime_threshold == ULONG_MAX)
+		if (tg->idletime_threshold_conf == ULONG_MAX)
 			strcpy(idle_time, " idle=max");
 		else
 			snprintf(idle_time, sizeof(idle_time), " idle=%lu",
-				tg->idletime_threshold);
+				tg->idletime_threshold_conf);
 
-		if (tg->latency_target == ULONG_MAX)
+		if (tg->latency_target_conf == ULONG_MAX)
 			strcpy(latency_time, " latency=max");
 		else
 			snprintf(latency_time, sizeof(latency_time),
-				" latency=%lu", tg->latency_target);
+				" latency=%lu", tg->latency_target_conf);
 	}
 
 	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
@@ -1563,8 +1584,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 	v[2] = tg->iops_conf[READ][index];
 	v[3] = tg->iops_conf[WRITE][index];
 
-	idle_time = tg->idletime_threshold;
-	latency_time = tg->latency_target;
+	idle_time = tg->idletime_threshold_conf;
+	latency_time = tg->latency_target_conf;
 	while (true) {
 		char tok[27];	/* wiops=18446744073709551616 */
 		char *p;
@@ -1628,10 +1649,10 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 		blk_throtl_update_limit_valid(tg->td);
 		if (tg->td->limit_valid[LIMIT_LOW])
 			tg->td->limit_index = LIMIT_LOW;
-		tg->idletime_threshold = (idle_time == ULONG_MAX) ?
-			ULONG_MAX : idle_time;
-		tg->latency_target = (latency_time == ULONG_MAX) ?
-			ULONG_MAX : latency_time;
+		tg->idletime_threshold_conf = idle_time;
+		tg->idletime_threshold = tg->idletime_threshold_conf;
+		tg->latency_target_conf = latency_time;
+		tg->latency_target = tg->latency_target_conf;
 	}
 	tg_conf_updated(tg);
 	ret = 0;
@@ -2385,6 +2406,7 @@ void blk_throtl_register_queue(struct request_queue *q)
 		struct throtl_grp *tg = blkg_to_tg(blkg);
 
 		tg->idletime_threshold = td->dft_idletime_threshold;
+		tg->idletime_threshold_conf = td->dft_idletime_threshold;
 	}
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3-59-g8ed1b


From 4cff729f62d1bd433178f1ffe09db5718835e925 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 17 May 2017 13:07:25 -0700
Subject: blk-throttle: output some debug info in trace

These info are important to understand what's happening and help debug.

Signed-off-by: Shaohua Li <shli@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-throttle.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 16174f8cb0a1..1f8d62f5e808 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1748,12 +1748,18 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
 	 * - IO latency is largely below threshold
 	 */
 	unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice);
+	bool ret;
 
 	time = min_t(unsigned long, MAX_IDLE_TIME, time);
-	return (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
+	ret = (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
 	       tg->avg_idletime > tg->idletime_threshold ||
 	       (tg->latency_target && tg->bio_cnt &&
 		tg->bad_bio_cnt * 5 < tg->bio_cnt);
+	throtl_log(&tg->service_queue,
+		"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
+		tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
+		tg->bio_cnt, ret, tg->td->scale);
+	return ret;
 }
 
 static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
@@ -1849,6 +1855,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
 	struct cgroup_subsys_state *pos_css;
 	struct blkcg_gq *blkg;
 
+	throtl_log(&td->service_queue, "upgrade to max");
 	td->limit_index = LIMIT_MAX;
 	td->low_upgrade_time = jiffies;
 	td->scale = 0;
@@ -1871,6 +1878,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
 {
 	td->scale /= 2;
 
+	throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
 	if (td->scale) {
 		td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
 		return;
@@ -2044,6 +2052,11 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
 		td->avg_buckets[i].valid = true;
 		last_latency = td->avg_buckets[i].latency;
 	}
+
+	for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
+		throtl_log(&td->service_queue,
+			"Latency bucket %d: latency=%ld, valid=%d", i,
+			td->avg_buckets[i].latency, td->avg_buckets[i].valid);
 }
 #else
 static inline void throtl_update_latency_buckets(struct throtl_data *td)
-- 
cgit v1.2.3-59-g8ed1b


From 9bb67aeb96784527dbc784c7a1b234461299363c Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 17 May 2017 13:07:26 -0700
Subject: blk-throttle: respect 0 bps/iops settings for io.low

If a cgroup with low limit 0 for both bps/iops, the cgroup's low limit
is ignored and we throttle the cgroup with its max limit. In this way,
other cgroups with a low limit will not get protected. To fix this, we
don't do the exception any more. cgroup will be throttled to a limit 0
if it uese default setting. To avoid completed stall, we give such
cgroup tiny IO resources.

Signed-off-by: Shaohua Li <shli@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-throttle.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 1f8d62f5e808..f6a9f42a0ad7 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -27,6 +27,8 @@ static int throtl_quantum = 32;
 #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
 /* default latency target is 0, eg, guarantee IO latency by default */
 #define DFL_LATENCY_TARGET (0)
+#define MIN_THROTL_BPS (320 * 1024)
+#define MIN_THROTL_IOPS (10)
 
 #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
 
@@ -296,8 +298,14 @@ static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
 
 	td = tg->td;
 	ret = tg->bps[rw][td->limit_index];
-	if (ret == 0 && td->limit_index == LIMIT_LOW)
-		return tg->bps[rw][LIMIT_MAX];
+	if (ret == 0 && td->limit_index == LIMIT_LOW) {
+		/* intermediate node or iops isn't 0 */
+		if (!list_empty(&blkg->blkcg->css.children) ||
+		    tg->iops[rw][td->limit_index])
+			return U64_MAX;
+		else
+			return MIN_THROTL_BPS;
+	}
 
 	if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
 	    tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
@@ -317,10 +325,17 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
 
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
 		return UINT_MAX;
+
 	td = tg->td;
 	ret = tg->iops[rw][td->limit_index];
-	if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
-		return tg->iops[rw][LIMIT_MAX];
+	if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
+		/* intermediate node or bps isn't 0 */
+		if (!list_empty(&blkg->blkcg->css.children) ||
+		    tg->bps[rw][td->limit_index])
+			return UINT_MAX;
+		else
+			return MIN_THROTL_IOPS;
+	}
 
 	if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
 	    tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
@@ -1353,7 +1368,7 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static void tg_conf_updated(struct throtl_grp *tg)
+static void tg_conf_updated(struct throtl_grp *tg, bool global)
 {
 	struct throtl_service_queue *sq = &tg->service_queue;
 	struct cgroup_subsys_state *pos_css;
@@ -1371,7 +1386,8 @@ static void tg_conf_updated(struct throtl_grp *tg)
 	 * restrictions in the whole hierarchy and allows them to bypass
 	 * blk-throttle.
 	 */
-	blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg)) {
+	blkg_for_each_descendant_pre(blkg, pos_css,
+			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
 		struct throtl_grp *this_tg = blkg_to_tg(blkg);
 		struct throtl_grp *parent_tg;
 
@@ -1434,7 +1450,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
 	else
 		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
 
-	tg_conf_updated(tg);
+	tg_conf_updated(tg, false);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
@@ -1522,16 +1538,16 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 	      tg->latency_target_conf == DFL_LATENCY_TARGET)))
 		return 0;
 
-	if (tg->bps_conf[READ][off] != bps_dft)
+	if (tg->bps_conf[READ][off] != U64_MAX)
 		snprintf(bufs[0], sizeof(bufs[0]), "%llu",
 			tg->bps_conf[READ][off]);
-	if (tg->bps_conf[WRITE][off] != bps_dft)
+	if (tg->bps_conf[WRITE][off] != U64_MAX)
 		snprintf(bufs[1], sizeof(bufs[1]), "%llu",
 			tg->bps_conf[WRITE][off]);
-	if (tg->iops_conf[READ][off] != iops_dft)
+	if (tg->iops_conf[READ][off] != UINT_MAX)
 		snprintf(bufs[2], sizeof(bufs[2]), "%u",
 			tg->iops_conf[READ][off]);
-	if (tg->iops_conf[WRITE][off] != iops_dft)
+	if (tg->iops_conf[WRITE][off] != UINT_MAX)
 		snprintf(bufs[3], sizeof(bufs[3]), "%u",
 			tg->iops_conf[WRITE][off]);
 	if (off == LIMIT_LOW) {
@@ -1654,7 +1670,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 		tg->latency_target_conf = latency_time;
 		tg->latency_target = tg->latency_target_conf;
 	}
-	tg_conf_updated(tg);
+	tg_conf_updated(tg, index == LIMIT_LOW &&
+		tg->td->limit_valid[LIMIT_LOW]);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
-- 
cgit v1.2.3-59-g8ed1b


From b4f428ef2844e9fa8154f2faaca249aa74e222a7 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 17 May 2017 13:07:27 -0700
Subject: blk-throttle: force user to configure all settings for io.low

Default value of io.low limit is 0. If user doesn't configure the limit,
last patch makes cgroup be throttled to very tiny bps/iops, which could
stall the system. A cgroup with default settings of io.low limit really
means nothing, so we force user to configure all settings, otherwise
io.low limit doesn't take effect. With this stragety, default setting of
latency/idle isn't important, so just set them to very conservative and
safe value.

Signed-off-by: Shaohua Li <shli@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-throttle.c | 80 ++++++++++++++++++++++++----------------------------
 1 file changed, 37 insertions(+), 43 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f6a9f42a0ad7..fc13dd0c6e39 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -22,13 +22,11 @@ static int throtl_quantum = 32;
 #define DFL_THROTL_SLICE_HD (HZ / 10)
 #define DFL_THROTL_SLICE_SSD (HZ / 50)
 #define MAX_THROTL_SLICE (HZ)
-#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
-#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
 #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
-/* default latency target is 0, eg, guarantee IO latency by default */
-#define DFL_LATENCY_TARGET (0)
 #define MIN_THROTL_BPS (320 * 1024)
 #define MIN_THROTL_IOPS (10)
+#define DFL_LATENCY_TARGET (-1L)
+#define DFL_IDLE_THRESHOLD (0)
 
 #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
 
@@ -205,8 +203,6 @@ struct throtl_data
 	unsigned int limit_index;
 	bool limit_valid[LIMIT_CNT];
 
-	unsigned long dft_idletime_threshold; /* us */
-
 	unsigned long low_upgrade_time;
 	unsigned long low_downgrade_time;
 
@@ -500,6 +496,8 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 
 	tg->latency_target = DFL_LATENCY_TARGET;
 	tg->latency_target_conf = DFL_LATENCY_TARGET;
+	tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+	tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
 
 	return &tg->pd;
 }
@@ -528,9 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
 		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
 	tg->td = td;
-
-	tg->idletime_threshold = td->dft_idletime_threshold;
-	tg->idletime_threshold_conf = td->dft_idletime_threshold;
 }
 
 /*
@@ -1534,7 +1529,7 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 	    tg->iops_conf[READ][off] == iops_dft &&
 	    tg->iops_conf[WRITE][off] == iops_dft &&
 	    (off != LIMIT_LOW ||
-	     (tg->idletime_threshold_conf == tg->td->dft_idletime_threshold &&
+	     (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
 	      tg->latency_target_conf == DFL_LATENCY_TARGET)))
 		return 0;
 
@@ -1660,16 +1655,31 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 		tg->iops_conf[READ][LIMIT_MAX]);
 	tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
 		tg->iops_conf[WRITE][LIMIT_MAX]);
-
-	if (index == LIMIT_LOW) {
-		blk_throtl_update_limit_valid(tg->td);
-		if (tg->td->limit_valid[LIMIT_LOW])
-			tg->td->limit_index = LIMIT_LOW;
-		tg->idletime_threshold_conf = idle_time;
+	tg->idletime_threshold_conf = idle_time;
+	tg->latency_target_conf = latency_time;
+
+	/* force user to configure all settings for low limit  */
+	if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
+	      tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
+	    tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
+	    tg->latency_target_conf == DFL_LATENCY_TARGET) {
+		tg->bps[READ][LIMIT_LOW] = 0;
+		tg->bps[WRITE][LIMIT_LOW] = 0;
+		tg->iops[READ][LIMIT_LOW] = 0;
+		tg->iops[WRITE][LIMIT_LOW] = 0;
+		tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+		tg->latency_target = DFL_LATENCY_TARGET;
+	} else if (index == LIMIT_LOW) {
 		tg->idletime_threshold = tg->idletime_threshold_conf;
-		tg->latency_target_conf = latency_time;
 		tg->latency_target = tg->latency_target_conf;
 	}
+
+	blk_throtl_update_limit_valid(tg->td);
+	if (tg->td->limit_valid[LIMIT_LOW]) {
+		if (index == LIMIT_LOW)
+			tg->td->limit_index = LIMIT_LOW;
+	} else
+		tg->td->limit_index = LIMIT_MAX;
 	tg_conf_updated(tg, index == LIMIT_LOW &&
 		tg->td->limit_valid[LIMIT_LOW]);
 	ret = 0;
@@ -1760,17 +1770,19 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
 	/*
 	 * cgroup is idle if:
 	 * - single idle is too long, longer than a fixed value (in case user
-	 *   configure a too big threshold) or 4 times of slice
+	 *   configure a too big threshold) or 4 times of idletime threshold
 	 * - average think time is more than threshold
 	 * - IO latency is largely below threshold
 	 */
-	unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice);
+	unsigned long time;
 	bool ret;
 
-	time = min_t(unsigned long, MAX_IDLE_TIME, time);
-	ret = (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
-	       tg->avg_idletime > tg->idletime_threshold ||
-	       (tg->latency_target && tg->bio_cnt &&
+	time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
+	ret = tg->latency_target == DFL_LATENCY_TARGET ||
+	      tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
+	      (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
+	      tg->avg_idletime > tg->idletime_threshold ||
+	      (tg->latency_target && tg->bio_cnt &&
 		tg->bad_bio_cnt * 5 < tg->bio_cnt);
 	throtl_log(&tg->service_queue,
 		"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
@@ -2405,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q)
 void blk_throtl_register_queue(struct request_queue *q)
 {
 	struct throtl_data *td;
-	struct cgroup_subsys_state *pos_css;
-	struct blkcg_gq *blkg;
 
 	td = q->td;
 	BUG_ON(!td);
 
-	if (blk_queue_nonrot(q)) {
+	if (blk_queue_nonrot(q))
 		td->throtl_slice = DFL_THROTL_SLICE_SSD;
-		td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD;
-	} else {
+	else
 		td->throtl_slice = DFL_THROTL_SLICE_HD;
-		td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
-	}
 #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
 	/* if no low limit, use previous default */
 	td->throtl_slice = DFL_THROTL_SLICE_HD;
@@ -2426,19 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q)
 	td->track_bio_latency = !q->mq_ops && !q->request_fn;
 	if (!td->track_bio_latency)
 		blk_stat_enable_accounting(q);
-
-	/*
-	 * some tg are created before queue is fully initialized, eg, nonrot
-	 * isn't initialized yet
-	 */
-	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
-		struct throtl_grp *tg = blkg_to_tg(blkg);
-
-		tg->idletime_threshold = td->dft_idletime_threshold;
-		tg->idletime_threshold_conf = td->dft_idletime_threshold;
-	}
-	rcu_read_unlock();
 }
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-- 
cgit v1.2.3-59-g8ed1b


From c849e55178f559c4bbed43efb113cb7602aade89 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 16 May 2017 19:21:08 +0200
Subject: PCI: endpoint: Make PCI_ENDPOINT depend on HAS_DMA

If NO_DMA=y:

    drivers/built-in.o: In function `__pci_epc_create':
    (.text+0xef4e): undefined reference to `bad_dma_ops'
    drivers/built-in.o: In function `pci_epc_add_epf':
    (.text+0xf676): undefined reference to `bad_dma_ops'
    drivers/built-in.o: In function `pci_epf_alloc_space':
    (.text+0xfa32): undefined reference to `bad_dma_ops'
    drivers/built-in.o: In function `pci_epf_free_space':
    (.text+0xfac4): undefined reference to `bad_dma_ops'

Add a dependency on HAS_DMA to fix this.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/endpoint/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/endpoint/Kconfig b/drivers/pci/endpoint/Kconfig
index c23f146fb5a6..c09623ca8c3b 100644
--- a/drivers/pci/endpoint/Kconfig
+++ b/drivers/pci/endpoint/Kconfig
@@ -6,6 +6,7 @@ menu "PCI Endpoint"
 
 config PCI_ENDPOINT
 	bool "PCI Endpoint Support"
+	depends on HAS_DMA
 	help
 	   Enable this configuration option to support configurable PCI
 	   endpoint. This should be enabled if the platform has a PCI
-- 
cgit v1.2.3-59-g8ed1b


From e40cf640b8f632091a30ef0b030c83546f07c902 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 22 May 2017 16:52:24 -0500
Subject: switchtec: Use new cdev_device_add() helper function

Convert from "cdev_add() + device_add()" to cdev_device_add(), and from
"device_del() + cdev_del()" to cdev_device_del().

[bhelgaas: changelog]
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/switch/switchtec.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index cc6e085008fb..abaa227a5f34 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1291,7 +1291,6 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
 	cdev = &stdev->cdev;
 	cdev_init(cdev, &switchtec_fops);
 	cdev->owner = THIS_MODULE;
-	cdev->kobj.parent = &dev->kobj;
 
 	return stdev;
 
@@ -1479,11 +1478,7 @@ static int switchtec_pci_probe(struct pci_dev *pdev,
 		  SWITCHTEC_EVENT_EN_IRQ,
 		  &stdev->mmio_part_cfg->mrpc_comp_hdr);
 
-	rc = cdev_add(&stdev->cdev, stdev->dev.devt, 1);
-	if (rc)
-		goto err_put;
-
-	rc = device_add(&stdev->dev);
+	rc = cdev_device_add(&stdev->cdev, &stdev->dev);
 	if (rc)
 		goto err_devadd;
 
@@ -1492,7 +1487,6 @@ static int switchtec_pci_probe(struct pci_dev *pdev,
 	return 0;
 
 err_devadd:
-	cdev_del(&stdev->cdev);
 	stdev_kill(stdev);
 err_put:
 	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
@@ -1506,8 +1500,7 @@ static void switchtec_pci_remove(struct pci_dev *pdev)
 
 	pci_set_drvdata(pdev, NULL);
 
-	device_del(&stdev->dev);
-	cdev_del(&stdev->cdev);
+	cdev_device_del(&stdev->cdev, &stdev->dev);
 	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
 	dev_info(&stdev->dev, "unregistered.\n");
 
-- 
cgit v1.2.3-59-g8ed1b


From 9871e9bb5cf6ff0b51457ca74c270c5c5230b224 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 22 May 2017 16:52:30 -0500
Subject: switchtec: Fix minor bug with partition ID register

When a switch endpoint is configured without NTB, the mmio_ntb registers
will read all zeros.  However, in corner case configurations where the
partition ID is not zero and NTB is not enabled, the code will have the
wrong partition ID and this causes the driver to use the wrong set of
drivers.  To fix this we simply take the partition ID from the system info
region.

Reported-by: Dingbao Chen <dingbao.chen@microsemi.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/switch/switchtec.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index abaa227a5f34..f6a63406c76e 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1441,12 +1441,15 @@ static int switchtec_init_pci(struct switchtec_dev *stdev,
 	stdev->mmio_sys_info = stdev->mmio + SWITCHTEC_GAS_SYS_INFO_OFFSET;
 	stdev->mmio_flash_info = stdev->mmio + SWITCHTEC_GAS_FLASH_INFO_OFFSET;
 	stdev->mmio_ntb = stdev->mmio + SWITCHTEC_GAS_NTB_OFFSET;
-	stdev->partition = ioread8(&stdev->mmio_ntb->partition_id);
+	stdev->partition = ioread8(&stdev->mmio_sys_info->partition_id);
 	stdev->partition_count = ioread8(&stdev->mmio_ntb->partition_count);
 	stdev->mmio_part_cfg_all = stdev->mmio + SWITCHTEC_GAS_PART_CFG_OFFSET;
 	stdev->mmio_part_cfg = &stdev->mmio_part_cfg_all[stdev->partition];
 	stdev->mmio_pff_csr = stdev->mmio + SWITCHTEC_GAS_PFF_CSR_OFFSET;
 
+	if (stdev->partition_count < 1)
+		stdev->partition_count = 1;
+
 	init_pff(stdev);
 
 	pci_set_drvdata(pdev, stdev);
-- 
cgit v1.2.3-59-g8ed1b


From 415b6185c541dc0a21457ff307cdb61950a6eb9f Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 22 May 2017 17:06:30 -0500
Subject: PCI: imx6: Fix config read timeout handling

Commit cc7b0d495589 ("PCI: designware: Update PCI config space remap
function") made PCI configuration requests non-posted, which means we now
get a synchronous abort when the CFG space read to probe for downstream
devices times out.

Synchronous aborts need to be handled differently from the async aborts we
were getting before, in particular the PC needs to be advanced when
resolving the abort.  This is mostly a copy of what other PCI drivers do on
ARM to handle those aborts.

[bhelgaas: changelog, "Fixes"]
Fixes: cc7b0d495589 ("PCI: designware: Update PCI config space remap function")
Tested-by: Fabio Estevam <fabio.estevam@nxp.com>
Tested-by: Peter Senna Tschudin <peter.senna@collabora.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Richard Zhu <hongxing.zhu@nxp.com>
---
 drivers/pci/dwc/pci-imx6.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c
index a98cba55c7f0..19a289b8cc94 100644
--- a/drivers/pci/dwc/pci-imx6.c
+++ b/drivers/pci/dwc/pci-imx6.c
@@ -252,7 +252,34 @@ static void imx6_pcie_reset_phy(struct imx6_pcie *imx6_pcie)
 static int imx6q_pcie_abort_handler(unsigned long addr,
 		unsigned int fsr, struct pt_regs *regs)
 {
-	return 0;
+	unsigned long pc = instruction_pointer(regs);
+	unsigned long instr = *(unsigned long *)pc;
+	int reg = (instr >> 12) & 15;
+
+	/*
+	 * If the instruction being executed was a read,
+	 * make it look like it read all-ones.
+	 */
+	if ((instr & 0x0c100000) == 0x04100000) {
+		unsigned long val;
+
+		if (instr & 0x00400000)
+			val = 255;
+		else
+			val = -1;
+
+		regs->uregs[reg] = val;
+		regs->ARM_pc += 4;
+		return 0;
+	}
+
+	if ((instr & 0x0e100090) == 0x00100090) {
+		regs->uregs[reg] = -1;
+		regs->ARM_pc += 4;
+		return 0;
+	}
+
+	return 1;
 }
 
 static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie)
@@ -819,8 +846,8 @@ static int __init imx6_pcie_init(void)
 	 * we can install the handler here without risking it
 	 * accessing some uninitialized driver state.
 	 */
-	hook_fault_code(16 + 6, imx6q_pcie_abort_handler, SIGBUS, 0,
-			"imprecise external abort");
+	hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0,
+			"external abort on non-linefetch");
 
 	return platform_driver_register(&imx6_pcie_driver);
 }
-- 
cgit v1.2.3-59-g8ed1b


From c10e8031d5b34cde06b039ca2e8af87a33d5ba11 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 18 May 2017 13:07:49 -0700
Subject: efi-pstore: Fix write/erase id tracking

Prior to the pstore interface refactoring, the "id" generated during
a backend pstore_write() was only retained by the internal pstore
inode tracking list. Additionally the "part" was ignored, so EFI
would encode this in the id. This corrects the misunderstandings
and correctly sets "id" during pstore_write(), and uses "part"
directly during pstore_erase().

Reported-by: Marta Lofstedt <marta.lofstedt@intel.com>
Fixes: 76cc9580e3fb ("pstore: Replace arguments for write() API")
Fixes: a61072aae693 ("pstore: Replace arguments for erase() API")
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Marta Lofstedt <marta.lofstedt@intel.com>
---
 drivers/firmware/efi/efi-pstore.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 44148fd4c9f2..dda2e96328c0 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -53,6 +53,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 	if (sscanf(name, "dump-type%u-%u-%d-%lu-%c",
 		   &record->type, &part, &cnt, &time, &data_type) == 5) {
 		record->id = generic_id(time, part, cnt);
+		record->part = part;
 		record->count = cnt;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -64,6 +65,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 	} else if (sscanf(name, "dump-type%u-%u-%d-%lu",
 		   &record->type, &part, &cnt, &time) == 4) {
 		record->id = generic_id(time, part, cnt);
+		record->part = part;
 		record->count = cnt;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -77,6 +79,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 		 * multiple logs, remains.
 		 */
 		record->id = generic_id(time, part, 0);
+		record->part = part;
 		record->count = 0;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -241,9 +244,15 @@ static int efi_pstore_write(struct pstore_record *record)
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	int i, ret = 0;
 
+	record->time.tv_sec = get_seconds();
+	record->time.tv_nsec = 0;
+
+	record->id = generic_id(record->time.tv_sec, record->part,
+				record->count);
+
 	snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lu-%c",
 		 record->type, record->part, record->count,
-		 get_seconds(), record->compressed ? 'C' : 'D');
+		 record->time.tv_sec, record->compressed ? 'C' : 'D');
 
 	for (i = 0; i < DUMP_NAME_LEN; i++)
 		efi_name[i] = name[i];
@@ -255,7 +264,6 @@ static int efi_pstore_write(struct pstore_record *record)
 	if (record->reason == KMSG_DUMP_OOPS)
 		efivar_run_worker();
 
-	record->id = record->part;
 	return ret;
 };
 
@@ -287,7 +295,7 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
 		 * holding multiple logs, remains.
 		 */
 		snprintf(name_old, sizeof(name_old), "dump-type%u-%u-%lu",
-			ed->record->type, (unsigned int)ed->record->id,
+			ed->record->type, ed->record->part,
 			ed->record->time.tv_sec);
 
 		for (i = 0; i < DUMP_NAME_LEN; i++)
@@ -320,10 +328,7 @@ static int efi_pstore_erase(struct pstore_record *record)
 	char name[DUMP_NAME_LEN];
 	efi_char16_t efi_name[DUMP_NAME_LEN];
 	int found, i;
-	unsigned int part;
 
-	do_div(record->id, 1000);
-	part = do_div(record->id, 100);
 	snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lu",
 		 record->type, record->part, record->count,
 		 record->time.tv_sec);
-- 
cgit v1.2.3-59-g8ed1b


From aa3d4409b664813ceb86a24bd09458cdd29cbb8a Mon Sep 17 00:00:00 2001
From: Martin Kepplinger <martin.kepplinger@ginzinger.com>
Date: Mon, 22 May 2017 17:19:45 -0700
Subject: Input: edt-ft5x06 - increase allowed data range for threshold
 parameter

The datasheet and application note does not mention an allowed range for
the M09_REGISTER_THRESHOLD parameter. One of our customers needs to set
lower values than 20 and they seem to work just fine on EDT EP0xx0M09 with
T5x06 touch.

So, lacking a known lower limit, we increase the range for thresholds,
and set the lower limit to 0. The documentation is updated accordingly.

Signed-off-by: Schoefegger Stefan <stefan.schoefegger@ginzinger.com>
Signed-off-by: Manfred Schlaegl <manfred.schlaegl@ginzinger.com>
Signed-off-by: Martin Kepplinger <martin.kepplinger@ginzinger.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt | 2 +-
 Documentation/input/devices/edt-ft5x06.rst                         | 2 +-
 drivers/input/touchscreen/edt-ft5x06.c                             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
index 6db22103e2dd..025cf8c9324a 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
@@ -36,7 +36,7 @@ Optional properties:
                 control gpios
 
  - threshold:   allows setting the "click"-threshold in the range
-                from 20 to 80.
+                from 0 to 80.
 
  - gain:        allows setting the sensitivity in the range from 0 to
                 31. Note that lower values indicate higher
diff --git a/Documentation/input/devices/edt-ft5x06.rst b/Documentation/input/devices/edt-ft5x06.rst
index 2032f0b7a8fa..1ccc94b192b7 100644
--- a/Documentation/input/devices/edt-ft5x06.rst
+++ b/Documentation/input/devices/edt-ft5x06.rst
@@ -15,7 +15,7 @@ It has been tested with the following devices:
 The driver allows configuration of the touch screen via a set of sysfs files:
 
 /sys/class/input/eventX/device/device/threshold:
-    allows setting the "click"-threshold in the range from 20 to 80.
+    allows setting the "click"-threshold in the range from 0 to 80.
 
 /sys/class/input/eventX/device/device/gain:
     allows setting the sensitivity in the range from 0 to 31. Note that
diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index 8cf8d8d5d4ef..f872817e81e4 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -471,7 +471,7 @@ static EDT_ATTR(gain, S_IWUSR | S_IRUGO, WORK_REGISTER_GAIN,
 static EDT_ATTR(offset, S_IWUSR | S_IRUGO, WORK_REGISTER_OFFSET,
 		M09_REGISTER_OFFSET, 0, 31);
 static EDT_ATTR(threshold, S_IWUSR | S_IRUGO, WORK_REGISTER_THRESHOLD,
-		M09_REGISTER_THRESHOLD, 20, 80);
+		M09_REGISTER_THRESHOLD, 0, 80);
 static EDT_ATTR(report_rate, S_IWUSR | S_IRUGO, WORK_REGISTER_REPORT_RATE,
 		NO_REGISTER, 3, 14);
 
-- 
cgit v1.2.3-59-g8ed1b


From 089b50d95948f691589cca4d81f1f8761747dbaa Mon Sep 17 00:00:00 2001
From: Maxime Roussin-Bélanger <maxime.roussinbelanger@gmail.com>
Date: Fri, 19 May 2017 14:59:03 -0700
Subject: Input: atmel_mxt_ts - add T100 as a readable object
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When using the 'object' sysfs attribute, T100 is not displayed in
the output.

Signed-off-by: Maxime Roussin-Bélanger <maxime.roussinbelanger@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/atmel_mxt_ts.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 2302aef2b2d4..dd042a9b0aaa 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -350,6 +350,7 @@ static bool mxt_object_readable(unsigned int type)
 	case MXT_TOUCH_KEYARRAY_T15:
 	case MXT_TOUCH_PROXIMITY_T23:
 	case MXT_TOUCH_PROXKEY_T52:
+	case MXT_TOUCH_MULTITOUCHSCREEN_T100:
 	case MXT_PROCI_GRIPFACE_T20:
 	case MXT_PROCG_NOISE_T22:
 	case MXT_PROCI_ONETOUCH_T24:
-- 
cgit v1.2.3-59-g8ed1b


From ad258fb918dae8b1ec79a85b4c7f518e4f902869 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 22 May 2017 07:46:55 +0200
Subject: i2c: designware: Fix bogus sda_hold_time due to uninitialized vars

We need to initializes those variables to 0 for platforms that do not
provide ACPI parameters. Otherwise, we set sda_hold_time to random
values, breaking e.g. Galileo and IOT2000 boards.

Reported-and-tested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
Fixes: 9d6408433019 ("i2c: designware: don't infer timings described by ACPI from clock rate")
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/i2c/busses/i2c-designware-platdrv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 6283b99d2b17..d1263b82d646 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -94,9 +94,9 @@ static void dw_i2c_acpi_params(struct platform_device *pdev, char method[],
 static int dw_i2c_acpi_configure(struct platform_device *pdev)
 {
 	struct dw_i2c_dev *dev = platform_get_drvdata(pdev);
+	u32 ss_ht = 0, fp_ht = 0, hs_ht = 0, fs_ht = 0;
 	acpi_handle handle = ACPI_HANDLE(&pdev->dev);
 	const struct acpi_device_id *id;
-	u32 ss_ht, fp_ht, hs_ht, fs_ht;
 	struct acpi_device *adev;
 	const char *uid;
 
-- 
cgit v1.2.3-59-g8ed1b


From 1fc2e41f7af4572b07190f9dec28396b418e9a36 Mon Sep 17 00:00:00 2001
From: Alexander Tsoy <alexander@tsoy.me>
Date: Mon, 22 May 2017 20:58:11 +0300
Subject: ALSA: hda - apply STAC_9200_DELL_M22 quirk for Dell Latitude D430

This model is actually called 92XXM2-8 in Windows driver. But since pin
configs for M22 and M28 are identical, just reuse M22 quirk.

Fixes external microphone (tested) and probably docking station ports
(not tested).

Signed-off-by: Alexander Tsoy <alexander@tsoy.me>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_sigmatel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index faa3d38bac0b..6cefdf6c0b75 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1559,6 +1559,8 @@ static const struct snd_pci_quirk stac9200_fixup_tbl[] = {
 		      "Dell Inspiron 1501", STAC_9200_DELL_M26),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f6,
 		      "unknown Dell", STAC_9200_DELL_M26),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0201,
+		      "Dell Latitude D430", STAC_9200_DELL_M22),
 	/* Panasonic */
 	SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-74", STAC_9200_PANASONIC),
 	/* Gateway machines needs EAPD to be set on resume */
-- 
cgit v1.2.3-59-g8ed1b


From 429030bc944ee9a8bbe5d9bb23dcda0ae2205450 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Date: Fri, 19 May 2017 14:58:19 -0300
Subject: drm: qxl: Delay entering atomic context during cursor update

qxl_release_map will enter an atomic context, but since we still need to
alloc memory for BOs, we better delay that until we have everything we
need, in case we need to sleep inside the allocation.  This avoids the
Sleep in atomic state below, which was reported by Mike.

 [   43.910362] BUG: sleeping function called from invalid context at mm/slab.h:432
 [   43.910955] in_atomic(): 1, irqs_disabled(): 0, pid: 2077, name: Xorg
 [   43.911472] Preemption disabled at:
 [   43.911478] [<ffffffffa02b1c45>] qxl_bo_kmap_atomic_page+0xa5/0x100 [qxl]
 [   43.912103] CPU: 0 PID: 2077 Comm: Xorg Tainted: G            E   4.12.0-master #38
 [ 43.912550] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
 rel-1.8.1-0-g4adadbd-20161202_174313-build11a 04/01/2014
 [   43.913202] Call Trace:
 [   43.913371]  dump_stack+0x65/0x89
 [   43.913581]  ? qxl_bo_kmap_atomic_page+0xa5/0x100 [qxl]
 [   43.913876]  ___might_sleep+0x11a/0x190
 [   43.914095]  __might_sleep+0x4a/0x80
 [   43.914319]  ? qxl_bo_create+0x50/0x190 [qxl]
 [   43.914565]  kmem_cache_alloc_trace+0x46/0x180
 [   43.914836]  qxl_bo_create+0x50/0x190 [qxl]
 [   43.915082]  ? refcount_dec_and_test+0x11/0x20
 [   43.915332]  ? ttm_mem_io_reserve+0x41/0xe0 [ttm]
 [   43.915595]  qxl_alloc_bo_reserved+0x37/0xb0 [qxl]
 [   43.915884]  qxl_cursor_atomic_update+0x8f/0x260 [qxl]
 [   43.916172]  ? drm_atomic_helper_update_legacy_modeset_state+0x1d6/0x210 [drm_kms_helper]
 [   43.916623]  drm_atomic_helper_commit_planes+0xec/0x230 [drm_kms_helper]
 [   43.916995]  drm_atomic_helper_commit_tail+0x2b/0x60 [drm_kms_helper]
 [   43.917398]  commit_tail+0x65/0x70 [drm_kms_helper]
 [   43.917693]  drm_atomic_helper_commit+0xa9/0x100 [drm_kms_helper]
 [   43.918039]  drm_atomic_commit+0x4b/0x50 [drm]
 [   43.918334]  drm_atomic_helper_update_plane+0xf1/0x110 [drm_kms_helper]
 [   43.918902]  __setplane_internal+0x19f/0x280 [drm]
 [   43.919240]  drm_mode_cursor_universal+0x101/0x1c0 [drm]
 [   43.919541]  drm_mode_cursor_common+0x15b/0x1d0 [drm]
 [   43.919858]  drm_mode_cursor2_ioctl+0xe/0x10 [drm]
 [   43.920157]  drm_ioctl+0x211/0x460 [drm]
 [   43.920383]  ? drm_mode_cursor_ioctl+0x50/0x50 [drm]
 [   43.920664]  ? handle_mm_fault+0x93/0x160
 [   43.920893]  do_vfs_ioctl+0x96/0x6e0
 [   43.921117]  ? __fget+0x73/0xa0
 [   43.921322]  SyS_ioctl+0x41/0x70
 [   43.921545]  entry_SYSCALL_64_fastpath+0x1a/0xa5
 [   43.922188] RIP: 0033:0x7f1145804bc7
 [   43.922526] RSP: 002b:00007ffcd3e50508 EFLAGS: 00003246 ORIG_RAX: 0000000000000010
 [   43.923367] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007f1145804bc7
 [   43.923852] RDX: 00007ffcd3e50540 RSI: 00000000c02464bb RDI: 000000000000000b
 [   43.924299] RBP: 0000000000000040 R08: 0000000000000040 R09: 000000000000000c
 [   43.924694] R10: 00007ffcd3e50340 R11: 0000000000003246 R12: 0000000000000018
 [   43.925128] R13: 00000000022bc390 R14: 0000000000000040 R15: 00007ffcd3e5062c

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170519175819.15682-1-krisman@collabora.co.uk
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 drivers/gpu/drm/qxl/qxl_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 058340a002c2..4a340efd8ba6 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -575,8 +575,6 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 	if (ret)
 		return;
 
-	cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release);
-
 	if (fb != old_state->fb) {
 		obj = to_qxl_framebuffer(fb)->obj;
 		user_bo = gem_to_qxl_bo(obj);
@@ -614,6 +612,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 		qxl_bo_kunmap(cursor_bo);
 		qxl_bo_kunmap(user_bo);
 
+		cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release);
 		cmd->u.set.visible = 1;
 		cmd->u.set.shape = qxl_bo_physical_address(qdev,
 							   cursor_bo, 0);
@@ -624,6 +623,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 		if (ret)
 			goto out_free_release;
 
+		cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release);
 		cmd->type = QXL_CURSOR_MOVE;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From f928543404bdf6bb4e8d6a6c3ced5edebd0d6f38 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 22 May 2017 15:59:45 +0200
Subject: drm: Fix deadlock retry loop in page_flip_ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I failed to properly onion-wrap the unwind code: We acquire the vblank
reference before we start with the wait-wound locking dance, hence we
must make sure we retry before we drop the reference. Oops.

v2: The vblank_put must be after the frambuffer_put (Michel). I suck at
unwrapping code that doesn't use separate labels for each stage, but
checks each pointer first ... While re-reading everything I also
realized that we must clean up the fb refcounts, and specifically
plane->old_fb before we drop the locks, either in the final unlocking,
or in the w/w retry path. Hence the correct fix is to drop the
vblank_put to the very bottom.

Fixes: 29dc0d1de182 ("drm: Roll out acquire context for the page_flip ioctl")
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Sean Paul <seanpaul@chromium.org>
Cc: David Airlie <airlied@linux.ie>
Cc: dri-devel@lists.freedesktop.org
Reported-by: Tommi Rantala <tt.rantala@gmail.com>
Cc: Tommi Rantala <tt.rantala@gmail.com>
Cc: Michel Dänzer <michel@daenzer.net>
Tested-by: Tommi Rantala <tt.rantala@gmail.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170522135945.28831-1-daniel.vetter@ffwll.ch
---
 drivers/gpu/drm/drm_plane.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index fedd4d60d9cd..5dc8c4350602 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -948,8 +948,6 @@ retry:
 	}
 
 out:
-	if (ret && crtc->funcs->page_flip_target)
-		drm_crtc_vblank_put(crtc);
 	if (fb)
 		drm_framebuffer_put(fb);
 	if (crtc->primary->old_fb)
@@ -964,5 +962,8 @@ out:
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
 
+	if (ret && crtc->funcs->page_flip_target)
+		drm_crtc_vblank_put(crtc);
+
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From c477ebe21fabe0010a2ed324ce3a1762c757d867 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Sat, 6 May 2017 11:41:30 +0200
Subject: mmc: dt: pwrseq-simple: Invent power-off-delay-us

During power off, after the GPIO pin has been asserted, some devices like
the Wifi chip from TI, Wl18xx, needs a delay before the host continues with
clock gating and turning off regulators as to follow a graceful shutdown
sequence.

Therefore invent an optional power-off-delay-us DT binding for
mmc-pwrseq-simple, to allow us to support this constraint.

Cc: devicetree@vger.kernel.org
Cc: Rob Herring <robh+dt@kernel.org>
Cc: linux-mmc@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
index e25436861867..9029b45b8a22 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
@@ -18,6 +18,8 @@ Optional properties:
   "ext_clock" (External clock provided to the card).
 - post-power-on-delay-ms : Delay in ms after powering the card and
 	de-asserting the reset-gpios (if any)
+- power-off-delay-us : Delay in us after asserting the reset-gpios (if any)
+	during power off of the card.
 
 Example:
 
-- 
cgit v1.2.3-59-g8ed1b


From e9256e142f597edf90c68cec22db4c4aebaa27de Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Sat, 6 May 2017 11:43:05 +0200
Subject: mmc: pwrseq_simple: Parse DTS for the power-off-delay-us property

If the optional power-off-delay-us property is found, insert the
corresponding delay after asserting the GPIO during power off. This enables
a graceful shutdown sequence for some devices.

Cc: linux-mmc@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/mmc/core/pwrseq_simple.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index 1304160de168..13ef162cf066 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -27,6 +27,7 @@ struct mmc_pwrseq_simple {
 	struct mmc_pwrseq pwrseq;
 	bool clk_enabled;
 	u32 post_power_on_delay_ms;
+	u32 power_off_delay_us;
 	struct clk *ext_clk;
 	struct gpio_descs *reset_gpios;
 };
@@ -78,6 +79,10 @@ static void mmc_pwrseq_simple_power_off(struct mmc_host *host)
 
 	mmc_pwrseq_simple_set_gpios_value(pwrseq, 1);
 
+	if (pwrseq->power_off_delay_us)
+		usleep_range(pwrseq->power_off_delay_us,
+			2 * pwrseq->power_off_delay_us);
+
 	if (!IS_ERR(pwrseq->ext_clk) && pwrseq->clk_enabled) {
 		clk_disable_unprepare(pwrseq->ext_clk);
 		pwrseq->clk_enabled = false;
@@ -119,6 +124,8 @@ static int mmc_pwrseq_simple_probe(struct platform_device *pdev)
 
 	device_property_read_u32(dev, "post-power-on-delay-ms",
 				 &pwrseq->post_power_on_delay_ms);
+	device_property_read_u32(dev, "power-off-delay-us",
+				 &pwrseq->power_off_delay_us);
 
 	pwrseq->pwrseq.dev = dev;
 	pwrseq->pwrseq.ops = &mmc_pwrseq_simple_ops;
-- 
cgit v1.2.3-59-g8ed1b


From f74ac688c981138c914f9afba50b646146e35585 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Mon, 24 Apr 2017 22:40:22 +0200
Subject: mfd: dts: hi655x: Add clock binding for the pmic

The hi655x PMIC provides the regulators but also a clock. The latter is
missing in the definition, so extend the documentation to include this as
well.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
[Ulf: Split patch and updated changelog]
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
index 05485699d70e..9630ac0e4b56 100644
--- a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
+++ b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
@@ -16,6 +16,11 @@ Required properties:
 - reg:                  Base address of PMIC on Hi6220 SoC.
 - interrupt-controller: Hi655x has internal IRQs (has own IRQ domain).
 - pmic-gpios:           The GPIO used by PMIC IRQ.
+- #clock-cells:		From common clock binding; shall be set to 0
+
+Optional properties:
+- clock-output-names: From common clock binding to override the
+  default output clock name
 
 Example:
 	pmic: pmic@f8000000 {
@@ -24,4 +29,5 @@ Example:
 		interrupt-controller;
 		#interrupt-cells = <2>;
 		pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+		#clock-cells = <0>;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 307ded8968868e55343e063fbe96cff1efd77eb6 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Mon, 24 Apr 2017 22:40:22 +0200
Subject: arm64: dts: hikey: Add clock for the pmic mfd

The hi655x PMIC provides the regulators but also a clock. The latter is
missing so let's add it. This clock is used by WiFi/Bluetooth chip, but
that connection is done in a separate change on top of this one.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
[Ulf: Split patch and updated changelog]
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 75bce2d0b1a8..d22eb3a646c4 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -330,6 +330,7 @@
 	pmic: pmic@f8000000 {
 		compatible = "hisilicon,hi655x-pmic";
 		reg = <0x0 0xf8000000 0x0 0x1000>;
+		#clock-cells = <0>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 		pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
-- 
cgit v1.2.3-59-g8ed1b


From 1b32a5ff98fbb271d2235ddcfe3b58f514f8260a Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 May 2017 12:46:55 +0200
Subject: arm64: dts: hi6220: Move the fixed_5v_hub regulator to the hikey dts

The regulator is a part of the hikey board, therefore let's move it from
the hi6220 SoC dtsi file into the hikey dts file . Let's also rename the
regulator according to the datasheet (5V_HUB) to better reflect the HW.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 10 ++++++++++
 arch/arm64/boot/dts/hisilicon/hi6220.dtsi      | 12 +-----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index d22eb3a646c4..0f6cba77fc76 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -81,6 +81,16 @@
 		};
 	};
 
+	reg_5v_hub: regulator@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "5V_HUB";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		gpio = <&gpio0 7 0>;
+		regulator-always-on;
+	};
+
 	soc {
 		spi0: spi@f7106000 {
 			status = "ok";
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
index 1e5129b19280..951152d44c02 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
@@ -725,20 +725,10 @@
 			status = "disabled";
 		};
 
-		fixed_5v_hub: regulator@0 {
-			compatible = "regulator-fixed";
-			regulator-name = "fixed_5v_hub";
-			regulator-min-microvolt = <5000000>;
-			regulator-max-microvolt = <5000000>;
-			regulator-boot-on;
-			gpio = <&gpio0 7 0>;
-			regulator-always-on;
-		};
-
 		usb_phy: usbphy {
 			compatible = "hisilicon,hi6220-usb-phy";
 			#phy-cells = <0>;
-			phy-supply = <&fixed_5v_hub>;
+			phy-supply = <&reg_5v_hub>;
 			hisilicon,peripheral-syscon = <&sys_ctrl>;
 		};
 
-- 
cgit v1.2.3-59-g8ed1b


From 84f7c60b31f10e3a438153bc7408ad536f585641 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 May 2017 13:51:27 +0200
Subject: arm64: dts: hikey: Add the SYS_5V and the VDD_3V3 regulators

Add these regulators to better describe the HW, but also because those is
needed in following changes.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 0f6cba77fc76..802f4a4bed30 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -81,7 +81,26 @@
 		};
 	};
 
-	reg_5v_hub: regulator@0 {
+	reg_sys_5v: regulator@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "SYS_5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	reg_vdd_3v3: regulator@1 {
+		compatible = "regulator-fixed";
+		regulator-name = "VDD_3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+		regulator-always-on;
+		vin-supply = <&reg_sys_5v>;
+	};
+
+	reg_5v_hub: regulator@2 {
 		compatible = "regulator-fixed";
 		regulator-name = "5V_HUB";
 		regulator-min-microvolt = <5000000>;
@@ -89,6 +108,7 @@
 		regulator-boot-on;
 		gpio = <&gpio0 7 0>;
 		regulator-always-on;
+		vin-supply = <&reg_sys_5v>;
 	};
 
 	soc {
-- 
cgit v1.2.3-59-g8ed1b


From 76f1dfb687150e852aa74573962cfc158a9570cc Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 May 2017 14:18:26 +0200
Subject: arm64: dts: hi6220: Move board data from the dwmmc nodes to hikey dts

Move the board specific descriptions for the dwmmc nodes in the hi6220 SoC
dtsi, into the hikey dts as it's there these belongs.

While changing this, let's take the opportunity to drop the use of the
"ti,non-removable" binding for one of the dwmmc device nodes, as it's not a
valid binding and not used. Drop also the unnecessary use of "num-slots =
<0x1>" for all of the dwmmc nodes, as there is no need to set this since
when default number of slots is one.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 23 ++++++++++++++++++++++-
 arch/arm64/boot/dts/hisilicon/hi6220.dtsi      | 19 -------------------
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 802f4a4bed30..5132d8ed4664 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -286,8 +286,29 @@
 
 		/* GPIO blocks 16 thru 19 do not appear to be routed to pins */
 
+		dwmmc_0: dwmmc0@f723d000 {
+			cap-mmc-highspeed;
+			non-removable;
+			bus-width = <0x8>;
+			vmmc-supply = <&ldo19>;
+		};
+
+		dwmmc_1: dwmmc1@f723e000 {
+			card-detect-delay = <200>;
+			cap-sd-highspeed;
+			sd-uhs-sdr12;
+			sd-uhs-sdr25;
+			sd-uhs-sdr50;
+			vqmmc-supply = <&ldo7>;
+			vmmc-supply = <&ldo10>;
+			bus-width = <0x4>;
+			disable-wp;
+			cd-gpios = <&gpio1 0 1>;
+		};
+
 		dwmmc_2: dwmmc2@f723f000 {
-			ti,non-removable;
+			broken-cd;
+			bus-width = <0x4>;
 			non-removable;
 			/* WL_EN */
 			vmmc-supply = <&wlan_en_reg>;
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
index 951152d44c02..5013e4b2ea71 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
@@ -756,17 +756,12 @@
 
 		dwmmc_0: dwmmc0@f723d000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
-			cap-mmc-highspeed;
-			non-removable;
 			reg = <0x0 0xf723d000 0x0 0x1000>;
 			interrupts = <0x0 0x48 0x4>;
 			clocks = <&sys_ctrl 2>, <&sys_ctrl 1>;
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>;
 			reset-names = "reset";
-			bus-width = <0x8>;
-			vmmc-supply = <&ldo19>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&emmc_pmx_func &emmc_clk_cfg_func
 				     &emmc_cfg_func &emmc_rst_cfg_func>;
@@ -774,13 +769,7 @@
 
 		dwmmc_1: dwmmc1@f723e000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
-			card-detect-delay = <200>;
 			hisilicon,peripheral-syscon = <&ao_ctrl>;
-			cap-sd-highspeed;
-			sd-uhs-sdr12;
-			sd-uhs-sdr25;
-			sd-uhs-sdr50;
 			reg = <0x0 0xf723e000 0x0 0x1000>;
 			interrupts = <0x0 0x49 0x4>;
 			#address-cells = <0x1>;
@@ -789,11 +778,6 @@
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>;
 			reset-names = "reset";
-			vqmmc-supply = <&ldo7>;
-			vmmc-supply = <&ldo10>;
-			bus-width = <0x4>;
-			disable-wp;
-			cd-gpios = <&gpio1 0 1>;
 			pinctrl-names = "default", "idle";
 			pinctrl-0 = <&sd_pmx_func &sd_clk_cfg_func &sd_cfg_func>;
 			pinctrl-1 = <&sd_pmx_idle &sd_clk_cfg_idle &sd_cfg_idle>;
@@ -801,15 +785,12 @@
 
 		dwmmc_2: dwmmc2@f723f000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
 			reg = <0x0 0xf723f000 0x0 0x1000>;
 			interrupts = <0x0 0x4a 0x4>;
 			clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>;
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>;
 			reset-names = "reset";
-			bus-width = <0x4>;
-			broken-cd;
 			pinctrl-names = "default", "idle";
 			pinctrl-0 = <&sdio_pmx_func &sdio_clk_cfg_func &sdio_cfg_func>;
 			pinctrl-1 = <&sdio_pmx_idle &sdio_clk_cfg_idle &sdio_cfg_idle>;
-- 
cgit v1.2.3-59-g8ed1b


From ea452678734eb782126f999bf5c4fb3e71d3b196 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 May 2017 16:11:33 +0200
Subject: arm64: dts: hikey: Fix WiFi support

The description of the connection between the dwmmc (SDIO) controller and
the Wifi chip, which is attached to the SDIO bus is wrong. Currently the
SDIO card can't be detected and thus the Wifi doesn't work.

Let's fix this by assigning the correct vmmc supply, which is the always on
regulator VDD_3V3 and remove the WLAN enable regulator altogether. Then to
properly deal with the power on/off sequence, add a mmc-pwrseq node to
describe the resources needed to detect the SDIO card.

Except for the WLAN enable GPIO and its corresponding assert/de-assert
delays, the mmc-pwrseq node also contains a handle to a clock provided by
the hi655x pmic. This clock is also needed to be able to turn on the WiFi
chip.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 5132d8ed4664..49f6a6242cf9 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -111,6 +111,15 @@
 		vin-supply = <&reg_sys_5v>;
 	};
 
+	wl1835_pwrseq: wl1835-pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		/* WLAN_EN GPIO */
+		reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>;
+		clocks = <&pmic>;
+		clock-names = "ext_clock";
+		power-off-delay-us = <10>;
+	};
+
 	soc {
 		spi0: spi@f7106000 {
 			status = "ok";
@@ -307,11 +316,10 @@
 		};
 
 		dwmmc_2: dwmmc2@f723f000 {
-			broken-cd;
 			bus-width = <0x4>;
 			non-removable;
-			/* WL_EN */
-			vmmc-supply = <&wlan_en_reg>;
+			vmmc-supply = <&reg_vdd_3v3>;
+			mmc-pwrseq = <&wl1835_pwrseq>;
 
 			#address-cells = <0x1>;
 			#size-cells = <0x0>;
@@ -323,18 +331,6 @@
 				interrupts = <3 IRQ_TYPE_EDGE_RISING>;
 			};
 		};
-
-		wlan_en_reg: regulator@1 {
-			compatible = "regulator-fixed";
-			regulator-name = "wlan-en-regulator";
-			regulator-min-microvolt = <1800000>;
-			regulator-max-microvolt = <1800000>;
-			/* WLAN_EN GPIO */
-			gpio = <&gpio0 5 0>;
-			/* WLAN card specific delay */
-			startup-delay-us = <70000>;
-			enable-active-high;
-		};
 	};
 
 	leds {
-- 
cgit v1.2.3-59-g8ed1b


From 1b57b6210f4e52904393be97c62122aae69bc8aa Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 23 May 2017 09:58:07 +0100
Subject: cfg80211: make cfg80211_sched_scan_results() work from atomic context

Drivers should be able to call cfg80211_sched_scan_results() from atomic
context. However, with the introduction of multiple scheduled scan feature
this requirement was not taken into account resulting in regression shown
below.

[  119.021594] BUG: scheduling while atomic: irq/47-iwlwifi/517/0x00000200
[  119.021604] Modules linked in: [...]
[  119.021759] CPU: 1 PID: 517 Comm: irq/47-iwlwifi Not tainted 4.12.0-rc2-t440s-20170522+ #1
[  119.021763] Hardware name: LENOVO 20AQS03H00/20AQS03H00, BIOS GJET91WW (2.41 ) 09/21/2016
[  119.021766] Call Trace:
[  119.021778]  ? dump_stack+0x5c/0x84
[  119.021784]  ? __schedule_bug+0x4c/0x70
[  119.021792]  ? __schedule+0x496/0x5c0
[  119.021798]  ? schedule+0x2d/0x80
[  119.021804]  ? schedule_preempt_disabled+0x5/0x10
[  119.021810]  ? __mutex_lock.isra.0+0x18e/0x4c0
[  119.021817]  ? __wake_up+0x2f/0x50
[  119.021833]  ? cfg80211_sched_scan_results+0x19/0x60 [cfg80211]
[  119.021844]  ? cfg80211_sched_scan_results+0x19/0x60 [cfg80211]
[  119.021859]  ? iwl_mvm_rx_lmac_scan_iter_complete_notif+0x17/0x30 [iwlmvm]
[  119.021869]  ? iwl_pcie_rx_handle+0x2a9/0x7e0 [iwlwifi]
[  119.021878]  ? iwl_pcie_irq_handler+0x17c/0x730 [iwlwifi]
[  119.021884]  ? irq_forced_thread_fn+0x60/0x60
[  119.021887]  ? irq_thread_fn+0x16/0x40
[  119.021892]  ? irq_thread+0x109/0x180
[  119.021896]  ? wake_threads_waitq+0x30/0x30
[  119.021901]  ? kthread+0xf2/0x130
[  119.021905]  ? irq_thread_dtor+0x90/0x90
[  119.021910]  ? kthread_create_on_node+0x40/0x40
[  119.021915]  ? ret_from_fork+0x26/0x40

Fixes: b34939b98369 ("cfg80211: add request id to cfg80211_sched_scan_*() api")
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 14d5f0c8c45f..9f0901f3e42b 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -322,9 +322,9 @@ cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid)
 {
 	struct cfg80211_sched_scan_request *pos;
 
-	ASSERT_RTNL();
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
 
-	list_for_each_entry(pos, &rdev->sched_scan_req_list, list) {
+	list_for_each_entry_rcu(pos, &rdev->sched_scan_req_list, list) {
 		if (pos->reqid == reqid)
 			return pos;
 	}
@@ -398,13 +398,13 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid)
 	trace_cfg80211_sched_scan_results(wiphy, reqid);
 	/* ignore if we're not scanning */
 
-	rtnl_lock();
+	rcu_read_lock();
 	request = cfg80211_find_sched_scan_req(rdev, reqid);
 	if (request) {
 		request->report_results = true;
 		queue_work(cfg80211_wq, &rdev->sched_scan_res_wk);
 	}
-	rtnl_unlock();
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(cfg80211_sched_scan_results);
 
-- 
cgit v1.2.3-59-g8ed1b


From c70d9d809fdeecedb96972457ee45c49a232d97f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 22 May 2017 15:40:12 -0500
Subject: ptrace: Properly initialize ptracer_cred on fork

When I introduced ptracer_cred I failed to consider the weirdness of
fork where the task_struct copies the old value by default.  This
winds up leaving ptracer_cred set even when a process forks and
the child process does not wind up being ptraced.

Because ptracer_cred is not set on non-ptraced processes whose
parents were ptraced this has broken the ability of the enlightenment
window manager to start setuid children.

Fix this by properly initializing ptracer_cred in ptrace_init_task

This must be done with a little bit of care to preserve the current value
of ptracer_cred when ptrace carries through fork.  Re-reading the
ptracer_cred from the ptracing process at this point is inconsistent
with how PT_PTRACE_CAP has been maintained all of these years.

Tested-by: Takashi Iwai <tiwai@suse.de>
Fixes: 64b875f7ac8a ("ptrace: Capture the ptracer's creds not PT_PTRACE_CAP")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/ptrace.h |  7 +++++--
 kernel/ptrace.c        | 20 +++++++++++++-------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 422bc2e4cb6a..ef3eb8bbfee4 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -54,7 +54,8 @@ extern int ptrace_request(struct task_struct *child, long request,
 			  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
-			  struct task_struct *new_parent);
+			  struct task_struct *new_parent,
+			  const struct cred *ptracer_cred);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_READ	0x01
@@ -206,7 +207,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 
 	if (unlikely(ptrace) && current->ptrace) {
 		child->ptrace = current->ptrace;
-		__ptrace_link(child, current->parent);
+		__ptrace_link(child, current->parent, current->ptracer_cred);
 
 		if (child->ptrace & PT_SEIZED)
 			task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
@@ -215,6 +216,8 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 
 		set_tsk_thread_flag(child, TIF_SIGPENDING);
 	}
+	else
+		child->ptracer_cred = NULL;
 }
 
 /**
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 266ddcc1d8bb..60f356d91060 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
 }
 
 
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
+		   const struct cred *ptracer_cred)
+{
+	BUG_ON(!list_empty(&child->ptrace_entry));
+	list_add(&child->ptrace_entry, &new_parent->ptraced);
+	child->parent = new_parent;
+	child->ptracer_cred = get_cred(ptracer_cred);
+}
+
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
+static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 {
-	BUG_ON(!list_empty(&child->ptrace_entry));
-	list_add(&child->ptrace_entry, &new_parent->ptraced);
-	child->parent = new_parent;
 	rcu_read_lock();
-	child->ptracer_cred = get_cred(__task_cred(new_parent));
+	__ptrace_link(child, new_parent, __task_cred(new_parent));
 	rcu_read_unlock();
 }
 
@@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 		flags |= PT_SEIZED;
 	task->ptrace = flags;
 
-	__ptrace_link(task, current);
+	ptrace_link(task, current);
 
 	/* SEIZE doesn't trap tracee on attach */
 	if (!seize)
@@ -459,7 +465,7 @@ static int ptrace_traceme(void)
 		 */
 		if (!ret && !(current->real_parent->flags & PF_EXITING)) {
 			current->ptrace = PT_PTRACED;
-			__ptrace_link(current, current->real_parent);
+			ptrace_link(current, current->real_parent);
 		}
 	}
 	write_unlock_irq(&tasklist_lock);
-- 
cgit v1.2.3-59-g8ed1b


From cdc5a7f363be34287ac6c2345e5d1d3b37cf4a23 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 9 May 2017 20:45:06 +0300
Subject: net/mlx5e: Use the correct delete call on offloaded TC encap entry
 detach

We wrongly direcly invoke hlist_del_rcu() and not hash_del_rcu() which
does a slightly different call now and may change later, fix that.

Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 11c27e4fadf6..a90dd26ea51c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -384,7 +384,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 			mlx5_encap_dealloc(priv->mdev, e->encap_id);
 
-		hlist_del_rcu(&e->encap_hlist);
+		hash_del_rcu(&e->encap_hlist);
 		kfree(e->encap_header);
 		kfree(e);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 3aa4266405a6c2e03eb0ff12d7c573d3d903da4c Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 10 May 2017 13:48:41 +0300
Subject: net/sched: act_csum: Add accessors for offloading drivers

Add the accessors for realizing if this is a csum action,
and for which fields checksum is needed.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/net/tc_act/tc_csum.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index f31fb6331a53..3248beaf16b0 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <net/act_api.h>
+#include <linux/tc_act/tc_csum.h>
 
 struct tcf_csum {
 	struct tc_action common;
@@ -11,4 +12,18 @@ struct tcf_csum {
 };
 #define to_tcf_csum(a) ((struct tcf_csum *)a)
 
+static inline bool is_tcf_csum(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->type == TCA_ACT_CSUM)
+		return true;
+#endif
+	return false;
+}
+
+static inline u32 tcf_csum_update_flags(const struct tc_action *a)
+{
+	return to_tcf_csum(a)->update_flags;
+}
+
 #endif /* __NET_TC_CSUM_H */
-- 
cgit v1.2.3-59-g8ed1b


From 26c02749936f064abf771a0f5f49b280fcfd8b66 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 10 May 2017 13:59:54 +0300
Subject: net/mlx5e: Allow TC csum offload if applied together with pedit
 action

When offloading header re-writes, the HW re-calculates the relevant L3/L4
checksums. Hence, when upper layers (as done by OVS) ask for TC checksum action
offload together with pedit offload, don't err. This command now works:

tc filter add dev ens1f0 protocol ip parent ffff: prio 20 flower skip_sw
	ip_proto tcp dst_port 9001
	action pedit ex munge tcp dport set 0x1234 pipe
        action csum tcp

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 39 +++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index a90dd26ea51c..9dd83c7e4c51 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -43,6 +43,7 @@
 #include <net/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_csum.h>
 #include <net/vxlan.h>
 #include <net/arp.h>
 #include "en.h"
@@ -1109,6 +1110,28 @@ out_err:
 	return err;
 }
 
+static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
+{
+	u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+			 TCA_CSUM_UPDATE_FLAG_UDP;
+
+	/*  The HW recalcs checksums only if re-writing headers */
+	if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+		netdev_warn(priv->netdev,
+			    "TC csum action is only offloaded with pedit\n");
+		return false;
+	}
+
+	if (update_flags & ~prot_flags) {
+		netdev_warn(priv->netdev,
+			    "can't offload TC csum action for some header/s - flags %#x\n",
+			    update_flags);
+		return false;
+	}
+
+	return true;
+}
+
 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				struct mlx5e_tc_flow_parse_attr *parse_attr,
 				struct mlx5e_tc_flow *flow)
@@ -1149,6 +1172,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
+		if (is_tcf_csum(a)) {
+			if (csum_offload_supported(priv, attr->action,
+						   tcf_csum_update_flags(a)))
+				continue;
+
+			return -EOPNOTSUPP;
+		}
+
 		if (is_tcf_skbedit_mark(a)) {
 			u32 mark = tcf_skbedit_mark(a);
 
@@ -1651,6 +1682,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
+		if (is_tcf_csum(a)) {
+			if (csum_offload_supported(priv, attr->action,
+						   tcf_csum_update_flags(a)))
+				continue;
+
+			return -EOPNOTSUPP;
+		}
+
 		if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
 			struct net_device *out_dev, *encap_dev = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From d824bf3fe2d352fc2c52b7ede05b1a0e95d946be Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 9 May 2017 19:02:42 +0300
Subject: net/mlx5e: Properly enforce disallowing of partial field re-write
 offload

Currently we don't support partial header re-writes through TC pedit
action offloading. However, the code that enforces that wasn't err-ing
on cases where the first and last bits of the mask are set but there is
some zero bit between them, such as in the below example, fix that!

tc filter add dev enp1s0 protocol ip parent ffff: prio 10 flower
	ip_proto udp dst_port 2001 skip_sw
	action pedit munge ip src set 1.0.0.1 retain 0xff0000ff

Fixes: d79b6df6b10a ('net/mlx5e: Add parsing of TC pedit actions to HW format')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9dd83c7e4c51..0387c321f0a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -926,7 +926,7 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 				struct mlx5e_tc_flow_parse_attr *parse_attr)
 {
 	struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
-	int i, action_size, nactions, max_actions, first, last;
+	int i, action_size, nactions, max_actions, first, last, first_z;
 	void *s_masks_p, *a_masks_p, *vals_p;
 	u32 s_mask, a_mask, val;
 	struct mlx5_fields *f;
@@ -985,9 +985,10 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 		memcpy(&val, vals_p, f->size);
 
 		field_bsize = f->size * BITS_PER_BYTE;
+		first_z = find_first_zero_bit(&mask, field_bsize);
 		first = find_first_bit(&mask, field_bsize);
 		last  = find_last_bit(&mask, field_bsize);
-		if (first > 0 || last != (field_bsize - 1)) {
+		if (first > 0 || last != (field_bsize - 1) || first_z < last) {
 			printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n",
 			       mask);
 			return -EOPNOTSUPP;
-- 
cgit v1.2.3-59-g8ed1b


From e3ca4e0583a02a04503d9c827fb5c5d50abc4ff5 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 9 May 2017 13:37:26 +0300
Subject: net/mlx5e: Fix warnings around parsing of TC pedit actions

The sparse tool emits these correct complaints:

drivers/net/ethernet/mellanox/mlx5/core//en_tc.c:1005:25: warning: cast to restricted __be32
drivers/net/ethernet/mellanox/mlx5/core//en_tc.c:1007:25: warning: cast to restricted __be16

The value is provided from user-space in network order, but there's
no way for them to realize that, avoid the warnings by casting to the
appropriate type.

Fixes: d79b6df6b10a ('net/mlx5e: Add parsing of TC pedit actions to HW format')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0387c321f0a2..ec63158ab643 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -928,9 +928,9 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 	struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
 	int i, action_size, nactions, max_actions, first, last, first_z;
 	void *s_masks_p, *a_masks_p, *vals_p;
-	u32 s_mask, a_mask, val;
 	struct mlx5_fields *f;
 	u8 cmd, field_bsize;
+	u32 s_mask, a_mask;
 	unsigned long mask;
 	void *action;
 
@@ -947,7 +947,8 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 	for (i = 0; i < ARRAY_SIZE(fields); i++) {
 		f = &fields[i];
 		/* avoid seeing bits set from previous iterations */
-		s_mask = a_mask = mask = val = 0;
+		s_mask = 0;
+		a_mask = 0;
 
 		s_masks_p = (void *)set_masks + f->offset;
 		a_masks_p = (void *)add_masks + f->offset;
@@ -982,9 +983,8 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 			memset(a_masks_p, 0, f->size);
 		}
 
-		memcpy(&val, vals_p, f->size);
-
 		field_bsize = f->size * BITS_PER_BYTE;
+
 		first_z = find_first_zero_bit(&mask, field_bsize);
 		first = find_first_bit(&mask, field_bsize);
 		last  = find_last_bit(&mask, field_bsize);
@@ -1004,11 +1004,11 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 		}
 
 		if (field_bsize == 32)
-			MLX5_SET(set_action_in, action, data, ntohl(val));
+			MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p));
 		else if (field_bsize == 16)
-			MLX5_SET(set_action_in, action, data, ntohs(val));
+			MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p));
 		else if (field_bsize == 8)
-			MLX5_SET(set_action_in, action, data, val);
+			MLX5_SET(set_action_in, action, data, *(u8 *)vals_p);
 
 		action += action_size;
 		nactions++;
-- 
cgit v1.2.3-59-g8ed1b


From b57fe691961cc8f00541f9a435c70df45d41e514 Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Thu, 27 Apr 2017 17:59:00 +0300
Subject: net/mlx5e: IPoIB, handle RX packet correctly

IPoIB packet contains the pseudo header area, we need to pull it prior
to reset_mac_header in order to let the GRO work well.

In more details:
GRO checks the mac address of the new coming packet, it does that by
comparing the hard_header_len size of the current packet to the previous
one in that session, the comparison is over hard_header_len size.
Now, the driver prepares that area in the skb by allocating area from the
reserved part and resetting the correct mac header to it.

Fixes: 9d6bd752c63c ("net/mlx5e: IPoIB, RX handler")
Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 7b1566f0ae58..66b5fec15313 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1041,6 +1041,8 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
 #define MLX5_IB_GRH_BYTES       40
 #define MLX5_IPOIB_ENCAP_LEN    4
 #define MLX5_GID_SIZE           16
+#define MLX5_IPOIB_PSEUDO_LEN   20
+#define MLX5_IPOIB_HARD_LEN     (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN)
 
 static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 					 struct mlx5_cqe64 *cqe,
@@ -1048,6 +1050,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 					 struct sk_buff *skb)
 {
 	struct net_device *netdev = rq->netdev;
+	char *pseudo_header;
 	u8 *dgid;
 	u8 g;
 
@@ -1076,8 +1079,11 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 	if (likely(netdev->features & NETIF_F_RXHASH))
 		mlx5e_skb_set_hash(cqe, skb);
 
+	/* 20 bytes of ipoib header and 4 for encap existing */
+	pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN);
+	memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN);
 	skb_reset_mac_header(skb);
-	skb_pull(skb, MLX5_IPOIB_ENCAP_LEN);
+	skb_pull(skb, MLX5_IPOIB_HARD_LEN);
 
 	skb->dev = netdev;
 
-- 
cgit v1.2.3-59-g8ed1b


From 73dd3a4839c1d27c36d4dcc92e1ff44225ecbeb7 Mon Sep 17 00:00:00 2001
From: Mohamad Haj Yahia <mohamad@mellanox.com>
Date: Thu, 23 Feb 2017 11:19:36 +0200
Subject: net/mlx5: Avoid using pending command interface slots

Currently when firmware command gets stuck or it takes long time to
complete, the driver command will get timeout and the command slot is
freed and can be used for new commands, and if the firmware receive new
command on the old busy slot its behavior is unexpected and this could
be harmful.
To fix this when the driver command gets timeout we return failure,
but we don't free the command slot and we wait for the firmware to
explicitly respond to that command.
Once all the entries are busy we will stop processing new firmware
commands.

Fixes: 9cba4ebcf374 ('net/mlx5: Fix potential deadlock in command mode change')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c    | 41 +++++++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c     |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/health.c |  2 +-
 include/linux/mlx5/driver.h                      |  7 +++-
 4 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 5bdaf3d545b2..10d282841f5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -774,7 +774,7 @@ static void cb_timeout_handler(struct work_struct *work)
 	mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
 		       mlx5_command_str(msg_to_opcode(ent->in)),
 		       msg_to_opcode(ent->in));
-	mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+	mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 }
 
 static void cmd_work_handler(struct work_struct *work)
@@ -804,6 +804,7 @@ static void cmd_work_handler(struct work_struct *work)
 	}
 
 	cmd->ent_arr[ent->idx] = ent;
+	set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
 	lay = get_inst(cmd, ent->idx);
 	ent->lay = lay;
 	memset(lay, 0, sizeof(*lay));
@@ -825,6 +826,20 @@ static void cmd_work_handler(struct work_struct *work)
 	if (ent->callback)
 		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
 
+	/* Skip sending command to fw if internal error */
+	if (pci_channel_offline(dev->pdev) ||
+	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		u8 status = 0;
+		u32 drv_synd;
+
+		ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
+		MLX5_SET(mbox_out, ent->out, status, status);
+		MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
+
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+		return;
+	}
+
 	/* ring doorbell after the descriptor is valid */
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
@@ -835,7 +850,7 @@ static void cmd_work_handler(struct work_struct *work)
 		poll_timeout(ent);
 		/* make sure we read the descriptor after ownership is SW */
 		rmb();
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
 	}
 }
 
@@ -879,7 +894,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 		wait_for_completion(&ent->done);
 	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
 		ent->ret = -ETIMEDOUT;
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 	}
 
 	err = ent->ret;
@@ -1375,7 +1390,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
 	}
 }
 
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
@@ -1395,6 +1410,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 			struct semaphore *sem;
 
 			ent = cmd->ent_arr[i];
+
+			/* if we already completed the command, ignore it */
+			if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
+						&ent->state)) {
+				/* only real completion can free the cmd slot */
+				if (!forced) {
+					mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
+						      ent->idx);
+					free_ent(cmd, ent->idx);
+				}
+				continue;
+			}
+
 			if (ent->callback)
 				cancel_delayed_work(&ent->cb_timeout_work);
 			if (ent->page_queue)
@@ -1417,7 +1445,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 				mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
 					      ent->ret, deliv_status_to_str(ent->status), ent->status);
 			}
-			free_ent(cmd, ent->idx);
+
+			/* only real completion will free the entry slot */
+			if (!forced)
+				free_ent(cmd, ent->idx);
 
 			if (ent->callback) {
 				ds = ent->ts2 - ent->ts1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ea5d8d37a75c..33eae5ad2fb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			break;
 
 		case MLX5_EVENT_TYPE_CMD:
-			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
+			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
 			break;
 
 		case MLX5_EVENT_TYPE_PORT_CHANGE:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d0515391d33b..44f59b1d6f0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev)
 	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
 
 	mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
-	mlx5_cmd_comp_handler(dev, vector);
+	mlx5_cmd_comp_handler(dev, vector, true);
 	return;
 
 no_trig:
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bcdf739ee41a..93273d9ea4d1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -787,7 +787,12 @@ enum {
 
 typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
 
+enum {
+	MLX5_CMD_ENT_STATE_PENDING_COMP,
+};
+
 struct mlx5_cmd_work_ent {
+	unsigned long		state;
 	struct mlx5_cmd_msg    *in;
 	struct mlx5_cmd_msg    *out;
 	void		       *uout;
@@ -976,7 +981,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
 void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,
-- 
cgit v1.2.3-59-g8ed1b


From b665d98edc9ab295169be2fc5bb4e89a46de0a1a Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 18 May 2017 13:34:43 +0300
Subject: net/mlx5: Tolerate irq_set_affinity_hint() failures

Add tolerance to failures of irq_set_affinity_hint().
Its role is to give hints that optimizes performance,
and should not block the driver load.

In non-SMP systems, functionality is not available as
there is a single core, and all these calls definitely
fail.  Hence, do not call the function and avoid the
warning prints.

Fixes: db058a186f98 ("net/mlx5_core: Set irq affinity hints")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0c123d571b4c..fe5546bb4153 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -612,7 +612,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 	struct mlx5_priv *priv  = &mdev->priv;
 	struct msix_entry *msix = priv->msix_arr;
 	int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
-	int err;
 
 	if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
 		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
@@ -622,18 +621,12 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
 			priv->irq_info[i].mask);
 
-	err = irq_set_affinity_hint(irq, priv->irq_info[i].mask);
-	if (err) {
-		mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x",
-			       irq);
-		goto err_clear_mask;
-	}
+#ifdef CONFIG_SMP
+	if (irq_set_affinity_hint(irq, priv->irq_info[i].mask))
+		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+#endif
 
 	return 0;
-
-err_clear_mask:
-	free_cpumask_var(priv->irq_info[i].mask);
-	return err;
 }
 
 static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
-- 
cgit v1.2.3-59-g8ed1b


From 7bd897cfce1eb373892d35d7f73201b0f9b221c4 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 23 May 2017 17:28:36 +0300
Subject: block: fix an error code in add_partition()

We don't set an error code on this path.  It means that we return NULL
instead of an error pointer and the caller does a NULL dereference.

Fixes: 6d1d8050b4bc ("block, partition: add partition_meta_info to hd_struct")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/partition-generic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/partition-generic.c b/block/partition-generic.c
index ff07b9143ca4..c5ec8246e25e 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -320,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
 	if (info) {
 		struct partition_meta_info *pinfo = alloc_part_info(disk);
-		if (!pinfo)
+		if (!pinfo) {
+			err = -ENOMEM;
 			goto out_free_stats;
+		}
 		memcpy(pinfo, info, sizeof(*info));
 		p->info = pinfo;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 7f65b1f5adc5f8496ca8bec4947de66fefe36220 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 22 May 2017 14:50:30 +0200
Subject: cdc-ether: divorce initialisation with a filter reset and a generic
 method

Some devices need their multicast filter reset but others are crashed by that.
So the methods need to be separated.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Reported-by: "Ridgway, Keith" <kridgway@harris.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c | 31 ++++++++++++++++++++++++-------
 include/linux/usb/usbnet.h  |  1 +
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index f3ae88fdf332..8ab281b478f2 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -310,6 +310,26 @@ skip:
 		return -ENODEV;
 	}
 
+	return 0;
+
+bad_desc:
+	dev_info(&dev->udev->dev, "bad CDC descriptors\n");
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind);
+
+
+/* like usbnet_generic_cdc_bind() but handles filter initialization
+ * correctly
+ */
+int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	int rv;
+
+	rv = usbnet_generic_cdc_bind(dev, intf);
+	if (rv < 0)
+		goto bail_out;
+
 	/* Some devices don't initialise properly. In particular
 	 * the packet filter is not reset. There are devices that
 	 * don't do reset all the way. So the packet filter should
@@ -317,13 +337,10 @@ skip:
 	 */
 	usbnet_cdc_update_filter(dev);
 
-	return 0;
-
-bad_desc:
-	dev_info(&dev->udev->dev, "bad CDC descriptors\n");
-	return -ENODEV;
+bail_out:
+	return rv;
 }
-EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind);
+EXPORT_SYMBOL_GPL(usbnet_ether_cdc_bind);
 
 void usbnet_cdc_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
@@ -417,7 +434,7 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 	BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data)
 			< sizeof(struct cdc_state)));
 
-	status = usbnet_generic_cdc_bind(dev, intf);
+	status = usbnet_ether_cdc_bind(dev, intf);
 	if (status < 0)
 		return status;
 
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 7dffa5624ea6..97116379db5f 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -206,6 +206,7 @@ struct cdc_state {
 };
 
 extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *);
+extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf);
 extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *);
 extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *);
 extern void usbnet_cdc_status(struct usbnet *, struct urb *);
-- 
cgit v1.2.3-59-g8ed1b


From 12e8b570e732eaa5eae3a2895ba3fbcf91bde2b4 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Mon, 22 May 2017 20:13:07 +0200
Subject: mlx5: fix bug reading rss_hash_type from CQE

Masks for extracting part of the Completion Queue Entry (CQE)
field rss_hash_type was swapped, namely CQE_RSS_HTYPE_IP and
CQE_RSS_HTYPE_L4.

The bug resulted in setting skb->l4_hash, even-though the
rss_hash_type indicated that hash was NOT computed over the
L4 (UDP or TCP) part of the packet.

Added comments from the datasheet, to make it more clear what
these masks are selecting.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dd9a263ed368..a940ec6a046c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -787,8 +787,14 @@ enum {
 };
 
 enum {
-	CQE_RSS_HTYPE_IP	= 0x3 << 6,
-	CQE_RSS_HTYPE_L4	= 0x3 << 2,
+	CQE_RSS_HTYPE_IP	= 0x3 << 2,
+	/* cqe->rss_hash_type[3:2] - IP destination selected for hash
+	 * (00 = none,  01 = IPv4, 10 = IPv6, 11 = Reserved)
+	 */
+	CQE_RSS_HTYPE_L4	= 0x3 << 6,
+	/* cqe->rss_hash_type[7:6] - L4 destination selected for hash
+	 * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI
+	 */
 };
 
 enum {
-- 
cgit v1.2.3-59-g8ed1b


From 223220356d5ebc05ead9a8d697abb0c0a906fc81 Mon Sep 17 00:00:00 2001
From: Richard <richard@aaazen.com>
Date: Sun, 21 May 2017 12:27:00 -0700
Subject: partitions/msdos: FreeBSD UFS2 file systems are not recognized

The code in block/partitions/msdos.c recognizes FreeBSD, OpenBSD
and NetBSD partitions and does a reasonable job picking out OpenBSD
and NetBSD UFS subpartitions.

But for FreeBSD the subpartitions are always "bad".

    Kernel: <bsd:bad subpartition - ignored

Though all 3 of these BSD systems use UFS as a file system, only
FreeBSD uses relative start addresses in the subpartition
declarations.

The following patch fixes this for FreeBSD partitions and leaves
the code for OpenBSD and NetBSD intact:

Signed-off-by: Richard Narron <comet.berkeley@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/partitions/msdos.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 93e7c1b32edd..5610cd537da7 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -300,6 +300,8 @@ static void parse_bsd(struct parsed_partitions *state,
 			continue;
 		bsd_start = le32_to_cpu(p->p_offset);
 		bsd_size = le32_to_cpu(p->p_size);
+		if (memcmp(flavour, "bsd\0", 4) == 0)
+			bsd_start += offset;
 		if (offset == bsd_start && size == bsd_size)
 			/* full parent partition, we have it already */
 			continue;
-- 
cgit v1.2.3-59-g8ed1b


From 6f4dbd149d2a151b89d1a5bbf7530ee5546c7908 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 19 May 2017 11:33:16 +0200
Subject: libceph: use kbasename() and kill ceph_file_part()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/ceph_debug.h |  6 +++---
 net/ceph/ceph_common.c          | 13 -------------
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
index aa2e19182d99..51c5bd64bd00 100644
--- a/include/linux/ceph/ceph_debug.h
+++ b/include/linux/ceph/ceph_debug.h
@@ -3,6 +3,8 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/string.h>
+
 #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
 
 /*
@@ -12,12 +14,10 @@
  */
 
 # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
-extern const char *ceph_file_part(const char *s, int len);
 #  define dout(fmt, ...)						\
 	pr_debug("%.*s %12.12s:%-4d : " fmt,				\
 		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
-		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\
-		 __LINE__, ##__VA_ARGS__)
+		 kbasename(__FILE__), __LINE__, ##__VA_ARGS__)
 # else
 /* faux printk call just to see any compiler warnings. */
 #  define dout(fmt, ...)	do {				\
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4fd02831beed..47e94b560ba0 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -56,19 +56,6 @@ static const struct kernel_param_ops param_ops_supported_features = {
 module_param_cb(supported_features, &param_ops_supported_features, NULL,
 		S_IRUGO);
 
-/*
- * find filename portion of a path (/foo/bar/baz -> baz)
- */
-const char *ceph_file_part(const char *s, int len)
-{
-	const char *e = s + len;
-
-	while (e != s && *(e-1) != '/')
-		e--;
-	return e;
-}
-EXPORT_SYMBOL(ceph_file_part);
-
 const char *ceph_msg_type_name(int type)
 {
 	switch (type) {
-- 
cgit v1.2.3-59-g8ed1b


From 1759f7b0e3fab1d1882d7c680af9d12c5c111b0e Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 19 May 2017 11:38:17 +0200
Subject: libceph: make ceph_msg_data_advance() return void

Both callers ignore the returned bool.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 net/ceph/messenger.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5766a6c896c4..d7ab481b2508 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1174,8 +1174,8 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
  * Returns true if the result moves the cursor on to the next piece
  * of the data item.
  */
-static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
-				size_t bytes)
+static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
+				  size_t bytes)
 {
 	bool new_piece;
 
@@ -1207,8 +1207,6 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
 		new_piece = true;
 	}
 	cursor->need_crc = new_piece;
-
-	return new_piece;
 }
 
 static size_t sizeof_footer(struct ceph_connection *con)
@@ -1577,7 +1575,6 @@ static int write_partial_message_data(struct ceph_connection *con)
 		size_t page_offset;
 		size_t length;
 		bool last_piece;
-		bool need_crc;
 		int ret;
 
 		page = ceph_msg_data_next(cursor, &page_offset, &length,
@@ -1592,7 +1589,7 @@ static int write_partial_message_data(struct ceph_connection *con)
 		}
 		if (do_datacrc && cursor->need_crc)
 			crc = ceph_crc32c_page(crc, page, page_offset, length);
-		need_crc = ceph_msg_data_advance(cursor, (size_t)ret);
+		ceph_msg_data_advance(cursor, (size_t)ret);
 	}
 
 	dout("%s %p msg %p done\n", __func__, con, msg);
@@ -2299,7 +2296,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
 
 		if (do_datacrc)
 			crc = ceph_crc32c_page(crc, page, page_offset, ret);
-		(void) ceph_msg_data_advance(cursor, (size_t)ret);
+		ceph_msg_data_advance(cursor, (size_t)ret);
 	}
 	if (do_datacrc)
 		con->in_data_crc = crc;
-- 
cgit v1.2.3-59-g8ed1b


From f3b4e55ded9b3c52831a7d2ab9e511293c99fc11 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 19 May 2017 11:59:22 +0200
Subject: libceph: drop version variable from ceph_monmap_decode()

It's set but not used: CEPH_FEATURE_MONNAMES feature bit isn't
advertised, which guarantees a v1 MonMap.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 net/ceph/mon_client.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 29a0ef351c5e..250f11f78609 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -43,15 +43,13 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
 	int i, err = -EINVAL;
 	struct ceph_fsid fsid;
 	u32 epoch, num_mon;
-	u16 version;
 	u32 len;
 
 	ceph_decode_32_safe(&p, end, len, bad);
 	ceph_decode_need(&p, end, len, bad);
 
 	dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
-
-	ceph_decode_16_safe(&p, end, version, bad);
+	p += sizeof(u16);  /* skip version */
 
 	ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
 	ceph_decode_copy(&p, &fsid, sizeof(fsid));
-- 
cgit v1.2.3-59-g8ed1b


From d18a1247c4070390fc0c2d83d89a72afe921882e Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 19 May 2017 12:21:56 +0200
Subject: libceph: validate blob_struct_v in process_one_ticket()

None of these are validated in userspace, but since we do validate
reply_struct_v in ceph_x_proc_ticket_reply(), tkt_struct_v (first) and
CephXServiceTicket struct_v (second) in process_one_ticket(), validate
CephXTicketBlob struct_v as well.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 net/ceph/auth_x.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 2034fb926670..d0126df33f1f 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -215,6 +215,9 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	dout(" ticket blob is %d bytes\n", dlen);
 	ceph_decode_need(ptp, tpend, 1 + sizeof(u64), bad);
 	blob_struct_v = ceph_decode_8(ptp);
+	if (blob_struct_v != 1)
+		goto bad;
+
 	new_secret_id = ceph_decode_64(ptp);
 	ret = ceph_decode_buffer(&new_ticket_blob, ptp, tpend);
 	if (ret)
-- 
cgit v1.2.3-59-g8ed1b


From b51456a6096ebf9f4ceb2cc7e176b471d4b70af0 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 19 May 2017 14:24:36 +0200
Subject: libceph: fix error handling in process_one_ticket()

Don't leak key internals after new_session_key is populated.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 net/ceph/auth_x.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index d0126df33f1f..8757fb87dab8 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -151,7 +151,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	struct timespec validity;
 	void *tp, *tpend;
 	void **ptp;
-	struct ceph_crypto_key new_session_key;
+	struct ceph_crypto_key new_session_key = { 0 };
 	struct ceph_buffer *new_ticket_blob;
 	unsigned long new_expires, new_renew_after;
 	u64 new_secret_id;
@@ -237,13 +237,13 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	     type, ceph_entity_type_name(type), th->secret_id,
 	     (int)th->ticket_blob->vec.iov_len);
 	xi->have_keys |= th->service;
-
-out:
-	return ret;
+	return 0;
 
 bad:
 	ret = -EINVAL;
-	goto out;
+out:
+	ceph_crypto_key_destroy(&new_session_key);
+	return ret;
 }
 
 static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
-- 
cgit v1.2.3-59-g8ed1b


From 293dffaad8d500e1a5336eeb90d544cf40d4fbd8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 23 May 2017 17:25:10 +0300
Subject: libceph: NULL deref on crush_decode() error path

If there is not enough space then ceph_decode_32_safe() does a goto bad.
We need to return an error code in that situation.  The current code
returns ERR_PTR(0) which is NULL.  The callers are not expecting that
and it results in a NULL dereference.

Fixes: f24e9980eb86 ("ceph: OSD client")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index ffe9e904d4d1..55e3a477f92d 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -317,6 +317,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		u32 yes;
 		struct crush_rule *r;
 
+		err = -EINVAL;
 		ceph_decode_32_safe(p, end, yes, bad);
 		if (!yes) {
 			dout("crush_decode NO rule %d off %x %p to %p\n",
-- 
cgit v1.2.3-59-g8ed1b


From 4d071c3238987325b9e50e33051a40d1cce311cc Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 23 May 2017 14:18:17 -0500
Subject: PCI/PM: Add needs_resume flag to avoid suspend complete optimization

Some drivers - like i915 - may not support the system suspend direct
complete optimization due to differences in their runtime and system
suspend sequence.  Add a flag that when set resumes the device before
calling the driver's system suspend handlers which effectively disables
the optimization.

Needed by a future patch fixing suspend/resume on i915.

Suggested by Rafael.

Signed-off-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/pci.c   | 3 ++-
 include/linux/pci.h | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b01bd5bba8e6..563901cd9c06 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2144,7 +2144,8 @@ bool pci_dev_keep_suspended(struct pci_dev *pci_dev)
 
 	if (!pm_runtime_suspended(dev)
 	    || pci_target_state(pci_dev) != pci_dev->current_state
-	    || platform_pci_need_resume(pci_dev))
+	    || platform_pci_need_resume(pci_dev)
+	    || (pci_dev->dev_flags & PCI_DEV_FLAGS_NEEDS_RESUME))
 		return false;
 
 	/*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..df7dd9021646 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -183,6 +183,11 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
 	/* Do not use FLR even if device advertises PCI_AF_CAP */
 	PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
+	/*
+	 * Resume before calling the driver's system suspend hooks, disabling
+	 * the direct_complete optimization.
+	 */
+	PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
 };
 
 enum pci_irq_reroute_variant {
-- 
cgit v1.2.3-59-g8ed1b


From 82bc9a42cf854fdf63155759c0aa790bd1f361b0 Mon Sep 17 00:00:00 2001
From: Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
Date: Tue, 18 Apr 2017 13:43:32 +0200
Subject: drm/gma500/psb: Actually use VBT mode when it is found

With LVDS we were incorrectly picking the pre-programmed mode instead of
the prefered mode provided by VBT. Make sure we pick the VBT mode if
one is provided. It is likely that the mode read-out code is still wrong
but this patch fixes the immediate problem on most machines.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78562
Cc: <stable@vger.kernel.org>
Signed-off-by: Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170418114332.12183-1-patrik.r.jakobsson@gmail.com
---
 drivers/gpu/drm/gma500/psb_intel_lvds.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index 0066fe7e622e..be3eefec5152 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -759,20 +759,23 @@ void psb_intel_lvds_init(struct drm_device *dev,
 		if (scan->type & DRM_MODE_TYPE_PREFERRED) {
 			mode_dev->panel_fixed_mode =
 			    drm_mode_duplicate(dev, scan);
+			DRM_DEBUG_KMS("Using mode from DDC\n");
 			goto out;	/* FIXME: check for quirks */
 		}
 	}
 
 	/* Failed to get EDID, what about VBT? do we need this? */
-	if (mode_dev->vbt_mode)
+	if (dev_priv->lfp_lvds_vbt_mode) {
 		mode_dev->panel_fixed_mode =
-		    drm_mode_duplicate(dev, mode_dev->vbt_mode);
+			drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
 
-	if (!mode_dev->panel_fixed_mode)
-		if (dev_priv->lfp_lvds_vbt_mode)
-			mode_dev->panel_fixed_mode =
-				drm_mode_duplicate(dev,
-					dev_priv->lfp_lvds_vbt_mode);
+		if (mode_dev->panel_fixed_mode) {
+			mode_dev->panel_fixed_mode->type |=
+				DRM_MODE_TYPE_PREFERRED;
+			DRM_DEBUG_KMS("Using mode from VBT\n");
+			goto out;
+		}
+	}
 
 	/*
 	 * If we didn't get EDID, try checking if the panel is already turned
@@ -789,6 +792,7 @@ void psb_intel_lvds_init(struct drm_device *dev,
 		if (mode_dev->panel_fixed_mode) {
 			mode_dev->panel_fixed_mode->type |=
 			    DRM_MODE_TYPE_PREFERRED;
+			DRM_DEBUG_KMS("Using pre-programmed mode\n");
 			goto out;	/* FIXME: check for quirks */
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 0a2ad541071f99eaf4589c3551176fca191c1ee2 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Fri, 5 May 2017 18:47:37 +0800
Subject: libceph: cleanup old messages according to reconnect seq

when reopen a connection, use 'reconnect seq' to clean up
messages that have already been received by peer.

Link: http://tracker.ceph.com/issues/18690
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/messenger.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index d7ab481b2508..588a91930051 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2228,10 +2228,18 @@ static void process_ack(struct ceph_connection *con)
 	struct ceph_msg *m;
 	u64 ack = le64_to_cpu(con->in_temp_ack);
 	u64 seq;
+	bool reconnect = (con->in_tag == CEPH_MSGR_TAG_SEQ);
+	struct list_head *list = reconnect ? &con->out_queue : &con->out_sent;
 
-	while (!list_empty(&con->out_sent)) {
-		m = list_first_entry(&con->out_sent, struct ceph_msg,
-				     list_head);
+	/*
+	 * In the reconnect case, con_fault() has requeued messages
+	 * in out_sent. We should cleanup old messages according to
+	 * the reconnect seq.
+	 */
+	while (!list_empty(list)) {
+		m = list_first_entry(list, struct ceph_msg, list_head);
+		if (reconnect && m->needs_out_seq)
+			break;
 		seq = le64_to_cpu(m->hdr.seq);
 		if (seq > ack)
 			break;
@@ -2240,6 +2248,7 @@ static void process_ack(struct ceph_connection *con)
 		m->ack_stamp = jiffies;
 		ceph_msg_remove(m);
 	}
+
 	prepare_read_tag(con);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 42c99fc4c7069371da7b04b9099319dd1c633ee2 Mon Sep 17 00:00:00 2001
From: Luis Henriques <lhenriques@suse.com>
Date: Fri, 5 May 2017 18:28:44 +0100
Subject: ceph: check that the new inode size is within limits in
 ceph_fallocate()

Currently the ceph client doesn't respect the rlimit in fallocate.  This
means that a user can allocate a file with size > RLIMIT_FSIZE.  This
patch adds the call to inode_newsize_ok() to verify filesystem limits and
ulimits.  This should make ceph successfully run xfstest generic/228.

Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/file.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3fdde0b283c9..29308a80d66f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1671,8 +1671,12 @@ static long ceph_fallocate(struct file *file, int mode,
 	}
 
 	size = i_size_read(inode);
-	if (!(mode & FALLOC_FL_KEEP_SIZE))
+	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
 		endoff = offset + length;
+		ret = inode_newsize_ok(inode, endoff);
+		if (ret)
+			goto unlock;
+	}
 
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
-- 
cgit v1.2.3-59-g8ed1b


From 3ab2137915aea0ce7b3ec02e0f260ecc0f1c289d Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 23 May 2017 13:28:54 +0800
Subject: sctp: fix stream update when processing dupcookie

Since commit 3dbcc105d556 ("sctp: alloc stream info when initializing
asoc"), stream and stream.out info are always alloced when creating
an asoc.

So it's not correct to check !asoc->stream before updating stream
info when processing dupcookie, but would be better to check asoc
state instead.

Fixes: 3dbcc105d556 ("sctp: alloc stream info when initializing asoc")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a9708da28eb5..95238284c422 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1176,7 +1176,9 @@ void sctp_assoc_update(struct sctp_association *asoc,
 
 		asoc->ctsn_ack_point = asoc->next_tsn - 1;
 		asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
-		if (!asoc->stream) {
+
+		if (sctp_state(asoc, COOKIE_WAIT)) {
+			sctp_stream_free(asoc->stream);
 			asoc->stream = new->stream;
 			new->stream = NULL;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 7e06297768886337707f5833942b3bd524a6d3d5 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 23 May 2017 13:28:55 +0800
Subject: sctp: set new_asoc temp when processing dupcookie

After sctp changed to use transport hashtable, a transport would be
added into global hashtable when adding the peer to an asoc, then
the asoc can be got by searching the transport in the hashtbale.

The problem is when processing dupcookie in sctp_sf_do_5_2_4_dupcook,
a new asoc would be created. A peer with the same addr and port as
the one in the old asoc might be added into the new asoc, but fail
to be added into the hashtable, as they also belong to the same sk.

It causes that sctp's dupcookie processing can not really work.

Since the new asoc will be freed after copying it's information to
the old asoc, it's more like a temp asoc. So this patch is to fix
it by setting it as a temp asoc to avoid adding it's any transport
into the hashtable and also avoid allocing assoc_id.

An extra thing it has to do is to also alloc stream info for any
temp asoc, as sctp dupcookie process needs it to update old asoc.
But I don't think it would hurt something, as a temp asoc would
always be freed after finishing processing cookie echo packet.

Reported-by: Jianwen Ji <jiji@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 13 ++++---------
 net/sctp/sm_statefuns.c  |  3 +++
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8a08f13469c4..92e332e17391 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2454,16 +2454,11 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 	 * stream sequence number shall be set to 0.
 	 */
 
-	/* Allocate storage for the negotiated streams if it is not a temporary
-	 * association.
-	 */
-	if (!asoc->temp) {
-		if (sctp_stream_init(asoc, gfp))
-			goto clean_up;
+	if (sctp_stream_init(asoc, gfp))
+		goto clean_up;
 
-		if (sctp_assoc_set_id(asoc, gfp))
-			goto clean_up;
-	}
+	if (!asoc->temp && sctp_assoc_set_id(asoc, gfp))
+		goto clean_up;
 
 	/* ADDIP Section 4.1 ASCONF Chunk Procedures
 	 *
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 4f5e6cfc7f60..f863b5573e42 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2088,6 +2088,9 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 		}
 	}
 
+	/* Set temp so that it won't be added into hashtable */
+	new_asoc->temp = 1;
+
 	/* Compare the tie_tag in cookie with the verification tag of
 	 * current association.
 	 */
-- 
cgit v1.2.3-59-g8ed1b


From 159a07604a99bd01e7db112de08d53dc4fcad109 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Tue, 23 May 2017 11:48:08 +0200
Subject: net: fec: add post PHY reset delay DT property

Some PHY require to wait for a bit after the reset GPIO has been
toggled. This adds support for the DT property `phy-reset-post-delay`
which gives the delay in milliseconds to wait after reset.

If the DT property is not given, no delay is observed. Post reset delay
greater than 1000ms are invalid.

Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/fsl-fec.txt |  4 ++++
 drivers/net/ethernet/freescale/fec_main.c         | 16 +++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index a1e3693cca16..6f55bdd52f8a 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -15,6 +15,10 @@ Optional properties:
 - phy-reset-active-high : If present then the reset sequence using the GPIO
   specified in the "phy-reset-gpios" property is reversed (H=reset state,
   L=operation state).
+- phy-reset-post-delay : Post reset delay in milliseconds. If present then
+  a delay of phy-reset-post-delay milliseconds will be observed after the
+  phy-reset-gpios has been toggled. Can be omitted thus no delay is
+  observed. Delay is in range of 1ms to 1000ms. Other delays are invalid.
 - phy-supply : regulator that powers the Ethernet PHY.
 - phy-handle : phandle to the PHY device connected to this device.
 - fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 56a563f90b0b..f7c8649fd28f 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3192,7 +3192,7 @@ static int fec_reset_phy(struct platform_device *pdev)
 {
 	int err, phy_reset;
 	bool active_high = false;
-	int msec = 1;
+	int msec = 1, phy_post_delay = 0;
 	struct device_node *np = pdev->dev.of_node;
 
 	if (!np)
@@ -3209,6 +3209,11 @@ static int fec_reset_phy(struct platform_device *pdev)
 	else if (!gpio_is_valid(phy_reset))
 		return 0;
 
+	err = of_property_read_u32(np, "phy-reset-post-delay", &phy_post_delay);
+	/* valid reset duration should be less than 1s */
+	if (!err && phy_post_delay > 1000)
+		return -EINVAL;
+
 	active_high = of_property_read_bool(np, "phy-reset-active-high");
 
 	err = devm_gpio_request_one(&pdev->dev, phy_reset,
@@ -3226,6 +3231,15 @@ static int fec_reset_phy(struct platform_device *pdev)
 
 	gpio_set_value_cansleep(phy_reset, !active_high);
 
+	if (!phy_post_delay)
+		return 0;
+
+	if (phy_post_delay > 20)
+		msleep(phy_post_delay);
+	else
+		usleep_range(phy_post_delay * 1000,
+			     phy_post_delay * 1000 + 1000);
+
 	return 0;
 }
 #else /* CONFIG_OF */
-- 
cgit v1.2.3-59-g8ed1b


From 0ff50e83b5122e836ca492fefb11656b225ac29c Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Tue, 23 May 2017 13:20:28 +0200
Subject: net: rtnetlink: bail out from rtnl_fdb_dump() on parse error

rtnl_fdb_dump() failed to check the result of nlmsg_parse(), which led
to contents of |ifm| being uninitialized because nlh->nlmsglen was too
small to accommodate |ifm|. The uninitialized data may affect some
branches and result in unwanted effects, although kernel data doesn't
seem to leak to the userspace directly.

The bug has been detected with KMSAN and syzkaller.

For the record, here is the KMSAN report:

==================================================================
BUG: KMSAN: use of unitialized memory in rtnl_fdb_dump+0x5dc/0x1000
CPU: 0 PID: 1039 Comm: probe Not tainted 4.11.0-rc5+ #2727
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16
 dump_stack+0x143/0x1b0 lib/dump_stack.c:52
 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:1007
 __kmsan_warning_32+0x66/0xb0 mm/kmsan/kmsan_instr.c:491
 rtnl_fdb_dump+0x5dc/0x1000 net/core/rtnetlink.c:3230
 netlink_dump+0x84f/0x1190 net/netlink/af_netlink.c:2168
 __netlink_dump_start+0xc97/0xe50 net/netlink/af_netlink.c:2258
 netlink_dump_start ./include/linux/netlink.h:165
 rtnetlink_rcv_msg+0xae9/0xb40 net/core/rtnetlink.c:4094
 netlink_rcv_skb+0x339/0x5a0 net/netlink/af_netlink.c:2339
 rtnetlink_rcv+0x83/0xa0 net/core/rtnetlink.c:4110
 netlink_unicast_kernel net/netlink/af_netlink.c:1272
 netlink_unicast+0x13b7/0x1480 net/netlink/af_netlink.c:1298
 netlink_sendmsg+0x10b8/0x10f0 net/netlink/af_netlink.c:1844
 sock_sendmsg_nosec net/socket.c:633
 sock_sendmsg net/socket.c:643
 ___sys_sendmsg+0xd4b/0x10f0 net/socket.c:1997
 __sys_sendmsg net/socket.c:2031
 SYSC_sendmsg+0x2c6/0x3f0 net/socket.c:2042
 SyS_sendmsg+0x87/0xb0 net/socket.c:2038
 do_syscall_64+0x102/0x150 arch/x86/entry/common.c:285
 entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246
RIP: 0033:0x401300
RSP: 002b:00007ffc3b0e6d58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401300
RDX: 0000000000000000 RSI: 00007ffc3b0e6d80 RDI: 0000000000000003
RBP: 00007ffc3b0e6e00 R08: 000000000000000b R09: 0000000000000004
R10: 000000000000000d R11: 0000000000000246 R12: 0000000000000000
R13: 00000000004065a0 R14: 0000000000406630 R15: 0000000000000000
origin: 000000008fe00056
 save_stack_trace+0x59/0x60 arch/x86/kernel/stacktrace.c:59
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:352
 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:247
 kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:260
 slab_alloc_node mm/slub.c:2743
 __kmalloc_node_track_caller+0x1f4/0x390 mm/slub.c:4349
 __kmalloc_reserve net/core/skbuff.c:138
 __alloc_skb+0x2cd/0x740 net/core/skbuff.c:231
 alloc_skb ./include/linux/skbuff.h:933
 netlink_alloc_large_skb net/netlink/af_netlink.c:1144
 netlink_sendmsg+0x934/0x10f0 net/netlink/af_netlink.c:1819
 sock_sendmsg_nosec net/socket.c:633
 sock_sendmsg net/socket.c:643
 ___sys_sendmsg+0xd4b/0x10f0 net/socket.c:1997
 __sys_sendmsg net/socket.c:2031
 SYSC_sendmsg+0x2c6/0x3f0 net/socket.c:2042
 SyS_sendmsg+0x87/0xb0 net/socket.c:2038
 do_syscall_64+0x102/0x150 arch/x86/entry/common.c:285
 return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246
==================================================================

and the reproducer:

==================================================================
  #include <sys/socket.h>
  #include <net/if_arp.h>
  #include <linux/netlink.h>
  #include <stdint.h>

  int main()
  {
    int sock = socket(PF_NETLINK, SOCK_DGRAM | SOCK_NONBLOCK, 0);
    struct msghdr msg;
    memset(&msg, 0, sizeof(msg));
    char nlmsg_buf[32];
    memset(nlmsg_buf, 0, sizeof(nlmsg_buf));
    struct nlmsghdr *nlmsg = nlmsg_buf;
    nlmsg->nlmsg_len = 0x11;
    nlmsg->nlmsg_type = 0x1e; // RTM_NEWROUTE = RTM_BASE + 0x0e
    // type = 0x0e = 1110b
    // kind = 2
    nlmsg->nlmsg_flags = 0x101; // NLM_F_ROOT | NLM_F_REQUEST
    nlmsg->nlmsg_seq = 0;
    nlmsg->nlmsg_pid = 0;
    nlmsg_buf[16] = (char)7;
    struct iovec iov;
    iov.iov_base = nlmsg_buf;
    iov.iov_len = 17;
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    sendmsg(sock, &msg, 0);
    return 0;
  }
==================================================================

Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 49a279a7cc15..9e2c0a7cb325 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3231,8 +3231,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int err = 0;
 	int fidx = 0;
 
-	if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
-			IFLA_MAX, ifla_policy, NULL) == 0) {
+	err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+			  IFLA_MAX, ifla_policy, NULL);
+	if (err < 0) {
+		return -EINVAL;
+	} else if (err == 0) {
 		if (tb[IFLA_MASTER])
 			br_idx = nla_get_u32(tb[IFLA_MASTER]);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From cd47512e51190efc34a6b90d5c6b54de036ea421 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 23 May 2017 08:19:49 -0700
Subject: net/phy: fix mdio-octeon dependency and build

Fix build errors by making this driver depend on OF_MDIO, like
several other similar drivers do.

drivers/built-in.o: In function `octeon_mdiobus_remove':
mdio-octeon.c:(.text+0x196ee0): undefined reference to `mdiobus_unregister'
mdio-octeon.c:(.text+0x196ee8): undefined reference to `mdiobus_free'
drivers/built-in.o: In function `octeon_mdiobus_probe':
mdio-octeon.c:(.text+0x196f1d): undefined reference to `devm_mdiobus_alloc_size'
mdio-octeon.c:(.text+0x196ffe): undefined reference to `of_mdiobus_register'
mdio-octeon.c:(.text+0x197010): undefined reference to `mdiobus_free'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc:	Andrew Lunn <andrew@lunn.ch>
Cc:	Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 60ffc9da6a28..c360dd6ead22 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -108,7 +108,7 @@ config MDIO_MOXART
 config MDIO_OCTEON
 	tristate "Octeon and some ThunderX SOCs MDIO buses"
 	depends on 64BIT
-	depends on HAS_IOMEM
+	depends on HAS_IOMEM && OF_MDIO
 	select MDIO_CAVIUM
 	help
 	  This module provides a driver for the Octeon and ThunderX MDIO
-- 
cgit v1.2.3-59-g8ed1b


From f2899788353c13891412b273fdff5f02d49aa40f Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 23 May 2017 17:49:13 +0200
Subject: net: phy: marvell: Limit errata to 88m1101

The 88m1101 has an errata when configuring autoneg. However, it was
being applied to many other Marvell PHYs as well. Limit its scope to
just the 88m1101.

Fixes: 76884679c644 ("phylib: Add support for Marvell 88e1111S and 88e1145")
Reported-by: Daniel Walker <danielwa@cisco.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Harini Katakam <harinik@xilinx.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 66 ++++++++++++++++++++++++++---------------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 272b051a0199..9097e42bec2e 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -255,34 +255,6 @@ static int marvell_config_aneg(struct phy_device *phydev)
 {
 	int err;
 
-	/* The Marvell PHY has an errata which requires
-	 * that certain registers get written in order
-	 * to restart autonegotiation */
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
-
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1d, 0x1f);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0x200c);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1d, 0x5);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0x100);
-	if (err < 0)
-		return err;
-
 	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
 		return err;
@@ -316,6 +288,42 @@ static int marvell_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
+static int m88e1101_config_aneg(struct phy_device *phydev)
+{
+	int err;
+
+	/* This Marvell PHY has an errata which requires
+	 * that certain registers get written in order
+	 * to restart autonegotiation
+	 */
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1d, 0x1f);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0x200c);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1d, 0x5);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0x100);
+	if (err < 0)
+		return err;
+
+	return marvell_config_aneg(phydev);
+}
+
 static int m88e1111_config_aneg(struct phy_device *phydev)
 {
 	int err;
@@ -1892,7 +1900,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
 		.config_init = &marvell_config_init,
-		.config_aneg = &marvell_config_aneg,
+		.config_aneg = &m88e1101_config_aneg,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
-- 
cgit v1.2.3-59-g8ed1b


From b3c85a0fb2c79f2c945fa1305b39974d0acf3105 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 10 May 2017 20:06:58 +0200
Subject: drm/amdgpu: fix fundamental suspend/resume issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reinitializing the VM manager during suspend/resume is a very very bad
idea since all the VMs are still active and kicking.

This can lead to random VM faults after resume when new processes
become the same client ID assigned.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c  | 15 ++-------------
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  | 15 ++-------------
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  | 15 ++-------------
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 16 ++--------------
 6 files changed, 30 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 07ff3b1514f1..1bf36c3542c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -672,6 +672,7 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
 	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
 
+	atomic64_set(&id->owner, 0);
 	id->gds_base = 0;
 	id->gds_size = 0;
 	id->gws_base = 0;
@@ -680,6 +681,26 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
 	id->oa_size = 0;
 }
 
+/**
+ * amdgpu_vm_reset_all_id - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ *
+ * Reset VMID to force flush on next use
+ */
+void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
+{
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vm_id_manager *id_mgr =
+			&adev->vm_manager.id_mgr[i];
+
+		for (j = 1; j < id_mgr->num_ids; ++j)
+			amdgpu_vm_reset_id(adev, i, j);
+	}
+}
+
 /**
  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
  *
@@ -2270,7 +2291,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		adev->vm_manager.seqno[i] = 0;
 
-
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 	atomic64_set(&adev->vm_manager.client_counter, 0);
 	spin_lock_init(&adev->vm_manager.prt_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d97e28b4bdc4..e1d951ece433 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -204,6 +204,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
 			unsigned vmid);
+void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index a572979f186c..d860939152df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -950,10 +950,6 @@ static int gmc_v6_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v6_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v6_0_hw_fini(adev);
 
 	return 0;
@@ -968,16 +964,9 @@ static int gmc_v6_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v6_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v6_0_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index a9083a16a250..2750e5c23813 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1117,10 +1117,6 @@ static int gmc_v7_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v7_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v7_0_hw_fini(adev);
 
 	return 0;
@@ -1135,16 +1131,9 @@ static int gmc_v7_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v7_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v7_0_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 4ac99784160a..f56b4089ee9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1209,10 +1209,6 @@ static int gmc_v8_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v8_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v8_0_hw_fini(adev);
 
 	return 0;
@@ -1227,16 +1223,9 @@ static int gmc_v8_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v8_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v8_0_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index dc1e1c1d6b24..f936332a069d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -791,10 +791,6 @@ static int gmc_v9_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v9_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v9_0_hw_fini(adev);
 
 	return 0;
@@ -809,17 +805,9 @@ static int gmc_v9_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v9_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev,
-				"vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v9_0_is_idle(void *handle)
-- 
cgit v1.2.3-59-g8ed1b


From 35d2f80b07bbe03fb358afb0bdeff7437a7d67ff Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 23 May 2017 13:38:41 -0400
Subject: vlan: Fix tcp checksum offloads in Q-in-Q vlans

It appears that TCP checksum offloading has been broken for
Q-in-Q vlans.  The behavior was execerbated by the
series
    commit afb0bc972b52 ("Merge branch 'stacked_vlan_tso'")
that that enabled accleleration features on stacked vlans.

However, event without that series, it is possible to trigger
this issue.  It just requires a lot more specialized configuration.

The root cause is the interaction between how
netdev_intersect_features() works, the features actually set on
the vlan devices and HW having the ability to run checksum with
longer headers.

The issue starts when netdev_interesect_features() replaces
NETIF_F_HW_CSUM with a combination of NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM,
if the HW advertises IP|IPV6 specific checksums.  This happens
for tagged and multi-tagged packets.   However, HW that enables
IP|IPV6 checksum offloading doesn't gurantee that packets with
arbitrarily long headers can be checksummed.

This patch disables IP|IPV6 checksums on the packet for multi-tagged
packets.

CC: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
CC: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Acked-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 8d5fcd6284ce..283dc2f5364d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
 static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
 						    netdev_features_t features)
 {
-	if (skb_vlan_tagged_multi(skb))
-		features = netdev_intersect_features(features,
-						     NETIF_F_SG |
-						     NETIF_F_HIGHDMA |
-						     NETIF_F_FRAGLIST |
-						     NETIF_F_HW_CSUM |
-						     NETIF_F_HW_VLAN_CTAG_TX |
-						     NETIF_F_HW_VLAN_STAG_TX);
+	if (skb_vlan_tagged_multi(skb)) {
+		/* In the case of multi-tagged packets, use a direct mask
+		 * instead of using netdev_interesect_features(), to make
+		 * sure that only devices supporting NETIF_F_HW_CSUM will
+		 * have checksum offloading support.
+		 */
+		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
+			    NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX |
+			    NETIF_F_HW_VLAN_STAG_TX;
+	}
 
 	return features;
 }
-- 
cgit v1.2.3-59-g8ed1b


From cc6e9de62a7f84c9293a2ea41bc412b55bb46e85 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 23 May 2017 13:38:42 -0400
Subject: be2net: Fix offload features for Q-in-Q packets

At least some of the be2net cards do not seem to be capabled
of performing checksum offload computions on Q-in-Q packets.
In these case, the recevied checksum on the remote is invalid
and TCP syn packets are dropped.

This patch adds a call to check disbled acceleration features
on Q-in-Q tagged traffic.

CC: Sathya Perla <sathya.perla@broadcom.com>
CC: Ajit Khaparde <ajit.khaparde@broadcom.com>
CC: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
CC: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index f3a09ab55900..4eee18ce9be4 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5078,9 +5078,11 @@ static netdev_features_t be_features_check(struct sk_buff *skb,
 	struct be_adapter *adapter = netdev_priv(dev);
 	u8 l4_hdr = 0;
 
-	/* The code below restricts offload features for some tunneled packets.
+	/* The code below restricts offload features for some tunneled and
+	 * Q-in-Q packets.
 	 * Offload features for normal (non tunnel) packets are unchanged.
 	 */
+	features = vlan_features_check(skb, features);
 	if (!skb->encapsulation ||
 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
 		return features;
-- 
cgit v1.2.3-59-g8ed1b


From 2836b4f224d4fd7d1a2b23c3eecaf0f0ae199a74 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 23 May 2017 13:38:43 -0400
Subject: virtio-net: enable TSO/checksum offloads for Q-in-Q vlans

Since virtio does not provide it's own ndo_features_check handler,
TSO, and now checksum offload, are disabled for stacked vlans.
Re-enable the support and let the host take care of it.  This
restores/improves Guest-to-Guest performance over Q-in-Q vlans.

Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9320d96a1632..3e9246cc49c3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1989,6 +1989,7 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_poll_controller = virtnet_netpoll,
 #endif
 	.ndo_xdp		= virtnet_xdp,
+	.ndo_features_check	= passthru_features_check,
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)
-- 
cgit v1.2.3-59-g8ed1b


From 0a646f331db0eb9efc8d3a95a44872036d441d58 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 11 May 2017 13:10:02 -0400
Subject: drm/amdgpu/ci: disable mclk switching for high refresh rates (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even if the vblank period would allow it, it still seems to
be problematic on some cards.

v2: fix logic inversion (Nils)

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868

Cc: stable@vger.kernel.org
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 6dc1410b380f..ec93714e4524 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -906,6 +906,12 @@ static bool ci_dpm_vblank_too_short(struct amdgpu_device *adev)
 	u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
 	u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
 
+	/* disable mclk switching if the refresh is >120Hz, even if the
+	 * blanking period would allow it
+	 */
+	if (amdgpu_dpm_get_vrefresh(adev) > 120)
+		return true;
+
 	if (vblank_time < switch_limit)
 		return true;
 	else
-- 
cgit v1.2.3-59-g8ed1b


From 58d7e3e427db1bd68f33025519a9468140280a75 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 11 May 2017 13:14:14 -0400
Subject: drm/radeon/ci: disable mclk switching for high refresh rates (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even if the vblank period would allow it, it still seems to
be problematic on some cards.

v2: fix logic inversion (Nils)

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868

Cc: stable@vger.kernel.org
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/ci_dpm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 7ba450832e6b..ea36dc4dd5d2 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -776,6 +776,12 @@ bool ci_dpm_vblank_too_short(struct radeon_device *rdev)
 	u32 vblank_time = r600_dpm_get_vblank_time(rdev);
 	u32 switch_limit = pi->mem_gddr5 ? 450 : 300;
 
+	/* disable mclk switching if the refresh is >120Hz, even if the
+        * blanking period would allow it
+        */
+	if (r600_dpm_get_vrefresh(rdev) > 120)
+		return true;
+
 	if (vblank_time < switch_limit)
 		return true;
 	else
-- 
cgit v1.2.3-59-g8ed1b


From 09be4a5219610a6fae3215d4f51f948d6f5d2609 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 11 May 2017 13:46:12 -0400
Subject: drm/amd/powerplay/smu7: add vblank check for mclk switching (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Check to make sure the vblank period is long enough to support
mclk switching.

v2: drop needless initial assignment (Nils)

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868

Cc: stable@vger.kernel.org
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 31 +++++++++++++++++++++---
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index a74a3db3056c..1445c51b6d05 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -2655,6 +2655,28 @@ static int smu7_get_power_state_size(struct pp_hwmgr *hwmgr)
 	return sizeof(struct smu7_power_state);
 }
 
+static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr,
+				 uint32_t vblank_time_us)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	uint32_t switch_limit_us;
+
+	switch (hwmgr->chip_id) {
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+		switch_limit_us = data->is_memory_gddr5 ? 190 : 150;
+		break;
+	default:
+		switch_limit_us = data->is_memory_gddr5 ? 450 : 150;
+		break;
+	}
+
+	if (vblank_time_us < switch_limit_us)
+		return true;
+	else
+		return false;
+}
 
 static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				struct pp_power_state *request_ps,
@@ -2669,6 +2691,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 	bool disable_mclk_switching;
 	bool disable_mclk_switching_for_frame_lock;
 	struct cgs_display_info info = {0};
+	struct cgs_mode_info mode_info = {0};
 	const struct phm_clock_and_voltage_limits *max_limits;
 	uint32_t i;
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -2677,6 +2700,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 	int32_t count;
 	int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0;
 
+	info.mode_info = &mode_info;
 	data->battery_state = (PP_StateUILabel_Battery ==
 			request_ps->classification.ui_label);
 
@@ -2703,8 +2727,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 
 	cgs_get_active_displays_info(hwmgr->device, &info);
 
-	/*TO DO result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/
-
 	minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock;
 	minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
 
@@ -2769,8 +2791,9 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
 
 
-	disable_mclk_switching = (1 < info.display_count) ||
-				    disable_mclk_switching_for_frame_lock;
+	disable_mclk_switching = ((1 < info.display_count) ||
+				  disable_mclk_switching_for_frame_lock ||
+				  smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us));
 
 	sclk = smu7_ps->performance_levels[0].engine_clock;
 	mclk = smu7_ps->performance_levels[0].memory_clock;
-- 
cgit v1.2.3-59-g8ed1b


From 2275a3a2fe9914ba6d76c8ea490da3c08342bd19 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 11 May 2017 13:57:41 -0400
Subject: drm/amd/powerplay/smu7: disable mclk switching for high refresh rates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even if the vblank period would allow it, it still seems to
be problematic on some cards.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868

Cc: stable@vger.kernel.org
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 1445c51b6d05..102eb6d029fa 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -2793,7 +2793,8 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 
 	disable_mclk_switching = ((1 < info.display_count) ||
 				  disable_mclk_switching_for_frame_lock ||
-				  smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us));
+				  smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
+				  (mode_info.refresh_rate > 120));
 
 	sclk = smu7_ps->performance_levels[0].engine_clock;
 	mclk = smu7_ps->performance_levels[0].memory_clock;
-- 
cgit v1.2.3-59-g8ed1b


From 3d18e33735a02b1a90aecf14410bf3edbfd4d3dc Mon Sep 17 00:00:00 2001
From: Lyude <lyude@redhat.com>
Date: Thu, 11 May 2017 19:31:12 -0400
Subject: drm/radeon: Unbreak HPD handling for r600+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We end up reading the interrupt register for HPD5, and then writing it
to HPD6 which on systems without anything using HPD5 results in
permanently disabling hotplug on one of the display outputs after the
first time we acknowledge a hotplug interrupt from the GPU.

This code is really bad. But for now, let's just fix this. I will
hopefully have a large patch series to refactor all of this soon.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Lyude <lyude@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/cik.c       | 4 ++--
 drivers/gpu/drm/radeon/evergreen.c | 4 ++--
 drivers/gpu/drm/radeon/r600.c      | 2 +-
 drivers/gpu/drm/radeon/si.c        | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index ccebe0f8d2e1..008c145b7f29 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -7401,7 +7401,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -7431,7 +7431,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_RX_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index f130ec41ee4b..0bf103536404 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -4927,7 +4927,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -4958,7 +4958,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_RX_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 0a085176e79b..e06e2d8feab3 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -3988,7 +3988,7 @@ static void r600_irq_ack(struct radeon_device *rdev)
 			WREG32(DC_HPD5_INT_CONTROL, tmp);
 		}
 		if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) {
-			tmp = RREG32(DC_HPD5_INT_CONTROL);
+			tmp = RREG32(DC_HPD6_INT_CONTROL);
 			tmp |= DC_HPDx_INT_ACK;
 			WREG32(DC_HPD6_INT_CONTROL, tmp);
 		}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index ceee87f029d9..76d1888528e6 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -6317,7 +6317,7 @@ static inline void si_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -6348,7 +6348,7 @@ static inline void si_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_RX_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 7c4378f4523d4af05b5941ea906e7032631eb753 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Thu, 11 May 2017 18:22:17 +0800
Subject: drm/amdgpu: fix NULL pointer panic of emit_gds_switch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[  338.384770] BUG: unable to handle kernel NULL pointer dereference at           (null)
[  338.384817] IP: [<          (null)>]           (null)
[  338.385505] RIP: 0010:[<0000000000000000>]  [<          (null)>]           (null)
[  338.385950] Call Trace:
[  338.385993]  [<ffffffffa05d2313>] ? amdgpu_vm_flush+0x283/0x400 [amdgpu]
[  338.386025]  [<ffffffff811818d3>] ? printk+0x4d/0x4f
[  338.386074]  [<ffffffffa05d4906>] amdgpu_ib_schedule+0x4a6/0x4d0 [amdgpu]
[  338.386140]  [<ffffffffa0673e54>] amdgpu_job_run+0x64/0x180 [amdgpu]
[  338.386203]  [<ffffffffa0672e09>] amd_sched_main+0x2e9/0x4a0 [amdgpu]
[  338.386232]  [<ffffffff810bfce0>] ? prepare_to_wait_event+0x110/0x110
[  338.386295]  [<ffffffffa0672b20>] ? amd_sched_select_entity+0xe0/0xe0 [amdgpu]
[  338.386327]  [<ffffffff8109b423>] kthread+0xd3/0xf0
[  338.386349]  [<ffffffff8109b350>] ? kthread_park+0x60/0x60
[  338.386376]  [<ffffffff817e1ee5>] ret_from_fork+0x25/0x30
[  338.386401] Code:  Bad RIP value.
[  338.386420] RIP  [<          (null)>]           (null)
[  338.386443]  RSP <ffffc90001bd7d40>
[  338.386458] CR2: 0000000000000000
[  338.398508] ---[ end trace 4c66fcdc74b9a0a2 ]---

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1bf36c3542c1..8ecf82c5fe74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -634,7 +634,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 		mutex_unlock(&id_mgr->lock);
 	}
 
-	if (gds_switch_needed) {
+	if (ring->funcs->emit_gds_switch && gds_switch_needed) {
 		id->gds_base = job->gds_base;
 		id->gds_size = job->gds_size;
 		id->gws_base = job->gws_base;
-- 
cgit v1.2.3-59-g8ed1b


From 3083696a1ee68f4845f8e9a21b91e343ff25eff3 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 23 May 2017 13:13:45 +0800
Subject: drm/amd/powerplay: fix a signedness bugs

Smatch complains about a signedness bug here:

        vega10_hwmgr.c:4202 vega10_force_clock_level()
        warn: always true condition '(i >= 0) => (0-u32max >= 0)'

Fixes: 7b52db39a4c2 ("drm/amd/powerplay: fix bug sclk/mclk
                     level can't be set on vega10.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Eric Huang <JinHuiEric.Huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index ad30f5d3a10d..2614af2f553f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -4186,7 +4186,7 @@ static int vega10_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
 	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
-	uint32_t i;
+	int i;
 
 	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
 		return -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From a4d768e702de224cc85e0c8eac9311763403b368 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Mon, 22 May 2017 19:54:10 -0700
Subject: xfs: fix unaligned access in xfs_btree_visit_blocks

This structure copy was throwing unaligned access warnings on sparc64:

Kernel unaligned access at TPC[1043c088] xfs_btree_visit_blocks+0x88/0xe0 [xfs]

xfs_btree_copy_ptrs does a memcpy, which avoids it.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_btree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5392674bf893..3a673ba201aa 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4395,7 +4395,7 @@ xfs_btree_visit_blocks(
 			xfs_btree_readahead_ptr(cur, ptr, 1);
 
 			/* save for the next iteration of the loop */
-			lptr = *ptr;
+			xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
 		}
 
 		/* for each buffer in the level */
-- 
cgit v1.2.3-59-g8ed1b


From 8affebe16d79ebefb1d9d6d56a46dc89716f9453 Mon Sep 17 00:00:00 2001
From: Eryu Guan <eguan@redhat.com>
Date: Tue, 23 May 2017 08:30:46 -0700
Subject: xfs: fix off-by-one on max nr_pages in xfs_find_get_desired_pgoff()

xfs_find_get_desired_pgoff() is used to search for offset of hole or
data in page range [index, end] (both inclusive), and the max number
of pages to search should be at least one, if end == index.
Otherwise the only page is missed and no hole or data is found,
which is not correct.

When block size is smaller than page size, this can be demonstrated
by preallocating a file with size smaller than page size and writing
data to the last block. E.g. run this xfs_io command on a 1k block
size XFS on x86_64 host.

  # xfs_io -fc "falloc 0 3k" -c "pwrite 2k 1k" \
  	    -c "seek -d 0" /mnt/xfs/testfile
  wrote 1024/1024 bytes at offset 2048
  1 KiB, 1 ops; 0.0000 sec (33.675 MiB/sec and 34482.7586 ops/sec)
  Whence  Result
  DATA    EOF

Data at offset 2k was missed, and lseek(2) returned ENXIO.

This is uncovered by generic/285 subtest 07 and 08 on ppc64 host,
where pagesize is 64k. Because a recent change to generic/285
reduced the preallocated file size to smaller than 64k.

Cc: stable@vger.kernel.org # v3.7+
Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 35703a801372..aefa2134a8cb 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1049,7 +1049,7 @@ xfs_find_get_desired_pgoff(
 		unsigned	nr_pages;
 		unsigned int	i;
 
-		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
 		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
 					  want);
 		/*
-- 
cgit v1.2.3-59-g8ed1b


From 5375023ae1266553a7baa0845e82917d8803f48c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 18 May 2017 16:36:22 -0700
Subject: xfs: Fix missed holes in SEEK_HOLE implementation

XFS SEEK_HOLE implementation could miss a hole in an unwritten extent as
can be seen by the following command:

xfs_io -c "falloc 0 256k" -c "pwrite 0 56k" -c "pwrite 128k 8k"
       -c "seek -h 0" file
wrote 57344/57344 bytes at offset 0
56 KiB, 14 ops; 0.0000 sec (49.312 MiB/sec and 12623.9856 ops/sec)
wrote 8192/8192 bytes at offset 131072
8 KiB, 2 ops; 0.0000 sec (70.383 MiB/sec and 18018.0180 ops/sec)
Whence	Result
HOLE	139264

Where we can see that hole at offset 56k was just ignored by SEEK_HOLE
implementation. The bug is in xfs_find_get_desired_pgoff() which does
not properly detect the case when pages are not contiguous.

Fix the problem by properly detecting when found page has larger offset
than expected.

CC: stable@vger.kernel.org
Fixes: d126d43f631f996daeee5006714fed914be32368
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index aefa2134a8cb..f1517e9928c7 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1076,17 +1076,6 @@ xfs_find_get_desired_pgoff(
 			break;
 		}
 
-		/*
-		 * At lease we found one page.  If this is the first time we
-		 * step into the loop, and if the first page index offset is
-		 * greater than the given search offset, a hole was found.
-		 */
-		if (type == HOLE_OFF && lastoff == startoff &&
-		    lastoff < page_offset(pvec.pages[0])) {
-			found = true;
-			break;
-		}
-
 		for (i = 0; i < nr_pages; i++) {
 			struct page	*page = pvec.pages[i];
 			loff_t		b_offset;
@@ -1098,18 +1087,18 @@ xfs_find_get_desired_pgoff(
 			 * file mapping. However, page->index will not change
 			 * because we have a reference on the page.
 			 *
-			 * Searching done if the page index is out of range.
-			 * If the current offset is not reaches the end of
-			 * the specified search range, there should be a hole
-			 * between them.
+			 * If current page offset is beyond where we've ended,
+			 * we've found a hole.
 			 */
-			if (page->index > end) {
-				if (type == HOLE_OFF && lastoff < endoff) {
-					*offset = lastoff;
-					found = true;
-				}
+			if (type == HOLE_OFF && lastoff < endoff &&
+			    lastoff < page_offset(pvec.pages[i])) {
+				found = true;
+				*offset = lastoff;
 				goto out;
 			}
+			/* Searching done if the page index is out of range. */
+			if (page->index > end)
+				goto out;
 
 			lock_page(page);
 			/*
-- 
cgit v1.2.3-59-g8ed1b


From d7fd24257aa60316bf81093f7f909dc9475ae974 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 18 May 2017 16:36:23 -0700
Subject: xfs: Fix off-by-in in loop termination in
 xfs_find_get_desired_pgoff()

There is an off-by-one error in loop termination conditions in
xfs_find_get_desired_pgoff() since 'end' may index a page beyond end of
desired range if 'endoff' is page aligned. It doesn't have any visible
effects but still it is good to fix it.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f1517e9928c7..dc0e4cb7029b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1043,7 +1043,7 @@ xfs_find_get_desired_pgoff(
 
 	index = startoff >> PAGE_SHIFT;
 	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
-	end = endoff >> PAGE_SHIFT;
+	end = (endoff - 1) >> PAGE_SHIFT;
 	do {
 		int		want;
 		unsigned	nr_pages;
-- 
cgit v1.2.3-59-g8ed1b


From a54fba8f5a0dc36161cacdf2aa90f007f702ec1a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 18 May 2017 16:36:24 -0700
Subject: xfs: Move handling of missing page into one place in
 xfs_find_get_desired_pgoff()

Currently several places in xfs_find_get_desired_pgoff() handle the case
of a missing page. Make them all handled in one place after the loop has
terminated.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c | 38 ++++++++------------------------------
 1 file changed, 8 insertions(+), 30 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index dc0e4cb7029b..5fb5a0958a14 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1052,29 +1052,8 @@ xfs_find_get_desired_pgoff(
 		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
 		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
 					  want);
-		/*
-		 * No page mapped into given range.  If we are searching holes
-		 * and if this is the first time we got into the loop, it means
-		 * that the given offset is landed in a hole, return it.
-		 *
-		 * If we have already stepped through some block buffers to find
-		 * holes but they all contains data.  In this case, the last
-		 * offset is already updated and pointed to the end of the last
-		 * mapped page, if it does not reach the endpoint to search,
-		 * that means there should be a hole between them.
-		 */
-		if (nr_pages == 0) {
-			/* Data search found nothing */
-			if (type == DATA_OFF)
-				break;
-
-			ASSERT(type == HOLE_OFF);
-			if (lastoff == startoff || lastoff < endoff) {
-				found = true;
-				*offset = lastoff;
-			}
+		if (nr_pages == 0)
 			break;
-		}
 
 		for (i = 0; i < nr_pages; i++) {
 			struct page	*page = pvec.pages[i];
@@ -1140,21 +1119,20 @@ xfs_find_get_desired_pgoff(
 
 		/*
 		 * The number of returned pages less than our desired, search
-		 * done.  In this case, nothing was found for searching data,
-		 * but we found a hole behind the last offset.
+		 * done.
 		 */
-		if (nr_pages < want) {
-			if (type == HOLE_OFF) {
-				*offset = lastoff;
-				found = true;
-			}
+		if (nr_pages < want)
 			break;
-		}
 
 		index = pvec.pages[i - 1]->index + 1;
 		pagevec_release(&pvec);
 	} while (index <= end);
 
+	/* No page at lastoff and we are not done - we found a hole. */
+	if (type == HOLE_OFF && lastoff < endoff) {
+		*offset = lastoff;
+		found = true;
+	}
 out:
 	pagevec_release(&pvec);
 	return found;
-- 
cgit v1.2.3-59-g8ed1b


From 11387fe4a98f75d1f4cdb3efe3b42b19205c9df5 Mon Sep 17 00:00:00 2001
From: Eric Garver <e@erig.me>
Date: Tue, 23 May 2017 18:37:27 -0400
Subject: geneve: fix fill_info when using collect_metadata

Since 9b4437a5b870 ("geneve: Unify LWT and netdev handling.") fill_info
does not return UDP_ZERO_CSUM6_RX when using COLLECT_METADATA. This is
because it uses ip_tunnel_info_af() with the device level info, which is
not valid for COLLECT_METADATA.

Fix by checking for the presence of the actual sockets.

Fixes: 9b4437a5b870 ("geneve: Unify LWT and netdev handling.")
Signed-off-by: Eric Garver <e@erig.me>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index dec5d563ab19..959fd12d2e67 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1293,7 +1293,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
 		goto nla_put_failure;
 
-	if (ip_tunnel_info_af(info) == AF_INET) {
+	if (rtnl_dereference(geneve->sock4)) {
 		if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
 				    info->key.u.ipv4.dst))
 			goto nla_put_failure;
@@ -1302,8 +1302,10 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			       !!(info->key.tun_flags & TUNNEL_CSUM)))
 			goto nla_put_failure;
 
+	}
+
 #if IS_ENABLED(CONFIG_IPV6)
-	} else {
+	if (rtnl_dereference(geneve->sock6)) {
 		if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
 				     &info->key.u.ipv6.dst))
 			goto nla_put_failure;
@@ -1315,8 +1317,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
 			       !geneve->use_udp6_rx_checksums))
 			goto nla_put_failure;
-#endif
 	}
+#endif
 
 	if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
 	    nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
-- 
cgit v1.2.3-59-g8ed1b


From b62ce397675502325d4282924bf70cfb6a005c3a Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 22 May 2017 13:11:41 +0800
Subject: drm/amdgpu: fix null point error when rmmod amdgpu.

this bug happened when amdgpu load failed.

[   75.740951] BUG: unable to handle kernel paging request at 00000000000031c0
[   75.748167] IP: [<ffffffffa064a0e0>] amdgpu_fbdev_restore_mode+0x20/0x60 [amdgpu]
[   75.755774] PGD 0

[   75.759185] Oops: 0000 [#1] SMP
[   75.762408] Modules linked in: amdgpu(OE-) ttm(OE) drm_kms_helper(OE) drm(OE) i2c_algo_bit(E) fb_sys_fops(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) rpcsec_gss_krb5(E) nfsv4(E) nfs(E) fscache(E) eeepc_wmi(E) asus_wmi(E) sparse_keymap(E) intel_rapl(E) snd_hda_codec_hdmi(E) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) snd_hda_intel(E) snd_hda_codec(E) snd_hda_core(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) snd_hwdep(E) snd_pcm(E) snd_seq_midi(E) coretemp(E) kvm_intel(E) snd_seq_midi_event(E) snd_rawmidi(E) kvm(E) snd_seq(E) joydev(E) snd_seq_device(E) snd_timer(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) mei_me(E) ghash_clmulni_intel(E) snd(E) aesni_intel(E) mei(E) soundcore(E) aes_x86_64(E) shpchp(E) serio_raw(E) lrw(E) acpi_pad(E) gf128mul(E) glue_helper(E) ablk_helper(E) mac_hid(E)
[   75.835574]  cryptd(E) parport_pc(E) ppdev(E) lp(E) nfsd(E) parport(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sunrpc(E) autofs4(E) hid_generic(E) usbhid(E) mxm_wmi(E) psmouse(E) e1000e(E) ptp(E) pps_core(E) ahci(E) libahci(E) wmi(E) video(E) i2c_hid(E) hid(E)
[   75.858489] CPU: 5 PID: 1603 Comm: rmmod Tainted: G           OE   4.9.0-custom #2
[   75.866183] Hardware name: System manufacturer System Product Name/Z170-A, BIOS 0901 08/31/2015
[   75.875050] task: ffff88045d1bbb80 task.stack: ffffc90002de4000
[   75.881094] RIP: 0010:[<ffffffffa064a0e0>]  [<ffffffffa064a0e0>] amdgpu_fbdev_restore_mode+0x20/0x60 [amdgpu]
[   75.891238] RSP: 0018:ffffc90002de7d48  EFLAGS: 00010286
[   75.896648] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000001
[   75.903933] RDX: 0000000000000000 RSI: ffff88045d1bbb80 RDI: 0000000000000286
[   75.911183] RBP: ffffc90002de7d50 R08: 0000000000000502 R09: 0000000000000004
[   75.918449] R10: 0000000000000000 R11: 0000000000000001 R12: ffff880464bf0000
[   75.925675] R13: ffffffffa0853000 R14: 0000000000000000 R15: 0000564e44f88210
[   75.932980] FS:  00007f13d5400700(0000) GS:ffff880476540000(0000) knlGS:0000000000000000
[   75.941238] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   75.947088] CR2: 00000000000031c0 CR3: 000000045fd0b000 CR4: 00000000003406e0
[   75.954332] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   75.961566] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   75.968834] Stack:
[   75.970881]  ffff880464bf0000 ffffc90002de7d60 ffffffffa0636592 ffffc90002de7d80
[   75.978454]  ffffffffa059015f ffff880464bf0000 ffff880464bf0000 ffffc90002de7da8
[   75.986076]  ffffffffa0595216 ffff880464bf0000 ffff880460f4d000 ffffffffa0853000
[   75.993692] Call Trace:
[   75.996177]  [<ffffffffa0636592>] amdgpu_driver_lastclose_kms+0x12/0x20 [amdgpu]
[   76.003700]  [<ffffffffa059015f>] drm_lastclose+0x2f/0xd0 [drm]
[   76.009777]  [<ffffffffa0595216>] drm_dev_unregister+0x16/0xd0 [drm]
[   76.016255]  [<ffffffffa0595944>] drm_put_dev+0x34/0x70 [drm]
[   76.022139]  [<ffffffffa062f365>] amdgpu_pci_remove+0x15/0x20 [amdgpu]
[   76.028800]  [<ffffffff81416499>] pci_device_remove+0x39/0xc0
[   76.034661]  [<ffffffff81531caa>] __device_release_driver+0x9a/0x140
[   76.041121]  [<ffffffff81531e58>] driver_detach+0xb8/0xc0
[   76.046575]  [<ffffffff81530c95>] bus_remove_driver+0x55/0xd0
[   76.052401]  [<ffffffff815325fc>] driver_unregister+0x2c/0x50
[   76.058244]  [<ffffffff81416289>] pci_unregister_driver+0x29/0x90
[   76.064466]  [<ffffffffa0596c5e>] drm_pci_exit+0x9e/0xb0 [drm]
[   76.070507]  [<ffffffffa0796d71>] amdgpu_exit+0x1c/0x32 [amdgpu]
[   76.076609]  [<ffffffff81104810>] SyS_delete_module+0x1a0/0x200
[   76.082627]  [<ffffffff810e2b1a>] ? rcu_eqs_enter.isra.36+0x4a/0x50
[   76.089001]  [<ffffffff8100392e>] do_syscall_64+0x6e/0x180
[   76.094583]  [<ffffffff817e1d2f>] entry_SYSCALL64_slow_path+0x25/0x25
[   76.101114] Code: 94 c0 c3 31 c0 5d c3 0f 1f 40 00 0f 1f 44 00 00 55 31 c0 48 89 e5 53 48 89 fb 48 c7 c7 1d 21 84 a0 e8 ab 77 b3 e0 e8 fc 8b d7 e0 <48> 8b bb c0 31 00 00 48 85 ff 74 09 e8 ff eb fc ff 85 c0 75 03
[   76.121432] RIP  [<ffffffffa064a0e0>] amdgpu_fbdev_restore_mode+0x20/0x60 [amdgpu]

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 236d9950221b..c0d8c6ff6380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -425,10 +425,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
 
 void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev)
 {
-	struct amdgpu_fbdev *afbdev = adev->mode_info.rfbdev;
+	struct amdgpu_fbdev *afbdev;
 	struct drm_fb_helper *fb_helper;
 	int ret;
 
+	if (!adev)
+		return;
+
+	afbdev = adev->mode_info.rfbdev;
+
 	if (!afbdev)
 		return;
 
-- 
cgit v1.2.3-59-g8ed1b


From 65d786c21bf8140dac83563306f46fe0b13a9aaa Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Tue, 23 May 2017 18:18:37 -0500
Subject: net: fix potential null pointer dereference

Add null check to avoid a potential null pointer dereference.

Addresses-Coverity-ID: 1408831
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gtp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 4fea1b3dfbb4..7b652bb7ebe4 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -873,7 +873,7 @@ static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[])
 
 	/* Check if there's an existing gtpX device to configure */
 	dev = dev_get_by_index_rcu(net, nla_get_u32(nla[GTPA_LINK]));
-	if (dev->netdev_ops == &gtp_netdev_ops)
+	if (dev && dev->netdev_ops == &gtp_netdev_ops)
 		gtp = netdev_priv(dev);
 
 	put_net(net);
-- 
cgit v1.2.3-59-g8ed1b


From 4b3c7dbbfff0673e8a89575414b864d8b001d3bb Mon Sep 17 00:00:00 2001
From: KT Liao <kt.liao@emc.com.tw>
Date: Thu, 25 May 2017 10:06:21 -0700
Subject: Input: elan_i2c - clear INT before resetting controller

Some old touchpad FWs need to have interrupt cleared before issuing reset
command after updating firmware. We clear interrupt by attempting to read
full report from the controller, and discarding any data read.

Signed-off-by: KT Liao <kt.liao@emc.com.tw>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_i2c.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c
index a679e56c44cd..765879dcaf85 100644
--- a/drivers/input/mouse/elan_i2c_i2c.c
+++ b/drivers/input/mouse/elan_i2c_i2c.c
@@ -557,7 +557,14 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client,
 	long ret;
 	int error;
 	int len;
-	u8 buffer[ETP_I2C_INF_LENGTH];
+	u8 buffer[ETP_I2C_REPORT_LEN];
+
+	len = i2c_master_recv(client, buffer, ETP_I2C_REPORT_LEN);
+	if (len != ETP_I2C_REPORT_LEN) {
+		error = len < 0 ? len : -EIO;
+		dev_warn(dev, "failed to read I2C data after FW WDT reset: %d (%d)\n",
+			error, len);
+	}
 
 	reinit_completion(completion);
 	enable_irq(client->irq);
-- 
cgit v1.2.3-59-g8ed1b


From a04f144059ac09f2c3da50b5707df589044aad66 Mon Sep 17 00:00:00 2001
From: KT Liao <kt.liao@emc.com.tw>
Date: Tue, 23 May 2017 13:41:47 -0700
Subject: Input: elan_i2c - ignore signals when finishing updating firmware

Use wait_for_completion_timeout() instead of
wait_for_completion_interruptible_timeout() to avoid stray signals ruining
firmware update. Our timeout is only 300 msec so we are fine simply letting
it expire in case device misbehaves.

Signed-off-by: KT Liao <kt.liao@emc.com.tw>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_i2c.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c
index 765879dcaf85..f431da07f861 100644
--- a/drivers/input/mouse/elan_i2c_i2c.c
+++ b/drivers/input/mouse/elan_i2c_i2c.c
@@ -554,7 +554,6 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client,
 				     struct completion *completion)
 {
 	struct device *dev = &client->dev;
-	long ret;
 	int error;
 	int len;
 	u8 buffer[ETP_I2C_REPORT_LEN];
@@ -570,23 +569,19 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client,
 	enable_irq(client->irq);
 
 	error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET);
-	if (!error)
-		ret = wait_for_completion_interruptible_timeout(completion,
-							msecs_to_jiffies(300));
-	disable_irq(client->irq);
-
 	if (error) {
 		dev_err(dev, "device reset failed: %d\n", error);
-		return error;
-	} else if (ret == 0) {
+	} else if (!wait_for_completion_timeout(completion,
+						msecs_to_jiffies(300))) {
 		dev_err(dev, "timeout waiting for device reset\n");
-		return -ETIMEDOUT;
-	} else if (ret < 0) {
-		error = ret;
-		dev_err(dev, "error waiting for device reset: %d\n", error);
-		return error;
+		error = -ETIMEDOUT;
 	}
 
+	disable_irq(client->irq);
+
+	if (error)
+		return error;
+
 	len = i2c_master_recv(client, buffer, ETP_I2C_INF_LENGTH);
 	if (len != ETP_I2C_INF_LENGTH) {
 		error = len < 0 ? len : -EIO;
-- 
cgit v1.2.3-59-g8ed1b


From 7c3f1875c66fbc19762760097cabc91849ea0bbb Mon Sep 17 00:00:00 2001
From: Roman Kapl <roman.kapl@sysgo.com>
Date: Wed, 24 May 2017 10:22:22 +0200
Subject: net: move somaxconn init from sysctl code

The default value for somaxconn is set in sysctl_core_net_init(), but this
function is not called when kernel is configured without CONFIG_SYSCTL.

This results in the kernel not being able to accept TCP connections,
because the backlog has zero size. Usually, the user ends up with:
"TCP: request_sock_TCP: Possible SYN flooding on port 7. Dropping request.  Check SNMP counters."
If SYN cookies are not enabled the connection is rejected.

Before ef547f2ac16 (tcp: remove max_qlen_log), the effects were less
severe, because the backlog was always at least eight slots long.

Signed-off-by: Roman Kapl <roman.kapl@sysgo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c   | 19 +++++++++++++++++++
 net/core/sysctl_net_core.c |  2 --
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1934efd4a9d4..26bbfababff2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -315,6 +315,25 @@ out_undo:
 	goto out;
 }
 
+static int __net_init net_defaults_init_net(struct net *net)
+{
+	net->core.sysctl_somaxconn = SOMAXCONN;
+	return 0;
+}
+
+static struct pernet_operations net_defaults_ops = {
+	.init = net_defaults_init_net,
+};
+
+static __init int net_defaults_init(void)
+{
+	if (register_pernet_subsys(&net_defaults_ops))
+		panic("Cannot initialize net default settings");
+
+	return 0;
+}
+
+core_initcall(net_defaults_init);
 
 #ifdef CONFIG_NET_NS
 static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index ea23254b2457..b7cd9aafe99e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -479,8 +479,6 @@ static __net_init int sysctl_core_net_init(struct net *net)
 {
 	struct ctl_table *tbl;
 
-	net->core.sysctl_somaxconn = SOMAXCONN;
-
 	tbl = netns_core_table;
 	if (!net_eq(net, &init_net)) {
 		tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
-- 
cgit v1.2.3-59-g8ed1b


From ba615f675281d76fd19aa03558777f81fb6b6084 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Wed, 24 May 2017 09:59:31 -0700
Subject: tcp: avoid fastopen API to be used on AF_UNSPEC

Fastopen API should be used to perform fastopen operations on the TCP
socket. It does not make sense to use fastopen API to perform disconnect
by calling it with AF_UNSPEC. The fastopen data path is also prone to
race conditions and bugs when using with AF_UNSPEC.

One issue reported and analyzed by Vegard Nossum is as follows:
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Thread A:                            Thread B:
------------------------------------------------------------------------
sendto()
 - tcp_sendmsg()
     - sk_stream_memory_free() = 0
         - goto wait_for_sndbuf
	     - sk_stream_wait_memory()
	        - sk_wait_event() // sleep
          |                          sendto(flags=MSG_FASTOPEN, dest_addr=AF_UNSPEC)
	  |                           - tcp_sendmsg()
	  |                              - tcp_sendmsg_fastopen()
	  |                                 - __inet_stream_connect()
	  |                                    - tcp_disconnect() //because of AF_UNSPEC
	  |                                       - tcp_transmit_skb()// send RST
	  |                                    - return 0; // no reconnect!
	  |                           - sk_stream_wait_connect()
	  |                                 - sock_error()
	  |                                    - xchg(&sk->sk_err, 0)
	  |                                    - return -ECONNRESET
	- ... // wake up, see sk->sk_err == 0
    - skb_entail() on TCP_CLOSE socket

If the connection is reopened then we will send a brand new SYN packet
after thread A has already queued a buffer. At this point I think the
socket internal state (sequence numbers etc.) becomes messed up.

When the new connection is closed, the FIN-ACK is rejected because the
sequence number is outside the window. The other side tries to
retransmit,
but __tcp_retransmit_skb() calls tcp_trim_head() on an empty skb which
corrupts the skb data length and hits a BUG() in copy_and_csum_bits().
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Hence, this patch adds a check for AF_UNSPEC in the fastopen data path
and return EOPNOTSUPP to user if such case happens.

Fixes: cf60af03ca4e7 ("tcp: Fast Open client - sendmsg(MSG_FASTOPEN)")
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 842b575f8fdd..59792d283ff8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1084,9 +1084,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr *uaddr = msg->msg_name;
 	int err, flags;
 
-	if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
+	if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+	    (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
+	     uaddr->sa_family == AF_UNSPEC))
 		return -EOPNOTSUPP;
 	if (tp->fastopen_req)
 		return -EALREADY; /* Another Fast Open is in progress */
@@ -1108,7 +1111,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 		}
 	}
 	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
-	err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+	err = __inet_stream_connect(sk->sk_socket, uaddr,
 				    msg->msg_namelen, flags, 1);
 	/* fastopen_req could already be freed in __inet_stream_connect
 	 * if the connection times out or gets rst
-- 
cgit v1.2.3-59-g8ed1b


From 5990baaa6d7b437dfcf58b7021ca56b1d6b35869 Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihrachys@redhat.com>
Date: Wed, 24 May 2017 15:19:35 -0700
Subject: arp: fixed -Wuninitialized compiler warning

Commit 7d472a59c0e5ec117220a05de6b370447fb6cb66 ("arp: always override
existing neigh entries with gratuitous ARP") introduced a compiler
warning:

net/ipv4/arp.c:880:35: warning: 'addr_type' may be used uninitialized in
this function [-Wmaybe-uninitialized]

While the code logic seems to be correct and doesn't allow the variable
to be used uninitialized, and the warning is not consistently
reproducible, it's still worth fixing it for other people not to waste
time looking at the warning in case it pops up in the build environment.
Yes, compiler is probably at fault, but we will need to accommodate.

Fixes: 7d472a59c0e5 ("arp: always override existing neigh entries with gratuitous ARP")
Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ae96e6f3e0cb..e9f3386a528b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -863,8 +863,8 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
 
+	addr_type = -1;
 	if (n || IN_DEV_ARP_ACCEPT(in_dev)) {
-		addr_type = -1;
 		is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
 				      sip, tip, sha, tha);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 1ad2f5838d345e1c102bd1cd27c4f4c1349b0dc8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 25 May 2017 01:05:05 +0200
Subject: bpf: fix incorrect pruning decision when alignment must be tracked

Currently, when we enforce alignment tracking on direct packet access,
the verifier lets the following program pass despite doing a packet
write with unaligned access:

  0: (61) r2 = *(u32 *)(r1 +76)
  1: (61) r3 = *(u32 *)(r1 +80)
  2: (61) r7 = *(u32 *)(r1 +8)
  3: (bf) r0 = r2
  4: (07) r0 += 14
  5: (25) if r7 > 0x1 goto pc+4
   R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0)
   R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp
  6: (2d) if r0 > r3 goto pc+1
   R0=pkt(id=0,off=14,r=14) R1=ctx R2=pkt(id=0,off=0,r=14)
   R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp
  7: (63) *(u32 *)(r0 -4) = r0
  8: (b7) r0 = 0
  9: (95) exit

  from 6 to 8:
   R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0)
   R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp
  8: (b7) r0 = 0
  9: (95) exit

  from 5 to 10:
   R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0)
   R3=pkt_end R7=inv,min_value=2 R10=fp
  10: (07) r0 += 1
  11: (05) goto pc-6
  6: safe                           <----- here, wrongly found safe
  processed 15 insns

However, if we enforce a pruning mismatch by adding state into r8
which is then being mismatched in states_equal(), we find that for
the otherwise same program, the verifier detects a misaligned packet
access when actually walking that path:

  0: (61) r2 = *(u32 *)(r1 +76)
  1: (61) r3 = *(u32 *)(r1 +80)
  2: (61) r7 = *(u32 *)(r1 +8)
  3: (b7) r8 = 1
  4: (bf) r0 = r2
  5: (07) r0 += 14
  6: (25) if r7 > 0x1 goto pc+4
   R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0)
   R3=pkt_end R7=inv,min_value=0,max_value=1
   R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp
  7: (2d) if r0 > r3 goto pc+1
   R0=pkt(id=0,off=14,r=14) R1=ctx R2=pkt(id=0,off=0,r=14)
   R3=pkt_end R7=inv,min_value=0,max_value=1
   R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp
  8: (63) *(u32 *)(r0 -4) = r0
  9: (b7) r0 = 0
  10: (95) exit

  from 7 to 9:
   R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0)
   R3=pkt_end R7=inv,min_value=0,max_value=1
   R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp
  9: (b7) r0 = 0
  10: (95) exit

  from 6 to 11:
   R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0)
   R3=pkt_end R7=inv,min_value=2
   R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp
  11: (07) r0 += 1
  12: (b7) r8 = 0
  13: (05) goto pc-7                <----- mismatch due to r8
  7: (2d) if r0 > r3 goto pc+1
   R0=pkt(id=0,off=15,r=15) R1=ctx R2=pkt(id=0,off=0,r=15)
   R3=pkt_end R7=inv,min_value=2
   R8=imm0,min_value=0,max_value=0,min_align=2147483648 R10=fp
  8: (63) *(u32 *)(r0 -4) = r0
  misaligned packet access off 2+15+-4 size 4

The reason why we fail to see it in states_equal() is that the
third test in compare_ptrs_to_packet() ...

  if (old->off <= cur->off &&
      old->off >= old->range && cur->off >= cur->range)
          return true;

... will let the above pass. The situation we run into is that
old->off <= cur->off (14 <= 15), meaning that prior walked paths
went with smaller offset, which was later used in the packet
access after successful packet range check and found to be safe
already.

For example: Given is R0=pkt(id=0,off=0,r=0). Adding offset 14
as in above program to it, results in R0=pkt(id=0,off=14,r=0)
before the packet range test. Now, testing this against R3=pkt_end
with 'if r0 > r3 goto out' will transform R0 into R0=pkt(id=0,off=14,r=14)
for the case when we're within bounds. A write into the packet
at offset *(u32 *)(r0 -4), that is, 2 + 14 -4, is valid and
aligned (2 is for NET_IP_ALIGN). After processing this with
all fall-through paths, we later on check paths from branches.
When the above skb->mark test is true, then we jump near the
end of the program, perform r0 += 1, and jump back to the
'if r0 > r3 goto out' test we've visited earlier already. This
time, R0 is of type R0=pkt(id=0,off=15,r=0), and we'll prune
that part because this time we'll have a larger safe packet
range, and we already found that with off=14 all further insn
were already safe, so it's safe as well with a larger off.
However, the problem is that the subsequent write into the packet
with 2 + 15 -4 is then unaligned, and not caught by the alignment
tracking. Note that min_align, aux_off, and aux_off_align were
all 0 in this example.

Since we cannot tell at this time what kind of packet access was
performed in the prior walk and what minimal requirements it has
(we might do so in the future, but that requires more complexity),
fix it to disable this pruning case for strict alignment for now,
and let the verifier do check such paths instead. With that applied,
the test cases pass and reject the program due to misalignment.

Fixes: d1174416747d ("bpf: Track alignment of register values in the verifier.")
Reference: http://patchwork.ozlabs.org/patch/761909/
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c72cd41f5b8b..e37e06b1229d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -843,9 +843,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 {
 	bool strict = env->strict_alignment;
 
-	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
-		strict = true;
-
 	switch (reg->type) {
 	case PTR_TO_PACKET:
 		return check_pkt_ptr_alignment(reg, off, size, strict);
@@ -2696,7 +2693,8 @@ err_free:
 /* the following conditions reduce the number of explored insns
  * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
  */
-static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+static bool compare_ptrs_to_packet(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *old,
 				   struct bpf_reg_state *cur)
 {
 	if (old->id != cur->id)
@@ -2739,7 +2737,7 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
 	 * 'if (R4 > data_end)' and all further insn were already good with r=20,
 	 * so they will be good with r=30 and we can prune the search.
 	 */
-	if (old->off <= cur->off &&
+	if (!env->strict_alignment && old->off <= cur->off &&
 	    old->off >= old->range && cur->off >= cur->range)
 		return true;
 
@@ -2810,7 +2808,7 @@ static bool states_equal(struct bpf_verifier_env *env,
 			continue;
 
 		if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
-		    compare_ptrs_to_packet(rold, rcur))
+		    compare_ptrs_to_packet(env, rold, rcur))
 			continue;
 
 		return false;
@@ -3588,10 +3586,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	} else {
 		log_level = 0;
 	}
-	if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT)
+
+	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		env->strict_alignment = true;
-	else
-		env->strict_alignment = false;
 
 	ret = replace_map_fd_with_map_ptr(env);
 	if (ret < 0)
@@ -3697,7 +3695,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
 	mutex_lock(&bpf_verifier_lock);
 
 	log_level = 0;
+
 	env->strict_alignment = false;
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		env->strict_alignment = true;
 
 	env->explored_states = kcalloc(env->prog->len,
 				       sizeof(struct bpf_verifier_state_list *),
-- 
cgit v1.2.3-59-g8ed1b


From a9789ef9afcb4fb0193f8dd94f2665ba3ad71e79 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 25 May 2017 01:05:06 +0200
Subject: bpf: properly reset caller saved regs after helper call and
 ld_abs/ind

Currently, after performing helper calls, we clear all caller saved
registers, that is r0 - r5 and fill r0 depending on struct bpf_func_proto
specification. The way we reset these regs can affect pruning decisions
in later paths, since we only reset register's imm to 0 and type to
NOT_INIT. However, we leave out clearing of other variables such as id,
min_value, max_value, etc, which can later on lead to pruning mismatches
due to stale data.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e37e06b1229d..339c8a1371de 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -463,19 +463,22 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
+static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
+{
+	BUG_ON(regno >= MAX_BPF_REG);
+
+	memset(&regs[regno], 0, sizeof(regs[regno]));
+	regs[regno].type = NOT_INIT;
+	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
+	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+}
+
 static void init_reg_state(struct bpf_reg_state *regs)
 {
 	int i;
 
-	for (i = 0; i < MAX_BPF_REG; i++) {
-		regs[i].type = NOT_INIT;
-		regs[i].imm = 0;
-		regs[i].min_value = BPF_REGISTER_MIN_RANGE;
-		regs[i].max_value = BPF_REGISTER_MAX_RANGE;
-		regs[i].min_align = 0;
-		regs[i].aux_off = 0;
-		regs[i].aux_off_align = 0;
-	}
+	for (i = 0; i < MAX_BPF_REG; i++)
+		mark_reg_not_init(regs, i);
 
 	/* frame pointer */
 	regs[BPF_REG_FP].type = FRAME_PTR;
@@ -1346,7 +1349,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	struct bpf_verifier_state *state = &env->cur_state;
 	const struct bpf_func_proto *fn = NULL;
 	struct bpf_reg_state *regs = state->regs;
-	struct bpf_reg_state *reg;
 	struct bpf_call_arg_meta meta;
 	bool changes_data;
 	int i, err;
@@ -1413,11 +1415,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	}
 
 	/* reset caller saved regs */
-	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		reg = regs + caller_saved[i];
-		reg->type = NOT_INIT;
-		reg->imm = 0;
-	}
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(regs, caller_saved[i]);
 
 	/* update return register */
 	if (fn->ret_type == RET_INTEGER) {
@@ -2445,7 +2444,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
 	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 mode = BPF_MODE(insn->code);
-	struct bpf_reg_state *reg;
 	int i, err;
 
 	if (!may_access_skb(env->prog->type)) {
@@ -2478,11 +2476,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	}
 
 	/* reset caller saved regs to unreadable */
-	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		reg = regs + caller_saved[i];
-		reg->type = NOT_INIT;
-		reg->imm = 0;
-	}
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(regs, caller_saved[i]);
 
 	/* mark destination R0 register as readable, since it contains
 	 * the value fetched from the packet
-- 
cgit v1.2.3-59-g8ed1b


From 41703a731066fde79c3e5ccf3391cf77a98aeda5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 25 May 2017 01:05:07 +0200
Subject: bpf: add bpf_clone_redirect to bpf_helper_changes_pkt_data

The bpf_clone_redirect() still needs to be listed in
bpf_helper_changes_pkt_data() since we call into
bpf_try_make_head_writable() from there, thus we need
to invalidate prior pkt regs as well.

Fixes: 36bbef52c7eb ("bpf: direct packet write and access for helpers for clsact progs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index a253a6197e6b..a6bb95fa87b2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2281,6 +2281,7 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_skb_change_head ||
 	    func == bpf_skb_change_tail ||
 	    func == bpf_skb_pull_data ||
+	    func == bpf_clone_redirect ||
 	    func == bpf_l3_csum_replace ||
 	    func == bpf_l4_csum_replace ||
 	    func == bpf_xdp_adjust_head)
-- 
cgit v1.2.3-59-g8ed1b


From a316338cb71a3260201490e615f2f6d5c0d8fb2c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 25 May 2017 01:05:08 +0200
Subject: bpf: fix wrong exposure of map_flags into fdinfo for lpm

trie_alloc() always needs to have BPF_F_NO_PREALLOC passed in via
attr->map_flags, since it does not support preallocation yet. We
check the flag, but we never copy the flag into trie->map.map_flags,
which is later on exposed into fdinfo and used by loaders such as
iproute2. Latter uses this in bpf_map_selfcheck_pinned() to test
whether a pinned map has the same spec as the one from the BPF obj
file and if not, bails out, which is currently the case for lpm
since it exposes always 0 as flags.

Also copy over flags in array_map_alloc() and stack_map_alloc().
They always have to be 0 right now, but we should make sure to not
miss to copy them over at a later point in time when we add actual
flags for them to use.

Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation")
Reported-by: Jarno Rajahalme <jarno@covalent.io>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/arraymap.c | 1 +
 kernel/bpf/lpm_trie.c | 1 +
 kernel/bpf/stackmap.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 5e00b2333c26..172dc8ee0e3b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -86,6 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array->map.key_size = attr->key_size;
 	array->map.value_size = attr->value_size;
 	array->map.max_entries = attr->max_entries;
+	array->map.map_flags = attr->map_flags;
 	array->elem_size = elem_size;
 
 	if (!percpu)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 39cfafd895b8..b09185f0f17d 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 	trie->map.key_size = attr->key_size;
 	trie->map.value_size = attr->value_size;
 	trie->map.max_entries = attr->max_entries;
+	trie->map.map_flags = attr->map_flags;
 	trie->data_size = attr->key_size -
 			  offsetof(struct bpf_lpm_trie_key, data);
 	trie->max_prefixlen = trie->data_size * 8;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 4dfd6f2ec2f9..31147d730abf 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	smap->map.key_size = attr->key_size;
 	smap->map.value_size = value_size;
 	smap->map.max_entries = attr->max_entries;
+	smap->map.map_flags = attr->map_flags;
 	smap->n_buckets = n_buckets;
 	smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
 
-- 
cgit v1.2.3-59-g8ed1b


From 614d0d77b49a9b131e58b77473698ab5b2c525b7 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 25 May 2017 01:05:09 +0200
Subject: bpf: add various verifier test cases

This patch adds various verifier test cases:

1) A test case for the pruning issue when tracking alignment
   is used.
2) Various PTR_TO_MAP_VALUE_OR_NULL tests to make sure pointer
   arithmetic turns such register into UNKNOWN_VALUE type.
3) Test cases for the special treatment of LD_ABS/LD_IND to
   make sure verifier doesn't break calling convention here.
   Latter is needed, since f.e. arm64 JIT uses r1 - r5 for
   storing temporary data, so they really must be marked as
   NOT_INIT.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h                      |  10 ++
 tools/include/linux/filter.h                |  10 ++
 tools/testing/selftests/bpf/test_verifier.c | 239 +++++++++++++++++++++++++++-
 3 files changed, 255 insertions(+), 4 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 56197f82af45..62d948f80730 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -272,6 +272,16 @@ struct bpf_prog_aux;
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index 390d7c9685fd..4ce25d43e8e3 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -208,6 +208,16 @@
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3773562056da..cabb19b1e371 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -49,6 +49,7 @@
 #define MAX_NR_MAPS	4
 
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
+#define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
 
 struct bpf_test {
 	const char *descr;
@@ -2614,6 +2615,30 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"direct packet access: test17 (pruning, alignment)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+			BPF_JMP_A(-6),
+		},
+		.errstr = "misaligned packet access off 2+15+-4 size 4",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
 	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
@@ -3340,6 +3365,70 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
 	{
 		"invalid memory access with multiple map_lookup_elem calls",
 		.insns = {
@@ -4937,7 +5026,149 @@ static struct bpf_test tests[] = {
 		.fixup_map_in_map = { 3 },
 		.errstr = "R1 type=map_value_or_null expected=map_ptr",
 		.result = REJECT,
-	}
+	},
+	{
+		"ld_abs: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"ld_ind: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -5059,9 +5290,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	do_test_fixup(test, prog, map_fds);
 
-	fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
-				   prog, prog_len, "GPL", 0, bpf_vlog,
-				   sizeof(bpf_vlog));
+	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+				     prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
+				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog));
 
 	expected_ret = unpriv && test->result_unpriv != UNDEF ?
 		       test->result_unpriv : test->result;
-- 
cgit v1.2.3-59-g8ed1b


From 791caeb084c57e3a4d648cf1ee799d1f70c0ef4e Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Wed, 24 May 2017 16:35:49 -0700
Subject: test_bpf: Add a couple of tests for BPF_JSGE.

Some JITs can optimize comparisons with zero.  Add a couple of
BPF_JSGE tests against immediate zero.

Signed-off-by: David Daney <david.daney@cavium.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/test_bpf.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 889bc31785be..be88cbaadde3 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -4504,6 +4504,44 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	{
+		"JMP_JSGE_K: Signed jump: value walk 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -3),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 6),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 4),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 2),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 1),
+			BPF_EXIT_INSN(),		/* bad exit */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),	/* good exit */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JSGE_K: Signed jump: value walk 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -3),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 4),
+			BPF_ALU64_IMM(BPF_ADD, R1, 2),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 2),
+			BPF_ALU64_IMM(BPF_ADD, R1, 2),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 1),
+			BPF_EXIT_INSN(),		/* bad exit */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),	/* good exit */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JGT | BPF_K */
 	{
 		"JMP_JGT_K: if (3 > 2) return 1",
-- 
cgit v1.2.3-59-g8ed1b


From 797a93647a48d6cb8a20641a86a71713a947f786 Mon Sep 17 00:00:00 2001
From: Nithin Sujir <nsujir@tintri.com>
Date: Wed, 24 May 2017 19:45:17 -0700
Subject: bonding: Don't update slave->link until ready to commit

In the loadbalance arp monitoring scheme, when a slave link change is
detected, the slave->link is immediately updated and slave_state_changed
is set. Later down the function, the rtnl_lock is acquired and the
changes are committed, updating the bond link state.

However, the acquisition of the rtnl_lock can fail. The next time the
monitor runs, since slave->link is already updated, it determines that
link is unchanged. This results in the bond link state permanently out
of sync with the slave link.

This patch modifies bond_loadbalance_arp_mon() to handle link changes
identical to bond_ab_arp_{inspect/commit}(). The new link state is
maintained in slave->new_link until we're ready to commit at which point
it's copied into slave->link.

NOTE: miimon_{inspect/commit}() has a more complex state machine
requiring the use of the bond_{propose,commit}_link_state() functions
which maintains the intermediate state in slave->link_new_state. The arp
monitors don't require that.

Testing: This bug is very easy to reproduce with the following steps.
1. In a loop, toggle a slave link of a bond slave interface.
2. In a separate loop, do ifconfig up/down of an unrelated interface to
create contention for rtnl_lock.
Within a few iterations, the bond link goes out of sync with the slave
link.

Signed-off-by: Nithin Nayak Sujir <nsujir@tintri.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Cc: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 73313318399c..2359478b977f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2612,11 +2612,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		unsigned long trans_start = dev_trans_start(slave->dev);
 
+		slave->new_link = BOND_LINK_NOCHANGE;
+
 		if (slave->link != BOND_LINK_UP) {
 			if (bond_time_in_interval(bond, trans_start, 1) &&
 			    bond_time_in_interval(bond, slave->last_rx, 1)) {
 
-				slave->link  = BOND_LINK_UP;
+				slave->new_link = BOND_LINK_UP;
 				slave_state_changed = 1;
 
 				/* primary_slave has no meaning in round-robin
@@ -2643,7 +2645,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 			if (!bond_time_in_interval(bond, trans_start, 2) ||
 			    !bond_time_in_interval(bond, slave->last_rx, 2)) {
 
-				slave->link  = BOND_LINK_DOWN;
+				slave->new_link = BOND_LINK_DOWN;
 				slave_state_changed = 1;
 
 				if (slave->link_failure_count < UINT_MAX)
@@ -2674,6 +2676,11 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 		if (!rtnl_trylock())
 			goto re_arm;
 
+		bond_for_each_slave(bond, slave, iter) {
+			if (slave->new_link != BOND_LINK_NOCHANGE)
+				slave->link = slave->new_link;
+		}
+
 		if (slave_state_changed) {
 			bond_slave_state_change(bond);
 			if (BOND_MODE(bond) == BOND_MODE_XOR)
-- 
cgit v1.2.3-59-g8ed1b


From 9bdcfb10f221e796c9619fe48655e0f1272f1d92 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 20 May 2017 15:14:43 +0200
Subject: nvme-pci: consistencly use ctrl->device for logging

This is what most of the code already does and gives much more useful
prefixes than the device embedded in the pci_dev.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
---
 drivers/nvme/host/pci.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4c2ff2bb26bc..bf8bec39c017 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -263,7 +263,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev)
 	c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
 
 	if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
-		dev_warn(dev->dev, "unable to set dbbuf\n");
+		dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
 		/* Free memory and continue on */
 		nvme_dbbuf_dma_free(dev);
 	}
@@ -1394,11 +1394,11 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
 	result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
 				      &pci_status);
 	if (result == PCIBIOS_SUCCESSFUL)
-		dev_warn(dev->dev,
+		dev_warn(dev->ctrl.device,
 			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
 			 csts, pci_status);
 	else
-		dev_warn(dev->dev,
+		dev_warn(dev->ctrl.device,
 			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
 			 csts, result);
 }
@@ -1740,8 +1740,8 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	 */
 	if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
 		dev->q_depth = 2;
-		dev_warn(dev->dev, "detected Apple NVMe controller, set "
-			"queue depth=%u to work around controller resets\n",
+		dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
+			"set queue depth=%u to work around controller resets\n",
 			dev->q_depth);
 	}
 
@@ -1759,7 +1759,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 		if (dev->cmbsz) {
 			if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
 						    &dev_attr_cmb.attr, NULL))
-				dev_warn(dev->dev,
+				dev_warn(dev->ctrl.device,
 					 "failed to add sysfs attribute for CMB\n");
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From d3d5b87ddde09bade512526f6df90e8c06c28230 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 20 May 2017 15:14:44 +0200
Subject: nvme: replace is_flags field in nvme_ctrl_ops with a flags field

So that we can have more flags for transport-specific behavior.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
---
 drivers/nvme/host/core.c   | 2 +-
 drivers/nvme/host/fc.c     | 2 +-
 drivers/nvme/host/nvme.h   | 3 ++-
 drivers/nvme/host/rdma.c   | 2 +-
 drivers/nvme/target/loop.c | 2 +-
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 04e115834702..228f7c73e2f1 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1605,7 +1605,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	}
 	memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
 
-	if (ctrl->ops->is_fabrics) {
+	if (ctrl->ops->flags & NVME_F_FABRICS) {
 		ctrl->icdoff = le16_to_cpu(id->icdoff);
 		ctrl->ioccsz = le32_to_cpu(id->ioccsz);
 		ctrl->iorcsz = le32_to_cpu(id->iorcsz);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 14a009e43aa5..5b14cbefb724 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2647,7 +2647,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
 	.name			= "fc",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 29c708ca9621..7c4b0f6636c5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -208,7 +208,8 @@ struct nvme_ns {
 struct nvme_ctrl_ops {
 	const char *name;
 	struct module *module;
-	bool is_fabrics;
+	unsigned int flags;
+#define NVME_F_FABRICS			(1 << 0)
 	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 	int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
 	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index e2c18f3d9dcf..28bd255c144d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1792,7 +1792,7 @@ static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
 	.name			= "rdma",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index feb497134aee..e503cfff0337 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -558,7 +558,7 @@ static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
 static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
 	.name			= "loop",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
-- 
cgit v1.2.3-59-g8ed1b


From c81bfba9983fc44210d3eb5971e0faac597bf50d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 20 May 2017 15:14:45 +0200
Subject: nvme: only setup block integrity if supported by the driver

Currently only the PCIe driver supports metadata, so we should not claim
integrity support for the other drivers.  This prevents nasty crashes
with targets that advertise metadata support on fabrics.

Also use the opportunity to factor out some code into a separate helper
that isn't even compiled if CONFIG_BLK_DEV_INTEGRITY is disabled.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
---
 drivers/nvme/host/core.c | 50 ++++++++++++++++++++++++++++++------------------
 drivers/nvme/host/nvme.h |  1 +
 drivers/nvme/host/pci.c  |  1 +
 3 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 228f7c73e2f1..a60926410438 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -925,6 +925,29 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
+static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
+		u16 bs)
+{
+	struct nvme_ns *ns = disk->private_data;
+	u16 old_ms = ns->ms;
+	u8 pi_type = 0;
+
+	ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
+	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+
+	/* PI implementation requires metadata equal t10 pi tuple size */
+	if (ns->ms == sizeof(struct t10_pi_tuple))
+		pi_type = id->dps & NVME_NS_DPS_PI_MASK;
+
+	if (blk_get_integrity(disk) &&
+	    (ns->pi_type != pi_type || ns->ms != old_ms ||
+	     bs != queue_logical_block_size(disk->queue) ||
+	     (ns->ms && ns->ext)))
+		blk_integrity_unregister(disk);
+
+	ns->pi_type = pi_type;
+}
+
 static void nvme_init_integrity(struct nvme_ns *ns)
 {
 	struct blk_integrity integrity;
@@ -951,6 +974,10 @@ static void nvme_init_integrity(struct nvme_ns *ns)
 	blk_queue_max_integrity_segments(ns->queue, 1);
 }
 #else
+static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
+		u16 bs)
+{
+}
 static void nvme_init_integrity(struct nvme_ns *ns)
 {
 }
@@ -997,37 +1024,22 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
 static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 {
 	struct nvme_ns *ns = disk->private_data;
-	u8 lbaf, pi_type;
-	u16 old_ms;
-	unsigned short bs;
-
-	old_ms = ns->ms;
-	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
-	ns->lba_shift = id->lbaf[lbaf].ds;
-	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
-	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+	u16 bs;
 
 	/*
 	 * If identify namespace failed, use default 512 byte block size so
 	 * block layer can use before failing read/write for 0 capacity.
 	 */
+	ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
 	if (ns->lba_shift == 0)
 		ns->lba_shift = 9;
 	bs = 1 << ns->lba_shift;
-	/* XXX: PI implementation requires metadata equal t10 pi tuple size */
-	pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
-					id->dps & NVME_NS_DPS_PI_MASK : 0;
 
 	blk_mq_freeze_queue(disk->queue);
-	if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
-				ns->ms != old_ms ||
-				bs != queue_logical_block_size(disk->queue) ||
-				(ns->ms && ns->ext)))
-		blk_integrity_unregister(disk);
 
-	ns->pi_type = pi_type;
+	if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+		nvme_prep_integrity(disk, id, bs);
 	blk_queue_logical_block_size(ns->queue, bs);
-
 	if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
 		nvme_init_integrity(ns);
 	if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 7c4b0f6636c5..9d6a070d4391 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -210,6 +210,7 @@ struct nvme_ctrl_ops {
 	struct module *module;
 	unsigned int flags;
 #define NVME_F_FABRICS			(1 << 0)
+#define NVME_F_METADATA_SUPPORTED	(1 << 1)
 	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 	int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
 	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index bf8bec39c017..6103b178e43a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2047,6 +2047,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
 	.name			= "pcie",
 	.module			= THIS_MODULE,
+	.flags			= NVME_F_METADATA_SUPPORTED,
 	.reg_read32		= nvme_pci_reg_read32,
 	.reg_write32		= nvme_pci_reg_write32,
 	.reg_read64		= nvme_pci_reg_read64,
-- 
cgit v1.2.3-59-g8ed1b


From 50af47d04ca530544b27affffb0722f158e2bb9c Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 24 May 2017 15:06:31 -0700
Subject: nvme: Quirk APST on Intel 600P/P3100 devices

They have known firmware bugs.  A fix is apparently in the works --
once fixed firmware is available, someone from Intel (Hi, Keith!)
can adjust the quirk accordingly.

Cc: stable@vger.kernel.org # v4.11
Cc: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: Mario Limonciello <mario_limonciello@dell.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6103b178e43a..d52701df7245 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2294,6 +2294,8 @@ static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_VDEVICE(INTEL, 0x0a54),
 		.driver_data = NVME_QUIRK_STRIPE_SIZE |
 				NVME_QUIRK_DEALLOCATE_ZEROES, },
+	{ PCI_VDEVICE(INTEL, 0xf1a5),	/* Intel 600P/P3100 */
+		.driver_data = NVME_QUIRK_NO_DEEPEST_PS },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
 	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
-- 
cgit v1.2.3-59-g8ed1b


From a8ecdd7117ee68fe27009acc8021423870c1dcd7 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 25 May 2017 16:38:06 -0700
Subject: blk-mq: Only register debugfs attributes for blk-mq queues

The code in blk-mq-debugfs.c assumes that it is working on a blk-mq
queue and is not intended to work on a blk-sq queue. Hence only
register blk-mq debugfs attributes for blk-mq queues.

Fixes: commit 9c1051aacde8 ("blk-mq: untangle debugfs and sysfs")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-sysfs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 504fee940052..712b018e9f54 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -887,10 +887,10 @@ int blk_register_queue(struct gendisk *disk)
 		goto unlock;
 	}
 
-	if (q->mq_ops)
+	if (q->mq_ops) {
 		__blk_mq_register_dev(dev, q);
-
-	blk_mq_debugfs_register(q);
+		blk_mq_debugfs_register(q);
+	}
 
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 
-- 
cgit v1.2.3-59-g8ed1b


From 83b4605b0c16cde5b00c8cf192408d51eab75402 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 20 May 2017 18:59:54 +0200
Subject: PCI/msi: fix the pci_alloc_irq_vectors_affinity stub

We need to return an error for any call that asks for MSI / MSI-X
vectors only, so that non-trivial fallback logic can work properly.

Also valid dev->irq and use the "correct" errno value based on feedback
from Linus.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Fixes: aff17164 ("PCI: Provide sensible IRQ vector alloc/free routines")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..fc2e832d7b9c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1342,9 +1342,9 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 			       unsigned int max_vecs, unsigned int flags,
 			       const struct irq_affinity *aff_desc)
 {
-	if (min_vecs > 1)
-		return -EINVAL;
-	return 1;
+	if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq)
+		return 1;
+	return -ENOSPC;
 }
 
 static inline void pci_free_irq_vectors(struct pci_dev *dev)
-- 
cgit v1.2.3-59-g8ed1b


From 0908cf4dfef35fc6ac12329007052ebe93ff1081 Mon Sep 17 00:00:00 2001
From: linzhang <xiaolou4617@gmail.com>
Date: Thu, 25 May 2017 14:07:18 +0800
Subject: net: llc: add lock_sock in llc_ui_bind to avoid a race condition

There is a race condition in llc_ui_bind if two or more processes/threads
try to bind a same socket.

If more processes/threads bind a same socket success that will lead to
two problems, one is this action is not what we expected, another is
will lead to kernel in unstable status or oops(in my simple test case,
cause llc2.ko can't unload).

The current code is test SOCK_ZAPPED bit to avoid a process to
bind a same socket twice but that is can't avoid more processes/threads
try to bind a same socket at the same time.

So, add lock_sock in llc_ui_bind like others, such as llc_ui_connect.

Signed-off-by: Lin Zhang <xiaolou4617@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8364fe5b59e4..c38d16f22d2a 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -311,6 +311,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 	int rc = -EINVAL;
 
 	dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
+
+	lock_sock(sk);
 	if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
 		goto out;
 	rc = -EAFNOSUPPORT;
@@ -382,6 +384,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 out_put:
 	llc_sap_put(sap);
 out:
+	release_sock(sk);
 	return rc;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 804ec7ebe8ea003999ca8d1bfc499edc6a9e07df Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 25 May 2017 19:14:56 +0200
Subject: sctp: fix ICMP processing if skb is non-linear

sometimes ICMP replies to INIT chunks are ignored by the client, even if
the encapsulated SCTP headers match an open socket. This happens when the
ICMP packet is carried by a paged skb: use skb_header_pointer() to read
packet contents beyond the SCTP header, so that chunk header and initiate
tag are validated correctly.

v2:
- don't use skb_header_pointer() to read the transport header, since
  icmp_socket_deliver() already puts these 8 bytes in the linear area.
- change commit message to make specific reference to INIT chunks.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index 0e06a278d2a9..ba9ad32fc447 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -473,15 +473,14 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
 			     struct sctp_association **app,
 			     struct sctp_transport **tpp)
 {
+	struct sctp_init_chunk *chunkhdr, _chunkhdr;
 	union sctp_addr saddr;
 	union sctp_addr daddr;
 	struct sctp_af *af;
 	struct sock *sk = NULL;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport = NULL;
-	struct sctp_init_chunk *chunkhdr;
 	__u32 vtag = ntohl(sctphdr->vtag);
-	int len = skb->len - ((void *)sctphdr - (void *)skb->data);
 
 	*app = NULL; *tpp = NULL;
 
@@ -516,13 +515,16 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
 	 * discard the packet.
 	 */
 	if (vtag == 0) {
-		chunkhdr = (void *)sctphdr + sizeof(struct sctphdr);
-		if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t)
-			  + sizeof(__be32) ||
+		/* chunk header + first 4 octects of init header */
+		chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) +
+					      sizeof(struct sctphdr),
+					      sizeof(struct sctp_chunkhdr) +
+					      sizeof(__be32), &_chunkhdr);
+		if (!chunkhdr ||
 		    chunkhdr->chunk_hdr.type != SCTP_CID_INIT ||
-		    ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) {
+		    ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag)
 			goto out;
-		}
+
 	} else if (vtag != asoc->c.peer_vtag) {
 		goto out;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 0e9a709560dbcfbace8bf4019dc5298619235891 Mon Sep 17 00:00:00 2001
From: Peter Dawson <petedaws@gmail.com>
Date: Fri, 26 May 2017 06:35:18 +1000
Subject: ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated packets

This fix addresses two problems in the way the DSCP field is formulated
 on the encapsulating header of IPv6 tunnels.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195661

1) The IPv6 tunneling code was manipulating the DSCP field of the
 encapsulating packet using the 32b flowlabel. Since the flowlabel is
 only the lower 20b it was incorrect to assume that the upper 12b
 containing the DSCP and ECN fields would remain intact when formulating
 the encapsulating header. This fix handles the 'inherit' and
 'fixed-value' DSCP cases explicitly using the extant dsfield u8 variable.

2) The use of INET_ECN_encapsulate(0, dsfield) in ip6_tnl_xmit was
 incorrect and resulted in the DSCP value always being set to 0.

Commit 90427ef5d2a4 ("ipv6: fix flow labels when the traffic class
 is non-0") caused the regression by masking out the flowlabel
 which exposed the incorrect handling of the DSCP portion of the
 flowlabel in ip6_tunnel and ip6_gre.

Fixes: 90427ef5d2a4 ("ipv6: fix flow labels when the traffic class is non-0")
Signed-off-by: Peter Dawson <peter.a.dawson@boeing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c    | 13 +++++++------
 net/ipv6/ip6_tunnel.c | 21 +++++++++++++--------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8d128ba79b66..0c5b4caa1949 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -537,11 +537,10 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
 
-	dsfield = ipv4_get_dsfield(iph);
-
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
-					  & IPV6_TCLASS_MASK;
+		dsfield = ipv4_get_dsfield(iph);
+	else
+		dsfield = ip6_tclass(t->parms.flowinfo);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 		fl6.flowi6_mark = skb->mark;
 	else
@@ -598,9 +597,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
 
-	dsfield = ipv6_get_dsfield(ipv6h);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+		dsfield = ipv6_get_dsfield(ipv6h);
+	else
+		dsfield = ip6_tclass(t->parms.flowinfo);
+
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
 		fl6.flowlabel |= ip6_flowlabel(ipv6h);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6eb2ae507500..7ae6c503f1ca 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1196,7 +1196,7 @@ route_lookup:
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
-	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+	ip6_flow_hdr(ipv6h, dsfield,
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
 	ipv6h->hop_limit = hop_limit;
 	ipv6h->nexthdr = proto;
@@ -1231,8 +1231,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (tproto != IPPROTO_IPIP && tproto != 0)
 		return -1;
 
-	dsfield = ipv4_get_dsfield(iph);
-
 	if (t->parms.collect_md) {
 		struct ip_tunnel_info *tun_info;
 		const struct ip_tunnel_key *key;
@@ -1246,6 +1244,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPIP;
 		fl6.daddr = key->u.ipv6.dst;
 		fl6.flowlabel = key->label;
+		dsfield = ip6_tclass(key->label);
 	} else {
 		if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 			encap_limit = t->parms.encap_limit;
@@ -1254,8 +1253,9 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPIP;
 
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-			fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
-					 & IPV6_TCLASS_MASK;
+			dsfield = ipv4_get_dsfield(iph);
+		else
+			dsfield = ip6_tclass(t->parms.flowinfo);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 			fl6.flowi6_mark = skb->mark;
 		else
@@ -1267,6 +1267,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
+	dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
+
 	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1300,8 +1302,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	    ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
-	dsfield = ipv6_get_dsfield(ipv6h);
-
 	if (t->parms.collect_md) {
 		struct ip_tunnel_info *tun_info;
 		const struct ip_tunnel_key *key;
@@ -1315,6 +1315,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPV6;
 		fl6.daddr = key->u.ipv6.dst;
 		fl6.flowlabel = key->label;
+		dsfield = ip6_tclass(key->label);
 	} else {
 		offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
 		/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
@@ -1337,7 +1338,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPV6;
 
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-			fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK);
+			dsfield = ipv6_get_dsfield(ipv6h);
+		else
+			dsfield = ip6_tclass(t->parms.flowinfo);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
 			fl6.flowlabel |= ip6_flowlabel(ipv6h);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
@@ -1351,6 +1354,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
+	dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
+
 	skb_set_inner_ipproto(skb, IPPROTO_IPV6);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
-- 
cgit v1.2.3-59-g8ed1b


From 82533ad9a1ce3a7a6863849a552c2cc041b55e0d Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 25 May 2017 22:54:53 +0200
Subject: net: ethernet: ax88796: don't call free_irq without request_irq first
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function ax_init_dev (which is called only from the driver's .probe
function) calls free_irq in the error path without having requested the
irq in the first place. So drop the free_irq call in the error path.

Fixes: 825a2ff1896e ("AX88796 network driver")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/8390/ax88796.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index b0a3b85fc6f8..db02bc2fb4b2 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -748,13 +748,13 @@ static int ax_init_dev(struct net_device *dev)
 
 	ret = ax_mii_init(dev);
 	if (ret)
-		goto out_irq;
+		goto err_out;
 
 	ax_NS8390_init(dev, 0);
 
 	ret = register_netdev(dev);
 	if (ret)
-		goto out_irq;
+		goto err_out;
 
 	netdev_info(dev, "%dbit, irq %d, %lx, MAC: %pM\n",
 		    ei_local->word16 ? 16 : 8, dev->irq, dev->base_addr,
@@ -762,9 +762,6 @@ static int ax_init_dev(struct net_device *dev)
 
 	return 0;
 
- out_irq:
-	/* cleanup irq */
-	free_irq(dev->irq, dev);
  err_out:
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 3fb07daff8e99243366a081e5129560734de4ada Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 25 May 2017 14:27:35 -0700
Subject: ipv4: add reference counting to metrics

Andrey Konovalov reported crashes in ipv4_mtu()

I could reproduce the issue with KASAN kernels, between
10.246.7.151 and 10.246.7.152 :

1) 20 concurrent netperf -t TCP_RR -H 10.246.7.152 -l 1000 &

2) At the same time run following loop :
while :
do
 ip ro add 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500
 ip ro del 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500
done

Cong Wang attempted to add back rt->fi in commit
82486aa6f1b9 ("ipv4: restore rt->fi for reference counting")
but this proved to add some issues that were complex to solve.

Instead, I suggested to add a refcount to the metrics themselves,
being a standalone object (in particular, no reference to other objects)

I tried to make this patch as small as possible to ease its backport,
instead of being super clean. Note that we believe that only ipv4 dst
need to take care of the metric refcount. But if this is wrong,
this patch adds the basic infrastructure to extend this to other
families.

Many thanks to Julian Anastasov for reviewing this patch, and Cong Wang
for his efforts on this problem.

Fixes: 2860583fe840 ("ipv4: Kill rt->fi")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h        |  8 +++++++-
 include/net/ip_fib.h     | 10 +++++-----
 net/core/dst.c           | 23 ++++++++++++++---------
 net/ipv4/fib_semantics.c | 17 ++++++++++-------
 net/ipv4/route.c         | 10 +++++++++-
 5 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 049af33da3b6..cfc043784166 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -107,10 +107,16 @@ struct dst_entry {
 	};
 };
 
+struct dst_metrics {
+	u32		metrics[RTAX_MAX];
+	atomic_t	refcnt;
+};
+extern const struct dst_metrics dst_default_metrics;
+
 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
-extern const u32 dst_default_metrics[];
 
 #define DST_METRICS_READ_ONLY		0x1UL
+#define DST_METRICS_REFCOUNTED		0x2UL
 #define DST_METRICS_FLAGS		0x3UL
 #define __DST_METRICS_PTR(Y)	\
 	((u32 *)((Y) & ~DST_METRICS_FLAGS))
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 6692c5758b33..f7f6aa789c61 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -114,11 +114,11 @@ struct fib_info {
 	__be32			fib_prefsrc;
 	u32			fib_tb_id;
 	u32			fib_priority;
-	u32			*fib_metrics;
-#define fib_mtu fib_metrics[RTAX_MTU-1]
-#define fib_window fib_metrics[RTAX_WINDOW-1]
-#define fib_rtt fib_metrics[RTAX_RTT-1]
-#define fib_advmss fib_metrics[RTAX_ADVMSS-1]
+	struct dst_metrics	*fib_metrics;
+#define fib_mtu fib_metrics->metrics[RTAX_MTU-1]
+#define fib_window fib_metrics->metrics[RTAX_WINDOW-1]
+#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
+#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
 	int			fib_nhs;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			fib_weight;
diff --git a/net/core/dst.c b/net/core/dst.c
index 960e503b5a52..6192f11beec9 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard_out);
 
-const u32 dst_default_metrics[RTAX_MAX + 1] = {
+const struct dst_metrics dst_default_metrics = {
 	/* This initializer is needed to force linker to place this variable
 	 * into const section. Otherwise it might end into bss section.
 	 * We really want to avoid false sharing on this variable, and catch
 	 * any writes on it.
 	 */
-	[RTAX_MAX] = 0xdeadbeef,
+	.refcnt = ATOMIC_INIT(1),
 };
 
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
 	if (dev)
 		dev_hold(dev);
 	dst->ops = ops;
-	dst_init_metrics(dst, dst_default_metrics, true);
+	dst_init_metrics(dst, dst_default_metrics.metrics, true);
 	dst->expires = 0UL;
 	dst->path = dst;
 	dst->from = NULL;
@@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release);
 
 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
 {
-	u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+	struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
 
 	if (p) {
-		u32 *old_p = __DST_METRICS_PTR(old);
+		struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
 		unsigned long prev, new;
 
-		memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+		atomic_set(&p->refcnt, 1);
+		memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
 
 		new = (unsigned long) p;
 		prev = cmpxchg(&dst->_metrics, old, new);
 
 		if (prev != old) {
 			kfree(p);
-			p = __DST_METRICS_PTR(prev);
+			p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
 			if (prev & DST_METRICS_READ_ONLY)
 				p = NULL;
+		} else if (prev & DST_METRICS_REFCOUNTED) {
+			if (atomic_dec_and_test(&old_p->refcnt))
+				kfree(old_p);
 		}
 	}
-	return p;
+	BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
+	return (u32 *)p;
 }
 EXPORT_SYMBOL(dst_cow_metrics_generic);
 
@@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
 {
 	unsigned long prev, new;
 
-	new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
+	new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
 	prev = cmpxchg(&dst->_metrics, old, new);
 	if (prev == old)
 		kfree(__DST_METRICS_PTR(old));
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da449ddb8cc1..ad9ad4aab5da 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -203,6 +203,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
 static void free_fib_info_rcu(struct rcu_head *head)
 {
 	struct fib_info *fi = container_of(head, struct fib_info, rcu);
+	struct dst_metrics *m;
 
 	change_nexthops(fi) {
 		if (nexthop_nh->nh_dev)
@@ -213,8 +214,9 @@ static void free_fib_info_rcu(struct rcu_head *head)
 		rt_fibinfo_free(&nexthop_nh->nh_rth_input);
 	} endfor_nexthops(fi);
 
-	if (fi->fib_metrics != (u32 *) dst_default_metrics)
-		kfree(fi->fib_metrics);
+	m = fi->fib_metrics;
+	if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+		kfree(m);
 	kfree(fi);
 }
 
@@ -971,11 +973,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 			val = 255;
 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
 			return -EINVAL;
-		fi->fib_metrics[type - 1] = val;
+		fi->fib_metrics->metrics[type - 1] = val;
 	}
 
 	if (ecn_ca)
-		fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+		fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
 
 	return 0;
 }
@@ -1033,11 +1035,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		goto failure;
 	fib_info_cnt++;
 	if (cfg->fc_mx) {
-		fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+		fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
 		if (!fi->fib_metrics)
 			goto failure;
+		atomic_set(&fi->fib_metrics->refcnt, 1);
 	} else
-		fi->fib_metrics = (u32 *) dst_default_metrics;
+		fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
 
 	fi->fib_net = net;
 	fi->fib_protocol = cfg->fc_protocol;
@@ -1238,7 +1241,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	if (fi->fib_priority &&
 	    nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
 		goto nla_put_failure;
-	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
+	if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
 		goto nla_put_failure;
 
 	if (fi->fib_prefsrc &&
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 655d9eebe43e..6883b3d4ba8f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1385,8 +1385,12 @@ static void rt_add_uncached_list(struct rtable *rt)
 
 static void ipv4_dst_destroy(struct dst_entry *dst)
 {
+	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
 	struct rtable *rt = (struct rtable *) dst;
 
+	if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+		kfree(p);
+
 	if (!list_empty(&rt->rt_uncached)) {
 		struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1438,7 +1442,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			rt->rt_gateway = nh->nh_gw;
 			rt->rt_uses_gateway = 1;
 		}
-		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+		dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
+		if (fi->fib_metrics != &dst_default_metrics) {
+			rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+			atomic_inc(&fi->fib_metrics->refcnt);
+		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 80b79dd0e2f29f06a6a54a5755c718f1c7ebb136 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sat, 27 May 2017 06:07:18 -0400
Subject: fs: locks: Fix some troubles at kernel-doc comments

There are a few syntax violations that cause outputs of
a few comments to not be properly parsed in ReST format.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index af2031a1fcff..4a4543a7f9c1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1858,8 +1858,8 @@ EXPORT_SYMBOL(generic_setlease);
  *
  * Call this to establish a lease on the file. The "lease" argument is not
  * used for F_UNLCK requests and may be NULL. For commands that set or alter
- * an existing lease, the (*lease)->fl_lmops->lm_break operation must be set;
- * if not, this function will return -ENOLCK (and generate a scary-looking
+ * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be
+ * set; if not, this function will return -ENOLCK (and generate a scary-looking
  * stack trace).
  *
  * The "priv" pointer is passed directly to the lm_setup function as-is. It
@@ -1972,15 +1972,13 @@ EXPORT_SYMBOL(locks_lock_inode_wait);
  *	@cmd: the type of lock to apply.
  *
  *	Apply a %FL_FLOCK style lock to an open file descriptor.
- *	The @cmd can be one of
+ *	The @cmd can be one of:
  *
- *	%LOCK_SH -- a shared lock.
- *
- *	%LOCK_EX -- an exclusive lock.
- *
- *	%LOCK_UN -- remove an existing lock.
- *
- *	%LOCK_MAND -- a `mandatory' flock.  This exists to emulate Windows Share Modes.
+ *	- %LOCK_SH -- a shared lock.
+ *	- %LOCK_EX -- an exclusive lock.
+ *	- %LOCK_UN -- remove an existing lock.
+ *	- %LOCK_MAND -- a 'mandatory' flock.
+ *	  This exists to emulate Windows Share Modes.
  *
  *	%LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other
  *	processes read and write access respectively.
-- 
cgit v1.2.3-59-g8ed1b


From a75d30c772078546ac00399a94ecdc82df1a4d72 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 27 May 2017 06:07:19 -0400
Subject: fs/locks: pass kernel struct flock to fcntl_getlk/setlk

This will make it easier to implement a sane compat fcntl syscall.

[ jlayton: fix undeclared identifiers in 32-bit fcntl64 syscall handler ]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/fcntl.c         | 27 +++++++++++++++----
 fs/locks.c         | 79 +++++++++++++++---------------------------------------
 include/linux/fs.h |  8 +++---
 3 files changed, 48 insertions(+), 66 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index f4e7267d117f..ed4283d500a3 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -246,6 +246,8 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
+	void __user *argp = (void __user *)arg;
+	struct flock flock;
 	long err = -EINVAL;
 
 	switch (cmd) {
@@ -273,7 +275,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_OFD_GETLK:
 #endif
 	case F_GETLK:
-		err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
+		if (copy_from_user(&flock, argp, sizeof(flock)))
+			return -EFAULT;
+		err = fcntl_getlk(filp, cmd, &flock);
+		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
+			return -EFAULT;
 		break;
 #if BITS_PER_LONG != 32
 	/* 32-bit arches must use fcntl64() */
@@ -283,7 +289,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		/* Fallthrough */
 	case F_SETLK:
 	case F_SETLKW:
-		err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
+		if (copy_from_user(&flock, argp, sizeof(flock)))
+			return -EFAULT;
+		err = fcntl_setlk(fd, filp, cmd, &flock);
 		break;
 	case F_GETOWN:
 		/*
@@ -383,7 +391,9 @@ out:
 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 		unsigned long, arg)
 {	
+	void __user *argp = (void __user *)arg;
 	struct fd f = fdget_raw(fd);
+	struct flock64 flock;
 	long err = -EBADF;
 
 	if (!f.file)
@@ -401,14 +411,21 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 	switch (cmd) {
 	case F_GETLK64:
 	case F_OFD_GETLK:
-		err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
+		err = -EFAULT;
+		if (copy_from_user(&flock, argp, sizeof(flock)))
+			break;
+		err = fcntl_getlk64(f.file, cmd, &flock);
+		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
+			err = -EFAULT;
 		break;
 	case F_SETLK64:
 	case F_SETLKW64:
 	case F_OFD_SETLK:
 	case F_OFD_SETLKW:
-		err = fcntl_setlk64(fd, f.file, cmd,
-				(struct flock64 __user *) arg);
+		err = -EFAULT;
+		if (copy_from_user(&flock, argp, sizeof(flock)))
+			break;
+		err = fcntl_setlk64(fd, f.file, cmd, &flock);
 		break;
 	default:
 		err = do_fcntl(fd, cmd, arg, f.file);
diff --git a/fs/locks.c b/fs/locks.c
index 4a4543a7f9c1..afefeb4ad6de 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2084,26 +2084,22 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
  */
-int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
+int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
 {
 	struct file_lock file_lock;
-	struct flock flock;
 	int error;
 
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
-		goto out;
 	error = -EINVAL;
-	if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
+	if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
 		goto out;
 
-	error = flock_to_posix_lock(filp, &file_lock, &flock);
+	error = flock_to_posix_lock(filp, &file_lock, flock);
 	if (error)
 		goto out;
 
 	if (cmd == F_OFD_GETLK) {
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_GETLK;
@@ -2115,15 +2111,12 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
 	if (error)
 		goto out;
  
-	flock.l_type = file_lock.fl_type;
+	flock->l_type = file_lock.fl_type;
 	if (file_lock.fl_type != F_UNLCK) {
-		error = posix_lock_to_flock(&flock, &file_lock);
+		error = posix_lock_to_flock(flock, &file_lock);
 		if (error)
 			goto rel_priv;
 	}
-	error = -EFAULT;
-	if (!copy_to_user(l, &flock, sizeof(flock)))
-		error = 0;
 rel_priv:
 	locks_release_private(&file_lock);
 out:
@@ -2216,26 +2209,16 @@ check_fmode_for_setlk(struct file_lock *fl)
  * This implements both the F_SETLK and F_SETLKW commands of fcntl().
  */
 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
-		struct flock __user *l)
+		struct flock *flock)
 {
 	struct file_lock *file_lock = locks_alloc_lock();
-	struct flock flock;
-	struct inode *inode;
+	struct inode *inode = locks_inode(filp);
 	struct file *f;
 	int error;
 
 	if (file_lock == NULL)
 		return -ENOLCK;
 
-	inode = locks_inode(filp);
-
-	/*
-	 * This might block, so we do it before checking the inode.
-	 */
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
-		goto out;
-
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
 	 */
@@ -2244,7 +2227,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}
 
-	error = flock_to_posix_lock(filp, file_lock, &flock);
+	error = flock_to_posix_lock(filp, file_lock, flock);
 	if (error)
 		goto out;
 
@@ -2259,7 +2242,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	switch (cmd) {
 	case F_OFD_SETLK:
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_SETLK;
@@ -2268,7 +2251,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		break;
 	case F_OFD_SETLKW:
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_SETLKW;
@@ -2313,26 +2296,22 @@ out:
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
  */
-int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
+int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
 {
 	struct file_lock file_lock;
-	struct flock64 flock;
 	int error;
 
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
-		goto out;
 	error = -EINVAL;
-	if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
+	if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
 		goto out;
 
-	error = flock64_to_posix_lock(filp, &file_lock, &flock);
+	error = flock64_to_posix_lock(filp, &file_lock, flock);
 	if (error)
 		goto out;
 
 	if (cmd == F_OFD_GETLK) {
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_GETLK64;
@@ -2344,13 +2323,9 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
 	if (error)
 		goto out;
 
-	flock.l_type = file_lock.fl_type;
+	flock->l_type = file_lock.fl_type;
 	if (file_lock.fl_type != F_UNLCK)
-		posix_lock_to_flock64(&flock, &file_lock);
-
-	error = -EFAULT;
-	if (!copy_to_user(l, &flock, sizeof(flock)))
-		error = 0;
+		posix_lock_to_flock64(flock, &file_lock);
 
 	locks_release_private(&file_lock);
 out:
@@ -2361,26 +2336,16 @@ out:
  * This implements both the F_SETLK and F_SETLKW commands of fcntl().
  */
 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
-		struct flock64 __user *l)
+		struct flock64 *flock)
 {
 	struct file_lock *file_lock = locks_alloc_lock();
-	struct flock64 flock;
-	struct inode *inode;
+	struct inode *inode = locks_inode(filp);
 	struct file *f;
 	int error;
 
 	if (file_lock == NULL)
 		return -ENOLCK;
 
-	/*
-	 * This might block, so we do it before checking the inode.
-	 */
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
-		goto out;
-
-	inode = locks_inode(filp);
-
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
 	 */
@@ -2389,7 +2354,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}
 
-	error = flock64_to_posix_lock(filp, file_lock, &flock);
+	error = flock64_to_posix_lock(filp, file_lock, flock);
 	if (error)
 		goto out;
 
@@ -2404,7 +2369,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	switch (cmd) {
 	case F_OFD_SETLK:
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_SETLK64;
@@ -2413,7 +2378,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		break;
 	case F_OFD_SETLKW:
 		error = -EINVAL;
-		if (flock.l_pid != 0)
+		if (flock->l_pid != 0)
 			goto out;
 
 		cmd = F_SETLKW64;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..aa4affb38c39 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1038,14 +1038,14 @@ static inline struct inode *locks_inode(const struct file *f)
 }
 
 #ifdef CONFIG_FILE_LOCKING
-extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *);
+extern int fcntl_getlk(struct file *, unsigned int, struct flock *);
 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
-			struct flock __user *);
+			struct flock *);
 
 #if BITS_PER_LONG == 32
-extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *);
+extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 *);
 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
-			struct flock64 __user *);
+			struct flock64 *);
 #endif
 
 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
-- 
cgit v1.2.3-59-g8ed1b


From 94073ad77fff221b5e66b8b9863a546ba212d6a3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 27 May 2017 06:07:20 -0400
Subject: fs/locks: don't mess with the address limit in compat_fcntl64

Instead write a proper compat syscall that calls common helpers.

[ jlayton: fix pointer dereferencing in fixup_compat_flock ]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/fcntl.c | 118 +++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 67 insertions(+), 51 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index ed4283d500a3..bbf80344c125 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -506,76 +506,92 @@ convert_fcntl_cmd(unsigned int cmd)
 	return cmd;
 }
 
+/*
+ * GETLK was successful and we need to return the data, but it needs to fit in
+ * the compat structure.
+ * l_start shouldn't be too big, unless the original start + end is greater than
+ * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
+ * -EOVERFLOW in that case.  l_len could be too big, in which case we just
+ * truncate it, and only allow the app to see that part of the conflicting lock
+ * that might make sense to it anyway
+ */
+static int fixup_compat_flock(struct flock *flock)
+{
+	if (flock->l_start > COMPAT_OFF_T_MAX)
+		return -EOVERFLOW;
+	if (flock->l_len > COMPAT_OFF_T_MAX)
+		flock->l_len = COMPAT_OFF_T_MAX;
+	return 0;
+}
+
 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 		       compat_ulong_t, arg)
 {
-	mm_segment_t old_fs;
-	struct flock f;
-	long ret;
-	unsigned int conv_cmd;
+	struct fd f = fdget_raw(fd);
+	struct flock flock;
+	long err = -EBADF;
+
+	if (!f.file)
+		return err;
+
+	if (unlikely(f.file->f_mode & FMODE_PATH)) {
+		if (!check_fcntl_cmd(cmd))
+			goto out_put;
+	}
+
+	err = security_file_fcntl(f.file, cmd, arg);
+	if (err)
+		goto out_put;
 
 	switch (cmd) {
 	case F_GETLK:
+		err = get_compat_flock(&flock, compat_ptr(arg));
+		if (err)
+			break;
+		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
+		if (err)
+			break;
+		err = fixup_compat_flock(&flock);
+		if (err)
+			return err;
+		err = put_compat_flock(&flock, compat_ptr(arg));
+		break;
+	case F_GETLK64:
+	case F_OFD_GETLK:
+		err = get_compat_flock64(&flock, compat_ptr(arg));
+		if (err)
+			break;
+		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
+		if (err)
+			break;
+		err = fixup_compat_flock(&flock);
+		if (err)
+			return err;
+		err = put_compat_flock64(&flock, compat_ptr(arg));
+		break;
 	case F_SETLK:
 	case F_SETLKW:
-		ret = get_compat_flock(&f, compat_ptr(arg));
-		if (ret != 0)
+		err = get_compat_flock(&flock, compat_ptr(arg));
+		if (err)
 			break;
-		old_fs = get_fs();
-		set_fs(KERNEL_DS);
-		ret = sys_fcntl(fd, cmd, (unsigned long)&f);
-		set_fs(old_fs);
-		if (cmd == F_GETLK && ret == 0) {
-			/* GETLK was successful and we need to return the data...
-			 * but it needs to fit in the compat structure.
-			 * l_start shouldn't be too big, unless the original
-			 * start + end is greater than COMPAT_OFF_T_MAX, in which
-			 * case the app was asking for trouble, so we return
-			 * -EOVERFLOW in that case.
-			 * l_len could be too big, in which case we just truncate it,
-			 * and only allow the app to see that part of the conflicting
-			 * lock that might make sense to it anyway
-			 */
-
-			if (f.l_start > COMPAT_OFF_T_MAX)
-				ret = -EOVERFLOW;
-			if (f.l_len > COMPAT_OFF_T_MAX)
-				f.l_len = COMPAT_OFF_T_MAX;
-			if (ret == 0)
-				ret = put_compat_flock(&f, compat_ptr(arg));
-		}
+		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
 		break;
-
-	case F_GETLK64:
 	case F_SETLK64:
 	case F_SETLKW64:
-	case F_OFD_GETLK:
 	case F_OFD_SETLK:
 	case F_OFD_SETLKW:
-		ret = get_compat_flock64(&f, compat_ptr(arg));
-		if (ret != 0)
+		err = get_compat_flock64(&flock, compat_ptr(arg));
+		if (err)
 			break;
-		old_fs = get_fs();
-		set_fs(KERNEL_DS);
-		conv_cmd = convert_fcntl_cmd(cmd);
-		ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
-		set_fs(old_fs);
-		if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) {
-			/* need to return lock information - see above for commentary */
-			if (f.l_start > COMPAT_LOFF_T_MAX)
-				ret = -EOVERFLOW;
-			if (f.l_len > COMPAT_LOFF_T_MAX)
-				f.l_len = COMPAT_LOFF_T_MAX;
-			if (ret == 0)
-				ret = put_compat_flock64(&f, compat_ptr(arg));
-		}
+		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
 		break;
-
 	default:
-		ret = sys_fcntl(fd, cmd, arg);
+		err = do_fcntl(fd, cmd, arg, f.file);
 		break;
 	}
-	return ret;
+out_put:
+	fdput(f);
+	return err;
 }
 
 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
-- 
cgit v1.2.3-59-g8ed1b


From 393cc3f51135ea2520521f776ef3afdf3395c797 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 13 Jun 2017 13:35:50 +0200
Subject: fs/fcntl: f_setown, allow returning error

Allow f_setown to return an error value. We will fail in the next patch
with EINVAL for bad input to f_setown, so tile the path for the later
patch.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/fcntl.c         | 7 ++++---
 include/linux/fs.h | 2 +-
 net/socket.c       | 3 +--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index bbf80344c125..313eba860346 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -109,7 +109,7 @@ void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 }
 EXPORT_SYMBOL(__f_setown);
 
-void f_setown(struct file *filp, unsigned long arg, int force)
+int f_setown(struct file *filp, unsigned long arg, int force)
 {
 	enum pid_type type;
 	struct pid *pid;
@@ -123,6 +123,8 @@ void f_setown(struct file *filp, unsigned long arg, int force)
 	pid = find_vpid(who);
 	__f_setown(filp, pid, type, force);
 	rcu_read_unlock();
+
+	return 0;
 }
 EXPORT_SYMBOL(f_setown);
 
@@ -305,8 +307,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		force_successful_syscall_return();
 		break;
 	case F_SETOWN:
-		f_setown(filp, arg, 1);
-		err = 0;
+		err = f_setown(filp, arg, 1);
 		break;
 	case F_GETOWN_EX:
 		err = f_getown_ex(filp, arg);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa4affb38c39..25ee1ff6d45b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1249,7 +1249,7 @@ extern void fasync_free(struct fasync_struct *);
 extern void kill_fasync(struct fasync_struct **, int, int);
 
 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
-extern void f_setown(struct file *filp, unsigned long arg, int force);
+extern int f_setown(struct file *filp, unsigned long arg, int force);
 extern void f_delown(struct file *filp);
 extern pid_t f_getown(struct file *filp);
 extern int send_sigurg(struct fown_struct *fown);
diff --git a/net/socket.c b/net/socket.c
index c2564eb25c6b..a30a1e324390 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -950,8 +950,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			err = -EFAULT;
 			if (get_user(pid, (int __user *)argp))
 				break;
-			f_setown(sock->file, pid, 1);
-			err = 0;
+			err = f_setown(sock->file, pid, 1);
 			break;
 		case FIOGETOWN:
 		case SIOCGPGRP:
-- 
cgit v1.2.3-59-g8ed1b


From fc3dc67471461c0efcb1ed22fb7595121d65fad9 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 13 Jun 2017 13:35:51 +0200
Subject: fs/fcntl: f_setown, avoid undefined behaviour

fcntl(0, F_SETOWN, 0x80000000) triggers:
UBSAN: Undefined behaviour in fs/fcntl.c:118:7
negation of -2147483648 cannot be represented in type 'int':
CPU: 1 PID: 18261 Comm: syz-executor Not tainted 4.8.1-0-syzkaller #1
...
Call Trace:
...
 [<ffffffffad8f0868>] ? f_setown+0x1d8/0x200
 [<ffffffffad8f19a9>] ? SyS_fcntl+0x999/0xf30
 [<ffffffffaed1fb00>] ? entry_SYSCALL_64_fastpath+0x23/0xc1

Fix that by checking the arg parameter properly (against INT_MAX) before
"who = -who". And return immediatelly with -EINVAL in case it is wrong.
Note that according to POSIX we can return EINVAL:
    http://pubs.opengroup.org/onlinepubs/9699919799/functions/fcntl.html

    [EINVAL]
        The cmd argument is F_SETOWN and the value of the argument
        is not valid as a process or process group identifier.

[v2] returns an error, v1 used to fail silently
[v3] implement proper check for the bad value INT_MIN

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/fcntl.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 313eba860346..693322e28751 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -116,6 +116,10 @@ int f_setown(struct file *filp, unsigned long arg, int force)
 	int who = arg;
 	type = PIDTYPE_PID;
 	if (who < 0) {
+		/* avoid overflow below */
+		if (who == INT_MIN)
+			return -EINVAL;
+
 		type = PIDTYPE_PGID;
 		who = -who;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From f73127356f344483c82632accda2e72b7e0e5f25 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 14 Jun 2017 09:11:54 -0400
Subject: fs/fcntl: return -ESRCH in f_setown when pid/pgid can't be found

The current implementation of F_SETOWN doesn't properly vet the argument
passed in and only returns an error if INT_MIN is passed in. If the
argument doesn't specify a valid pid/pgid, then we just end up cleaning
out the file->f_owner structure.

What we really want is to only clean that out only in the case where
userland passed in an argument of 0. For anything else, we want to
return ESRCH if it doesn't refer to a valid pid.

The relevant POSIX spec page is here:

    http://pubs.opengroup.org/onlinepubs/9699919799/functions/fcntl.html

Cc: Jiri Slaby <jslaby@suse.cz>
Cc: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/fcntl.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 693322e28751..afed3b364979 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -112,8 +112,9 @@ EXPORT_SYMBOL(__f_setown);
 int f_setown(struct file *filp, unsigned long arg, int force)
 {
 	enum pid_type type;
-	struct pid *pid;
-	int who = arg;
+	struct pid *pid = NULL;
+	int who = arg, ret = 0;
+
 	type = PIDTYPE_PID;
 	if (who < 0) {
 		/* avoid overflow below */
@@ -123,12 +124,19 @@ int f_setown(struct file *filp, unsigned long arg, int force)
 		type = PIDTYPE_PGID;
 		who = -who;
 	}
+
 	rcu_read_lock();
-	pid = find_vpid(who);
-	__f_setown(filp, pid, type, force);
+	if (who) {
+		pid = find_vpid(who);
+		if (!pid)
+			ret = -ESRCH;
+	}
+
+	if (!ret)
+		__f_setown(filp, pid, type, force);
 	rcu_read_unlock();
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(f_setown);
 
-- 
cgit v1.2.3-59-g8ed1b