From 1de3cd4fb49f3463679c49afe0aa9ceb133f3e49 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Wed, 8 Mar 2017 16:10:19 +0000 Subject: drm: hdlcd: Fix the calculation of the scanout start address The calculation of the framebuffer's start address was wrongly using the CRTC's x and y position rather than the one of the source framebuffer. To fix that we need to update the plane_check code to call drm_plane_helper_check_state() to clip the src and dst coordinates. While there so some minor cleanup of redundant freeing of devm_alloc-ated memory. Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_crtc.c | 47 ++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index 20ebfb4fbdfa..c65116348281 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -206,16 +207,33 @@ static const struct drm_crtc_helper_funcs hdlcd_crtc_helper_funcs = { static int hdlcd_plane_atomic_check(struct drm_plane *plane, struct drm_plane_state *state) { - u32 src_w, src_h; + struct drm_rect clip = { 0 }; + struct drm_crtc_state *crtc_state; + u32 src_h = state->src_h >> 16; - src_w = state->src_w >> 16; - src_h = state->src_h >> 16; + /* only the HDLCD_REG_FB_LINE_COUNT register has a limit */ + if (src_h >= HDLCD_MAX_YRES) { + DRM_DEBUG_KMS("Invalid source width: %d\n", src_h); + return -EINVAL; + } + + if (!state->fb || !state->crtc) + return 0; - /* we can't do any scaling of the plane source */ - if ((src_w != state->crtc_w) || (src_h != state->crtc_h)) + crtc_state = drm_atomic_get_existing_crtc_state(state->state, + state->crtc); + if (!crtc_state) { + DRM_DEBUG_KMS("Invalid crtc state\n"); return -EINVAL; + } - return 0; + clip.x2 = crtc_state->adjusted_mode.hdisplay; + clip.y2 = crtc_state->adjusted_mode.vdisplay; + + return drm_plane_helper_check_state(state, &clip, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, true); } static void hdlcd_plane_atomic_update(struct drm_plane *plane, @@ -224,21 +242,20 @@ static void hdlcd_plane_atomic_update(struct drm_plane *plane, struct drm_framebuffer *fb = plane->state->fb; struct hdlcd_drm_private *hdlcd; struct drm_gem_cma_object *gem; - u32 src_w, src_h, dest_w, dest_h; + u32 src_x, src_y, dest_h; dma_addr_t scanout_start; if (!fb) return; - src_w = plane->state->src_w >> 16; - src_h = plane->state->src_h >> 16; - dest_w = plane->state->crtc_w; - dest_h = plane->state->crtc_h; + src_x = plane->state->src.x1 >> 16; + src_y = plane->state->src.y1 >> 16; + dest_h = drm_rect_height(&plane->state->dst); gem = drm_fb_cma_get_gem_obj(fb, 0); + scanout_start = gem->paddr + fb->offsets[0] + - plane->state->crtc_y * fb->pitches[0] + - plane->state->crtc_x * - fb->format->cpp[0]; + src_y * fb->pitches[0] + + src_x * fb->format->cpp[0]; hdlcd = plane->dev->dev_private; hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_LENGTH, fb->pitches[0]); @@ -285,7 +302,6 @@ static struct drm_plane *hdlcd_plane_init(struct drm_device *drm) formats, ARRAY_SIZE(formats), DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { - devm_kfree(drm->dev, plane); return ERR_PTR(ret); } @@ -309,7 +325,6 @@ int hdlcd_setup_crtc(struct drm_device *drm) &hdlcd_crtc_funcs, NULL); if (ret) { hdlcd_plane_destroy(primary); - devm_kfree(drm->dev, primary); return ret; } -- cgit v1.2.3-59-g8ed1b From ec8542cb8a628fd10522f8421182207277f90185 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 29 Jan 2017 20:40:59 +0200 Subject: ARM: dts: bcm2835: fix uart0 pinctrl node names Downstream kernel uses pins 32, 33 as UART0 (PL011) Rx/Tx to communicate with the Bluetooth chip. So ALT3 of these pins is most likely not CTS/RTS. Change the node name to reflect that. This matches section 6.2 "Alternative Function Assignments" in the BCM2835 ARM Peripherals document. With this change in place, adding &uart0 { pinctrl-names = "default"; pinctrl-0 = <&uart0_gpio32 &gpclk2_gpio43>; status = "okay"; }; to bcm2837-rpi-3-b.dts does the right thing on my Raspberry Pi 3. Pins 30, 31 are CTS/RTS of UART0 in alternate function 3. Rename uart0_gpio30 as well. While at it, fix a little typo in a nearby comment. Fixes: 21ff843931b ("ARM: dts: bcm283x: Define standard pinctrl groups in the gpio node.") Acked-by: Stefan Wahren Signed-off-by: Baruch Siach Reviewed-by: Eric Anholt --- arch/arm/boot/dts/bcm283x.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 35cea3fcaf5c..e9623af3ad76 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -295,17 +295,17 @@ /* Separate from the uart0_gpio14 group * because it conflicts with spi1_gpio16, and * people often run uart0 on the two pins - * without flow contrl. + * without flow control. */ uart0_ctsrts_gpio16: uart0_ctsrts_gpio16 { brcm,pins = <16 17>; brcm,function = ; }; - uart0_gpio30: uart0_gpio30 { + uart0_ctsrts_gpio30: uart0_ctsrts_gpio30 { brcm,pins = <30 31>; brcm,function = ; }; - uart0_ctsrts_gpio32: uart0_ctsrts_gpio32 { + uart0_gpio32: uart0_gpio32 { brcm,pins = <32 33>; brcm,function = ; }; -- cgit v1.2.3-59-g8ed1b From 843b2287fb77f0b1966fa90912e093a0d417b27b Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 29 Jan 2017 21:53:10 +0200 Subject: ARM: dts: bcm2835: fix i2c0 pins According to the BCM2835 ARM Peripherals document i2c0 doesn't map to pins 32, 34 but to 28, 29. Fixes: 21ff843931b ("ARM: dts: bcm283x: Define standard pinctrl groups in the gpio node.") Signed-off-by: Baruch Siach Reviewed-by: Eric Anholt --- arch/arm/boot/dts/bcm283x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index e9623af3ad76..7b4880dc291a 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -198,8 +198,8 @@ brcm,pins = <0 1>; brcm,function = ; }; - i2c0_gpio32: i2c0_gpio32 { - brcm,pins = <32 34>; + i2c0_gpio28: i2c0_gpio28 { + brcm,pins = <28 29>; brcm,function = ; }; i2c0_gpio44: i2c0_gpio44 { -- cgit v1.2.3-59-g8ed1b From e1be65a5e426460ab567076107311e83532904e9 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 29 Jan 2017 21:53:11 +0200 Subject: ARM: dts: bcm2835: fix uart0/uart1 pins According to the BCM2835 ARM Peripherals document uart1 doesn't map to pins 36-39, but uart0 does. Also, split into separate Rx/Tx and CST/RTS groups to match other uart nodes. Fixes: 21ff843931b ("ARM: dts: bcm283x: Define standard pinctrl groups in the gpio node.") Signed-off-by: Baruch Siach Reviewed-by: Eric Anholt --- arch/arm/boot/dts/bcm283x.dtsi | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 7b4880dc291a..561f27d8d922 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -309,6 +309,14 @@ brcm,pins = <32 33>; brcm,function = ; }; + uart0_gpio36: uart0_gpio36 { + brcm,pins = <36 37>; + brcm,function = ; + }; + uart0_ctsrts_gpio38: uart0_ctsrts_gpio38 { + brcm,pins = <38 39>; + brcm,function = ; + }; uart1_gpio14: uart1_gpio14 { brcm,pins = <14 15>; @@ -326,10 +334,6 @@ brcm,pins = <30 31>; brcm,function = ; }; - uart1_gpio36: uart1_gpio36 { - brcm,pins = <36 37 38 39>; - brcm,function = ; - }; uart1_gpio40: uart1_gpio40 { brcm,pins = <40 41>; brcm,function = ; -- cgit v1.2.3-59-g8ed1b From 10b6c0c2e2bb8cd1be682f8d36ef597e3419cb88 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 30 Jan 2017 20:44:39 +0200 Subject: ARM: dts: bcm2835: add index to the ethernet alias An alias name should have an index number even when it is the only of its type. This allows U-Boot to add the local-mac-address property. Otherwise U-Boot skips the alias. Fixes: 6a93792774 ("ARM: bcm2835: dt: Add the ethernet to the device trees") Signed-off-by: Baruch Siach Acked-by: Lubomir Rintel Reviewed-by: Eric Anholt --- arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi | 2 +- arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi b/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi index 12c981e51134..9a0599f711ff 100644 --- a/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi +++ b/arch/arm/boot/dts/bcm283x-rpi-smsc9512.dtsi @@ -1,6 +1,6 @@ / { aliases { - ethernet = ðernet; + ethernet0 = ðernet; }; }; diff --git a/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi b/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi index 3f0a56ebcf1f..dc7ae776db5f 100644 --- a/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi +++ b/arch/arm/boot/dts/bcm283x-rpi-smsc9514.dtsi @@ -1,6 +1,6 @@ / { aliases { - ethernet = ðernet; + ethernet0 = ðernet; }; }; -- cgit v1.2.3-59-g8ed1b From d2718d1365f7fce624fd7ed163f60532f92ed016 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 5 Apr 2017 17:18:03 +0200 Subject: arm64: marvell: enable the Armada 37xx pinctrl driver This commit makes sure the driver for the Armada 37xx pin controller is enabled. Signed-off-by: Gregory CLEMENT --- arch/arm64/Kconfig.platforms | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 129cc5ae4091..9aa71a3f3f50 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -103,8 +103,13 @@ config ARCH_MVEBU select ARMADA_AP806_SYSCON select ARMADA_CP110_SYSCON select ARMADA_37XX_CLK + select GPIOLIB + select GPIOLIB_IRQCHIP select MVEBU_ODMI select MVEBU_PIC + select OF_GPIO + select PINCTRL + select PINCTRL_ARMADA_37XX help This enables support for Marvell EBU familly, including: - Armada 3700 SoC Family -- cgit v1.2.3-59-g8ed1b From 7b4ccb3c466f62bbf2f4dd5d6a143d945a6f3051 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 25 Apr 2017 19:36:25 +0200 Subject: soc: renesas: Provide dummy rcar_rst_read_mode_pins() for compile-testing If the R-Car RST driver is not included, compile-testing R-Car clock drivers fails with a link error: undefined reference to `rcar_rst_read_mode_pins' To fix this, provide a dummy version. Use the exact same test logic as in drivers/soc/renesas/Makefile, as there is no Kconfig symbol (yet) to control compilation of the R-Car RST driver. Fixes: 527c02f66d263d2e ("soc: renesas: Add R-Car RST driver") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- include/linux/soc/renesas/rcar-rst.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h index a18e0783946b..787e7ad53d45 100644 --- a/include/linux/soc/renesas/rcar-rst.h +++ b/include/linux/soc/renesas/rcar-rst.h @@ -1,6 +1,11 @@ #ifndef __LINUX_SOC_RENESAS_RCAR_RST_H__ #define __LINUX_SOC_RENESAS_RCAR_RST_H__ +#if defined(CONFIG_ARCH_RCAR_GEN1) || defined(CONFIG_ARCH_RCAR_GEN2) || \ + defined(CONFIG_ARCH_R8A7795) || defined(CONFIG_ARCH_R8A7796) int rcar_rst_read_mode_pins(u32 *mode); +#else +static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; } +#endif #endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */ -- cgit v1.2.3-59-g8ed1b From afda007feda5cfe7463f3281dfeee703a5dc7ca3 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 5 Apr 2017 17:18:07 +0200 Subject: ARM64: dts: marvell: Add pinctrl nodes for Armada 3700 Add the nodes for the two pin controller present in the Armada 37xx SoCs. Initially the node was named gpio1 using the same name that for the register range in the datasheet. However renaming it pinctr_nb (nb for North Bridge) makes more sens. Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 42 ++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index 311b97c80c7b..e6216cbd4b38 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -161,10 +161,29 @@ #clock-cells = <1>; }; - gpio1: gpio@13800 { - compatible = "marvell,mvebu-gpio-3700", + pinctrl_nb: pinctrl@13800 { + compatible = "marvell,armada3710-nb-pinctrl", "syscon", "simple-mfd"; - reg = <0x13800 0x500>; + reg = <0x13800 0x100>, <0x13C00 0x20>; + gpionb: gpio { + #gpio-cells = <2>; + gpio-ranges = <&pinctrl_nb 0 0 36>; + gpio-controller; + interrupts = + , + , + , + , + , + , + , + , + , + , + , + ; + + }; xtalclk: xtal-clk { compatible = "marvell,armada-3700-xtal-clock"; @@ -173,6 +192,23 @@ }; }; + pinctrl_sb: pinctrl@18800 { + compatible = "marvell,armada3710-sb-pinctrl", + "syscon", "simple-mfd"; + reg = <0x18800 0x100>, <0x18C00 0x20>; + gpiosb: gpio { + #gpio-cells = <2>; + gpio-ranges = <&pinctrl_sb 0 0 29>; + gpio-controller; + interrupts = + , + , + , + , + ; + }; + }; + eth0: ethernet@30000 { compatible = "marvell,armada-3700-neta"; reg = <0x30000 0x4000>; -- cgit v1.2.3-59-g8ed1b From 6a680783aaadd168557eec695374929ac066536f Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 14 Dec 2016 17:43:48 +0100 Subject: ARM64: dts: marvell: armada37xx: add pinctrl definition Start to populate the device tree of the Armada 37xx with the pincontrol configuration used on the board providing a dts. Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-3720-db.dts | 8 +++++++ arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 31 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-db.dts b/arch/arm64/boot/dts/marvell/armada-3720-db.dts index 950cbd23a5bd..01cdcb98416f 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-db.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-db.dts @@ -79,6 +79,8 @@ }; &i2c0 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_pins>; status = "okay"; gpio_exp: pca9555@22 { @@ -113,6 +115,8 @@ &spi0 { status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&spi_quad_pins>; m25p80@0 { compatible = "jedec,spi-nor"; @@ -143,6 +147,8 @@ /* Exported on the micro USB connector CON32 through an FTDI */ &uart0 { + pinctrl-names = "default"; + pinctrl-0 = <&uart1_pins>; status = "okay"; }; @@ -178,6 +184,8 @@ }; ð0 { + pinctrl-names = "default"; + pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii-id"; phy = <&phy0>; status = "okay"; diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index e6216cbd4b38..f581c74c0bb2 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -190,6 +190,31 @@ clock-output-names = "xtal"; #clock-cells = <0>; }; + + spi_quad_pins: spi-quad-pins { + groups = "spi_quad"; + function = "spi"; + }; + + i2c1_pins: i2c1-pins { + groups = "i2c1"; + function = "i2c"; + }; + + i2c2_pins: i2c2-pins { + groups = "i2c2"; + function = "i2c"; + }; + + uart1_pins: uart1-pins { + groups = "uart1"; + function = "uart"; + }; + + uart2_pins: uart2-pins { + groups = "uart2"; + function = "uart"; + }; }; pinctrl_sb: pinctrl@18800 { @@ -207,6 +232,12 @@ , ; }; + + rgmii_pins: mii-pins { + groups = "rgmii"; + function = "mii"; + }; + }; eth0: ethernet@30000 { -- cgit v1.2.3-59-g8ed1b From 07a63cbe8bcb6ba72fb989dcab1ec55ec6c36c7e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 2 May 2017 13:36:00 +0200 Subject: s390/cputime: fix incorrect system time git commit c5328901aa1db134 "[S390] entry[64].S improvements" removed the update of the exit_timer lowcore field from the critical section cleanup of the .Lsysc_restore/.Lsysc_done and .Lio_restore/.Lio_done blocks. If the PSW is updated by the critical section cleanup to point to user space again, the interrupt entry code will do a vtime calculation after the cleanup completed with an exit_timer value which has *not* been updated. Due to this incorrect system time deltas are calculated. If an interrupt occured with an old PSW between .Lsysc_restore/.Lsysc_done or .Lio_restore/.Lio_done update __LC_EXIT_TIMER with the system entry time of the interrupt. Cc: stable@vger.kernel.org # 3.3+ Tested-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a5f5d3bb3dbc..e408d9cc5b96 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -312,6 +312,7 @@ ENTRY(system_call) lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) +.Lsysc_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r11,%r15,__PT_R11(%r11) @@ -623,6 +624,7 @@ ENTRY(io_int_handler) lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) +.Lio_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r11,%r15,__PT_R11(%r11) @@ -1174,15 +1176,23 @@ cleanup_critical: br %r14 .Lcleanup_sysc_restore: + # check if stpt has been executed clg %r9,BASED(.Lcleanup_sysc_restore_insn) + jh 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + cghi %r11,__LC_SAVE_AREA_ASYNC je 0f + mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER +0: clg %r9,BASED(.Lcleanup_sysc_restore_insn+8) + je 1f lg %r9,24(%r11) # get saved pointer to pt_regs mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) -0: lmg %r8,%r9,__LC_RETURN_PSW +1: lmg %r8,%r9,__LC_RETURN_PSW br %r14 .Lcleanup_sysc_restore_insn: + .quad .Lsysc_exit_timer .quad .Lsysc_done - 4 .Lcleanup_io_tif: @@ -1190,15 +1200,20 @@ cleanup_critical: br %r14 .Lcleanup_io_restore: + # check if stpt has been executed clg %r9,BASED(.Lcleanup_io_restore_insn) - je 0f + jh 0f + mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER +0: clg %r9,BASED(.Lcleanup_io_restore_insn+8) + je 1f lg %r9,24(%r11) # get saved r11 pointer to pt_regs mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) -0: lmg %r8,%r9,__LC_RETURN_PSW +1: lmg %r8,%r9,__LC_RETURN_PSW br %r14 .Lcleanup_io_restore_insn: + .quad .Lio_exit_timer .quad .Lio_done - 4 .Lcleanup_idle: -- cgit v1.2.3-59-g8ed1b From 085b6ba0f7971d18fc3078b25e8309e9e75659cb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 2 May 2017 12:38:57 +0200 Subject: s390/ftrace: fix compile for !MODULES Fix this compile error if CONFIG_MODULES is disabled: arch/s390/built-in.o: In function `ftrace_plt_init': arch/s390/kernel/ftrace.o:(.init.text+0x34cc): undefined reference to `module_alloc' Reported-by: Rob Landley Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ftrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 60a8a4e207ed..68f2b8a15eab 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -172,6 +172,8 @@ int __init ftrace_dyn_arch_init(void) return 0; } +#ifdef CONFIG_MODULES + static int __init ftrace_plt_init(void) { unsigned int *ip; @@ -190,6 +192,8 @@ static int __init ftrace_plt_init(void) } device_initcall(ftrace_plt_init); +#endif /* CONFIG_MODULES */ + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * Hook the return address and push it in the stack of return addresses -- cgit v1.2.3-59-g8ed1b From db55947dd2d09cd3e6f722d1205934fec793ee63 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 2 May 2017 13:20:11 +0200 Subject: s390/uprobes: fix compile for !KPROBES Fix the following compile error(s) if CONFIG_KPROBES is disabled: arch/s390/kernel/uprobes.c:79:14: error: implicit declaration of function 'probe_get_fixup_type' arch/s390/kernel/uprobes.c:87:14: error: 'FIXUP_PSW_NORMAL' undeclared (first use in this function) Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/dis.h | 2 ++ arch/s390/include/asm/kprobes.h | 20 ++++++++++---------- arch/s390/lib/probes.c | 1 + 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h index 60323c21938b..37f617dfbede 100644 --- a/arch/s390/include/asm/dis.h +++ b/arch/s390/include/asm/dis.h @@ -40,6 +40,8 @@ static inline int insn_length(unsigned char code) return ((((int) code + 64) >> 7) + 1) << 1; } +struct pt_regs; + void show_code(struct pt_regs *regs); void print_fn_code(unsigned char *code, unsigned long len); int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len); diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 1293c4066cfc..28792ef82c83 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -27,12 +27,21 @@ * 2005-Dec Used as a template for s390 by Mike Grundy * */ +#include #include #define BREAKPOINT_INSTRUCTION 0x0002 +#define FIXUP_PSW_NORMAL 0x08 +#define FIXUP_BRANCH_NOT_TAKEN 0x04 +#define FIXUP_RETURN_REGISTER 0x02 +#define FIXUP_NOT_REQUIRED 0x01 + +int probe_is_prohibited_opcode(u16 *insn); +int probe_get_fixup_type(u16 *insn); +int probe_is_insn_relative_long(u16 *insn); + #ifdef CONFIG_KPROBES -#include #include #include #include @@ -56,11 +65,6 @@ typedef u16 kprobe_opcode_t; #define KPROBE_SWAP_INST 0x10 -#define FIXUP_PSW_NORMAL 0x08 -#define FIXUP_BRANCH_NOT_TAKEN 0x04 -#define FIXUP_RETURN_REGISTER 0x02 -#define FIXUP_NOT_REQUIRED 0x01 - /* Architecture specific copy of original instruction */ struct arch_specific_insn { /* copy of original instruction */ @@ -90,10 +94,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr); int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); -int probe_is_prohibited_opcode(u16 *insn); -int probe_get_fixup_type(u16 *insn); -int probe_is_insn_relative_long(u16 *insn); - #define flush_insn_slot(p) do { } while (0) #endif /* CONFIG_KPROBES */ diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c index ae90e1ae3607..1963ddbf4ab3 100644 --- a/arch/s390/lib/probes.c +++ b/arch/s390/lib/probes.c @@ -4,6 +4,7 @@ * Copyright IBM Corp. 2014 */ +#include #include #include -- cgit v1.2.3-59-g8ed1b From eb77e6b80f3bed262c7773236f0fb84649fd3091 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 27 Apr 2017 12:11:54 -0500 Subject: EDAC, amd64: Fix reporting of Chip Select sizes on Fam17h The wrong index into the csbases/csmasks arrays was being passed to the function to compute the chip select sizes, which resulted in the wrong size being computed. Address that so that the correct values are computed and printed. Also, redo how we calculate the number of pages in a CS row. Reported-by: Benjamin Bennett Signed-off-by: Yazen Ghannam Cc: # 4.10.x Cc: linux-edac Link: http://lkml.kernel.org/r/1493313114-11260-1-git-send-email-Yazen.Ghannam@amd.com [ Remove unneeded integer math comment, minor cleanups. ] Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 82dab1692264..3aea55698165 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -782,24 +782,26 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) { - u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; - int dimm, size0, size1; + int dimm, size0, size1, cs0, cs1; edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); for (dimm = 0; dimm < 4; dimm++) { size0 = 0; + cs0 = dimm * 2; - if (dcsb[dimm*2] & DCSB_CS_ENABLE) - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, dimm); + if (csrow_enabled(cs0, ctrl, pvt)) + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0); size1 = 0; - if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE) - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, dimm); + cs1 = dimm * 2 + 1; + + if (csrow_enabled(cs1, ctrl, pvt)) + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", - dimm * 2, size0, - dimm * 2 + 1, size1); + cs0, size0, + cs1, size1); } } @@ -2756,26 +2758,22 @@ skip: * encompasses * */ -static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) +static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) { - u32 cs_mode, nr_pages; u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; + int csrow_nr = csrow_nr_orig; + u32 cs_mode, nr_pages; + if (!pvt->umc) + csrow_nr >>= 1; - /* - * The math on this doesn't look right on the surface because x/2*4 can - * be simplified to x*2 but this expression makes use of the fact that - * it is integral math where 1/2=0. This intermediate value becomes the - * number of bits to shift the DBAM register to extract the proper CSROW - * field. - */ - cs_mode = DBAM_DIMM(csrow_nr / 2, dbam); + cs_mode = DBAM_DIMM(csrow_nr, dbam); - nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, (csrow_nr / 2)) - << (20 - PAGE_SHIFT); + nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); + nr_pages <<= 20 - PAGE_SHIFT; edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", - csrow_nr, dct, cs_mode); + csrow_nr_orig, dct, cs_mode); edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); return nr_pages; -- cgit v1.2.3-59-g8ed1b From 0e78a87306a6f55b1c7bbafad1de62c3975953ca Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 3 May 2017 08:44:27 +0200 Subject: esp4: Fix udpencap for local TCP packets. Locally generated TCP packets are usually cloned, so we do skb_cow_data() on this packets. After that we need to reload the pointer to the esp header. On udpencap this header has an offset to skb_transport_header, so take this offset into account. Fixes: 67d349ed603 ("net/esp4: Fix invalid esph pointer crash") Fixes: fca11ebde3f0 ("esp4: Reorganize esp_output") Reported-by: Don Bowman Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 65cc02bd82bc..93322f895eab 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -248,6 +248,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * u8 *tail; u8 *vaddr; int nfrags; + int esph_offset; struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; @@ -313,11 +314,13 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * } cow: + esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb); + nfrags = skb_cow_data(skb, tailen, &trailer); if (nfrags < 0) goto out; tail = skb_tail_pointer(trailer); - esp->esph = ip_esp_hdr(skb); + esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset); skip_cow: esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); -- cgit v1.2.3-59-g8ed1b From 9b3eb54106cf6acd03f07cf0ab01c13676a226c2 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 3 May 2017 16:43:19 +0200 Subject: xfrm: fix stack access out of bounds with CONFIG_XFRM_SUB_POLICY When CONFIG_XFRM_SUB_POLICY=y, xfrm_dst stores a copy of the flowi for that dst. Unfortunately, the code that allocates and fills this copy doesn't care about what type of flowi (flowi, flowi4, flowi6) gets passed. In multiple code paths (from raw_sendmsg, from TCP when replying to a FIN, in vxlan, geneve, and gre), the flowi that gets passed to xfrm is actually an on-stack flowi4, so we end up reading stuff from the stack past the end of the flowi4 struct. Since xfrm_dst->origin isn't used anywhere following commit ca116922afa8 ("xfrm: Eliminate "fl" and "pol" args to xfrm_bundle_ok()."), just get rid of it. xfrm_dst->partner isn't used either, so get rid of that too. Fixes: 9d6ec938019c ("ipv4: Use flowi4 in public route lookup interfaces.") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 10 ---------- net/xfrm/xfrm_policy.c | 47 ----------------------------------------------- 2 files changed, 57 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6793a30c66b1..7e7e2b0d2915 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -979,10 +979,6 @@ struct xfrm_dst { struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; -#ifdef CONFIG_XFRM_SUB_POLICY - struct flowi *origin; - struct xfrm_selector *partner; -#endif u32 xfrm_genid; u32 policy_genid; u32 route_mtu_cached; @@ -998,12 +994,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) dst_release(xdst->route); if (likely(xdst->u.dst.xfrm)) xfrm_state_put(xdst->u.dst.xfrm); -#ifdef CONFIG_XFRM_SUB_POLICY - kfree(xdst->origin); - xdst->origin = NULL; - kfree(xdst->partner); - xdst->partner = NULL; -#endif } #endif diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b00a1d5a7f52..ed4e52d95172 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1797,43 +1797,6 @@ free_dst: goto out; } -#ifdef CONFIG_XFRM_SUB_POLICY -static int xfrm_dst_alloc_copy(void **target, const void *src, int size) -{ - if (!*target) { - *target = kmalloc(size, GFP_ATOMIC); - if (!*target) - return -ENOMEM; - } - - memcpy(*target, src, size); - return 0; -} -#endif - -static int xfrm_dst_update_parent(struct dst_entry *dst, - const struct xfrm_selector *sel) -{ -#ifdef CONFIG_XFRM_SUB_POLICY - struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - return xfrm_dst_alloc_copy((void **)&(xdst->partner), - sel, sizeof(*sel)); -#else - return 0; -#endif -} - -static int xfrm_dst_update_origin(struct dst_entry *dst, - const struct flowi *fl) -{ -#ifdef CONFIG_XFRM_SUB_POLICY - struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); -#else - return 0; -#endif -} - static int xfrm_expand_policies(const struct flowi *fl, u16 family, struct xfrm_policy **pols, int *num_pols, int *num_xfrms) @@ -1905,16 +1868,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, xdst = (struct xfrm_dst *)dst; xdst->num_xfrms = err; - if (num_pols > 1) - err = xfrm_dst_update_parent(dst, &pols[1]->selector); - else - err = xfrm_dst_update_origin(dst, fl); - if (unlikely(err)) { - dst_free(dst); - XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); - return ERR_PTR(err); - } - xdst->num_pols = num_pols; memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); -- cgit v1.2.3-59-g8ed1b From 3d05f3aed5d721c2c77d20288c29ab26c6193ed5 Mon Sep 17 00:00:00 2001 From: Julia Cartwright Date: Fri, 28 Apr 2017 12:41:02 -0500 Subject: md/raid5: make use of spin_lock_irq over local_irq_disable + spin_lock On mainline, there is no functional difference, just less code, and symmetric lock/unlock paths. On PREEMPT_RT builds, this fixes the following warning, seen by Alexander GQ Gerasiov, due to the sleeping nature of spinlocks. BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:993 in_atomic(): 0, irqs_disabled(): 1, pid: 58, name: kworker/u12:1 CPU: 5 PID: 58 Comm: kworker/u12:1 Tainted: G W 4.9.20-rt16-stand6-686 #1 Hardware name: Supermicro SYS-5027R-WRF/X9SRW-F, BIOS 3.2a 10/28/2015 Workqueue: writeback wb_workfn (flush-253:0) Call Trace: dump_stack+0x47/0x68 ? migrate_enable+0x4a/0xf0 ___might_sleep+0x101/0x180 rt_spin_lock+0x17/0x40 add_stripe_bio+0x4e3/0x6c0 [raid456] ? preempt_count_add+0x42/0xb0 raid5_make_request+0x737/0xdd0 [raid456] Reported-by: Alexander GQ Gerasiov Tested-by: Alexander GQ Gerasiov Signed-off-by: Julia Cartwright Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2e38cfac5b1d..3809a2192132 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -103,8 +103,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash) static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) { int i; - local_irq_disable(); - spin_lock(conf->hash_locks); + spin_lock_irq(conf->hash_locks); for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); spin_lock(&conf->device_lock); @@ -114,9 +113,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf) { int i; spin_unlock(&conf->device_lock); - for (i = NR_STRIPE_HASH_LOCKS; i; i--) - spin_unlock(conf->hash_locks + i - 1); - local_irq_enable(); + for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--) + spin_unlock(conf->hash_locks + i); + spin_unlock_irq(conf->hash_locks); } /* Find first data disk in a raid6 stripe */ @@ -714,12 +713,11 @@ static bool is_full_stripe_write(struct stripe_head *sh) static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) { - local_irq_disable(); if (sh1 > sh2) { - spin_lock(&sh2->stripe_lock); + spin_lock_irq(&sh2->stripe_lock); spin_lock_nested(&sh1->stripe_lock, 1); } else { - spin_lock(&sh1->stripe_lock); + spin_lock_irq(&sh1->stripe_lock); spin_lock_nested(&sh2->stripe_lock, 1); } } @@ -727,8 +725,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) { spin_unlock(&sh1->stripe_lock); - spin_unlock(&sh2->stripe_lock); - local_irq_enable(); + spin_unlock_irq(&sh2->stripe_lock); } /* Only freshly new full stripe normal write stripe can be added to a batch list */ -- cgit v1.2.3-59-g8ed1b From 7f48d0b48cba2ddc03d09353ba4ef6ae680da520 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 25 Apr 2017 10:05:12 +0100 Subject: drm/i915/gvt: fix typo: "supporte" -> "support" trivial fix to typo in WARN_ONCE message Signed-off-by: Colin Ian King Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 6da9ae1618e3..31624f1df893 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1240,7 +1240,7 @@ static int dma_ctrl_write(struct intel_vgpu *vgpu, unsigned int offset, mode = vgpu_vreg(vgpu, offset); if (GFX_MODE_BIT_SET_IN_MASK(mode, START_DMA)) { - WARN_ONCE(1, "VM(%d): iGVT-g doesn't supporte GuC\n", + WARN_ONCE(1, "VM(%d): iGVT-g doesn't support GuC\n", vgpu->id); return 0; } -- cgit v1.2.3-59-g8ed1b From 657314b7a5d16961e7e0ecdae4a59d28123e74c0 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 28 Apr 2017 17:30:52 +0200 Subject: drm/etnaviv: don't put fence in case of submit failure If we bail out of the submit before actually adding the cmdstream to the kernel ring there is no valid fence to put. Make sure to skip the fence_put in that case. Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index e1909429837e..de80ee1b71df 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -44,6 +44,7 @@ static struct etnaviv_gem_submit *submit_create(struct drm_device *dev, /* initially, until copy_from_user() and bo lookup succeeds: */ submit->nr_bos = 0; + submit->fence = NULL; ww_acquire_init(&submit->ticket, &reservation_ww_class); } @@ -294,7 +295,8 @@ static void submit_cleanup(struct etnaviv_gem_submit *submit) } ww_acquire_fini(&submit->ticket); - dma_fence_put(submit->fence); + if (submit->fence) + dma_fence_put(submit->fence); kfree(submit); } -- cgit v1.2.3-59-g8ed1b From d86ab9cff8b936aadde444d0e263a8db5ff0349b Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Wed, 3 May 2017 14:30:48 +0100 Subject: cpufreq: schedutil: use now as reference when aggregating shared policy requests Currently, sugov_next_freq_shared() uses last_freq_update_time as a reference to decide when to start considering CPU contributions as stale. However, since last_freq_update_time is set by the last CPU that issued a frequency transition, this might cause problems in certain cases. In practice, the detection of stale utilization values fails whenever the CPU with such values was the last to update the policy. For example (and please note again that the SCHED_CPUFREQ_RT flag is not the problem here, but only the detection of after how much time that flag has to be considered stale), suppose a policy with 2 CPUs: CPU0 | CPU1 | | RT task scheduled | SCHED_CPUFREQ_RT is set | CPU1->last_update = now | freq transition to max | last_freq_update_time = now | more than TICK_NSEC nsecs | a small CFS wakes up | CPU0->last_update = now1 | delta_ns(CPU0) < TICK_NSEC* | CPU0's util is considered | delta_ns(CPU1) = | last_freq_update_time - | CPU1->last_update = 0 | < TICK_NSEC | CPU1 is still considered | CPU1->SCHED_CPUFREQ_RT is set | we stay at max (until CPU1 | exits from idle) | * delta_ns is actually negative as now1 > last_freq_update_time While last_freq_update_time is a sensible reference for rate limiting, it doesn't seem to be useful for working around stale CPU states. Fix the problem by always considering now (time) as the reference for deciding when CPUs have stale contributions. Signed-off-by: Juri Lelli Acked-by: Vincent Guittot Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 76877a62b5fa..622eed1b7658 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -245,11 +245,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, sugov_update_commit(sg_policy, time, next_f); } -static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu) +static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - u64 last_freq_update_time = sg_policy->last_freq_update_time; unsigned long util = 0, max = 1; unsigned int j; @@ -265,7 +264,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu) * enough, don't take the CPU into account as it probably is * idle now (and clear iowait_boost for it). */ - delta_ns = last_freq_update_time - j_sg_cpu->last_update; + delta_ns = time - j_sg_cpu->last_update; if (delta_ns > TICK_NSEC) { j_sg_cpu->iowait_boost = 0; continue; @@ -309,7 +308,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, if (flags & SCHED_CPUFREQ_RT_DL) next_f = sg_policy->policy->cpuinfo.max_freq; else - next_f = sugov_next_freq_shared(sg_cpu); + next_f = sugov_next_freq_shared(sg_cpu, time); sugov_update_commit(sg_policy, time, next_f); } -- cgit v1.2.3-59-g8ed1b From d90c902449a7561f1b1d58ba5a0d11728ce8b0b2 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 5 May 2017 07:40:42 +0200 Subject: af_key: Fix slab-out-of-bounds in pfkey_compile_policy. The sadb_x_sec_len is stored in the unit 'byte divided by eight'. So we have to multiply this value by eight before we can do size checks. Otherwise we may get a slab-out-of-bounds when we memcpy the user sec_ctx. Fixes: df71837d502 ("[LSM-IPSec]: Security association restriction.") Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Steffen Klassert --- net/key/af_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index c1950bb14735..512dc43d0ce6 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3285,7 +3285,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + - sec_ctx->sadb_x_sec_len) { + sec_ctx->sadb_x_sec_len*8) { *dir = -EINVAL; goto out; } -- cgit v1.2.3-59-g8ed1b From 2c1497bbc8fdee897341ab48ee9c9209b421b8c0 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Mon, 8 May 2017 10:30:18 +0300 Subject: xfrm: Fix NETDEV_DOWN with IPSec offload Upon NETDEV_DOWN event, all xfrm_state objects which are bound to the device are flushed. The condition for this is wrong, though, testing dev->hw_features instead of dev->features. If a device has non-user-modifiable NETIF_F_HW_ESP, then its xfrm_state objects are not flushed, causing a crash later on after the device is deleted. Check dev->features instead of dev->hw_features. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Ilan Tayari Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 8ec8a3fcf8d4..574e6f32f94f 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -170,7 +170,7 @@ static int xfrm_dev_feat_change(struct net_device *dev) static int xfrm_dev_down(struct net_device *dev) { - if (dev->hw_features & NETIF_F_HW_ESP) + if (dev->features & NETIF_F_HW_ESP) xfrm_dev_state_flush(dev_net(dev), dev, true); xfrm_garbage_collect(dev_net(dev)); -- cgit v1.2.3-59-g8ed1b From 2345ab1df8a9aa3cdca942142b48eb141faeb1c3 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Mon, 8 May 2017 09:27:39 +0800 Subject: drm/i915/gvt: not to restore in-context mmio Needn't to restore the in-context MMIO when SCHEDULE_OUT. Sometimes with restoring the in-context MMIO, some GPU hang can be observed. So remove the in-context MMIO restore Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/render.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 0beb83563b08..05b75b9f852a 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -333,6 +333,9 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id) } else v = mmio->value; + if (mmio->in_context) + continue; + I915_WRITE(mmio->reg, v); POSTING_READ(mmio->reg); -- cgit v1.2.3-59-g8ed1b From 3c5ab3f395d66a9e4e937fcfdf6ebc63894f028b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 29 Apr 2017 20:33:09 +0300 Subject: ipvs: SNAT packet replies only for NATed connections We do not check if packet from real server is for NAT connection before performing SNAT. This causes problems for setups that use DR/TUN and allow local clients to access the real server directly, for example: - local client in director creates IPVS-DR/TUN connection CIP->VIP and the request packets are routed to RIP. Talks are finished but IPVS connection is not expired yet. - second local client creates non-IPVS connection CIP->RIP with same reply tuple RIP->CIP and when replies are received on LOCAL_IN we wrongly assign them for the first client connection because RIP->CIP matches the reply direction. As result, IPVS SNATs replies for non-IPVS connections. The problem is more visible to local UDP clients but in rare cases it can happen also for TCP or remote clients when the real server sends the reply traffic via the director. So, better to be more precise for the reply traffic. As replies are not expected for DR/TUN connections, better to not touch them. Reported-by: Nick Moriarty Tested-by: Nick Moriarty Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d2d7bdf1d510..ad99c1ceea6f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, { unsigned int verdict = NF_DROP; - if (IP_VS_FWD_METHOD(cp) != 0) { - pr_err("shouldn't reach here, because the box is on the " - "half connection in the tun/dr module.\n"); - } + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); + +ignore_cp: verdict = NF_ACCEPT; out: @@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in */ cp = pp->conn_out_get(ipvs, af, skb, &iph); - if (likely(cp)) + if (likely(cp)) { + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; return handle_response(af, skb, pd, cp, &iph, hooknum); + } /* Check for real-server-started requests */ if (atomic_read(&ipvs->conn_out_counter)) { @@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in } } } + +out: IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; + +ignore_cp: + __ip_vs_conn_put(cp); + goto out; } /* -- cgit v1.2.3-59-g8ed1b From 2214c260c72b0bd94e6c1c19bf451686212025d3 Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Mon, 8 May 2017 11:56:55 +0200 Subject: md: don't return -EAGAIN in md_allow_write for external metadata arrays This essentially reverts commit b5470dc5fc18 ("md: resolve external metadata handling deadlock in md_allow_write") with some adjustments. Since commit 6791875e2e53 ("md: make reconfig_mutex optional for writes to md sysfs files.") changing array_state to 'active' does not use mddev_lock() and will not cause a deadlock with md_allow_write(). This revert simplifies userspace tools that write to sysfs attributes like "stripe_cache_size" or "consistency_policy" because it removes the need for special handling for external metadata arrays, checking for EAGAIN and retrying the write. Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- drivers/md/md.c | 20 ++++++++------------ drivers/md/md.h | 2 +- drivers/md/raid1.c | 9 +++------ drivers/md/raid5.c | 12 +++--------- 4 files changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 82f798be964f..10367ffe92e3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8022,18 +8022,15 @@ EXPORT_SYMBOL(md_write_end); * may proceed without blocking. It is important to call this before * attempting a GFP_KERNEL allocation while holding the mddev lock. * Must be called with mddev_lock held. - * - * In the ->external case MD_SB_CHANGE_PENDING can not be cleared until mddev->lock - * is dropped, so return -EAGAIN after notifying userspace. */ -int md_allow_write(struct mddev *mddev) +void md_allow_write(struct mddev *mddev) { if (!mddev->pers) - return 0; + return; if (mddev->ro) - return 0; + return; if (!mddev->pers->sync_request) - return 0; + return; spin_lock(&mddev->lock); if (mddev->in_sync) { @@ -8046,13 +8043,12 @@ int md_allow_write(struct mddev *mddev) spin_unlock(&mddev->lock); md_update_sb(mddev, 0); sysfs_notify_dirent_safe(mddev->sysfs_state); + /* wait for the dirty state to be recorded in the metadata */ + wait_event(mddev->sb_wait, + !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) && + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); } else spin_unlock(&mddev->lock); - - if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) - return -EAGAIN; - else - return 0; } EXPORT_SYMBOL_GPL(md_allow_write); diff --git a/drivers/md/md.h b/drivers/md/md.h index 4e75d121bfcc..11f15146ce51 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -665,7 +665,7 @@ extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, bool metadata_op); extern void md_do_sync(struct md_thread *thread); extern void md_new_event(struct mddev *mddev); -extern int md_allow_write(struct mddev *mddev); +extern void md_allow_write(struct mddev *mddev); extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev); extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors); extern int md_check_no_bitmap(struct mddev *mddev); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7ed59351fe97..7c1f73398800 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3197,7 +3197,7 @@ static int raid1_reshape(struct mddev *mddev) struct r1conf *conf = mddev->private; int cnt, raid_disks; unsigned long flags; - int d, d2, err; + int d, d2; /* Cannot change chunk_size, layout, or level */ if (mddev->chunk_sectors != mddev->new_chunk_sectors || @@ -3209,11 +3209,8 @@ static int raid1_reshape(struct mddev *mddev) return -EINVAL; } - if (!mddev_is_clustered(mddev)) { - err = md_allow_write(mddev); - if (err) - return err; - } + if (!mddev_is_clustered(mddev)) + md_allow_write(mddev); raid_disks = mddev->raid_disks + mddev->delta_disks; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3809a2192132..f8055a7abb4b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2309,14 +2309,12 @@ static int resize_stripes(struct r5conf *conf, int newsize) struct stripe_head *osh, *nsh; LIST_HEAD(newstripes); struct disk_info *ndisks; - int err; + int err = 0; struct kmem_cache *sc; int i; int hash, cnt; - err = md_allow_write(conf->mddev); - if (err) - return err; + md_allow_write(conf->mddev); /* Step 1 */ sc = kmem_cache_create(conf->cache_name[1-conf->active_name], @@ -6310,7 +6308,6 @@ int raid5_set_cache_size(struct mddev *mddev, int size) { struct r5conf *conf = mddev->private; - int err; if (size <= 16 || size > 32768) return -EINVAL; @@ -6322,10 +6319,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) ; mutex_unlock(&conf->cache_size_mutex); - - err = md_allow_write(mddev); - if (err) - return err; + md_allow_write(mddev); mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) -- cgit v1.2.3-59-g8ed1b From c1061255031dea76ee9e082d081b88ac6d6c8265 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 27 Apr 2017 16:25:03 +0200 Subject: scsi: libfc: do not flood console with messages 'libfc: queue full ...' When the FCoE sending side becomes congested libfc tries to reduce the queue depth on the host; however due to the built-in lag before attempting to ramp down the queue depth _again_ the message log is flooded with the following message: libfc: queue full, reducing can_queue to 512 With this patch the message is printed only once (ie when it's actually changed). Signed-off-by: Hannes Reinecke Reviewed-by: Bart Van Assche Acked-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_fcp.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index a808e8ef1d08..234352da5c3c 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -407,11 +407,12 @@ unlock: * can_queue. Eventually we will hit the point where we run * on all reserved structs. */ -static void fc_fcp_can_queue_ramp_down(struct fc_lport *lport) +static bool fc_fcp_can_queue_ramp_down(struct fc_lport *lport) { struct fc_fcp_internal *si = fc_get_scsi_internal(lport); unsigned long flags; int can_queue; + bool changed = false; spin_lock_irqsave(lport->host->host_lock, flags); @@ -427,9 +428,11 @@ static void fc_fcp_can_queue_ramp_down(struct fc_lport *lport) if (!can_queue) can_queue = 1; lport->host->can_queue = can_queue; + changed = true; unlock: spin_unlock_irqrestore(lport->host->host_lock, flags); + return changed; } /* @@ -1896,11 +1899,11 @@ int fc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc_cmd) if (!fc_fcp_lport_queue_ready(lport)) { if (lport->qfull) { - fc_fcp_can_queue_ramp_down(lport); - shost_printk(KERN_ERR, lport->host, - "libfc: queue full, " - "reducing can_queue to %d.\n", - lport->host->can_queue); + if (fc_fcp_can_queue_ramp_down(lport)) + shost_printk(KERN_ERR, lport->host, + "libfc: queue full, " + "reducing can_queue to %d.\n", + lport->host->can_queue); } rc = SCSI_MLQUEUE_HOST_BUSY; goto out; -- cgit v1.2.3-59-g8ed1b From 4492b739c9ccfaf828bd7c02dc779ec2a5e55ff4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 27 Apr 2017 15:08:26 -0700 Subject: scsi: lpfc: Fix panic on BFS configuration To select the appropriate shost template, the driver is issuing a mailbox command to retrieve the wwn. Turns out the sending of the command precedes the reset of the function. On SLI-4 adapters, this is inconsequential as the mailbox command location is specified by dma via the BMBX register. However, on SLI-3 adapters, the location of the mailbox command submission area changes. When the function is first powered on or reset, the cmd is submitted via PCI bar memory. Later the driver changes the function config to use host memory and DMA. The request to start a mailbox command is the same, a simple doorbell write, regardless of submission area. So.. if there has not been a boot driver run against the adapter, the mailbox command works as defaults are ok. But, if the boot driver has configured the card and, and if no platform pci function/slot reset occurs as the os starts, the mailbox command will fail. The SLI-3 device will use the stale boot driver dma location. This can cause PCI eeh errors. Fix is to reset the sli-3 function before sending the mailbox command, thus synchronizing the function/driver on mailbox location. Note: The fix uses routines that are typically invoked later in the call flow to reset the sli-3 device. The issue in using those routines is that the normal (non-fix) flow does additional initialization, namely the allocation of the pport structure. So, rather than significantly reworking the initialization flow so that the pport is alloc'd first, pointer checks are added to work around it. Checks are limited to the routines invoked by a sli-3 adapter (s3 routines) as this fix/early call is only invoked on a sli3 adapter. Nothing changes post the fix. Subsequent initialization, and another adapter reset, still occur - both on sli-3 and sli-4 adapters. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Fixes: 96418b5e2c88 ("scsi: lpfc: Fix eh_deadline setting for sli3 adapters.") Cc: stable@vger.kernel.org # v4.11+ Reviewed-by: Ewan D. Milne Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_crtn.h | 1 + drivers/scsi/lpfc/lpfc_init.c | 7 +++++++ drivers/scsi/lpfc/lpfc_sli.c | 19 ++++++++++++------- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 944b32ca4931..1c55408ac718 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -294,6 +294,7 @@ int lpfc_selective_reset(struct lpfc_hba *); void lpfc_reset_barrier(struct lpfc_hba *); int lpfc_sli_brdready(struct lpfc_hba *, uint32_t); int lpfc_sli_brdkill(struct lpfc_hba *); +int lpfc_sli_chipset_init(struct lpfc_hba *phba); int lpfc_sli_brdreset(struct lpfc_hba *); int lpfc_sli_brdrestart(struct lpfc_hba *); int lpfc_sli_hba_setup(struct lpfc_hba *); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 90ae354a9c45..e85f273e34ac 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3602,6 +3602,13 @@ lpfc_get_wwpn(struct lpfc_hba *phba) LPFC_MBOXQ_t *mboxq; MAILBOX_t *mb; + if (phba->sli_rev < LPFC_SLI_REV4) { + /* Reset the port first */ + lpfc_sli_brdrestart(phba); + rc = lpfc_sli_chipset_init(phba); + if (rc) + return (uint64_t)-1; + } mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index cf19f4976f5f..2a4fc00dfa9b 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4204,13 +4204,16 @@ lpfc_sli_brdreset(struct lpfc_hba *phba) /* Reset HBA */ lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "0325 Reset HBA Data: x%x x%x\n", - phba->pport->port_state, psli->sli_flag); + (phba->pport) ? phba->pport->port_state : 0, + psli->sli_flag); /* perform board reset */ phba->fc_eventTag = 0; phba->link_events = 0; - phba->pport->fc_myDID = 0; - phba->pport->fc_prevDID = 0; + if (phba->pport) { + phba->pport->fc_myDID = 0; + phba->pport->fc_prevDID = 0; + } /* Turn off parity checking and serr during the physical reset */ pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value); @@ -4336,7 +4339,8 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba) /* Restart HBA */ lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "0337 Restart HBA Data: x%x x%x\n", - phba->pport->port_state, psli->sli_flag); + (phba->pport) ? phba->pport->port_state : 0, + psli->sli_flag); word0 = 0; mb = (MAILBOX_t *) &word0; @@ -4350,7 +4354,7 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba) readl(to_slim); /* flush */ /* Only skip post after fc_ffinit is completed */ - if (phba->pport->port_state) + if (phba->pport && phba->pport->port_state) word0 = 1; /* This is really setting up word1 */ else word0 = 0; /* This is really setting up word1 */ @@ -4359,7 +4363,8 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba) readl(to_slim); /* flush */ lpfc_sli_brdreset(phba); - phba->pport->stopped = 0; + if (phba->pport) + phba->pport->stopped = 0; phba->link_state = LPFC_INIT_START; phba->hba_flag = 0; spin_unlock_irq(&phba->hbalock); @@ -4446,7 +4451,7 @@ lpfc_sli_brdrestart(struct lpfc_hba *phba) * iteration, the function will restart the HBA again. The function returns * zero if HBA successfully restarted else returns negative error code. **/ -static int +int lpfc_sli_chipset_init(struct lpfc_hba *phba) { uint32_t status, i = 0; -- cgit v1.2.3-59-g8ed1b From 4ff7adc8c7886bcf6e48f09c49d3f339f33d7e79 Mon Sep 17 00:00:00 2001 From: Zhou Zhengping Date: Fri, 28 Apr 2017 17:43:04 +0800 Subject: scsi: Skip deleted devices in __scsi_device_lookup When a device is unplugged from a SCSI controller, if the scsi_device is still in use by application layer, it won't get released until users close it. In this case, scsi_device_remove just set the scsi_device's state to be SDEV_DEL. But if you plug the disk just before the old scsi_device is released, then there will be two scsi_device structures in scsi_host->__devices. When the next unplug event happens, some low-level drivers will check whether the scsi_device has been added to host (for example the MegaRAID SAS series controller) by calling scsi_device_lookup(call __scsi_device_lookup) in function megasas_aen_polling. __scsi_device_lookup will return the first scsi_device. Because its state is SDEV_DEL, the scsi_device_lookup will return NULL, making the low-level driver assume that the scsi_device has been removed, and won't call scsi_device_remove which will lead to hot swap failure. Signed-off-by: Zhou Zhengping Tested-by: Zeng Rujia Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195607 Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 7bfbcfa7af40..61cdd99ae41e 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -763,6 +763,8 @@ struct scsi_device *__scsi_device_lookup(struct Scsi_Host *shost, struct scsi_device *sdev; list_for_each_entry(sdev, &shost->__devices, siblings) { + if (sdev->sdev_state == SDEV_DEL) + continue; if (sdev->channel == channel && sdev->id == id && sdev->lun ==lun) return sdev; -- cgit v1.2.3-59-g8ed1b From 7cc49bbd005a5447afafc285dd302cbb8065eb28 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 May 2017 09:57:20 +0200 Subject: scsi: MAINTAINERS: update OSD entries The open-osd domain doesn't exist anymore, and mails to the list lead to really annoying bounced that repeat every day. Also the primarydata address for Benny bounces, and while I have a new one for him he doesn't seem to be maintaining the OSD code any more. Which beggs the question: should we really leave the Supported status in MAINTAINERS given that the code is barely maintained? Signed-off-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Boaz Harrosh Acked-by: Benny Halevy Signed-off-by: Martin K. Petersen --- MAINTAINERS | 4 ---- 1 file changed, 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 08360bb0468b..0e174d4b86a3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9530,10 +9530,6 @@ F: drivers/net/wireless/intersil/orinoco/ OSD LIBRARY and FILESYSTEM M: Boaz Harrosh -M: Benny Halevy -L: osd-dev@open-osd.org -W: http://open-osd.org -T: git git://git.open-osd.org/open-osd.git S: Maintained F: drivers/scsi/osd/ F: include/scsi/osd_* -- cgit v1.2.3-59-g8ed1b From 7aa686d35598cbe58b3f7ce32e4ec17649190f3d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 2 May 2017 10:45:03 -0700 Subject: scsi: scsi_lib: Add #include This patch avoids that when building with W=1 the compiler complains that __scsi_init_queue() has not been declared. See also commit d48777a633d6 ("scsi: remove __scsi_alloc_queue"). Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 814a4bd8405d..e31f1cc90b81 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -30,6 +30,7 @@ #include #include #include +#include /* __scsi_init_queue() */ #include #include -- cgit v1.2.3-59-g8ed1b From 87ea6bdd8da88842a8247921f58ca495b4da79ef Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 3 May 2017 14:17:55 -0500 Subject: scsi: qedf: properly update arguments position in function call Properly update the position of the arguments in function call. Addresses-Coverity-ID: 1402010 Signed-off-by: Gustavo A. R. Silva Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_els.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c index c505d41f6dc8..90627033bde6 100644 --- a/drivers/scsi/qedf/qedf_els.c +++ b/drivers/scsi/qedf/qedf_els.c @@ -109,7 +109,7 @@ retry_els: did = fcport->rdata->ids.port_id; sid = fcport->sid; - __fc_fill_fc_hdr(fc_hdr, FC_RCTL_ELS_REQ, sid, did, + __fc_fill_fc_hdr(fc_hdr, FC_RCTL_ELS_REQ, did, sid, FC_TYPE_ELS, FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0); -- cgit v1.2.3-59-g8ed1b From 0d618cf4f24cebb822eaf57c27985266e4110176 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 May 2017 00:22:16 +0300 Subject: scsi: lpfc: double lock typo in lpfc_ns_rsp() There is a double lock bug here so this will deadlock instead of unlocking. Fixes: 1c5b12f76301 ("Fix implicit logo and RSCN handling for NVMET") Signed-off-by: Dan Carpenter Reviewed-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 1487406aea77..c7962dae4dab 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -630,7 +630,7 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type, NLP_EVT_DEVICE_RECOVERY); spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_NVMET_RECOV; - spin_lock_irq(shost->host_lock); + spin_unlock_irq(shost->host_lock); } } -- cgit v1.2.3-59-g8ed1b From 0d2fc3b48ba0e267962c861c5258564c335dd1f2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 May 2017 00:23:57 +0300 Subject: scsi: qedf: Cleanup the type of io_log->op We store sc_cmd->cmnd[0] which is an unsigned char in io_log->op so this should also be unsigned char. The other thing is that this is displayed in the debugfs: seq_printf(s, "0x%02x:", io_log->op); Smatch complains that the formatting won't work for negative values so changing it to unsigned silences that warning as well. Signed-off-by: Dan Carpenter Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h index 40aeb6bb96a2..07ee88200e91 100644 --- a/drivers/scsi/qedf/qedf.h +++ b/drivers/scsi/qedf/qedf.h @@ -259,7 +259,7 @@ struct qedf_io_log { uint16_t task_id; uint32_t port_id; /* Remote port fabric ID */ int lun; - char op; /* SCSI CDB */ + unsigned char op; /* SCSI CDB */ uint8_t lba[4]; unsigned int bufflen; /* SCSI buffer length */ unsigned int sg_count; /* Number of SG elements */ -- cgit v1.2.3-59-g8ed1b From cd22874fec8fbecd4999b55e67baf3d71775f6f3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 5 May 2017 15:42:55 -0700 Subject: scsi: qedf: Avoid reading past end of buffer Using memcpy() from a string that is shorter than the length copied means the destination buffer is being filled with arbitrary data from the kernel rodata segment. Instead, use strncpy() which will fill the trailing bytes with zeros. This was found with the future CONFIG_FORTIFY_SOURCE feature. Cc: Daniel Micay Signed-off-by: Kees Cook Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index cceddd995a4b..a5c97342fd5d 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2895,7 +2895,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) slowpath_params.drv_minor = QEDF_DRIVER_MINOR_VER; slowpath_params.drv_rev = QEDF_DRIVER_REV_VER; slowpath_params.drv_eng = QEDF_DRIVER_ENG_VER; - memcpy(slowpath_params.name, "qedf", QED_DRV_VER_STR_SIZE); + strncpy(slowpath_params.name, "qedf", QED_DRV_VER_STR_SIZE); rc = qed_ops->common->slowpath_start(qedf->cdev, &slowpath_params); if (rc) { QEDF_ERR(&(qedf->dbg_ctx), "Cannot start slowpath.\n"); -- cgit v1.2.3-59-g8ed1b From 07cc1ccfb84320582c9ac389a21cd81df82bc123 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 5 May 2017 16:31:47 -0700 Subject: scsi: cxlflash: Select IRQ_POLL The driver now uses IRQ_POLL and needs to select it to avoid the following build error. ERROR: ".irq_poll_complete" [drivers/scsi/cxlflash/cxlflash.ko] undefined! ERROR: ".irq_poll_sched" [drivers/scsi/cxlflash/cxlflash.ko] undefined! ERROR: ".irq_poll_disable" [drivers/scsi/cxlflash/cxlflash.ko] undefined! ERROR: ".irq_poll_init" [drivers/scsi/cxlflash/cxlflash.ko] undefined! Fixes: cba06e6de403 ("scsi: cxlflash: Implement IRQ polling for RRQ processing") Signed-off-by: Guenter Roeck Acked-by: Matthew R. Ochs Signed-off-by: Martin K. Petersen --- drivers/scsi/cxlflash/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/cxlflash/Kconfig b/drivers/scsi/cxlflash/Kconfig index c052104e523e..a011c5dbf214 100644 --- a/drivers/scsi/cxlflash/Kconfig +++ b/drivers/scsi/cxlflash/Kconfig @@ -5,6 +5,7 @@ config CXLFLASH tristate "Support for IBM CAPI Flash" depends on PCI && SCSI && CXL && EEH + select IRQ_POLL default m help Allows CAPI Accelerated IO to Flash -- cgit v1.2.3-59-g8ed1b From 019c0d66f66a8612bb867caf05e865a4766238a4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 6 May 2017 23:13:55 +0100 Subject: scsi: lpfc: ensure els_wq is being checked before destroying it I believe there is a typo on the wq destroy of els_wq, currently the driver is checking if els_cq is not null and I think this should be a check on els_wq instead. Detected by CoverityScan, CID#1411629 ("Copy-paste error") Signed-off-by: Colin Ian King Acked-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index e85f273e34ac..4b1eb98c228d 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -8854,7 +8854,7 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba) lpfc_wq_destroy(phba, phba->sli4_hba.nvmels_wq); /* Unset ELS work queue */ - if (phba->sli4_hba.els_cq) + if (phba->sli4_hba.els_wq) lpfc_wq_destroy(phba, phba->sli4_hba.els_wq); /* Unset unsolicited receive queue */ -- cgit v1.2.3-59-g8ed1b From b77b36cb7272ec5b9fb000e2ff18e947d9586a22 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 3 May 2017 17:29:01 +0100 Subject: scsi: pmcraid: remove redundant check to see if request_size is less than zero The 2nd check to see if request_size is less than zero is redundant because the first check takes error exit path on this condition. So, since it is redundant, remove it. Detected by CoverityScan, CID#146149 ("Logically Dead Code") Signed-off-by: Colin Ian King Reviewed-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/pmcraid.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index a4aadf5f4dc6..1cc814f1505a 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -3770,9 +3770,6 @@ static long pmcraid_ioctl_passthrough( pmcraid_err("couldn't build passthrough ioadls\n"); goto out_free_cmd; } - } else if (request_size < 0) { - rc = -EINVAL; - goto out_free_cmd; } /* If data is being written into the device, copy the data from user -- cgit v1.2.3-59-g8ed1b From 29efc390b9462582ae95eb9a0b8cd17ab956afc0 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sun, 7 May 2017 17:36:24 -0700 Subject: md/md0: optimize raid0 discard handling There are complaints that raid0 discard handling is slow. Currently we divide discard request into chunks and dispatch to underlayer disks. The block layer will do merge to form big requests. This causes a lot of request split/merge and uses significant CPU time. A simple idea is to calculate the range for each raid disk for an IO request and send a discard request to raid disks, which will avoid the split/merge completely. Previously Coly tried the approach, but the implementation was too complex because of raid0 zones. This patch always split bio in zone boundary and handle bio within one zone. It simplifies the implementation a lot. Reviewed-by: NeilBrown Acked-by: Coly Li Signed-off-by: Shaohua Li --- drivers/md/raid0.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 102 insertions(+), 14 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 84e58596594d..d6c0bc76e837 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -385,7 +385,7 @@ static int raid0_run(struct mddev *mddev) blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); - blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); + blk_queue_max_discard_sectors(mddev->queue, UINT_MAX); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); blk_queue_io_opt(mddev->queue, @@ -459,6 +459,95 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev, } } +static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) +{ + struct r0conf *conf = mddev->private; + struct strip_zone *zone; + sector_t start = bio->bi_iter.bi_sector; + sector_t end; + unsigned int stripe_size; + sector_t first_stripe_index, last_stripe_index; + sector_t start_disk_offset; + unsigned int start_disk_index; + sector_t end_disk_offset; + unsigned int end_disk_index; + unsigned int disk; + + zone = find_zone(conf, &start); + + if (bio_end_sector(bio) > zone->zone_end) { + struct bio *split = bio_split(bio, + zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO, + mddev->bio_set); + bio_chain(split, bio); + generic_make_request(bio); + bio = split; + end = zone->zone_end; + } else + end = bio_end_sector(bio); + + if (zone != conf->strip_zone) + end = end - zone[-1].zone_end; + + /* Now start and end is the offset in zone */ + stripe_size = zone->nb_dev * mddev->chunk_sectors; + + first_stripe_index = start; + sector_div(first_stripe_index, stripe_size); + last_stripe_index = end; + sector_div(last_stripe_index, stripe_size); + + start_disk_index = (int)(start - first_stripe_index * stripe_size) / + mddev->chunk_sectors; + start_disk_offset = ((int)(start - first_stripe_index * stripe_size) % + mddev->chunk_sectors) + + first_stripe_index * mddev->chunk_sectors; + end_disk_index = (int)(end - last_stripe_index * stripe_size) / + mddev->chunk_sectors; + end_disk_offset = ((int)(end - last_stripe_index * stripe_size) % + mddev->chunk_sectors) + + last_stripe_index * mddev->chunk_sectors; + + for (disk = 0; disk < zone->nb_dev; disk++) { + sector_t dev_start, dev_end; + struct bio *discard_bio = NULL; + struct md_rdev *rdev; + + if (disk < start_disk_index) + dev_start = (first_stripe_index + 1) * + mddev->chunk_sectors; + else if (disk > start_disk_index) + dev_start = first_stripe_index * mddev->chunk_sectors; + else + dev_start = start_disk_offset; + + if (disk < end_disk_index) + dev_end = (last_stripe_index + 1) * mddev->chunk_sectors; + else if (disk > end_disk_index) + dev_end = last_stripe_index * mddev->chunk_sectors; + else + dev_end = end_disk_offset; + + if (dev_end <= dev_start) + continue; + + rdev = conf->devlist[(zone - conf->strip_zone) * + conf->strip_zone[0].nb_dev + disk]; + if (__blkdev_issue_discard(rdev->bdev, + dev_start + zone->dev_start + rdev->data_offset, + dev_end - dev_start, GFP_NOIO, 0, &discard_bio) || + !discard_bio) + continue; + bio_chain(discard_bio, bio); + if (mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(rdev->bdev), + discard_bio, disk_devt(mddev->gendisk), + bio->bi_iter.bi_sector); + generic_make_request(discard_bio); + } + bio_endio(bio); +} + static void raid0_make_request(struct mddev *mddev, struct bio *bio) { struct strip_zone *zone; @@ -473,6 +562,11 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) return; } + if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) { + raid0_handle_discard(mddev, bio); + return; + } + bio_sector = bio->bi_iter.bi_sector; sector = bio_sector; chunk_sects = mddev->chunk_sectors; @@ -498,19 +592,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) bio->bi_iter.bi_sector = sector + zone->dev_start + tmp_dev->data_offset; - if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { - /* Just ignore it */ - bio_endio(bio); - } else { - if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), - bio, disk_devt(mddev->gendisk), - bio_sector); - mddev_check_writesame(mddev, bio); - mddev_check_write_zeroes(mddev, bio); - generic_make_request(bio); - } + if (mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), + bio, disk_devt(mddev->gendisk), + bio_sector); + mddev_check_writesame(mddev, bio); + mddev_check_write_zeroes(mddev, bio); + generic_make_request(bio); } static void raid0_status(struct seq_file *seq, struct mddev *mddev) -- cgit v1.2.3-59-g8ed1b From f5c8b9601036869e162cb278aaafbf003dc4e5a0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 May 2017 09:15:07 +0200 Subject: s390/uaccess: use sane length for __strncpy_from_user() The average string that is copied from user space to kernel space is rather short. E.g. booting a system involves about 50.000 strncpy_from_user() calls where the NULL terminated string has an average size of 27 bytes. By default our s390 specific strncpy_from_user() implementation however copies up to 4096 bytes, which is a waste of cpu cycles and cache lines. Therefore reduce the default length to L1_CACHE_BYTES (256 bytes), which also reduces the average execution time of strncpy_from_user() by 30-40%. Alternatively we could have switched to the generic strncpy_from_user() implementation, however it turned out that that variant would be slower than the now optimized s390 variant. Reported-by: Al Viro Reported-by: Linus Torvalds Signed-off-by: Heiko Carstens Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/lib/uaccess.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 1e5bb2b86c42..b3bd3f23b8e8 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -337,8 +337,8 @@ long __strncpy_from_user(char *dst, const char __user *src, long size) return 0; done = 0; do { - offset = (size_t)src & ~PAGE_MASK; - len = min(size - done, PAGE_SIZE - offset); + offset = (size_t)src & (L1_CACHE_BYTES - 1); + len = min(size - done, L1_CACHE_BYTES - offset); if (copy_from_user(dst, src, len)) return -EFAULT; len_str = strnlen(dst, len); -- cgit v1.2.3-59-g8ed1b From 80ba38469aa28bbcfc7a31e5b41adfc42120465e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 4 May 2017 13:06:58 +0200 Subject: s390/topology: let topology_mnest_limit() return unsigned char Fixes a couple of compile warnings with gcc 7.1.0 : arch/s390/kernel/sysinfo.c:578:20: note: directive argument in the range [-2147483648, 4] sprintf(link_to, "15_1_%d", topology_mnest_limit()); Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/sysinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 73bff45ced55..e784bed6ed7f 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -146,7 +146,7 @@ extern int topology_max_mnest; * Returns the maximum nesting level supported by the cpu topology code. * The current maximum level is 4 which is the drawer level. */ -static inline int topology_mnest_limit(void) +static inline unsigned char topology_mnest_limit(void) { return min(topology_max_mnest, 4); } -- cgit v1.2.3-59-g8ed1b From 2685df6776b2c69967eaead48f694869f7dc91ca Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 4 May 2017 13:35:33 +0200 Subject: s390/ccwgroup: increase string buffer size Avoid false positive warnings like this with gcc 7.1: drivers/s390/cio/ccwgroup.c:41:21: warning: '%d' directive writing between 1 and 10 bytes into a region of size 4 sprintf(str, "cdev%d", i); and simply increase the size of the string buffer. Reviewed-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/ccwgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index e443b0d0b236..34b9ad6b3143 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -35,7 +35,7 @@ static struct bus_type ccwgroup_bus_type; static void __ccwgroup_remove_symlinks(struct ccwgroup_device *gdev) { int i; - char str[8]; + char str[16]; for (i = 0; i < gdev->count; i++) { sprintf(str, "cdev%d", i); @@ -238,7 +238,7 @@ static void ccwgroup_release(struct device *dev) static int __ccwgroup_create_symlinks(struct ccwgroup_device *gdev) { - char str[8]; + char str[16]; int i, rc; for (i = 0; i < gdev->count; i++) { -- cgit v1.2.3-59-g8ed1b From 6423ef691c50568e56bb7e33a35bf9f8d6142d23 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 4 May 2017 13:39:18 +0200 Subject: s390/qdio: increase string buffer size Avoid false positive warnings like this with gcc 7.1: drivers/s390/cio/qdio_debug.h:63:4: note: 'snprintf' output between 8 and 17 bytes into a destination of size 16 snprintf(debug_buffer, QDIO_DBF_LEN, text); and simply increase the size of the string buffer. Reviewed-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio_debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio_debug.h b/drivers/s390/cio/qdio_debug.h index f33ce8577619..1d595d17bf11 100644 --- a/drivers/s390/cio/qdio_debug.h +++ b/drivers/s390/cio/qdio_debug.h @@ -11,7 +11,7 @@ #include "qdio.h" /* that gives us 15 characters in the text event views */ -#define QDIO_DBF_LEN 16 +#define QDIO_DBF_LEN 32 extern debug_info_t *qdio_dbf_setup; extern debug_info_t *qdio_dbf_error; -- cgit v1.2.3-59-g8ed1b From d04a4c76f71dd5335f8e499b59617382d84e2b8d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 4 May 2017 09:42:22 +0200 Subject: s390: move _text symbol to address higher than zero The perf tool assumes that kernel symbols are never present at address zero. In fact it assumes if functions that map symbols to addresses return zero, that the symbol was not found. Given that s390's _text symbol historically is located at address zero this yields at least a couple of false errors and warnings in one of perf's test cases about not present symbols ("perf test 1"). To fix this simply move the _text symbol to address 0x200, just behind the initial psw and channel program located at the beginning of the kernel image. This is now hard coded within the linker script. I tried a nicer solution which moves the initial psw and channel program into an own section. However that would move the symbols within the "real" head.text section to different addresses, since the ".org" statements within head.S are relative to the head.text section. If there is a new section in front, everything else will be moved. Alternatively I could have adjusted all ".org" statements. But this current solution seems to be the easiest one, since nobody really cares where the _text symbol is actually located. Reported-by: Zvonko Kosic Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vmlinux.lds.S | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 72307f108c40..6e2c42bd1c3b 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -31,8 +31,14 @@ SECTIONS { . = 0x00000000; .text : { - _text = .; /* Text and read-only data */ + /* Text and read-only data */ HEAD_TEXT + /* + * E.g. perf doesn't like symbols starting at address zero, + * therefore skip the initial PSW and channel program located + * at address zero and let _text start at 0x200. + */ + _text = 0x200; TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT -- cgit v1.2.3-59-g8ed1b From 7e6c7fe95e3d0be80d0d05a2efde5c8fb97602ba Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 26 Apr 2017 17:25:50 +0800 Subject: arm: dts: mt7623: add clock-frequency to the a7 timer node to mt7623.dtsi We need to tell the driver what the timers frequency is and that the core has not be configured by the bootrom. Not doing so makes the unit not boot. Signed-off-by: John Crispin Signed-off-by: Sean Wang Signed-off-by: Matthias Brugger --- arch/arm/boot/dts/mt7623.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/mt7623.dtsi b/arch/arm/boot/dts/mt7623.dtsi index 402579ab70d2..3a9e9b6aea68 100644 --- a/arch/arm/boot/dts/mt7623.dtsi +++ b/arch/arm/boot/dts/mt7623.dtsi @@ -72,6 +72,8 @@ , , ; + clock-frequency = <13000000>; + arm,cpu-registers-not-fw-configured; }; watchdog: watchdog@10007000 { -- cgit v1.2.3-59-g8ed1b From 1c0803652d82b9a62980886e019c9f70576028a5 Mon Sep 17 00:00:00 2001 From: yong mao Date: Wed, 15 Mar 2017 15:26:39 +0800 Subject: ARM64: dts: mediatek: configure some fixed mmc parameters configure some fixed mmc parameters Signed-off-by: Yong Mao Signed-off-by: Chaotian Jing Signed-off-by: Matthias Brugger --- arch/arm64/boot/dts/mediatek/mt8173-evb.dts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts index 0ecaad4333a7..1c3634fa94bf 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts @@ -134,6 +134,9 @@ bus-width = <8>; max-frequency = <50000000>; cap-mmc-highspeed; + mediatek,hs200-cmd-int-delay=<26>; + mediatek,hs400-cmd-int-delay=<14>; + mediatek,hs400-cmd-resp-sel-rising; vmmc-supply = <&mt6397_vemc_3v3_reg>; vqmmc-supply = <&mt6397_vio18_reg>; non-removable; -- cgit v1.2.3-59-g8ed1b From 6f92120892d94ef6c551da5aa2cf7bc9401c9903 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Apr 2017 23:55:28 +0200 Subject: ARM: omap2+: make omap4_get_cpu1_ns_pa_addr declaration usable When CONFIG_PM is disabled, we get a build error: arch/arm/mach-omap2/omap-smp.c: In function 'omap4_smp_maybe_reset_cpu1': arch/arm/mach-omap2/omap-smp.c:309:20: error: implicit declaration of function 'omap4_get_cpu1_ns_pa_addr'; did you mean 'omap4_get_scu_base'? [-Werror=implicit-function-declaration] We need to fix this in multiple files, to ensure the declaration is visible, to actually build the function without CONFIG_PM, and to only call it when OMAP4 and/or OMAP5 are enabled. Fixes: 351b7c490700 ("ARM: omap2+: Revert omap-smp.c changes resetting CPU1 during boot") Signed-off-by: Arnd Bergmann Acked-by: Tony Lindgren --- arch/arm/mach-omap2/common.h | 3 ++- arch/arm/mach-omap2/omap-mpuss-lowpower.c | 10 +++++----- arch/arm/mach-omap2/omap-smp.c | 11 +++++++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 3089d3bfa19b..8cc6338fcb12 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -266,11 +266,12 @@ extern int omap4_cpu_kill(unsigned int cpu); extern const struct smp_operations omap4_smp_ops; #endif +extern u32 omap4_get_cpu1_ns_pa_addr(void); + #if defined(CONFIG_SMP) && defined(CONFIG_PM) extern int omap4_mpuss_init(void); extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state); extern int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state); -extern u32 omap4_get_cpu1_ns_pa_addr(void); #else static inline int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state) diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c index 03ec6d307c82..4cfc4f9b2c69 100644 --- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c +++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c @@ -213,11 +213,6 @@ static void __init save_l2x0_context(void) {} #endif -u32 omap4_get_cpu1_ns_pa_addr(void) -{ - return old_cpu1_ns_pa_addr; -} - /** * omap4_enter_lowpower: OMAP4 MPUSS Low Power Entry Function * The purpose of this function is to manage low power programming @@ -457,6 +452,11 @@ int __init omap4_mpuss_init(void) #endif +u32 omap4_get_cpu1_ns_pa_addr(void) +{ + return old_cpu1_ns_pa_addr; +} + /* * For kexec, we must set CPU1_WAKEUP_NS_PA_ADDR to point to * current kernel's secondary_startup() early before diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 3faf454ba487..33e4953c61a8 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -306,7 +306,6 @@ static void __init omap4_smp_maybe_reset_cpu1(struct omap_smp_config *c) cpu1_startup_pa = readl_relaxed(cfg.wakeupgen_base + OMAP_AUX_CORE_BOOT_1); - cpu1_ns_pa_addr = omap4_get_cpu1_ns_pa_addr(); /* Did the configured secondary_startup() get overwritten? */ if (!omap4_smp_cpu1_startup_valid(cpu1_startup_pa)) @@ -316,9 +315,13 @@ static void __init omap4_smp_maybe_reset_cpu1(struct omap_smp_config *c) * If omap4 or 5 has NS_PA_ADDR configured, CPU1 may be in a * deeper idle state in WFI and will wake to an invalid address. */ - if ((soc_is_omap44xx() || soc_is_omap54xx()) && - !omap4_smp_cpu1_startup_valid(cpu1_ns_pa_addr)) - needs_reset = true; + if ((soc_is_omap44xx() || soc_is_omap54xx())) { + cpu1_ns_pa_addr = omap4_get_cpu1_ns_pa_addr(); + if (!omap4_smp_cpu1_startup_valid(cpu1_ns_pa_addr)) + needs_reset = true; + } else { + cpu1_ns_pa_addr = 0; + } if (!needs_reset || !c->cpu1_rstctrl_va) return; -- cgit v1.2.3-59-g8ed1b From 560d388950ceda5e7c7cdef7f3d9a8ff297bbf9d Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Wed, 3 May 2017 17:17:21 +0200 Subject: CIFS: silence lockdep splat in cifs_relock_file() cifs_relock_file() can perform a down_write() on the inode's lock_sem even though it was already performed in cifs_strict_readv(). Lockdep complains about this. AFAICS, there is no problem here, and lockdep just needs to be told that this nesting is OK. ============================================= [ INFO: possible recursive locking detected ] 4.11.0+ #20 Not tainted --------------------------------------------- cat/701 is trying to acquire lock: (&cifsi->lock_sem){++++.+}, at: cifs_reopen_file+0x7a7/0xc00 but task is already holding lock: (&cifsi->lock_sem){++++.+}, at: cifs_strict_readv+0x177/0x310 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&cifsi->lock_sem); lock(&cifsi->lock_sem); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by cat/701: #0: (&cifsi->lock_sem){++++.+}, at: cifs_strict_readv+0x177/0x310 stack backtrace: CPU: 0 PID: 701 Comm: cat Not tainted 4.11.0+ #20 Call Trace: dump_stack+0x85/0xc2 __lock_acquire+0x17dd/0x2260 ? trace_hardirqs_on_thunk+0x1a/0x1c ? preempt_schedule_irq+0x6b/0x80 lock_acquire+0xcc/0x260 ? lock_acquire+0xcc/0x260 ? cifs_reopen_file+0x7a7/0xc00 down_read+0x2d/0x70 ? cifs_reopen_file+0x7a7/0xc00 cifs_reopen_file+0x7a7/0xc00 ? printk+0x43/0x4b cifs_readpage_worker+0x327/0x8a0 cifs_readpage+0x8c/0x2a0 generic_file_read_iter+0x692/0xd00 cifs_strict_readv+0x29f/0x310 generic_file_splice_read+0x11c/0x1c0 do_splice_to+0xa5/0xc0 splice_direct_to_actor+0xfa/0x350 ? generic_pipe_buf_nosteal+0x10/0x10 do_splice_direct+0xb5/0xe0 do_sendfile+0x278/0x3a0 SyS_sendfile64+0xc4/0xe0 entry_SYSCALL_64_fastpath+0x1f/0xbe Signed-off-by: Rabin Vincent Acked-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 6ef78ad838e6..0fd081bd2a2f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -582,7 +582,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; - down_read(&cinode->lock_sem); + down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); if (cinode->can_cache_brlcks) { /* can cache locks - no need to relock */ up_read(&cinode->lock_sem); -- cgit v1.2.3-59-g8ed1b From de1892b887eeb85ce458a93979c2108e6f329618 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 4 May 2017 07:54:04 -0500 Subject: Don't delay freeing mids when blocked on slow socket write of request When processing responses, and in particular freeing mids (DeleteMidQEntry), which is very important since it also frees the associated buffers (cifs_buf_release), we can block a long time if (writes to) socket is slow due to low memory or networking issues. We can block in send (smb request) waiting for memory, and be blocked in processing responess (which could free memory if we let it) - since they both grab the server->srv_mutex. In practice, in the DeleteMidQEntry case - there is no reason we need to grab the srv_mutex so remove these around DeleteMidQEntry, and it allows us to free memory faster. Signed-off-by: Steve French Acked-by: Pavel Shilovsky --- fs/cifs/cifssmb.c | 7 ------- fs/cifs/smb2pdu.c | 7 ------- fs/cifs/transport.c | 2 -- 3 files changed, 16 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4c01b3f9abf0..4de3186d8a71 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -697,9 +697,7 @@ cifs_echo_callback(struct mid_q_entry *mid) { struct TCP_Server_Info *server = mid->callback_data; - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(server, 1, CIFS_ECHO_OP); } @@ -1599,9 +1597,7 @@ cifs_readv_callback(struct mid_q_entry *mid) } queue_work(cifsiod_wq, &rdata->work); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(server, 1, 0); } @@ -2058,7 +2054,6 @@ cifs_writev_callback(struct mid_q_entry *mid) { struct cifs_writedata *wdata = mid->callback_data; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); - struct TCP_Server_Info *server = tcon->ses->server; unsigned int written; WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf; @@ -2095,9 +2090,7 @@ cifs_writev_callback(struct mid_q_entry *mid) } queue_work(cifsiod_wq, &wdata->work); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(tcon->ses->server, 1, 0); } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 48ff7703b919..e0517f499c38 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2173,9 +2173,7 @@ smb2_echo_callback(struct mid_q_entry *mid) if (mid->mid_state == MID_RESPONSE_RECEIVED) credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(server, credits_received, CIFS_ECHO_OP); } @@ -2433,9 +2431,7 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_fail_inc(tcon, SMB2_READ_HE); queue_work(cifsiod_wq, &rdata->work); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(server, credits_received, 0); } @@ -2594,7 +2590,6 @@ smb2_writev_callback(struct mid_q_entry *mid) { struct cifs_writedata *wdata = mid->callback_data; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); - struct TCP_Server_Info *server = tcon->ses->server; unsigned int written; struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; unsigned int credits_received = 1; @@ -2634,9 +2629,7 @@ smb2_writev_callback(struct mid_q_entry *mid) cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); queue_work(cifsiod_wq, &wdata->work); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(tcon->ses->server, credits_received, 0); } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 4d64b5b8fc9c..de589d0d3739 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -613,9 +613,7 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) } spin_unlock(&GlobalMid_Lock); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); return rc; } -- cgit v1.2.3-59-g8ed1b From ae157902aea13926e9596d091cad6bd87b3be084 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Wed, 19 Apr 2017 14:02:31 +0800 Subject: drm/i915/gvt: avoid unnecessary vgpu switch It's no need to switch vgpu if next vgpu is the same with current vgpu, otherwise it will make performance drop in some case. v2: correct the comments. Signed-off-by: Ping Gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/sched_policy.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 34b9acdf3479..6ac77f8cc405 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -53,9 +53,13 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) enum intel_engine_id i; struct intel_engine_cs *engine; - /* no target to schedule */ - if (!scheduler->next_vgpu) + /* no need to schedule if next_vgpu is the same with current_vgpu, + * let scheduler chose next_vgpu again by setting it to NULL. + */ + if (scheduler->next_vgpu == scheduler->current_vgpu) { + scheduler->next_vgpu = NULL; return; + } gvt_dbg_sched("try to schedule next vgpu %d\n", scheduler->next_vgpu->id); -- cgit v1.2.3-59-g8ed1b From bb3338d3474e0329918fda9dae2c52751731eb58 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 8 May 2017 17:39:24 -0700 Subject: md/raid5-cache: in r5l_do_submit_io(), submit io->split_bio first In r5l_do_submit_io(), it is necessary to check io->split_bio before submit io->current_bio. This is because, endio of current_bio may free the whole IO unit, and thus change io->split_bio. Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 26ba09282e7c..a6a62e212cd3 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -622,20 +622,30 @@ static void r5l_do_submit_io(struct r5l_log *log, struct r5l_io_unit *io) __r5l_set_io_unit_state(io, IO_UNIT_IO_START); spin_unlock_irqrestore(&log->io_list_lock, flags); + /* + * In case of journal device failures, submit_bio will get error + * and calls endio, then active stripes will continue write + * process. Therefore, it is not necessary to check Faulty bit + * of journal device here. + * + * We can't check split_bio after current_bio is submitted. If + * io->split_bio is null, after current_bio is submitted, current_bio + * might already be completed and the io_unit is freed. We submit + * split_bio first to avoid the issue. + */ + if (io->split_bio) { + if (io->has_flush) + io->split_bio->bi_opf |= REQ_PREFLUSH; + if (io->has_fua) + io->split_bio->bi_opf |= REQ_FUA; + submit_bio(io->split_bio); + } + if (io->has_flush) io->current_bio->bi_opf |= REQ_PREFLUSH; if (io->has_fua) io->current_bio->bi_opf |= REQ_FUA; submit_bio(io->current_bio); - - if (!io->split_bio) - return; - - if (io->has_flush) - io->split_bio->bi_opf |= REQ_PREFLUSH; - if (io->has_fua) - io->split_bio->bi_opf |= REQ_FUA; - submit_bio(io->split_bio); } /* deferred io_unit will be dispatched here */ -- cgit v1.2.3-59-g8ed1b From e84188852a7239d7a144af12f7e5dac8fa88600b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 May 2017 21:05:16 +0200 Subject: tee: add ARM_SMCCC dependency For the moment, the tee subsystem only makes sense in combination with the op-tee driver that depends on ARM_SMCCC, so let's hide the subsystem from users that can't select that. Suggested-by: Linus Torvalds Signed-off-by: Arnd Bergmann --- drivers/tee/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig index 2330a4eb4e8b..a6df12d88f90 100644 --- a/drivers/tee/Kconfig +++ b/drivers/tee/Kconfig @@ -1,6 +1,7 @@ # Generic Trusted Execution Environment Configuration config TEE tristate "Trusted Execution Environment support" + depends on HAVE_ARM_SMCCC || COMPILE_TEST select DMA_SHARED_BUFFER select GENERIC_ALLOCATOR help -- cgit v1.2.3-59-g8ed1b From efc0c21c9ea786d6f019d7df7b4e3932f3578d90 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Thu, 2 Mar 2017 12:23:45 +0100 Subject: s390: convert debug_info.ref_count from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/debug.h | 3 ++- arch/s390/kernel/debug.c | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 0206c8052328..df7b54ea956d 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ @@ -31,7 +32,7 @@ struct debug_view; typedef struct debug_info { struct debug_info* next; struct debug_info* prev; - atomic_t ref_count; + refcount_t ref_count; spinlock_t lock; int level; int nr_areas; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 530226b6cb19..86b3e74f569e 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -277,7 +277,7 @@ debug_info_alloc(const char *name, int pages_per_area, int nr_areas, memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS * sizeof(struct dentry*)); - atomic_set(&(rc->ref_count), 0); + refcount_set(&(rc->ref_count), 0); return rc; @@ -361,7 +361,7 @@ debug_info_create(const char *name, int pages_per_area, int nr_areas, debug_area_last = rc; rc->next = NULL; - debug_info_get(rc); + refcount_set(&rc->ref_count, 1); out: return rc; } @@ -416,7 +416,7 @@ static void debug_info_get(debug_info_t * db_info) { if (db_info) - atomic_inc(&db_info->ref_count); + refcount_inc(&db_info->ref_count); } /* @@ -431,7 +431,7 @@ debug_info_put(debug_info_t *db_info) if (!db_info) return; - if (atomic_dec_and_test(&db_info->ref_count)) { + if (refcount_dec_and_test(&db_info->ref_count)) { for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!db_info->views[i]) continue; -- cgit v1.2.3-59-g8ed1b From df3ed932394488e57e72dd0e73c224d1804fdc8f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 11 May 2017 10:15:10 -0500 Subject: Partially Revert "of: fix sparse warnings in fdt, irq, reserved mem, and resolver code" A change to function pointers that was meant to address a sparse warning turned out to cause hundreds of new gcc-7 warnings: include/linux/of_irq.h:11:13: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers] drivers/of/of_reserved_mem.c: In function '__reserved_mem_init_node': drivers/of/of_reserved_mem.c:200:7: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers] int const (*initfn)(struct reserved_mem *rmem) = i->data; Turns out the sparse warnings were spurious and have been fixed in upstream sparse since 0.5.0 in commit "sparse: treat function pointers as pointers to const data". This partially reverts commit 17a70355ea576843a7ac851f1db26872a50b2850. Fixes: 17a70355ea57 ("of: fix sparse warnings in fdt, irq, reserved mem, and resolver code") Reported-by: Arnd Bergmann Signed-off-by: Rob Herring --- drivers/of/of_reserved_mem.c | 2 +- include/linux/of_irq.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 4dec07ea510f..d507c3569a88 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -197,7 +197,7 @@ static int __init __reserved_mem_init_node(struct reserved_mem *rmem) const struct of_device_id *i; for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) { - int const (*initfn)(struct reserved_mem *rmem) = i->data; + reservedmem_of_init_fn initfn = i->data; const char *compat = i->compatible; if (!of_flat_dt_is_compatible(rmem->fdt_node, compat)) diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index ec6b11deb773..1e0deb8e8494 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -8,7 +8,7 @@ #include #include -typedef int const (*of_irq_init_cb_t)(struct device_node *, struct device_node *); +typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *); /* * Workarounds only applied to 32bit powermac machines -- cgit v1.2.3-59-g8ed1b From a00ebd1cf12c378a1d4f7a1d6daf1d76c1eaad82 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 11 May 2017 10:21:46 +0200 Subject: drbd: fix request leak introduced by locking/atomic, kref: Kill kref_sub() When killing kref_sub(), the unconditional additional kref_get() was not properly paired with the necessary kref_put(), causing a leak of struct drbd_requests (~ 224 Bytes) per submitted bio, and breaking DRBD in general, as the destructor of those "drbd_requests" does more than just the mempoll_free(). Fixes: bdfafc4ffdd2 ("locking/atomic, kref: Kill kref_sub()") Signed-off-by: Lars Ellenberg Cc: stable@vger.kernel.org # v4.11 Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b5730e17b455..656624314f0d 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -315,24 +315,32 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) } /* still holds resource->req_lock */ -static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) +static void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) { struct drbd_device *device = req->device; D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED)); + if (!put) + return; + if (!atomic_sub_and_test(put, &req->completion_ref)) - return 0; + return; drbd_req_complete(req, m); + /* local completion may still come in later, + * we need to keep the req object around. */ + if (req->rq_state & RQ_LOCAL_ABORTED) + return; + if (req->rq_state & RQ_POSTPONED) { /* don't destroy the req object just yet, * but queue it for retry */ drbd_restart_request(req); - return 0; + return; } - return 1; + kref_put(&req->kref, drbd_req_destroy); } static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) @@ -519,12 +527,8 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (req->i.waiting) wake_up(&device->misc_wait); - if (c_put) { - if (drbd_req_put_completion_ref(req, m, c_put)) - kref_put(&req->kref, drbd_req_destroy); - } else { - kref_put(&req->kref, drbd_req_destroy); - } + drbd_req_put_completion_ref(req, m, c_put); + kref_put(&req->kref, drbd_req_destroy); } static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) @@ -1366,8 +1370,7 @@ nodata: } out: - if (drbd_req_put_completion_ref(req, &m, 1)) - kref_put(&req->kref, drbd_req_destroy); + drbd_req_put_completion_ref(req, &m, 1); spin_unlock_irq(&resource->req_lock); /* Even though above is a kref_put(), this is safe. -- cgit v1.2.3-59-g8ed1b From 1a4a5bf52a4adb477adb075e5afce925824ad132 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 9 May 2017 18:27:33 +0800 Subject: driver: vrf: Fix one possible use-after-free issue The current codes only deal with the case that the skb is dropped, it may meet one use-after-free issue when NF_HOOK returns 0 that means the skb is stolen by one netfilter rule or hook. When one netfilter rule or hook stoles the skb and return NF_STOLEN, it means the skb is taken by the rule, and other modules should not touch this skb ever. Maybe the skb is queued or freed directly by the rule. Now uses the nf_hook instead of NF_HOOK to get the result of netfilter, and check the return value of nf_hook. Only when its value equals 1, it means the skb could go ahead. Or reset the skb as NULL. BTW, because vrf_rcv_finish is empty function, so needn't invoke it even though nf_hook returns 1. But we need to modify vrf_rcv_finish to deal with the NF_STOLEN case. There are two cases when skb is stolen. 1. The skb is stolen and freed directly. There is nothing we need to do, and vrf_rcv_finish isn't invoked. 2. The skb is queued and reinjected again. The vrf_rcv_finish would be invoked as okfn, so need to free the skb in it. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/vrf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ceda5861da78..db882493875c 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -989,6 +989,7 @@ static u32 vrf_fib_table(const struct net_device *dev) static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + kfree_skb(skb); return 0; } @@ -998,7 +999,7 @@ static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook, { struct net *net = dev_net(dev); - if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0) + if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1) skb = NULL; /* kfree_skb(skb) handled by nf code */ return skb; -- cgit v1.2.3-59-g8ed1b From 8d66c30b12ed3cb533696dea8b9a9eadd5da426a Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 9 May 2017 15:40:38 +0200 Subject: net: qca_spi: Fix alignment issues in rx path The qca_spi driver causes alignment issues on ARM devices. So fix this by using netdev_alloc_skb_ip_align(). Signed-off-by: Stefan Wahren Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/qca_spi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 513e6c74e199..24ca7df15d07 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -296,8 +296,9 @@ qcaspi_receive(struct qcaspi *qca) /* Allocate rx SKB if we don't have one available. */ if (!qca->rx_skb) { - qca->rx_skb = netdev_alloc_skb(net_dev, - net_dev->mtu + VLAN_ETH_HLEN); + qca->rx_skb = netdev_alloc_skb_ip_align(net_dev, + net_dev->mtu + + VLAN_ETH_HLEN); if (!qca->rx_skb) { netdev_dbg(net_dev, "out of RX resources\n"); qca->stats.out_of_mem++; @@ -377,7 +378,7 @@ qcaspi_receive(struct qcaspi *qca) qca->rx_skb, qca->rx_skb->dev); qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY; netif_rx_ni(qca->rx_skb); - qca->rx_skb = netdev_alloc_skb(net_dev, + qca->rx_skb = netdev_alloc_skb_ip_align(net_dev, net_dev->mtu + VLAN_ETH_HLEN); if (!qca->rx_skb) { netdev_dbg(net_dev, "out of RX resources\n"); @@ -759,7 +760,8 @@ qcaspi_netdev_init(struct net_device *dev) if (!qca->rx_buffer) return -ENOBUFS; - qca->rx_skb = netdev_alloc_skb(dev, qca->net_dev->mtu + VLAN_ETH_HLEN); + qca->rx_skb = netdev_alloc_skb_ip_align(dev, qca->net_dev->mtu + + VLAN_ETH_HLEN); if (!qca->rx_skb) { kfree(qca->rx_buffer); netdev_info(qca->net_dev, "Failed to allocate RX sk_buff.\n"); -- cgit v1.2.3-59-g8ed1b From 0fe20fafd1791f993806d417048213ec57b81045 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 9 May 2017 17:19:42 +0100 Subject: netxen_nic: set rcode to the return status from the call to netxen_issue_cmd Currently rcode is being initialized to NX_RCODE_SUCCESS and later it is checked to see if it is not NX_RCODE_SUCCESS which is never true. It appears that there is an unintentional missing assignment of rcode from the return of the call to netxen_issue_cmd() that was dropped in an earlier fix, so add it in. Detected by CoverityScan, CID#401900 ("Logically dead code") Fixes: 2dcd5d95ad6b2 ("netxen_nic: fix cdrp race condition") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c index b8d5270359cd..e30676515529 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c @@ -247,7 +247,7 @@ nx_fw_cmd_set_mtu(struct netxen_adapter *adapter, int mtu) cmd.req.arg3 = 0; if (recv_ctx->state == NX_HOST_CTX_STATE_ACTIVE) - netxen_issue_cmd(adapter, &cmd); + rcode = netxen_issue_cmd(adapter, &cmd); if (rcode != NX_RCODE_SUCCESS) return -EIO; -- cgit v1.2.3-59-g8ed1b From 83eaddab4378db256d00d295bda6ca997cd13a52 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 9 May 2017 16:59:54 -0700 Subject: ipv6/dccp: do not inherit ipv6_mc_list from parent Like commit 657831ffc38e ("dccp/tcp: do not inherit mc_list from parent") we should clear ipv6_mc_list etc. for IPv6 sockets too. Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 6 ++++++ net/ipv6/tcp_ipv6.c | 2 ++ 2 files changed, 8 insertions(+) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d9b6a4e403e7..b6bbb71e713e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -426,6 +426,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; @@ -490,6 +493,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index aeb9497b5bb7..df5a9ff71f3f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1062,6 +1062,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif + newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; @@ -1131,6 +1132,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * First: no IPv4 options. */ newinet->inet_opt = NULL; + newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; -- cgit v1.2.3-59-g8ed1b From b60161668199ac62011c024adc9e66713b9554e7 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Wed, 10 May 2017 11:20:27 -0400 Subject: mdio: mux: Correct mdio_mux_init error path issues There is a potential unnecessary refcount decrement on error path of put_device(&pb->mii_bus->dev), as it is possible to avoid the of_mdio_find_bus() call if mux_bus is specified by the calling function. The same put_device() is not called in the error path if the devm_kzalloc of pb fails. This caused the variable used in the put_device() to be changed, as the pb pointer was obviously not set up. There is an unnecessary of_node_get() on child_bus_node if the of_mdiobus_register() is successful, as the for_each_available_child_of_node() automatically increments this. Thus the refcount on this node will always be +1 more than it should be. There is no of_node_put() on child_bus_node if the of_mdiobus_register() call fails. Finally, it is lacking devm_kfree() of pb in the error path. While this might not be technically necessary, it was present in other parts of the function. So, I am adding it where necessary to make it uniform. Signed-off-by: Jon Mason Fixes: f20e6657a875 ("mdio: mux: Enhanced MDIO mux framework for integrated multiplexers") Fixes: 0ca2997d1452 ("netdev/of/phy: Add MDIO bus multiplexer support.") Signed-off-by: David S. Miller --- drivers/net/phy/mdio-mux.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 963838d4fac1..6943c5ece44a 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -122,10 +122,9 @@ int mdio_mux_init(struct device *dev, pb = devm_kzalloc(dev, sizeof(*pb), GFP_KERNEL); if (pb == NULL) { ret_val = -ENOMEM; - goto err_parent_bus; + goto err_pb_kz; } - pb->switch_data = data; pb->switch_fn = switch_fn; pb->current_child = -1; @@ -154,6 +153,7 @@ int mdio_mux_init(struct device *dev, cb->mii_bus = mdiobus_alloc(); if (!cb->mii_bus) { ret_val = -ENOMEM; + devm_kfree(dev, cb); of_node_put(child_bus_node); break; } @@ -169,8 +169,8 @@ int mdio_mux_init(struct device *dev, if (r) { mdiobus_free(cb->mii_bus); devm_kfree(dev, cb); + of_node_put(child_bus_node); } else { - of_node_get(child_bus_node); cb->next = pb->children; pb->children = cb; } @@ -181,9 +181,11 @@ int mdio_mux_init(struct device *dev, return 0; } + devm_kfree(dev, pb); +err_pb_kz: /* balance the reference of_mdio_find_bus() took */ - put_device(&pb->mii_bus->dev); - + if (!mux_bus) + put_device(&parent_bus->dev); err_parent_bus: of_node_put(parent_bus_node); return ret_val; -- cgit v1.2.3-59-g8ed1b From 9111e7880ccf419548c7b0887df020b08eadb075 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 10 May 2017 19:07:51 +0200 Subject: s390/qeth: handle sysfs error during initialization When setting up the device from within the layer discipline's probe routine, creating the layer-specific sysfs attributes can fail. Report this error back to the caller, and handle it by releasing the layer discipline. Signed-off-by: Ursula Braun [jwi: updated commit msg, moved an OSN change to a subsequent patch] Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 4 +++- drivers/s390/net/qeth_core_sys.c | 2 ++ drivers/s390/net/qeth_l2_main.c | 5 ++++- drivers/s390/net/qeth_l3_main.c | 5 ++++- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 38114a8d56e0..5bfd7c15d6a9 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5731,8 +5731,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) if (rc) goto err; rc = card->discipline->setup(card->gdev); - if (rc) + if (rc) { + qeth_core_free_discipline(card); goto err; + } } rc = card->discipline->set_online(gdev); err: diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 75b29fd2fcf4..412ff61891ac 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -426,6 +426,8 @@ static ssize_t qeth_dev_layer2_store(struct device *dev, goto out; rc = card->discipline->setup(card->gdev); + if (rc) + qeth_core_free_discipline(card); out: mutex_unlock(&card->discipline_mutex); return rc ? rc : count; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 1b07f382d74c..2d2623182abf 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -883,8 +883,11 @@ static int qeth_l2_stop(struct net_device *dev) static int qeth_l2_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; - qeth_l2_create_device_attributes(&gdev->dev); + rc = qeth_l2_create_device_attributes(&gdev->dev); + if (rc) + return rc; INIT_LIST_HEAD(&card->vid_list); hash_init(card->mac_htable); card->options.layer2 = 1; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 6e0354ef4b86..ae70daf33bd5 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3039,8 +3039,11 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) static int qeth_l3_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; - qeth_l3_create_device_attributes(&gdev->dev); + rc = qeth_l3_create_device_attributes(&gdev->dev); + if (rc) + return rc; card->options.layer2 = 0; card->info.hwtrap = 0; return 0; -- cgit v1.2.3-59-g8ed1b From 2d2ebb3ed0c6acfb014f98e427298673a5d07b82 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 10 May 2017 19:07:52 +0200 Subject: s390/qeth: unbreak OSM and OSN support commit b4d72c08b358 ("qeth: bridgeport support - basic control") broke the support for OSM and OSN devices as follows: As OSM and OSN are L2 only, qeth_core_probe_device() does an early setup by loading the l2 discipline and calling qeth_l2_probe_device(). In this context, adding the l2-specific bridgeport sysfs attributes via qeth_l2_create_device_attributes() hits a BUG_ON in fs/sysfs/group.c, since the basic sysfs infrastructure for the device hasn't been established yet. Note that OSN actually has its own unique sysfs attributes (qeth_osn_devtype), so the additional attributes shouldn't be created at all. For OSM, add a new qeth_l2_devtype that contains all the common and l2-specific sysfs attributes. When qeth_core_probe_device() does early setup for OSM or OSN, assign the corresponding devtype so that the ccwgroup probe code creates the full set of sysfs attributes. This allows us to skip qeth_l2_create_device_attributes() in case of an early setup. Any device that can't do early setup will initially have only the generic sysfs attributes, and when it's probed later qeth_l2_probe_device() adds the l2-specific attributes. If an early-setup device is removed (by calling ccwgroup_ungroup()), device_unregister() will - using the devtype - delete the l2-specific attributes before qeth_l2_remove_device() is called. So make sure to not remove them twice. What complicates the issue is that qeth_l2_probe_device() and qeth_l2_remove_device() is also called on a device when its layer2 attribute changes (ie. its layer mode is switched). For early-setup devices this wouldn't work properly - we wouldn't remove the l2-specific attributes when switching to L3. But switching the layer mode doesn't actually make any sense; we already decided that the device can only operate in L2! So just refuse to switch the layer mode on such devices. Note that OSN doesn't have a layer2 attribute, so we only need to special-case OSM. Based on an initial patch by Ursula Braun. Fixes: b4d72c08b358 ("qeth: bridgeport support - basic control") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 4 ++++ drivers/s390/net/qeth_core_main.c | 17 +++++++++-------- drivers/s390/net/qeth_core_sys.c | 22 ++++++++++++++-------- drivers/s390/net/qeth_l2.h | 2 ++ drivers/s390/net/qeth_l2_main.c | 17 +++++++++++++---- drivers/s390/net/qeth_l2_sys.c | 8 ++++++++ drivers/s390/net/qeth_l3_main.c | 1 + 7 files changed, 51 insertions(+), 20 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index f6aa21176d89..30bc6105aac3 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -701,6 +701,7 @@ enum qeth_discipline_id { }; struct qeth_discipline { + const struct device_type *devtype; void (*start_poll)(struct ccw_device *, int, unsigned long); qdio_handler_t *input_handler; qdio_handler_t *output_handler; @@ -875,6 +876,9 @@ extern struct qeth_discipline qeth_l2_discipline; extern struct qeth_discipline qeth_l3_discipline; extern const struct attribute_group *qeth_generic_attr_groups[]; extern const struct attribute_group *qeth_osn_attr_groups[]; +extern const struct attribute_group qeth_device_attr_group; +extern const struct attribute_group qeth_device_blkt_group; +extern const struct device_type qeth_generic_devtype; extern struct workqueue_struct *qeth_wq; int qeth_card_hw_is_reachable(struct qeth_card *); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 5bfd7c15d6a9..fc6d85f2b38d 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5530,10 +5530,12 @@ void qeth_core_free_discipline(struct qeth_card *card) card->discipline = NULL; } -static const struct device_type qeth_generic_devtype = { +const struct device_type qeth_generic_devtype = { .name = "qeth_generic", .groups = qeth_generic_attr_groups, }; +EXPORT_SYMBOL_GPL(qeth_generic_devtype); + static const struct device_type qeth_osn_devtype = { .name = "qeth_osn", .groups = qeth_osn_attr_groups, @@ -5659,23 +5661,22 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) goto err_card; } - if (card->info.type == QETH_CARD_TYPE_OSN) - gdev->dev.type = &qeth_osn_devtype; - else - gdev->dev.type = &qeth_generic_devtype; - switch (card->info.type) { case QETH_CARD_TYPE_OSN: case QETH_CARD_TYPE_OSM: rc = qeth_core_load_discipline(card, QETH_DISCIPLINE_LAYER2); if (rc) goto err_card; + + gdev->dev.type = (card->info.type != QETH_CARD_TYPE_OSN) + ? card->discipline->devtype + : &qeth_osn_devtype; rc = card->discipline->setup(card->gdev); if (rc) goto err_disc; - case QETH_CARD_TYPE_OSD: - case QETH_CARD_TYPE_OSX: + break; default: + gdev->dev.type = &qeth_generic_devtype; break; } diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 412ff61891ac..db6a285d41e0 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -413,12 +413,16 @@ static ssize_t qeth_dev_layer2_store(struct device *dev, if (card->options.layer2 == newdis) goto out; - else { - card->info.mac_bits = 0; - if (card->discipline) { - card->discipline->remove(card->gdev); - qeth_core_free_discipline(card); - } + if (card->info.type == QETH_CARD_TYPE_OSM) { + /* fixed layer, can't switch */ + rc = -EOPNOTSUPP; + goto out; + } + + card->info.mac_bits = 0; + if (card->discipline) { + card->discipline->remove(card->gdev); + qeth_core_free_discipline(card); } rc = qeth_core_load_discipline(card, newdis); @@ -705,10 +709,11 @@ static struct attribute *qeth_blkt_device_attrs[] = { &dev_attr_inter_jumbo.attr, NULL, }; -static struct attribute_group qeth_device_blkt_group = { +const struct attribute_group qeth_device_blkt_group = { .name = "blkt", .attrs = qeth_blkt_device_attrs, }; +EXPORT_SYMBOL_GPL(qeth_device_blkt_group); static struct attribute *qeth_device_attrs[] = { &dev_attr_state.attr, @@ -728,9 +733,10 @@ static struct attribute *qeth_device_attrs[] = { &dev_attr_switch_attrs.attr, NULL, }; -static struct attribute_group qeth_device_attr_group = { +const struct attribute_group qeth_device_attr_group = { .attrs = qeth_device_attrs, }; +EXPORT_SYMBOL_GPL(qeth_device_attr_group); const struct attribute_group *qeth_generic_attr_groups[] = { &qeth_device_attr_group, diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h index 29d9fb3890ad..0d59f9a45ea9 100644 --- a/drivers/s390/net/qeth_l2.h +++ b/drivers/s390/net/qeth_l2.h @@ -8,6 +8,8 @@ #include "qeth_core.h" +extern const struct attribute_group *qeth_l2_attr_groups[]; + int qeth_l2_create_device_attributes(struct device *); void qeth_l2_remove_device_attributes(struct device *); void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2d2623182abf..04666fe231aa 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -880,14 +880,21 @@ static int qeth_l2_stop(struct net_device *dev) return 0; } +static const struct device_type qeth_l2_devtype = { + .name = "qeth_layer2", + .groups = qeth_l2_attr_groups, +}; + static int qeth_l2_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc; - rc = qeth_l2_create_device_attributes(&gdev->dev); - if (rc) - return rc; + if (gdev->dev.type == &qeth_generic_devtype) { + rc = qeth_l2_create_device_attributes(&gdev->dev); + if (rc) + return rc; + } INIT_LIST_HEAD(&card->vid_list); hash_init(card->mac_htable); card->options.layer2 = 1; @@ -899,7 +906,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) { struct qeth_card *card = dev_get_drvdata(&cgdev->dev); - qeth_l2_remove_device_attributes(&cgdev->dev); + if (cgdev->dev.type == &qeth_generic_devtype) + qeth_l2_remove_device_attributes(&cgdev->dev); qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); @@ -1272,6 +1280,7 @@ static int qeth_l2_control_event(struct qeth_card *card, } struct qeth_discipline qeth_l2_discipline = { + .devtype = &qeth_l2_devtype, .start_poll = qeth_qdio_start_poll, .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, .output_handler = (qdio_handler_t *) qeth_qdio_output_handler, diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c index 687972356d6b..9696baa49e2d 100644 --- a/drivers/s390/net/qeth_l2_sys.c +++ b/drivers/s390/net/qeth_l2_sys.c @@ -269,3 +269,11 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card) } else qeth_bridgeport_an_set(card, 0); } + +const struct attribute_group *qeth_l2_attr_groups[] = { + &qeth_device_attr_group, + &qeth_device_blkt_group, + /* l2 specific, see l2_{create,remove}_device_attributes(): */ + &qeth_l2_bridgeport_attr_group, + NULL, +}; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index ae70daf33bd5..6c2146fc831a 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3309,6 +3309,7 @@ static int qeth_l3_control_event(struct qeth_card *card, } struct qeth_discipline qeth_l3_discipline = { + .devtype = &qeth_generic_devtype, .start_poll = qeth_qdio_start_poll, .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, .output_handler = (qdio_handler_t *) qeth_qdio_output_handler, -- cgit v1.2.3-59-g8ed1b From 25e2c341e7818a394da9abc403716278ee646014 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 10 May 2017 19:07:53 +0200 Subject: s390/qeth: avoid null pointer dereference on OSN Access card->dev only after checking whether's its valid. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 04666fe231aa..bd2df62a5cdf 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -965,7 +965,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) case QETH_CARD_TYPE_OSN: card->dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN, ether_setup); - card->dev->flags |= IFF_NOARP; break; default: card->dev = alloc_etherdev(0); @@ -980,9 +979,12 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->min_mtu = 64; card->dev->max_mtu = ETH_MAX_MTU; card->dev->netdev_ops = &qeth_l2_netdev_ops; - card->dev->ethtool_ops = - (card->info.type != QETH_CARD_TYPE_OSN) ? - &qeth_l2_ethtool_ops : &qeth_l2_osn_ops; + if (card->info.type == QETH_CARD_TYPE_OSN) { + card->dev->ethtool_ops = &qeth_l2_osn_ops; + card->dev->flags |= IFF_NOARP; + } else { + card->dev->ethtool_ops = &qeth_l2_ethtool_ops; + } card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->hw_features = NETIF_F_SG; -- cgit v1.2.3-59-g8ed1b From ebccc7397e4a49ff64c8f44a54895de9d32fe742 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 10 May 2017 19:07:54 +0200 Subject: s390/qeth: add missing hash table initializations commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") added new hash tables, but missed to initialize them. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Ursula Braun Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 6c2146fc831a..d8df1e635163 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3044,6 +3044,8 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev) rc = qeth_l3_create_device_attributes(&gdev->dev); if (rc) return rc; + hash_init(card->ip_htable); + hash_init(card->ip_mc_htable); card->options.layer2 = 0; card->info.hwtrap = 0; return 0; -- cgit v1.2.3-59-g8ed1b From 5b6cb43b4d625b04a4049d727a116edbfe5cf0f4 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 10 May 2017 10:28:05 -0700 Subject: net: ethernet: ti: netcp_core: return error while dma channel open issue Fix error path while dma open channel issue. Also, no need to check output on NULL if it's never returned. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 6 ++++-- drivers/soc/ti/knav_dma.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 729a7da90b5b..e6222e535019 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1353,9 +1353,10 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) tx_pipe->dma_channel = knav_dma_open_channel(dev, tx_pipe->dma_chan_name, &config); - if (IS_ERR_OR_NULL(tx_pipe->dma_channel)) { + if (IS_ERR(tx_pipe->dma_channel)) { dev_err(dev, "failed opening tx chan(%s)\n", tx_pipe->dma_chan_name); + ret = PTR_ERR(tx_pipe->dma_channel); goto err; } @@ -1673,9 +1674,10 @@ static int netcp_setup_navigator_resources(struct net_device *ndev) netcp->rx_channel = knav_dma_open_channel(netcp->netcp_device->device, netcp->dma_chan_name, &config); - if (IS_ERR_OR_NULL(netcp->rx_channel)) { + if (IS_ERR(netcp->rx_channel)) { dev_err(netcp->ndev_dev, "failed opening rx chan(%s\n", netcp->dma_chan_name); + ret = PTR_ERR(netcp->rx_channel); goto fail; } diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c index ecebe2eecc3a..026182d3b27c 100644 --- a/drivers/soc/ti/knav_dma.c +++ b/drivers/soc/ti/knav_dma.c @@ -413,7 +413,7 @@ static int of_channel_match_helper(struct device_node *np, const char *name, * @name: slave channel name * @config: dma configuration parameters * - * Returns pointer to appropriate DMA channel on success or NULL. + * Returns pointer to appropriate DMA channel on success or error. */ void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config) -- cgit v1.2.3-59-g8ed1b From d8b54110ee944de522ccd3531191f39986ec20f9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 11 May 2017 01:53:15 +0200 Subject: bpf, arm64: fix faulty emission of map access in tail calls Shubham was recently asking on netdev why in arm64 JIT we don't multiply the index for accessing the tail call map by 8. That led me into testing out arm64 JIT wrt tail calls and it turned out I got a NULL pointer dereference on the tail call. The buggy access is at: prog = array->ptrs[index]; if (prog == NULL) goto out; [...] 00000060: d2800e0a mov x10, #0x70 // #112 00000064: f86a682a ldr x10, [x1,x10] 00000068: f862694b ldr x11, [x10,x2] 0000006c: b40000ab cbz x11, 0x00000080 [...] The code triggering the crash is f862694b. x1 at the time contains the address of the bpf array, x10 offsetof(struct bpf_array, ptrs). Meaning, above we load the pointer to the program at map slot 0 into x10. x10 can then be NULL if the slot is not occupied, which we later on try to access with a user given offset in x2 that is the map index. Fix this by emitting the following instead: [...] 00000060: d2800e0a mov x10, #0x70 // #112 00000064: 8b0a002a add x10, x1, x10 00000068: d37df04b lsl x11, x2, #3 0000006c: f86b694b ldr x11, [x10,x11] 00000070: b40000ab cbz x11, 0x00000084 [...] This basically adds the offset to ptrs to the base address of the bpf array we got and we later on access the map with an index * 8 offset relative to that. The tail call map itself is basically one large area with meta data at the head followed by the array of prog pointers. This makes tail calls working again, tested on Cavium ThunderX ARMv8. Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper") Reported-by: Shubham Bansal Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/arm64/net/bpf_jit_comp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c6e53580aefe..71f930501ade 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -253,8 +253,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ off = offsetof(struct bpf_array, ptrs); emit_a64_mov_i64(tmp, off, ctx); - emit(A64_LDR64(tmp, r2, tmp), ctx); - emit(A64_LDR64(prg, tmp, r3), ctx); + emit(A64_ADD(1, tmp, r2, tmp), ctx); + emit(A64_LSL(1, prg, r3, 3), ctx); + emit(A64_LDR64(prg, tmp, prg), ctx); emit(A64_CBZ(1, prg, jmp_offset), ctx); /* goto *(prog->bpf_func + prologue_size); */ -- cgit v1.2.3-59-g8ed1b From d1174416747d790d750742d0514915deeed93acf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 May 2017 11:22:52 -0700 Subject: bpf: Track alignment of register values in the verifier. Currently if we add only constant values to pointers we can fully validate the alignment, and properly check if we need to reject the program on !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS architectures. However, once an unknown value is introduced we only allow byte sized memory accesses which is too restrictive. Add logic to track the known minimum alignment of register values, and propagate this state into registers containing pointers. The most common paradigm that makes use of this new logic is computing the transport header using the IP header length field. For example: struct ethhdr *ep = skb->data; struct iphdr *iph = (struct iphdr *) (ep + 1); struct tcphdr *th; ... n = iph->ihl; th = ((void *)iph + (n * 4)); port = th->dest; The existing code will reject the load of th->dest because it cannot validate that the alignment is at least 2 once "n * 4" is added the the packet pointer. In the new code, the register holding "n * 4" will have a reg->min_align value of 4, because any value multiplied by 4 will be at least 4 byte aligned. (actually, the eBPF code emitted by the compiler in this case is most likely to use a shift left by 2, but the end result is identical) At the critical addition: th = ((void *)iph + (n * 4)); The register holding 'th' will start with reg->off value of 14. The pointer addition will transform that reg into something that looks like: reg->aux_off = 14 reg->aux_off_align = 4 Next, the verifier will look at the th->dest load, and it will see a load offset of 2, and first check: if (reg->aux_off_align % size) which will pass because aux_off_align is 4. reg_off will be computed: reg_off = reg->off; ... reg_off += reg->aux_off; plus we have off==2, and it will thus check: if ((NET_IP_ALIGN + reg_off + off) % size != 0) which evaluates to: if ((NET_IP_ALIGN + 14 + 2) % size != 0) On strict alignment architectures, NET_IP_ALIGN is 2, thus: if ((2 + 14 + 2) % size != 0) which passes. These pointer transformations and checks work regardless of whether the constant offset or the variable with known alignment is added first to the pointer register. Signed-off-by: David S. Miller Acked-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 3 ++ kernel/bpf/verifier.c | 108 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 92 insertions(+), 19 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5efb4db44e1e..7c6a51924afc 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -40,6 +40,9 @@ struct bpf_reg_state { */ s64 min_value; u64 max_value; + u32 min_align; + u32 aux_off; + u32 aux_off_align; }; enum bpf_stack_slot_type { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c5b56c92f8e2..cc7b626fa447 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -241,6 +241,12 @@ static void print_verifier_state(struct bpf_verifier_state *state) if (reg->max_value != BPF_REGISTER_MAX_RANGE) verbose(",max_value=%llu", (unsigned long long)reg->max_value); + if (reg->min_align) + verbose(",min_align=%u", reg->min_align); + if (reg->aux_off) + verbose(",aux_off=%u", reg->aux_off); + if (reg->aux_off_align) + verbose(",aux_off_align=%u", reg->aux_off_align); } for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] == STACK_SPILL) @@ -466,6 +472,9 @@ static void init_reg_state(struct bpf_reg_state *regs) regs[i].imm = 0; regs[i].min_value = BPF_REGISTER_MIN_RANGE; regs[i].max_value = BPF_REGISTER_MAX_RANGE; + regs[i].min_align = 0; + regs[i].aux_off = 0; + regs[i].aux_off_align = 0; } /* frame pointer */ @@ -492,6 +501,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) { regs[regno].min_value = BPF_REGISTER_MIN_RANGE; regs[regno].max_value = BPF_REGISTER_MAX_RANGE; + regs[regno].min_align = 0; } static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs, @@ -779,17 +789,28 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) } static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, - int off, int size) + int off, int size, bool strict) { - if (reg->id && size != 1) { - verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n"); - return -EACCES; + int reg_off; + + /* Byte size accesses are always allowed. */ + if (!strict || size == 1) + return 0; + + reg_off = reg->off; + if (reg->id) { + if (reg->aux_off_align % size) { + verbose("Packet access is only %u byte aligned, %d byte access not allowed\n", + reg->aux_off_align, size); + return -EACCES; + } + reg_off += reg->aux_off; } /* skb->data is NET_IP_ALIGN-ed */ - if ((NET_IP_ALIGN + reg->off + off) % size != 0) { + if ((NET_IP_ALIGN + reg_off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", - NET_IP_ALIGN, reg->off, off, size); + NET_IP_ALIGN, reg_off, off, size); return -EACCES; } @@ -797,9 +818,9 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, } static int check_val_ptr_alignment(const struct bpf_reg_state *reg, - int size) + int size, bool strict) { - if (size != 1) { + if (strict && size != 1) { verbose("Unknown alignment. Only byte-sized access allowed in value access.\n"); return -EACCES; } @@ -810,13 +831,16 @@ static int check_val_ptr_alignment(const struct bpf_reg_state *reg, static int check_ptr_alignment(const struct bpf_reg_state *reg, int off, int size) { + bool strict = false; + + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + strict = true; + switch (reg->type) { case PTR_TO_PACKET: - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : - check_pkt_ptr_alignment(reg, off, size); + return check_pkt_ptr_alignment(reg, off, size, strict); case PTR_TO_MAP_VALUE_ADJ: - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : - check_val_ptr_alignment(reg, size); + return check_val_ptr_alignment(reg, size, strict); default: if (off % size != 0) { verbose("misaligned access off %d size %d\n", @@ -883,6 +907,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, value_regno); /* note that reg.[id|off|range] == 0 */ state->regs[value_regno].type = reg_type; + state->regs[value_regno].aux_off = 0; + state->regs[value_regno].aux_off_align = 0; } } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { @@ -1455,6 +1481,8 @@ add_imm: */ dst_reg->off += imm; } else { + bool had_id; + if (src_reg->type == PTR_TO_PACKET) { /* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */ tmp_reg = *dst_reg; /* save r7 state */ @@ -1488,14 +1516,23 @@ add_imm: src_reg->imm); return -EACCES; } + + had_id = (dst_reg->id != 0); + /* dst_reg stays as pkt_ptr type and since some positive * integer value was added to the pointer, increment its 'id' */ dst_reg->id = ++env->id_gen; - /* something was added to pkt_ptr, set range and off to zero */ + /* something was added to pkt_ptr, set range to zero */ + dst_reg->aux_off = dst_reg->off; dst_reg->off = 0; dst_reg->range = 0; + if (had_id) + dst_reg->aux_off_align = min(dst_reg->aux_off_align, + src_reg->min_align); + else + dst_reg->aux_off_align = src_reg->min_align; } return 0; } @@ -1669,6 +1706,13 @@ static void check_reg_overflow(struct bpf_reg_state *reg) reg->min_value = BPF_REGISTER_MIN_RANGE; } +static u32 calc_align(u32 imm) +{ + if (!imm) + return 1U << 31; + return imm - ((imm - 1) & imm); +} + static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn) { @@ -1676,8 +1720,10 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, s64 min_val = BPF_REGISTER_MIN_RANGE; u64 max_val = BPF_REGISTER_MAX_RANGE; u8 opcode = BPF_OP(insn->code); + u32 dst_align, src_align; dst_reg = ®s[insn->dst_reg]; + src_align = 0; if (BPF_SRC(insn->code) == BPF_X) { check_reg_overflow(®s[insn->src_reg]); min_val = regs[insn->src_reg].min_value; @@ -1693,12 +1739,18 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, regs[insn->src_reg].type != UNKNOWN_VALUE) { min_val = BPF_REGISTER_MIN_RANGE; max_val = BPF_REGISTER_MAX_RANGE; + src_align = 0; + } else { + src_align = regs[insn->src_reg].min_align; } } else if (insn->imm < BPF_REGISTER_MAX_RANGE && (s64)insn->imm > BPF_REGISTER_MIN_RANGE) { min_val = max_val = insn->imm; + src_align = calc_align(insn->imm); } + dst_align = dst_reg->min_align; + /* We don't know anything about what was done to this register, mark it * as unknown. */ @@ -1723,18 +1775,21 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, dst_reg->min_value += min_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value += max_val; + dst_reg->min_align = min(src_align, dst_align); break; case BPF_SUB: if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) dst_reg->min_value -= min_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value -= max_val; + dst_reg->min_align = min(src_align, dst_align); break; case BPF_MUL: if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) dst_reg->min_value *= min_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value *= max_val; + dst_reg->min_align = max(src_align, dst_align); break; case BPF_AND: /* Disallow AND'ing of negative numbers, ain't nobody got time @@ -1746,17 +1801,23 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, else dst_reg->min_value = 0; dst_reg->max_value = max_val; + dst_reg->min_align = max(src_align, dst_align); break; case BPF_LSH: /* Gotta have special overflow logic here, if we're shifting * more than MAX_RANGE then just assume we have an invalid * range. */ - if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) + if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) { dst_reg->min_value = BPF_REGISTER_MIN_RANGE; - else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) - dst_reg->min_value <<= min_val; - + dst_reg->min_align = 1; + } else { + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) + dst_reg->min_value <<= min_val; + if (!dst_reg->min_align) + dst_reg->min_align = 1; + dst_reg->min_align <<= min_val; + } if (max_val > ilog2(BPF_REGISTER_MAX_RANGE)) dst_reg->max_value = BPF_REGISTER_MAX_RANGE; else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) @@ -1766,11 +1827,19 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* RSH by a negative number is undefined, and the BPF_RSH is an * unsigned shift, so make the appropriate casts. */ - if (min_val < 0 || dst_reg->min_value < 0) + if (min_val < 0 || dst_reg->min_value < 0) { dst_reg->min_value = BPF_REGISTER_MIN_RANGE; - else + } else { dst_reg->min_value = (u64)(dst_reg->min_value) >> min_val; + } + if (min_val < 0) { + dst_reg->min_align = 1; + } else { + dst_reg->min_align >>= (u64) min_val; + if (!dst_reg->min_align) + dst_reg->min_align = 1; + } if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value >>= max_val; break; @@ -1872,6 +1941,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[insn->dst_reg].imm = insn->imm; regs[insn->dst_reg].max_value = insn->imm; regs[insn->dst_reg].min_value = insn->imm; + regs[insn->dst_reg].min_align = calc_align(insn->imm); } } else if (opcode > BPF_END) { -- cgit v1.2.3-59-g8ed1b From c5fc9692d101d1318b0f53f9f691cd88ac029317 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 May 2017 11:25:17 -0700 Subject: bpf: Do per-instruction state dumping in verifier when log_level > 1. If log_level > 1, do a state dump every instruction and emit it in a more compact way (without a leading newline). This will facilitate more sophisticated test cases which inspect the verifier log for register state. Signed-off-by: David S. Miller Acked-by: Daniel Borkmann --- kernel/bpf/verifier.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cc7b626fa447..ff2bfe1d656a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2926,8 +2926,12 @@ static int do_check(struct bpf_verifier_env *env) goto process_bpf_exit; } - if (log_level && do_print_state) { - verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx); + if (log_level > 1 || (log_level && do_print_state)) { + if (log_level > 1) + verbose("%d:", insn_idx); + else + verbose("\nfrom %d to %d:", + prev_insn_idx, insn_idx); print_verifier_state(&env->cur_state); do_print_state = false; } -- cgit v1.2.3-59-g8ed1b From e07b98d9bffe410019dfcf62c3428d4a96c56a2c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 May 2017 11:38:07 -0700 Subject: bpf: Add strict alignment flag for BPF_PROG_LOAD. Add a new field, "prog_flags", and an initial flag value BPF_F_STRICT_ALIGNMENT. When set, the verifier will enforce strict pointer alignment regardless of the setting of CONFIG_EFFICIENT_UNALIGNED_ACCESS. The verifier, in this mode, will also use a fixed value of "2" in place of NET_IP_ALIGN. This facilitates test cases that will exercise and validate this part of the verifier even when run on architectures where alignment doesn't matter. Signed-off-by: David S. Miller Acked-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 1 + include/uapi/linux/bpf.h | 8 ++++++++ kernel/bpf/syscall.c | 5 ++++- kernel/bpf/verifier.c | 23 +++++++++++++++++------ tools/build/feature/test-bpf.c | 1 + tools/include/uapi/linux/bpf.h | 11 +++++++++-- 6 files changed, 40 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7c6a51924afc..d5093b52b485 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -90,6 +90,7 @@ struct bpf_verifier_env { struct bpf_prog *prog; /* eBPF program being verified */ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ + bool strict_alignment; /* perform strict pointer alignment checks */ struct bpf_verifier_state cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 945a1f5f63c5..94dfa9def355 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -132,6 +132,13 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -177,6 +184,7 @@ union bpf_attr { __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_flags; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index fd2411fd6914..265a0d854e33 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -783,7 +783,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) EXPORT_SYMBOL_GPL(bpf_prog_get_type); /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD kern_version +#define BPF_PROG_LOAD_LAST_FIELD prog_flags static int bpf_prog_load(union bpf_attr *attr) { @@ -796,6 +796,9 @@ static int bpf_prog_load(union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_LOAD)) return -EINVAL; + if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) + return -EINVAL; + /* copy eBPF program license from user space */ if (strncpy_from_user(license, u64_to_user_ptr(attr->license), sizeof(license) - 1) < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ff2bfe1d656a..e74fb1b87855 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -791,6 +791,7 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, int off, int size, bool strict) { + int ip_align; int reg_off; /* Byte size accesses are always allowed. */ @@ -807,10 +808,14 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, reg_off += reg->aux_off; } - /* skb->data is NET_IP_ALIGN-ed */ - if ((NET_IP_ALIGN + reg_off + off) % size != 0) { + /* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking + * we force this to 2 which is universally what architectures use + * when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. + */ + ip_align = strict ? 2 : NET_IP_ALIGN; + if ((ip_align + reg_off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", - NET_IP_ALIGN, reg_off, off, size); + ip_align, reg_off, off, size); return -EACCES; } @@ -828,10 +833,11 @@ static int check_val_ptr_alignment(const struct bpf_reg_state *reg, return 0; } -static int check_ptr_alignment(const struct bpf_reg_state *reg, +static int check_ptr_alignment(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int off, int size) { - bool strict = false; + bool strict = env->strict_alignment; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) strict = true; @@ -873,7 +879,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, if (size < 0) return size; - err = check_ptr_alignment(reg, off, size); + err = check_ptr_alignment(env, reg, off, size); if (err) return err; @@ -3568,6 +3574,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) } else { log_level = 0; } + if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT) + env->strict_alignment = true; + else + env->strict_alignment = false; ret = replace_map_fd_with_map_ptr(env); if (ret < 0) @@ -3673,6 +3683,7 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, mutex_lock(&bpf_verifier_lock); log_level = 0; + env->strict_alignment = false; env->explored_states = kcalloc(env->prog->len, sizeof(struct bpf_verifier_state_list *), diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c index ebc6dceddb58..7598361ef1f1 100644 --- a/tools/build/feature/test-bpf.c +++ b/tools/build/feature/test-bpf.c @@ -29,6 +29,7 @@ int main(void) attr.log_size = 0; attr.log_level = 0; attr.kern_version = 0; + attr.prog_flags = 0; /* * Test existence of __NR_bpf and BPF_PROG_LOAD. diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e553529929f6..94dfa9def355 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -132,6 +132,13 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -177,6 +184,7 @@ union bpf_attr { __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_flags; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -481,8 +489,7 @@ union bpf_attr { * u32 bpf_get_socket_uid(skb) * Get the owner uid of the socket stored inside sk_buff. * @skb: pointer to skb - * Return: uid of the socket owner on success or 0 if the socket pointer - * inside sk_buff is NULL + * Return: uid of the socket owner on success or overflowuid if failed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ -- cgit v1.2.3-59-g8ed1b From 91045f5e5238a6d2ad21d41d7e86e2fa65f90d76 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 May 2017 11:42:48 -0700 Subject: bpf: Add bpf_verify_program() to the library. This allows a test case to load a BPF program and unconditionally acquire the verifier log. It also allows specification of the strict alignment flag. Signed-off-by: David S. Miller Acked-by: Daniel Borkmann --- tools/lib/bpf/bpf.c | 22 ++++++++++++++++++++++ tools/lib/bpf/bpf.h | 4 ++++ 2 files changed, 26 insertions(+) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 4fe444b8092e..6e178987af8e 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -117,6 +117,28 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } +int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, int strict_alignment, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.prog_type = type; + attr.insn_cnt = (__u32)insns_cnt; + attr.insns = ptr_to_u64(insns); + attr.license = ptr_to_u64(license); + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + attr.log_level = 2; + log_buf[0] = 0; + attr.kern_version = kern_version; + attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0; + + return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +} + int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags) { diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index edb4daeff7a5..972bd8333eb7 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -35,6 +35,10 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz); +int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, int strict_alignment, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz); int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); -- cgit v1.2.3-59-g8ed1b From 18b3ad90b64e9893297357608abddd26170730eb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 May 2017 11:43:51 -0700 Subject: bpf: Add verifier test case for alignment. Signed-off-by: David S. Miller Acked-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/test_align.c | 417 +++++++++++++++++++++++++++++++ 2 files changed, 419 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_align.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 91edd0566237..f92f27d8156f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -11,7 +11,8 @@ endif CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf -TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs +TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ + test_align TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c new file mode 100644 index 000000000000..9dd97948a47f --- /dev/null +++ b/tools/testing/selftests/bpf/test_align.c @@ -0,0 +1,417 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "../../../include/linux/filter.h" + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#define MAX_INSNS 512 +#define MAX_MATCHES 16 + +struct bpf_align_test { + const char *descr; + struct bpf_insn insns[MAX_INSNS]; + enum { + UNDEF, + ACCEPT, + REJECT + } result; + enum bpf_prog_type prog_type; + const char *matches[MAX_MATCHES]; +}; + +static struct bpf_align_test tests[] = { + { + .descr = "mov", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_3, 16), + BPF_MOV64_IMM(BPF_REG_3, 32), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp", + "2: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "3: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "4: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp", + "5: R1=ctx R3=imm32,min_value=32,max_value=32,min_align=32 R10=fp", + }, + }, + { + .descr = "shift", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_4, 32), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp", + "2: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp", + "3: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "4: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "5: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp", + "6: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp", + "7: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm32,min_value=32,max_value=32,min_align=32 R10=fp", + "8: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm16,min_value=16,max_value=16,min_align=16 R10=fp", + "9: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "10: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "11: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm2,min_value=2,max_value=2,min_align=2 R10=fp", + }, + }, + { + .descr = "addsub", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "2: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=4 R10=fp", + "3: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R10=fp", + "4: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "5: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm12,min_value=12,max_value=12,min_align=4 R10=fp", + "6: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm14,min_value=14,max_value=14,min_align=2 R10=fp", + }, + }, + { + .descr = "mul", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 7), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp", + "2: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp", + "3: R1=ctx R3=imm14,min_value=14,max_value=14,min_align=2 R10=fp", + "4: R1=ctx R3=imm56,min_value=56,max_value=56,min_align=4 R10=fp", + }, + }, + +#define PREP_PKT_POINTERS \ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \ + offsetof(struct __sk_buff, data)), \ + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \ + offsetof(struct __sk_buff, data_end)) + +#define LOAD_UNKNOWN(DST_REG) \ + PREP_PKT_POINTERS, \ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \ + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \ + BPF_EXIT_INSN(), \ + BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0) + + { + .descr = "unknown shift", + .insns = { + LOAD_UNKNOWN(BPF_REG_3), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + LOAD_UNKNOWN(BPF_REG_4), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp", + "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv55,min_align=2 R10=fp", + "9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv54,min_align=4 R10=fp", + "10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv53,min_align=8 R10=fp", + "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv52,min_align=16 R10=fp", + "18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv56 R10=fp", + "19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv51,min_align=32 R10=fp", + "20: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv52,min_align=16 R10=fp", + "21: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv53,min_align=8 R10=fp", + "22: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv54,min_align=4 R10=fp", + "23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv55,min_align=2 R10=fp", + }, + }, + { + .descr = "unknown mul", + .insns = { + LOAD_UNKNOWN(BPF_REG_3), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp", + "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv55,min_align=1 R10=fp", + "10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv54,min_align=2 R10=fp", + "12: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "13: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv53,min_align=4 R10=fp", + "14: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv52,min_align=8 R10=fp", + "16: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv50,min_align=8 R10=fp" + }, + }, + { + .descr = "packet const offset", + .insns = { + PREP_PKT_POINTERS, + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + + BPF_MOV64_IMM(BPF_REG_0, 0), + + /* Skip over ethernet header. */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3), + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0), + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "4: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=0,r=0) R10=fp", + "5: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=14,r=0) R10=fp", + "6: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R4=pkt(id=0,off=14,r=0) R5=pkt(id=0,off=14,r=0) R10=fp", + "10: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv56 R5=pkt(id=0,off=14,r=18) R10=fp", + "14: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp", + "15: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp", + }, + }, + { + .descr = "packet variable offset", + .insns = { + LOAD_UNKNOWN(BPF_REG_6), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), + + /* First, add a constant to the R5 packet pointer, + * then a variable with a known alignment. + */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + /* Now, test in the other direction. Adding first + * the variable offset to R5, then the constant. + */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + /* Calculated offset in R6 has unknown value, but known + * alignment of 4. + */ + "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R6=inv54,min_align=4 R10=fp", + + /* Offset is added to packet pointer R5, resulting in known + * auxiliary alignment and offset. + */ + "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R5=pkt(id=1,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* At the time the word size load is performed from R5, + * it's total offset is NET_IP_ALIGN + reg->off (0) + + * reg->aux_off (14) which is 16. Then the variable + * offset is considered using reg->aux_off_align which + * is 4 and meets the load's requirements. + */ + "15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=1,off=4,r=4),aux_off=14,aux_off_align=4 R5=pkt(id=1,off=0,r=4),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + + /* Variable offset is added to R5 packet pointer, + * resulting in auxiliary alignment of 4. + */ + "18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=0,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* Constant offset is added to R5, resulting in + * reg->off of 14. + */ + "19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=14,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* At the time the word size load is performed from R5, + * it's total offset is NET_IP_ALIGN + reg->off (14) which + * is 16. Then the variable offset is considered using + * reg->aux_off_align which is 4 and meets the load's + * requirements. + */ + "23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp", + }, + }, +}; + +static int probe_filter_length(const struct bpf_insn *fp) +{ + int len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static char bpf_vlog[32768]; + +static int do_test_single(struct bpf_align_test *test) +{ + struct bpf_insn *prog = test->insns; + int prog_type = test->prog_type; + int prog_len, i; + int fd_prog; + int ret; + + prog_len = probe_filter_length(prog); + fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len, 1, "GPL", 0, + bpf_vlog, sizeof(bpf_vlog)); + if (fd_prog < 0) { + printf("Failed to load program.\n"); + printf("%s", bpf_vlog); + ret = 1; + } else { + ret = 0; + for (i = 0; i < MAX_MATCHES; i++) { + const char *t, *m = test->matches[i]; + + if (!m) + break; + t = strstr(bpf_vlog, m); + if (!t) { + printf("Failed to find match: %s\n", m); + ret = 1; + printf("%s", bpf_vlog); + break; + } + } + /* printf("%s", bpf_vlog); */ + close(fd_prog); + } + return ret; +} + +static int do_test(unsigned int from, unsigned int to) +{ + int all_pass = 0; + int all_fail = 0; + unsigned int i; + + for (i = from; i < to; i++) { + struct bpf_align_test *test = &tests[i]; + int fail; + + printf("Test %3d: %s ... ", + i, test->descr); + fail = do_test_single(test); + if (fail) { + all_fail++; + printf("FAIL\n"); + } else { + all_pass++; + printf("PASS\n"); + } + } + printf("Results: %d pass %d fail\n", + all_pass, all_fail); + return 0; +} + +int main(int argc, char **argv) +{ + unsigned int from = 0, to = ARRAY_SIZE(tests); + + if (argc == 3) { + unsigned int l = atoi(argv[argc - 2]); + unsigned int u = atoi(argv[argc - 1]); + + if (l < to && u < to) { + from = l; + to = u + 1; + } + } else if (argc == 2) { + unsigned int t = atoi(argv[argc - 1]); + + if (t < to) { + from = t; + to = t + 1; + } + } + return do_test(from, to); +} -- cgit v1.2.3-59-g8ed1b From 0a5539f66133a02b24f9cc43da5b84b7e6f3f436 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 11 May 2017 12:00:50 -0700 Subject: bpf: Provide a linux/types.h override for bpf selftests. We do not want to use the architecture's type.h header when building BPF programs which are always 64-bit. Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/Makefile | 3 ++- tools/testing/selftests/bpf/include/uapi/linux/types.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/include/uapi/linux/types.h diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f92f27d8156f..f389b02d43a0 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -35,6 +35,7 @@ $(BPFOBJ): force CLANG ?= clang %.o: %.c - $(CLANG) -I. -I../../../include/uapi -I../../../../samples/bpf/ \ + $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ + -I../../../../samples/bpf/ \ -Wno-compare-distinct-pointer-types \ -O2 -target bpf -c $< -o $@ diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h new file mode 100644 index 000000000000..fbd16a7554af --- /dev/null +++ b/tools/testing/selftests/bpf/include/uapi/linux/types.h @@ -0,0 +1,6 @@ +#ifndef _UAPI_LINUX_TYPES_H +#define _UAPI_LINUX_TYPES_H + +#include + +#endif /* _UAPI_LINUX_TYPES_H */ -- cgit v1.2.3-59-g8ed1b From 23b245c04d0ef408087430dd4d1b214a5da1eb78 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 10 May 2017 08:47:11 -0700 Subject: md/raid1/10: avoid unnecessary locking If we add bios to block plugging list, locking is unnecessry, since the block unplug is guaranteed not to run at that time. Reviewed-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 7 +++---- drivers/md/raid10.c | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7c1f73398800..a17ed6218d51 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1529,17 +1529,16 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, plug = container_of(cb, struct raid1_plug_cb, cb); else plug = NULL; - spin_lock_irqsave(&conf->device_lock, flags); if (plug) { bio_list_add(&plug->pending, mbio); plug->pending_cnt++; } else { + spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); conf->pending_count++; - } - spin_unlock_irqrestore(&conf->device_lock, flags); - if (!plug) + spin_unlock_irqrestore(&conf->device_lock, flags); md_wakeup_thread(mddev->thread); + } } r1_bio_write_done(r1_bio); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6b86a0032cf8..4343d7ff9916 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1282,17 +1282,16 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, plug = container_of(cb, struct raid10_plug_cb, cb); else plug = NULL; - spin_lock_irqsave(&conf->device_lock, flags); if (plug) { bio_list_add(&plug->pending, mbio); plug->pending_cnt++; } else { + spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); conf->pending_count++; - } - spin_unlock_irqrestore(&conf->device_lock, flags); - if (!plug) + spin_unlock_irqrestore(&conf->device_lock, flags); md_wakeup_thread(mddev->thread); + } } static void raid10_write_request(struct mddev *mddev, struct bio *bio, -- cgit v1.2.3-59-g8ed1b From 0489df9a430e9607de8130a6bc4bf4c02f96eaf1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 12 May 2017 01:04:45 +0200 Subject: xdp: add flag to enforce driver mode After commit b5cdae3291f7 ("net: Generic XDP") we automatically fall back to a generic XDP variant if the driver does not support native XDP. Allow for an option where the user can specify that always the native XDP variant should be selected and in case it's not supported by a driver, just bail out. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 6 ++++-- net/core/dev.c | 2 ++ net/core/rtnetlink.c | 5 +++++ samples/bpf/xdp1_user.c | 8 ++++++-- samples/bpf/xdp_tx_iptunnel_user.c | 7 ++++++- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8e56ac70e0d1..549ac8a98b44 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -888,9 +888,11 @@ enum { /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) -#define XDP_FLAGS_SKB_MODE (2U << 0) +#define XDP_FLAGS_SKB_MODE (1U << 1) +#define XDP_FLAGS_DRV_MODE (1U << 2) #define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ - XDP_FLAGS_SKB_MODE) + XDP_FLAGS_SKB_MODE | \ + XDP_FLAGS_DRV_MODE) enum { IFLA_XDP_UNSPEC, diff --git a/net/core/dev.c b/net/core/dev.c index 96cf83da0d66..e56cb71351d4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6873,6 +6873,8 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, ASSERT_RTNL(); xdp_op = ops->ndo_xdp; + if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE)) + return -EOPNOTSUPP; if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) xdp_op = generic_xdp_install; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bcb0f610ee42..dda9f1636356 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2199,6 +2199,11 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } + if ((xdp_flags & XDP_FLAGS_SKB_MODE) && + (xdp_flags & XDP_FLAGS_DRV_MODE)) { + err = -EINVAL; + goto errout; + } } if (xdp[IFLA_XDP_FD]) { diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 378850c70eb8..17be9ea3ecb2 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -62,13 +62,14 @@ static void usage(const char *prog) fprintf(stderr, "usage: %s [OPTS] IFINDEX\n\n" "OPTS:\n" - " -S use skb-mode\n", + " -S use skb-mode\n" + " -N enforce native mode\n", prog); } int main(int argc, char **argv) { - const char *optstr = "S"; + const char *optstr = "SN"; char filename[256]; int opt; @@ -77,6 +78,9 @@ int main(int argc, char **argv) case 'S': xdp_flags |= XDP_FLAGS_SKB_MODE; break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; default: usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 92b8bde9337c..631cdcc41c97 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -79,6 +79,8 @@ static void usage(const char *cmd) printf(" -m Used in sending the IP Tunneled pkt\n"); printf(" -T Default: 0 (forever)\n"); printf(" -P Default is TCP\n"); + printf(" -S use skb-mode\n"); + printf(" -N enforce native mode\n"); printf(" -h Display this help\n"); } @@ -138,7 +140,7 @@ int main(int argc, char **argv) { unsigned char opt_flags[256] = {}; unsigned int kill_after_s = 0; - const char *optstr = "i:a:p:s:d:m:T:P:Sh"; + const char *optstr = "i:a:p:s:d:m:T:P:SNh"; int min_port = 0, max_port = 0; struct iptnl_info tnl = {}; struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; @@ -206,6 +208,9 @@ int main(int argc, char **argv) case 'S': xdp_flags |= XDP_FLAGS_SKB_MODE; break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; default: usage(argv[0]); return 1; -- cgit v1.2.3-59-g8ed1b From d67b9cd28c1d7f82c2e5e727731ea7c89b23a0a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 12 May 2017 01:04:46 +0200 Subject: xdp: refine xdp api with regards to generic xdp While working on the iproute2 generic XDP frontend, I noticed that as of right now it's possible to have native *and* generic XDP programs loaded both at the same time for the case when a driver supports native XDP. The intended model for generic XDP from b5cdae3291f7 ("net: Generic XDP") is, however, that only one out of the two can be present at once which is also indicated as such in the XDP netlink dump part. The main rationale for generic XDP is to ease accessibility (in case a driver does not yet have XDP support) and to generically provide a semantical model as an example for driver developers wanting to add XDP support. The generic XDP option for an XDP aware driver can still be useful for comparing and testing both implementations. However, it is not intended to have a second XDP processing stage or layer with exactly the same functionality of the first native stage. Only reason could be to have a partial fallback for future XDP features that are not supported yet in the native implementation and we probably also shouldn't strive for such fallback and instead encourage native feature support in the first place. Given there's currently no such fallback issue or use case, lets not go there yet if we don't need to. Therefore, change semantics for loading XDP and bail out if the user tries to load a generic XDP program when a native one is present and vice versa. Another alternative to bailing out would be to handle the transition from one flavor to another gracefully, but that would require to bring the device down, exchange both types of programs, and bring it up again in order to avoid a tiny window where a packet could hit both hooks. Given this complicates the logic for just a debugging feature in the native case, I went with the simpler variant. For the dump, remove IFLA_XDP_FLAGS that was added with b5cdae3291f7 and reuse IFLA_XDP_ATTACHED for indicating the mode. Dumping all or just a subset of flags that were used for loading the XDP prog is suboptimal in the long run since not all flags are useful for dumping and if we start to reuse the same flag definitions for load and dump, then we'll waste bit space. What we really just want is to dump the mode for now. Current IFLA_XDP_ATTACHED semantics are: nothing was installed (0), a program is running at the native driver layer (1). Thus, add a mode that says that a program is running at generic XDP layer (2). Applications will handle this fine in that older binaries will just indicate that something is attached at XDP layer, effectively this is similar to IFLA_XDP_FLAGS attr that we would have had modulo the redundancy. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++++-- include/uapi/linux/if_link.h | 7 ++++++ net/core/dev.c | 55 +++++++++++++++++++++++++++++--------------- net/core/rtnetlink.c | 40 +++++++++++++++----------------- 4 files changed, 67 insertions(+), 43 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c23bd2efb56..3f39d27decf4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3296,11 +3296,15 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, u32 flags); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); + +typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp); +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, u32 flags); +bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op); + int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 549ac8a98b44..15ac20382aba 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -894,6 +894,13 @@ enum { XDP_FLAGS_SKB_MODE | \ XDP_FLAGS_DRV_MODE) +/* These are stored into IFLA_XDP_ATTACHED on dump. */ +enum { + XDP_ATTACHED_NONE = 0, + XDP_ATTACHED_DRV, + XDP_ATTACHED_SKB, +}; + enum { IFLA_XDP_UNSPEC, IFLA_XDP_FD, diff --git a/net/core/dev.c b/net/core/dev.c index e56cb71351d4..fca407b4a6ea 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6852,6 +6852,32 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); +bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op) +{ + struct netdev_xdp xdp; + + memset(&xdp, 0, sizeof(xdp)); + xdp.command = XDP_QUERY_PROG; + + /* Query must always succeed. */ + WARN_ON(xdp_op(dev, &xdp) < 0); + return xdp.prog_attached; +} + +static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, + struct netlink_ext_ack *extack, + struct bpf_prog *prog) +{ + struct netdev_xdp xdp; + + memset(&xdp, 0, sizeof(xdp)); + xdp.command = XDP_SETUP_PROG; + xdp.extack = extack; + xdp.prog = prog; + + return xdp_op(dev, &xdp); +} + /** * dev_change_xdp_fd - set or clear a bpf program for a device rx path * @dev: device @@ -6864,43 +6890,34 @@ EXPORT_SYMBOL(dev_change_proto_down); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags) { - int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp); const struct net_device_ops *ops = dev->netdev_ops; struct bpf_prog *prog = NULL; - struct netdev_xdp xdp; + xdp_op_t xdp_op, xdp_chk; int err; ASSERT_RTNL(); - xdp_op = ops->ndo_xdp; + xdp_op = xdp_chk = ops->ndo_xdp; if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE)) return -EOPNOTSUPP; if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) xdp_op = generic_xdp_install; + if (xdp_op == xdp_chk) + xdp_chk = generic_xdp_install; if (fd >= 0) { - if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) { - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG; - - err = xdp_op(dev, &xdp); - if (err < 0) - return err; - if (xdp.prog_attached) - return -EBUSY; - } + if (xdp_chk && __dev_xdp_attached(dev, xdp_chk)) + return -EEXIST; + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && + __dev_xdp_attached(dev, xdp_op)) + return -EBUSY; prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); if (IS_ERR(prog)) return PTR_ERR(prog); } - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_SETUP_PROG; - xdp.extack = extack; - xdp.prog = prog; - - err = xdp_op(dev, &xdp); + err = dev_xdp_install(dev, xdp_op, extack, prog); if (err < 0 && prog) bpf_prog_put(prog); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dda9f1636356..d7f82c3450b1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -899,8 +899,7 @@ static size_t rtnl_port_size(const struct net_device *dev, static size_t rtnl_xdp_size(void) { size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ - nla_total_size(1) + /* XDP_ATTACHED */ - nla_total_size(4); /* XDP_FLAGS */ + nla_total_size(1); /* XDP_ATTACHED */ return xdp_size; } @@ -1247,37 +1246,34 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) return 0; } +static u8 rtnl_xdp_attached_mode(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + ASSERT_RTNL(); + + if (rcu_access_pointer(dev->xdp_prog)) + return XDP_ATTACHED_SKB; + if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp)) + return XDP_ATTACHED_DRV; + + return XDP_ATTACHED_NONE; +} + static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) { struct nlattr *xdp; - u32 xdp_flags = 0; - u8 val = 0; int err; xdp = nla_nest_start(skb, IFLA_XDP); if (!xdp) return -EMSGSIZE; - if (rcu_access_pointer(dev->xdp_prog)) { - xdp_flags = XDP_FLAGS_SKB_MODE; - val = 1; - } else if (dev->netdev_ops->ndo_xdp) { - struct netdev_xdp xdp_op = {}; - - xdp_op.command = XDP_QUERY_PROG; - err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); - if (err) - goto err_cancel; - val = xdp_op.prog_attached; - } - err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val); + + err = nla_put_u8(skb, IFLA_XDP_ATTACHED, + rtnl_xdp_attached_mode(dev)); if (err) goto err_cancel; - if (xdp_flags) { - err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags); - if (err) - goto err_cancel; - } nla_nest_end(skb, xdp); return 0; -- cgit v1.2.3-59-g8ed1b From f6ba8d33cfbb46df569972e64dbb5bb7e929bfd9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 May 2017 15:24:41 -0700 Subject: netem: fix skb_orphan_partial() I should have known that lowering skb->truesize was dangerous :/ In case packets are not leaving the host via a standard Ethernet device, but looped back to local sockets, bad things can happen, as reported by Michael Madsen ( https://bugzilla.kernel.org/show_bug.cgi?id=195713 ) So instead of tweaking skb->truesize, lets change skb->destructor and keep a reference on the owner socket via its sk_refcnt. Fixes: f2f872f9272a ("netem: Introduce skb_orphan_partial() helper") Signed-off-by: Eric Dumazet Reported-by: Michael Madsen Signed-off-by: David S. Miller --- net/core/sock.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 79c6aee6af9b..e43e71d7856b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1803,28 +1803,24 @@ EXPORT_SYMBOL(skb_set_owner_w); * delay queue. We want to allow the owner socket to send more * packets, as if they were already TX completed by a typical driver. * But we also want to keep skb->sk set because some packet schedulers - * rely on it (sch_fq for example). So we set skb->truesize to a small - * amount (1) and decrease sk_wmem_alloc accordingly. + * rely on it (sch_fq for example). */ void skb_orphan_partial(struct sk_buff *skb) { - /* If this skb is a TCP pure ACK or already went here, - * we have nothing to do. 2 is already a very small truesize. - */ - if (skb->truesize <= 2) + if (skb_is_tcp_pure_ack(skb)) return; - /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, - * so we do not completely orphan skb, but transfert all - * accounted bytes but one, to avoid unexpected reorders. - */ if (skb->destructor == sock_wfree #ifdef CONFIG_INET || skb->destructor == tcp_wfree #endif ) { - atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc); - skb->truesize = 1; + struct sock *sk = skb->sk; + + if (atomic_inc_not_zero(&sk->sk_refcnt)) { + atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + skb->destructor = sock_efree; + } } else { skb_orphan(skb); } -- cgit v1.2.3-59-g8ed1b From b451e5d24ba6687c6f0e7319c727a709a1846c06 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 10 May 2017 17:01:27 -0700 Subject: tcp: avoid fragmenting peculiar skbs in SACK This patch fixes a bug in splitting an SKB during SACK processing. Specifically if an skb contains multiple packets and is only partially sacked in the higher sequences, tcp_match_sack_to_skb() splits the skb and marks the second fragment as SACKed. The current code further attempts rounding up the first fragment to MSS boundaries. But it misses a boundary condition when the rounded-up fragment size (pkt_len) is exactly skb size. Spliting such an skb is pointless and causses a kernel warning and aborts the SACK processing. This patch universally checks such over-split before calling tcp_fragment to prevent these unnecessary warnings. Fixes: adb92db857ee ("tcp: Make SACK code to split only at mss boundaries") Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5a3ad09e2786..06e2dbc2b4a2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1179,13 +1179,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, */ if (pkt_len > mss) { unsigned int new_len = (pkt_len / mss) * mss; - if (!in_sack && new_len < pkt_len) { + if (!in_sack && new_len < pkt_len) new_len += mss; - if (new_len >= skb->len) - return 0; - } pkt_len = new_len; } + + if (pkt_len >= skb->len && !in_sack) + return 0; + err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; -- cgit v1.2.3-59-g8ed1b From cb395b2010879a8461aa1b1c37025769708c32cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 May 2017 21:59:28 -0700 Subject: net: sched: optimize class dumps In commit 59cc1f61f09c ("net: sched: convert qdisc linked list to hashtable") we missed the opportunity to considerably speed up tc_dump_tclass_root() if a qdisc handle is provided by user. Instead of iterating all the qdiscs, use qdisc_match_from_root() to directly get the one we look for. Signed-off-by: Eric Dumazet Cc: Jiri Kosina Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_api.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bbe57d57b67f..e88342fde1bc 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1831,6 +1831,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (!qdisc_dev(root)) return 0; + if (tcm->tcm_parent) { + q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); + if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + return 0; + } hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; -- cgit v1.2.3-59-g8ed1b From d86b5672b1adb98b4cdd6fbf0224bbfb03db6e2e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 11 May 2017 13:58:06 +0200 Subject: xen-netfront: avoid crashing on resume after a failure in talk_to_netback() Unavoidable crashes in netfront_resume() and netback_changed() after a previous fail in talk_to_netback() (e.g. when we fail to read MAC from xenstore) were discovered. The failure path in talk_to_netback() does unregister/free for netdev but we don't reset drvdata and we try accessing it after resume. Fix the bug by removing the whole xen device completely with device_unregister(), this guarantees we won't have any calls into netfront after a failure. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 6ffc482550c1..7b61adb6270c 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1934,8 +1934,7 @@ abort_transaction_no_dev_fatal: xennet_disconnect_backend(info); xennet_destroy_queues(info); out: - unregister_netdev(info->netdev); - xennet_free_netdev(info->netdev); + device_unregister(&dev->dev); return err; } -- cgit v1.2.3-59-g8ed1b From f9c3fe2f43343be7972226c2e736e7a68e383dc1 Mon Sep 17 00:00:00 2001 From: "Chopra, Manish" Date: Thu, 11 May 2017 07:12:47 -0700 Subject: qlcnic: Fix link configuration with autoneg disabled Currently driver returns error on speed configurations for 83xx adapter's non XGBE ports, due to this link doesn't come up on the ports using 1000Base-T as a connector with autoneg disabled. This patch fixes this with initializing appropriate port type based on queried module/connector types from hardware before any speed/autoneg configuration. Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- .../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 34 ++++++++++++++++++++++ .../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h | 1 + .../net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 3 ++ 3 files changed, 38 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 718bf58a7da6..4fb68797630e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -3168,6 +3168,40 @@ int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr, return 0; } +void qlcnic_83xx_get_port_type(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct qlcnic_cmd_args cmd; + u32 config; + int err; + + err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_GET_LINK_STATUS); + if (err) + return; + + err = qlcnic_issue_cmd(adapter, &cmd); + if (err) { + dev_info(&adapter->pdev->dev, + "Get Link Status Command failed: 0x%x\n", err); + goto out; + } else { + config = cmd.rsp.arg[3]; + + switch (QLC_83XX_SFP_MODULE_TYPE(config)) { + case QLC_83XX_MODULE_FIBRE_1000BASE_SX: + case QLC_83XX_MODULE_FIBRE_1000BASE_LX: + case QLC_83XX_MODULE_FIBRE_1000BASE_CX: + case QLC_83XX_MODULE_TP_1000BASE_T: + ahw->port_type = QLCNIC_GBE; + break; + default: + ahw->port_type = QLCNIC_XGBE; + } + } +out: + qlcnic_free_mbx_args(&cmd); +} + int qlcnic_83xx_test_link(struct qlcnic_adapter *adapter) { u8 pci_func; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h index 3dfe8e27b51c..b75a81246856 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h @@ -637,6 +637,7 @@ void qlcnic_83xx_get_pauseparam(struct qlcnic_adapter *, int qlcnic_83xx_set_pauseparam(struct qlcnic_adapter *, struct ethtool_pauseparam *); int qlcnic_83xx_test_link(struct qlcnic_adapter *); +void qlcnic_83xx_get_port_type(struct qlcnic_adapter *adapter); int qlcnic_83xx_reg_test(struct qlcnic_adapter *); int qlcnic_83xx_get_regs_len(struct qlcnic_adapter *); int qlcnic_83xx_get_registers(struct qlcnic_adapter *, u32 *); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 9a869c15d8bf..7f7deeaf1cf0 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -486,6 +486,9 @@ static int qlcnic_set_link_ksettings(struct net_device *dev, u32 ret = 0; struct qlcnic_adapter *adapter = netdev_priv(dev); + if (qlcnic_83xx_check(adapter)) + qlcnic_83xx_get_port_type(adapter); + if (adapter->ahw->port_type != QLCNIC_GBE) return -EOPNOTSUPP; -- cgit v1.2.3-59-g8ed1b From 33c16bfd5bc00bbb9823d25af46c496966e0ac0d Mon Sep 17 00:00:00 2001 From: "Chopra, Manish" Date: Thu, 11 May 2017 07:12:48 -0700 Subject: qlcnic: Update version to 5.3.66 Bumping up the version as couple of fixes added after 5.3.65 Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 49bad00a0f8f..7245b1072518 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -37,8 +37,8 @@ #define _QLCNIC_LINUX_MAJOR 5 #define _QLCNIC_LINUX_MINOR 3 -#define _QLCNIC_LINUX_SUBVERSION 65 -#define QLCNIC_LINUX_VERSIONID "5.3.65" +#define _QLCNIC_LINUX_SUBVERSION 66 +#define QLCNIC_LINUX_VERSIONID "5.3.66" #define QLCNIC_DRV_IDC_VER 0x01 #define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\ (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION)) -- cgit v1.2.3-59-g8ed1b From 69a73e744db1a57175039e3d1e6ef3913e816eb8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 11 May 2017 21:41:09 -0400 Subject: bpf: Remove commented out debugging hack in test_align. Reported-by: Alexander Alemayhu Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_align.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 9dd97948a47f..ed242552e492 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -361,7 +361,6 @@ static int do_test_single(struct bpf_align_test *test) break; } } - /* printf("%s", bpf_vlog); */ close(fd_prog); } return ret; -- cgit v1.2.3-59-g8ed1b From d2be3667f3769b3c60aa294ef7f2b03d1b16559c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 11 May 2017 19:29:40 +0100 Subject: ethernet: aquantia: remove redundant checks on error status The error status err is initialized as zero and then being checked several times to see if it is less than zero even when it has not been updated. It may seem that the err should be assigned to the return code of the call to the various *offload_en_set calls and then we check for failure, however, these functions are void and never actually return any status. Since these error checks are redundant we can remove these as well as err and the error exit label err_exit. Detected by CoverityScan, CID#1398313 and CID#1398306 ("Logically dead code") Signed-off-by: Colin Ian King Reviewed-by: Lino Sanfilippo Acked-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 13 +------------ drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 12 +----------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index 4ee15ff06a44..faeb4935ef3e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -200,29 +200,18 @@ err_exit: static int hw_atl_a0_hw_offload_set(struct aq_hw_s *self, struct aq_nic_cfg_s *aq_nic_cfg) { - int err = 0; - /* TX checksums offloads*/ tpo_ipv4header_crc_offload_en_set(self, 1); tpo_tcp_udp_crc_offload_en_set(self, 1); - if (err < 0) - goto err_exit; /* RX checksums offloads*/ rpo_ipv4header_crc_offload_en_set(self, 1); rpo_tcp_udp_crc_offload_en_set(self, 1); - if (err < 0) - goto err_exit; /* LSO offloads*/ tdm_large_send_offload_en_set(self, 0xFFFFFFFFU); - if (err < 0) - goto err_exit; - - err = aq_hw_err_from_flags(self); -err_exit: - return err; + return aq_hw_err_from_flags(self); } static int hw_atl_a0_hw_init_tx_path(struct aq_hw_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 42150708191d..1bceb7358e5c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -200,25 +200,18 @@ err_exit: static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self, struct aq_nic_cfg_s *aq_nic_cfg) { - int err = 0; unsigned int i; /* TX checksums offloads*/ tpo_ipv4header_crc_offload_en_set(self, 1); tpo_tcp_udp_crc_offload_en_set(self, 1); - if (err < 0) - goto err_exit; /* RX checksums offloads*/ rpo_ipv4header_crc_offload_en_set(self, 1); rpo_tcp_udp_crc_offload_en_set(self, 1); - if (err < 0) - goto err_exit; /* LSO offloads*/ tdm_large_send_offload_en_set(self, 0xFFFFFFFFU); - if (err < 0) - goto err_exit; /* LRO offloads */ { @@ -245,10 +238,7 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self, rpo_lro_en_set(self, aq_nic_cfg->is_lro ? 0xFFFFFFFFU : 0U); } - err = aq_hw_err_from_flags(self); - -err_exit: - return err; + return aq_hw_err_from_flags(self); } static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self) -- cgit v1.2.3-59-g8ed1b From ad990dbe6d3ac3af1f5f4484b1126b9fc601e98a Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Thu, 11 May 2017 15:52:30 -0400 Subject: samples/bpf: run cleanup routines when receiving SIGTERM Shahid Habib noticed that when xdp1 was killed from a different console the xdp program was not cleaned-up properly in the kernel and it continued to forward traffic. Most of the applications in samples/bpf cleanup properly, but only when getting SIGINT. Since kill defaults to using SIGTERM, add support to cleanup when the application receives either SIGINT or SIGTERM. Signed-off-by: Andy Gospodarek Reported-by: Shahid Habib Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/cookie_uid_helper_example.c | 4 +++- samples/bpf/offwaketime_user.c | 1 + samples/bpf/sampleip_user.c | 1 + samples/bpf/trace_event_user.c | 1 + samples/bpf/tracex2_user.c | 1 + samples/bpf/xdp1_user.c | 1 + samples/bpf/xdp_tx_iptunnel_user.c | 1 + 7 files changed, 9 insertions(+), 1 deletion(-) diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c index b08ab4e88929..9d751e209f31 100644 --- a/samples/bpf/cookie_uid_helper_example.c +++ b/samples/bpf/cookie_uid_helper_example.c @@ -306,7 +306,9 @@ int main(int argc, char *argv[]) prog_attach_iptables(argv[2]); if (cfg_test_traffic) { if (signal(SIGINT, finish) == SIG_ERR) - error(1, errno, "register handler failed"); + error(1, errno, "register SIGINT handler failed"); + if (signal(SIGTERM, finish) == SIG_ERR) + error(1, errno, "register SIGTERM handler failed"); while (!test_finish) { print_table(); printf("\n"); diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 9cce2a66bd66..512f87a5fd20 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -100,6 +100,7 @@ int main(int argc, char **argv) setrlimit(RLIMIT_MEMLOCK, &r); signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c index be59d7dcbdde..4ed690b907ff 100644 --- a/samples/bpf/sampleip_user.c +++ b/samples/bpf/sampleip_user.c @@ -180,6 +180,7 @@ int main(int argc, char **argv) return 1; } signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); /* do sampling */ printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n", diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 0c5561d193a4..fa4336423da5 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -192,6 +192,7 @@ int main(int argc, char **argv) setrlimit(RLIMIT_MEMLOCK, &r); signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 7fee0f1ba9a3..7321a3f253c9 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -127,6 +127,7 @@ int main(int ac, char **argv) } signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); /* start 'ping' in the background to have some kfree_skb events */ f = popen("ping -c5 localhost", "r"); diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 17be9ea3ecb2..2431c0321b71 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -106,6 +106,7 @@ int main(int argc, char **argv) } signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { printf("link set xdp fd failed\n"); diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 631cdcc41c97..715cd12eaca5 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -244,6 +244,7 @@ int main(int argc, char **argv) } signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); while (min_port <= max_port) { vip.dport = htons(min_port++); -- cgit v1.2.3-59-g8ed1b From 844cf763fba654436d3a4279b6a672c196cf1901 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 11 May 2017 20:28:15 +0200 Subject: tipc: make macro tipc_wait_for_cond() smp safe The macro tipc_wait_for_cond() is embedding the macro sk_wait_event() to fulfil its task. The latter, in turn, is evaluating the stated condition outside the socket lock context. This is problematic if the condition is accessing non-trivial data structures which may be altered by incoming interrupts, as is the case with the cong_links() linked list, used by socket to keep track of the current set of congested links. We sometimes see crashes when this list is accessed by a condition function at the same time as a SOCK_WAKEUP interrupt is removing an element from the list. We fix this by expanding selected parts of sk_wait_event() into the outer macro, while ensuring that all evaluations of a given condition are performed under socket lock protection. Fixes: commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Reviewed-by: Parthasarathy Bhuvaragan Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 0d4f2f455a7c..1b92b72e812f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -362,25 +362,25 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout) return 0; } -#define tipc_wait_for_cond(sock_, timeout_, condition_) \ -({ \ - int rc_ = 0; \ - int done_ = 0; \ - \ - while (!(condition_) && !done_) { \ - struct sock *sk_ = sock->sk; \ - DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ - \ - rc_ = tipc_sk_sock_err(sock_, timeout_); \ - if (rc_) \ - break; \ - prepare_to_wait(sk_sleep(sk_), &wait_, \ - TASK_INTERRUPTIBLE); \ - done_ = sk_wait_event(sk_, timeout_, \ - (condition_), &wait_); \ - remove_wait_queue(sk_sleep(sk_), &wait_); \ - } \ - rc_; \ +#define tipc_wait_for_cond(sock_, timeo_, condition_) \ +({ \ + struct sock *sk_; \ + int rc_; \ + \ + while ((rc_ = !(condition_))) { \ + DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ + sk_ = (sock_)->sk; \ + rc_ = tipc_sk_sock_err((sock_), timeo_); \ + if (rc_) \ + break; \ + prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \ + release_sock(sk_); \ + *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ + sched_annotate_sleep(); \ + lock_sock(sk_); \ + remove_wait_queue(sk_sleep(sk_), &wait_); \ + } \ + rc_; \ }) /** -- cgit v1.2.3-59-g8ed1b From 6832a333ed4a7cc4fcb170c045d1d96d0976fdd4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 11 May 2017 19:30:02 -0700 Subject: bpf: Handle multiple variable additions into packet pointers in verifier. We must accumulate into reg->aux_off rather than use a plain assignment. Add a test for this situation to test_align. Reported-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 +- tools/testing/selftests/bpf/test_align.c | 37 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e74fb1b87855..39f2dcbc4cbc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1531,7 +1531,7 @@ add_imm: dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ - dst_reg->aux_off = dst_reg->off; + dst_reg->aux_off += dst_reg->off; dst_reg->off = 0; dst_reg->range = 0; if (had_id) diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index ed242552e492..9644d4e069de 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -273,6 +273,20 @@ static struct bpf_align_test tests[] = { BPF_EXIT_INSN(), BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + /* Test multiple accumulations of unknown values + * into a packet pointer. + */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -314,6 +328,29 @@ static struct bpf_align_test tests[] = { * requirements. */ "23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* Constant offset is added to R5 packet pointer, + * resulting in reg->off value of 14. + */ + "26: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=0,off=14,r=8) R6=inv54,min_align=4 R10=fp", + /* Variable offset is added to R5, resulting in an + * auxiliary offset of 14, and an auxiliary alignment of 4. + */ + "27: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + /* Constant is added to R5 again, setting reg->off to 4. */ + "28: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=4,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + /* And once more we add a variable, which causes an accumulation + * of reg->off into reg->aux_off_align, with resulting value of + * 18. The auxiliary alignment stays at 4. + */ + "29: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=4,off=0,r=0),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + /* At the time the word size load is performed from R5, + * it's total offset is NET_IP_ALIGN + reg->off (0) + + * reg->aux_off (18) which is 20. Then the variable offset + * is considered using reg->aux_off_align which is 4 and meets + * the load's requirements. + */ + "33: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=4,off=4,r=4),aux_off=18,aux_off_align=4 R5=pkt(id=4,off=0,r=4),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp", }, }, }; -- cgit v1.2.3-59-g8ed1b From df0c8d911abf6ba97b2c2fc3c5a12769e0b081a3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 11 May 2017 11:24:16 -0700 Subject: net: phy: Call bus->reset() after releasing PHYs from reset The API convention makes it that a given MDIO bus reset should be able to access PHY devices in its reset() callback and perform additional MDIO accesses in order to bring the bus and PHYs in a working state. Commit 69226896ad63 ("mdio_bus: Issue GPIO RESET to PHYs.") broke that contract by first calling bus->reset() and then release all PHYs from reset using their shared GPIO line, so restore the expected functionality here. Fixes: 69226896ad63 ("mdio_bus: Issue GPIO RESET to PHYs.") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index a898e5c4ef1b..8e73f5f36e71 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -364,9 +364,6 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) mutex_init(&bus->mdio_lock); - if (bus->reset) - bus->reset(bus); - /* de-assert bus level PHY GPIO resets */ if (bus->num_reset_gpios > 0) { bus->reset_gpiod = devm_kcalloc(&bus->dev, @@ -396,6 +393,9 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) } } + if (bus->reset) + bus->reset(bus); + for (i = 0; i < PHY_MAX_ADDR; i++) { if ((bus->phy_mask & (1 << i)) == 0) { struct phy_device *phydev; -- cgit v1.2.3-59-g8ed1b From 29f6ca6916e29fc46f1418885374d9ed50430687 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 14:59:02 +0900 Subject: scsi: sd: Unlock zone in case of error in sd_setup_write_same_cmnd() scsi_io_init() may fail, leaving a zone of a zoned block device locked. Fix this by properly unlocking the write same request target zone if scsi_io_init() fails. Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f9d1432d7cc5..e60a309b26bf 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -948,6 +948,10 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) rq->__data_len = sdp->sector_size; ret = scsi_init_io(cmd); rq->__data_len = nr_bytes; + + if (sd_is_zoned(sdkp) && ret != BLKPREP_OK) + sd_zbc_write_unlock_zone(cmd); + return ret; } -- cgit v1.2.3-59-g8ed1b From ed44fd7fd8a6785b73cfc6d44594c434e578d724 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 15:48:19 +0900 Subject: scsi: sd: Write lock zone for REQ_OP_WRITE_ZEROES For a zoned block device, sd_zbc_complete() handles zone write unlock on completion of a REQ_OP_WRITE_ZEROES command but the zone write locking is missing from sd_setup_write_zeroes_cmnd(). This patch fixes this problem by locking the target zone of a REQ_OP_WRITE_ZEROES request. Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e60a309b26bf..de9e2f2ef662 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -827,21 +827,32 @@ static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9); u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); + int ret; if (!(rq->cmd_flags & REQ_NOUNMAP)) { switch (sdkp->zeroing_mode) { case SD_ZERO_WS16_UNMAP: - return sd_setup_write_same16_cmnd(cmd, true); + ret = sd_setup_write_same16_cmnd(cmd, true); + goto out; case SD_ZERO_WS10_UNMAP: - return sd_setup_write_same10_cmnd(cmd, true); + ret = sd_setup_write_same10_cmnd(cmd, true); + goto out; } } if (sdp->no_write_same) return BLKPREP_INVALID; + if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff) - return sd_setup_write_same16_cmnd(cmd, false); - return sd_setup_write_same10_cmnd(cmd, false); + ret = sd_setup_write_same16_cmnd(cmd, false); + else + ret = sd_setup_write_same10_cmnd(cmd, false); + +out: + if (sd_is_zoned(sdkp) && ret == BLKPREP_OK) + return sd_zbc_write_lock_zone(cmd); + + return ret; } static void sd_config_write_same(struct scsi_disk *sdkp) -- cgit v1.2.3-59-g8ed1b From 48ae8484e9fc324b4968d33c585e54bc98e44d61 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 10 May 2017 09:53:40 +0200 Subject: scsi: sg: don't return bogus Sg_requests If the list search in sg_get_rq_mark() fails to find a valid request, we return a bogus element. This then can later lead to a GPF in sg_remove_scat(). So don't return bogus Sg_requests in sg_get_rq_mark() but NULL in case the list search doesn't find a valid request. Signed-off-by: Johannes Thumshirn Reported-by: Andrey Konovalov Cc: Hannes Reinecke Cc: Christoph Hellwig Cc: Doug Gilbert Reviewed-by: Hannes Reinecke Acked-by: Doug Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 0a38ba01b7b4..82c33a6edbea 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -2074,11 +2074,12 @@ sg_get_rq_mark(Sg_fd * sfp, int pack_id) if ((1 == resp->done) && (!resp->sg_io_owned) && ((-1 == pack_id) || (resp->header.pack_id == pack_id))) { resp->done = 2; /* guard against other readers */ - break; + write_unlock_irqrestore(&sfp->rq_list_lock, iflags); + return resp; } } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); - return resp; + return NULL; } /* always adds to end of list */ -- cgit v1.2.3-59-g8ed1b From acde25726bc6034b628febb8a4c6c0838736ccbf Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 10 May 2017 16:39:41 +1000 Subject: KVM: PPC: Book3S HV: Add radix checks in real-mode hypercall handlers POWER9 running a radix guest will take some hypervisor interrupts without going to real mode (turning off the MMU). This means that early hypercall handlers may now be called in virtual mode. Most of the handlers work just fine in both modes, but there are some that can crash the host if called in virtual mode, notably the TCE (IOMMU) hypercalls H_PUT_TCE, H_STUFF_TCE and H_PUT_TCE_INDIRECT. These already have both a real-mode and a virtual-mode version, so we arrange for the real-mode version to return H_TOO_HARD for radix guests, which will result in the virtual-mode version being called. The other hypercall which is sensitive to the MMU mode is H_RANDOM. It doesn't have a virtual-mode version, so this adds code to enable it to be called in either mode. An alternative solution was considered which would refuse to call any of the early hypercall handlers when doing a virtual-mode exit from a radix guest. However, the XICS-on-XIVE code depends on the XICS hypercalls being handled early even for virtual-mode exits, because the handlers need to be called before the XIVE vCPU state has been pulled off the hardware. Therefore that solution would have become quite invasive and complicated, and was rejected in favour of the simpler, though less elegant, solution presented here. Reviewed-by: David Gibson Tested-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio_hv.c | 13 +++++++++++++ arch/powerpc/kvm/book3s_hv_builtin.c | 9 ++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index eda0a8f6fae8..3adfd2f5301c 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -301,6 +301,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ /* liobn, ioba, tce); */ + /* For radix, we might be in virtual mode, so punt */ + if (kvm_is_radix(vcpu->kvm)) + return H_TOO_HARD; + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -381,6 +385,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, bool prereg = false; struct kvmppc_spapr_tce_iommu_table *stit; + /* For radix, we might be in virtual mode, so punt */ + if (kvm_is_radix(vcpu->kvm)) + return H_TOO_HARD; + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -491,6 +499,10 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, long i, ret; struct kvmppc_spapr_tce_iommu_table *stit; + /* For radix, we might be in virtual mode, so punt */ + if (kvm_is_radix(vcpu->kvm)) + return H_TOO_HARD; + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -527,6 +539,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, return H_SUCCESS; } +/* This can be called in either virtual mode or real mode */ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba) { diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 88a65923c649..ee4c2558c305 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -207,7 +207,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hwrng_present); long kvmppc_h_random(struct kvm_vcpu *vcpu) { - if (powernv_get_random_real_mode(&vcpu->arch.gpr[4])) + int r; + + /* Only need to do the expensive mfmsr() on radix */ + if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR)) + r = powernv_get_random_long(&vcpu->arch.gpr[4]); + else + r = powernv_get_random_real_mode(&vcpu->arch.gpr[4]); + if (r) return H_SUCCESS; return H_HARDWARE; -- cgit v1.2.3-59-g8ed1b From 67325e988faea735d663799b6d152b5f4254093c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 11 May 2017 11:33:30 +1000 Subject: KVM: PPC: Book3S PR: Check copy_to/from_user return values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PR KVM implementation of the PAPR HPT hypercalls (H_ENTER etc.) access an image of the HPT in userspace memory using copy_from_user and copy_to_user. Recently, the declarations of those functions were annotated to indicate that the return value must be checked. Since this code doesn't currently check the return value, this causes compile warnings like the ones shown below, and since on PPC the default is to compile arch/powerpc with -Werror, this causes the build to fail. To fix this, we check the return values, and if non-zero, fail the hypercall being processed with a H_FUNCTION error return value. There is really no good error return value to use since PAPR didn't envisage the possibility that the hypervisor may not be able to access the guest's HPT, and H_FUNCTION (function not supported) seems as good as any. The typical compile warnings look like this: CC arch/powerpc/kvm/book3s_pr_papr.o /home/paulus/kernel/kvm/arch/powerpc/kvm/book3s_pr_papr.c: In function ‘kvmppc_h_pr_enter’: /home/paulus/kernel/kvm/arch/powerpc/kvm/book3s_pr_papr.c:53:2: error: ignoring return value of ‘copy_from_user’, declared with attribute warn_unused_result [-Werror=unused-result] copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); ^ /home/paulus/kernel/kvm/arch/powerpc/kvm/book3s_pr_papr.c:74:2: error: ignoring return value of ‘copy_to_user’, declared with attribute warn_unused_result [-Werror=unused-result] copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE); ^ ... etc. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr_papr.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index bcbeeb62dd13..a04384adece7 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -50,7 +50,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) pteg_addr = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); - copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); + ret = H_FUNCTION; + if (copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg))) + goto done; hpte = pteg; ret = H_PTEG_FULL; @@ -71,7 +73,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6)); hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7)); pteg_addr += i * HPTE_SIZE; - copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE); + ret = H_FUNCTION; + if (copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE)) + goto done; kvmppc_set_gpr(vcpu, 4, pte_index | i); ret = H_SUCCESS; @@ -93,7 +97,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); - copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + ret = H_FUNCTION; + if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) + goto done; pte[0] = be64_to_cpu((__force __be64)pte[0]); pte[1] = be64_to_cpu((__force __be64)pte[1]); @@ -103,7 +109,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) goto done; - copy_to_user((void __user *)pteg, &v, sizeof(v)); + ret = H_FUNCTION; + if (copy_to_user((void __user *)pteg, &v, sizeof(v))) + goto done; rb = compute_tlbie_rb(pte[0], pte[1], pte_index); vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); @@ -171,7 +179,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) } pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX); - copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) { + ret = H_FUNCTION; + break; + } pte[0] = be64_to_cpu((__force __be64)pte[0]); pte[1] = be64_to_cpu((__force __be64)pte[1]); @@ -184,7 +195,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) tsh |= H_BULK_REMOVE_NOT_FOUND; } else { /* Splat the pteg in (userland) hpt */ - copy_to_user((void __user *)pteg, &v, sizeof(v)); + if (copy_to_user((void __user *)pteg, &v, sizeof(v))) { + ret = H_FUNCTION; + break; + } rb = compute_tlbie_rb(pte[0], pte[1], tsh & H_BULK_REMOVE_PTEX); @@ -211,7 +225,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); - copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + ret = H_FUNCTION; + if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) + goto done; pte[0] = be64_to_cpu((__force __be64)pte[0]); pte[1] = be64_to_cpu((__force __be64)pte[1]); @@ -234,7 +250,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); pte[0] = (__force u64)cpu_to_be64(pte[0]); pte[1] = (__force u64)cpu_to_be64(pte[1]); - copy_to_user((void __user *)pteg, pte, sizeof(pte)); + ret = H_FUNCTION; + if (copy_to_user((void __user *)pteg, pte, sizeof(pte))) + goto done; ret = H_SUCCESS; done: -- cgit v1.2.3-59-g8ed1b From 70d466f760b351fe30b5f8c956354ddf29aa676b Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 11 May 2017 15:28:28 -0700 Subject: md/r5cache: gracefully handle journal device errors for writeback mode For the raid456 with writeback cache, when journal device failed during normal operation, it is still possible to persist all data, as all pending data is still in stripe cache. However, it is necessary to handle journal failure gracefully. During journal failures, the following logic handles the graceful shutdown of journal: 1. raid5_error() marks the device as Faulty and schedules async work log->disable_writeback_work; 2. In disable_writeback_work (r5c_disable_writeback_async), the mddev is suspended, set to write through, and then resumed. mddev_suspend() flushes all cached stripes; 3. All cached stripes need to be flushed carefully to the RAID array. This patch fixes issues within the process above: 1. In r5c_update_on_rdev_error() schedule disable_writeback_work for journal failures; 2. In r5c_disable_writeback_async(), wait for MD_SB_CHANGE_PENDING, since raid5_error() updates superblock. 3. In handle_stripe(), allow stripes with data in journal (s.injournal > 0) to make progress during log_failed; 4. In delay_towrite(), if log failed only process data in the cache (skip new writes in dev->towrite); 5. In __get_priority_stripe(), process loprio_list during journal device failures. 6. In raid5_remove_disk(), wait for all cached stripes are flushed before calling log_exit(). Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 11 +++++++++-- drivers/md/raid5-log.h | 3 ++- drivers/md/raid5.c | 29 +++++++++++++++++++++++------ 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index a6a62e212cd3..cc3f8442f11f 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -24,6 +24,7 @@ #include "md.h" #include "raid5.h" #include "bitmap.h" +#include "raid5-log.h" /* * metadata/data stored in disk with 4k size unit (a block) regardless @@ -680,6 +681,11 @@ static void r5c_disable_writeback_async(struct work_struct *work) return; pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n", mdname(mddev)); + + /* wait superblock change before suspend */ + wait_event(mddev->sb_wait, + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); + mddev_suspend(mddev); log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; mddev_resume(mddev); @@ -2983,7 +2989,7 @@ ioerr: return ret; } -void r5c_update_on_rdev_error(struct mddev *mddev) +void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev) { struct r5conf *conf = mddev->private; struct r5l_log *log = conf->log; @@ -2991,7 +2997,8 @@ void r5c_update_on_rdev_error(struct mddev *mddev) if (!log) return; - if (raid5_calc_degraded(conf) > 0 && + if ((raid5_calc_degraded(conf) > 0 || + test_bit(Journal, &rdev->flags)) && conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK) schedule_work(&log->disable_writeback_work); } diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h index 27097101ccca..328d67aedda4 100644 --- a/drivers/md/raid5-log.h +++ b/drivers/md/raid5-log.h @@ -28,7 +28,8 @@ extern void r5c_flush_cache(struct r5conf *conf, int num); extern void r5c_check_stripe_cache_usage(struct r5conf *conf); extern void r5c_check_cached_full_stripe(struct r5conf *conf); extern struct md_sysfs_entry r5c_journal_mode; -extern void r5c_update_on_rdev_error(struct mddev *mddev); +extern void r5c_update_on_rdev_error(struct mddev *mddev, + struct md_rdev *rdev); extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect); extern struct dma_async_tx_descriptor * diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f8055a7abb4b..0ac57a925606 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2689,7 +2689,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) bdevname(rdev->bdev, b), mdname(mddev), conf->raid_disks - mddev->degraded); - r5c_update_on_rdev_error(mddev); + r5c_update_on_rdev_error(mddev, rdev); } /* @@ -3050,6 +3050,11 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous) * When LOG_CRITICAL, stripes with injournal == 0 will be sent to * no_space_stripes list. * + * 3. during journal failure + * In journal failure, we try to flush all cached data to raid disks + * based on data in stripe cache. The array is read-only to upper + * layers, so we would skip all pending writes. + * */ static inline bool delay_towrite(struct r5conf *conf, struct r5dev *dev, @@ -3063,6 +3068,9 @@ static inline bool delay_towrite(struct r5conf *conf, if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) && s->injournal > 0) return true; + /* case 3 above */ + if (s->log_failed && s->injournal) + return true; return false; } @@ -4696,10 +4704,15 @@ static void handle_stripe(struct stripe_head *sh) " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, s.failed_num[0], s.failed_num[1]); - /* check if the array has lost more than max_degraded devices and, + /* + * check if the array has lost more than max_degraded devices and, * if so, some requests might need to be failed. + * + * When journal device failed (log_failed), we will only process + * the stripe if there is data need write to raid disks */ - if (s.failed > conf->max_degraded || s.log_failed) { + if (s.failed > conf->max_degraded || + (s.log_failed && s.injournal == 0)) { sh->check_state = 0; sh->reconstruct_state = 0; break_stripe_batch_list(sh, 0); @@ -5272,8 +5285,10 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) struct stripe_head *sh, *tmp; struct list_head *handle_list = NULL; struct r5worker_group *wg; - bool second_try = !r5c_is_writeback(conf->log); - bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state); + bool second_try = !r5c_is_writeback(conf->log) && + !r5l_log_disk_error(conf); + bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) || + r5l_log_disk_error(conf); again: wg = NULL; @@ -7521,7 +7536,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) * neilb: there is no locking about new writes here, * so this cannot be safe. */ - if (atomic_read(&conf->active_stripes)) { + if (atomic_read(&conf->active_stripes) || + atomic_read(&conf->r5c_cached_full_stripes) || + atomic_read(&conf->r5c_cached_partial_stripes)) { return -EBUSY; } log_exit(conf); -- cgit v1.2.3-59-g8ed1b From 5ddf0440a1a28f00f69ed2e093476bab3b60c2c3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 11 May 2017 17:03:44 -0700 Subject: md/r5cache: handle sync with data in write back cache Currently, sync of raid456 array cannot make progress when hitting data in writeback r5cache. This patch fixes this issue by flushing cached data of the stripe before processing the sync request. This is achived by: 1. In handle_stripe(), do not set STRIPE_SYNCING if the stripe is in write back cache; 2. In r5c_try_caching_write(), handle the stripe in sync with write through; 3. In do_release_stripe(), make stripe in sync write out and send it to the state machine. Shaohua: explictly set STRIPE_HANDLE after write out completed Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 8 +++++++- drivers/md/raid5.c | 21 +++++++++++++++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index cc3f8442f11f..4c00bc248287 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -2637,8 +2637,11 @@ int r5c_try_caching_write(struct r5conf *conf, * When run in degraded mode, array is set to write-through mode. * This check helps drain pending write safely in the transition to * write-through mode. + * + * When a stripe is syncing, the write is also handled in write + * through mode. */ - if (s->failed) { + if (s->failed || test_bit(STRIPE_SYNCING, &sh->state)) { r5c_make_stripe_write_out(sh); return -EAGAIN; } @@ -2841,6 +2844,9 @@ void r5c_finish_stripe_write_out(struct r5conf *conf, } r5l_append_flush_payload(log, sh->sector); + /* stripe is flused to raid disks, we can do resync now */ + if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) + set_bit(STRIPE_HANDLE, &sh->state); } int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0ac57a925606..9c4f7659f8b1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -233,11 +233,15 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, if (test_bit(R5_InJournal, &sh->dev[i].flags)) injournal++; /* - * When quiesce in r5c write back, set STRIPE_HANDLE for stripes with - * data in journal, so they are not released to cached lists + * In the following cases, the stripe cannot be released to cached + * lists. Therefore, we make the stripe write out and set + * STRIPE_HANDLE: + * 1. when quiesce in r5c write back; + * 2. when resync is requested fot the stripe. */ - if (conf->quiesce && r5c_is_writeback(conf->log) && - !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0) { + if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) || + (conf->quiesce && r5c_is_writeback(conf->log) && + !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) { if (test_bit(STRIPE_R5C_CACHING, &sh->state)) r5c_make_stripe_write_out(sh); set_bit(STRIPE_HANDLE, &sh->state); @@ -4656,8 +4660,13 @@ static void handle_stripe(struct stripe_head *sh) if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { spin_lock(&sh->stripe_lock); - /* Cannot process 'sync' concurrently with 'discard' */ - if (!test_bit(STRIPE_DISCARD, &sh->state) && + /* + * Cannot process 'sync' concurrently with 'discard'. + * Flush data in r5cache before 'sync'. + */ + if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) && + !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) && + !test_bit(STRIPE_DISCARD, &sh->state) && test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { set_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_INSYNC, &sh->state); -- cgit v1.2.3-59-g8ed1b From 76d837a4c0f905f98088877d780169d7a14a6b29 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 11 May 2017 14:31:59 +1000 Subject: KVM: PPC: Book3S PR: Don't include SPAPR TCE code on non-pseries platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit e91aa8e6ecd5 ("KVM: PPC: Enable IOMMU_API for KVM_BOOK3S_64 permanently", 2017-03-22) enabled the SPAPR TCE code for all 64-bit Book 3S kernel configurations in order to simplify the code and reduce #ifdefs. However, 64-bit Book 3S PPC platforms other than pseries and powernv don't implement the necessary IOMMU callbacks, leading to build failures like the following (for a pasemi config): scripts/kconfig/conf --silentoldconfig Kconfig warning: (KVM_BOOK3S_64) selects SPAPR_TCE_IOMMU which has unmet direct dependencies (IOMMU_SUPPORT && (PPC_POWERNV || PPC_PSERIES)) ... CC [M] arch/powerpc/kvm/book3s_64_vio.o /home/paulus/kernel/kvm/arch/powerpc/kvm/book3s_64_vio.c: In function ‘kvmppc_clear_tce’: /home/paulus/kernel/kvm/arch/powerpc/kvm/book3s_64_vio.c:363:2: error: implicit declaration of function ‘iommu_tce_xchg’ [-Werror=implicit-function-declaration] iommu_tce_xchg(tbl, entry, &hpa, &dir); ^ To fix this, we make the inclusion of the SPAPR TCE support, and the code that uses it in book3s_vio.c and book3s_vio_hv.c, depend on the inclusion of support for the pseries and/or powernv platforms. This means that when running a 'pseries' guest on those platforms, the guest won't have in-kernel acceleration of the PAPR TCE hypercalls, but at least now they compile. Reviewed-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/Kconfig | 2 +- arch/powerpc/kvm/Makefile | 4 ++-- arch/powerpc/kvm/book3s_pr_papr.c | 36 +++++++++++++++++++++++++++--------- arch/powerpc/kvm/powerpc.c | 4 +++- 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 24de532c1736..0c52cb5d43f5 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -67,7 +67,7 @@ config KVM_BOOK3S_64 select KVM_BOOK3S_64_HANDLER select KVM select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE - select SPAPR_TCE_IOMMU if IOMMU_SUPPORT + select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV) ---help--- Support running unmodified book3s_64 and book3s_32 guest kernels in virtual machines on book3s_64 host processors. diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index d91a2604c496..381a6ec0ff3b 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -46,7 +46,7 @@ kvm-e500mc-objs := \ e500_emulate.o kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) -kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \ +kvm-book3s_64-builtin-objs-$(CONFIG_SPAPR_TCE_IOMMU) := \ book3s_64_vio_hv.o kvm-pr-y := \ @@ -90,11 +90,11 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o +kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o kvm-book3s_64-module-objs := \ $(common-objs-y) \ book3s.o \ - book3s_64_vio.o \ book3s_rtas.o \ $(kvm-book3s_64-objs-y) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index a04384adece7..8a4205fa774f 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -262,36 +262,37 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) return EMULATE_DONE; } -static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) +static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu) { - unsigned long liobn = kvmppc_get_gpr(vcpu, 4); - unsigned long ioba = kvmppc_get_gpr(vcpu, 5); - unsigned long tce = kvmppc_get_gpr(vcpu, 6); long rc; - rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce); + rc = kvmppc_h_logical_ci_load(vcpu); if (rc == H_TOO_HARD) return EMULATE_FAIL; kvmppc_set_gpr(vcpu, 3, rc); return EMULATE_DONE; } -static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu) +static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu) { long rc; - rc = kvmppc_h_logical_ci_load(vcpu); + rc = kvmppc_h_logical_ci_store(vcpu); if (rc == H_TOO_HARD) return EMULATE_FAIL; kvmppc_set_gpr(vcpu, 3, rc); return EMULATE_DONE; } -static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu) +#ifdef CONFIG_SPAPR_TCE_IOMMU +static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) { + unsigned long liobn = kvmppc_get_gpr(vcpu, 4); + unsigned long ioba = kvmppc_get_gpr(vcpu, 5); + unsigned long tce = kvmppc_get_gpr(vcpu, 6); long rc; - rc = kvmppc_h_logical_ci_store(vcpu); + rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce); if (rc == H_TOO_HARD) return EMULATE_FAIL; kvmppc_set_gpr(vcpu, 3, rc); @@ -329,6 +330,23 @@ static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +#else /* CONFIG_SPAPR_TCE_IOMMU */ +static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) +{ + return EMULATE_FAIL; +} + +static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu) +{ + return EMULATE_FAIL; +} + +static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu) +{ + return EMULATE_FAIL; +} +#endif /* CONFIG_SPAPR_TCE_IOMMU */ + static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) { long rc = kvmppc_xics_hcall(vcpu, cmd); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f7cf2cd564ef..7f71ab5fcad1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1749,7 +1749,7 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_enable_cap(kvm, &cap); break; } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_SPAPR_TCE_IOMMU case KVM_CREATE_SPAPR_TCE_64: { struct kvm_create_spapr_tce_64 create_tce_64; @@ -1780,6 +1780,8 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64); goto out; } +#endif +#ifdef CONFIG_PPC_BOOK3S_64 case KVM_PPC_GET_SMMU_INFO: { struct kvm_ppc_smmu_info info; struct kvm *kvm = filp->private_data; -- cgit v1.2.3-59-g8ed1b From 5ba9b0a14132d0b8d97affe909f324045a968d03 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 12 May 2017 11:55:26 +0800 Subject: irqchip/mbigen: Fix memory mapping code Some mbigens share memory regions, and devm_ioremap_resource does not allow to share resources which will break the probe of mbigen, in opposition to devm_ioremap. This patch restores back usage of devm_ioremap function, but with proper error handling and logging. Fixes: 216646e4d82e ("irqchip/mbigen: Fix return value check in mbigen_device_probe()") Signed-off-by: Hanjun Guo Acked-by: Marc Zyngier Cc: Kefeng Wang Cc: linuxarm@huawei.com Cc: Wei Yongjun Cc: MaJun Link: http://lkml.kernel.org/r/1494561328-39514-2-git-send-email-guohanjun@huawei.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-mbigen.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index d2306c821ebb..0f5e66e96bd9 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -337,9 +337,12 @@ static int mbigen_device_probe(struct platform_device *pdev) mgn_chip->pdev = pdev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mgn_chip->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(mgn_chip->base)) - return PTR_ERR(mgn_chip->base); + mgn_chip->base = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!mgn_chip->base) { + dev_err(&pdev->dev, "failed to ioremap %pR\n", res); + return -ENOMEM; + } if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) err = mbigen_of_create_domain(pdev, mgn_chip); -- cgit v1.2.3-59-g8ed1b From ad7cc3c0c57d77b442db323056354d0e49833569 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 12 May 2017 11:55:27 +0800 Subject: irqchip/mbigen: Fix potential NULL dereferencing platform_get_resource() may return NULL, add proper check to avoid potential NULL dereferencing. Signed-off-by: Hanjun Guo Acked-by: Marc Zyngier Cc: Kefeng Wang Cc: linuxarm@huawei.com Cc: Wei Yongjun Cc: MaJun Link: http://lkml.kernel.org/r/1494561328-39514-3-git-send-email-guohanjun@huawei.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-mbigen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 0f5e66e96bd9..2fa1e457190d 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -337,6 +337,9 @@ static int mbigen_device_probe(struct platform_device *pdev) mgn_chip->pdev = pdev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + mgn_chip->base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!mgn_chip->base) { -- cgit v1.2.3-59-g8ed1b From 9459a04b6a5a09967eec94a1b66f0a74312819d9 Mon Sep 17 00:00:00 2001 From: MaJun Date: Fri, 12 May 2017 11:55:28 +0800 Subject: irqchip/mbigen: Fix the clear register offset calculation The register array offset for clearing an interrupt is calculated by: offset = (hwirq - RESERVED_IRQ_PER_MBIGEN_CHIP) / 32; This is wrong because the clear register array includes the reserved interrupts. So the clear operation ends up in the wrong register. This went unnoticed so far, because the hardware clears the real bit through a timeout mechanism when the hardware is configured in debug mode. That debug mode was enabled on early generations of the hardware, so the problem was papered over. On newer hardware with updated firmware the debug mode was disabled, so the bits did not get cleared which causes the system to malfunction. Remove the subtraction of RESERVED_IRQ_PER_MBIGEN_CHIP, so the correct register is accessed. [ tglx: Rewrote changelog ] Fixes: a6c2f87b8820 ("irqchip/mbigen: Implement the mbigen irq chip operation functions") Signed-off-by: MaJun Signed-off-by: Hanjun Guo Acked-by: Marc Zyngier Cc: Kefeng Wang Cc: linuxarm@huawei.com Cc: Wei Yongjun Link: http://lkml.kernel.org/r/1494561328-39514-4-git-send-email-guohanjun@huawei.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-mbigen.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 2fa1e457190d..31d6b5a582d2 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -106,10 +106,7 @@ static inline void get_mbigen_type_reg(irq_hw_number_t hwirq, static inline void get_mbigen_clear_reg(irq_hw_number_t hwirq, u32 *mask, u32 *addr) { - unsigned int ofst; - - hwirq -= RESERVED_IRQ_PER_MBIGEN_CHIP; - ofst = hwirq / 32 * 4; + unsigned int ofst = (hwirq / 32) * 4; *mask = 1 << (hwirq % 32); *addr = ofst + REG_MBIGEN_CLEAR_OFFSET; -- cgit v1.2.3-59-g8ed1b From dbc2b5e9a09e9a6664679a667ff81cff6e5f2641 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 12 May 2017 14:39:52 +0800 Subject: sctp: fix src address selection if using secondary addresses for ipv6 Commit 0ca50d12fe46 ("sctp: fix src address selection if using secondary addresses") has fixed a src address selection issue when using secondary addresses for ipv4. Now sctp ipv6 also has the similar issue. When using a secondary address, sctp_v6_get_dst tries to choose the saddr which has the most same bits with the daddr by sctp_v6_addr_match_len. It may make some cases not work as expected. hostA: [1] fd21:356b:459a:cf10::11 (eth1) [2] fd21:356b:459a:cf20::11 (eth2) hostB: [a] fd21:356b:459a:cf30::2 (eth1) [b] fd21:356b:459a:cf40::2 (eth2) route from hostA to hostB: fd21:356b:459a:cf30::/64 dev eth1 metric 1024 mtu 1500 The expected path should be: fd21:356b:459a:cf10::11 <-> fd21:356b:459a:cf30::2 But addr[2] matches addr[a] more bits than addr[1] does, according to sctp_v6_addr_match_len. It causes the path to be: fd21:356b:459a:cf20::11 <-> fd21:356b:459a:cf30::2 This patch is to fix it with the same way as Marcelo's fix for sctp ipv4. As no ip_dev_find for ipv6, this patch is to use ipv6_chk_addr to check if the saddr is in a dev instead. Note that for backwards compatibility, it will still do the addr_match_len check here when no optimal is found. Reported-by: Patrick Talbert Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 961ee59f696a..142b70e959af 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -240,12 +240,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct sctp_bind_addr *bp; struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; - union sctp_addr *baddr = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; struct in6_addr *final_p, final; __u8 matchlen = 0; - __u8 bmatchlen; sctp_scope_t scope; memset(fl6, 0, sizeof(struct flowi6)); @@ -312,23 +310,37 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, */ rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid) + struct dst_entry *bdst; + __u8 bmatchlen; + + if (!laddr->valid || + laddr->state != SCTP_ADDR_SRC || + laddr->a.sa.sa_family != AF_INET6 || + scope > sctp_scope(&laddr->a)) continue; - if ((laddr->state == SCTP_ADDR_SRC) && - (laddr->a.sa.sa_family == AF_INET6) && - (scope <= sctp_scope(&laddr->a))) { - bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); - if (!baddr || (matchlen < bmatchlen)) { - baddr = &laddr->a; - matchlen = bmatchlen; - } - } - } - if (baddr) { - fl6->saddr = baddr->v6.sin6_addr; - fl6->fl6_sport = baddr->v6.sin6_port; + + fl6->saddr = laddr->a.v6.sin6_addr; + fl6->fl6_sport = laddr->a.v6.sin6_port; final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); - dst = ip6_dst_lookup_flow(sk, fl6, final_p); + bdst = ip6_dst_lookup_flow(sk, fl6, final_p); + + if (!IS_ERR(bdst) && + ipv6_chk_addr(dev_net(bdst->dev), + &laddr->a.v6.sin6_addr, bdst->dev, 1)) { + if (!IS_ERR_OR_NULL(dst)) + dst_release(dst); + dst = bdst; + break; + } + + bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); + if (matchlen > bmatchlen) + continue; + + if (!IS_ERR_OR_NULL(dst)) + dst_release(dst); + dst = bdst; + matchlen = bmatchlen; } rcu_read_unlock(); -- cgit v1.2.3-59-g8ed1b From 9fc3e4dc67fde953fd26adb3cea8f92597810b64 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 11 May 2017 22:11:29 -0500 Subject: net: dsa: mv88e6xxx: add default case to switch Add default case to switch in order to avoid any chance of using an uninitialized variable _low_, in case s->type does not match any of the listed case values. Addresses-Coverity-ID: 1398130 Suggested-by: Andrew Lunn Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 19581d783d8e..d034d8cd7d22 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -849,6 +849,9 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, mv88e6xxx_g1_stats_read(chip, reg, &low); if (s->sizeof_stat == 8) mv88e6xxx_g1_stats_read(chip, reg + 1, &high); + break; + default: + return UINT64_MAX; } value = (((u64)high) << 16) | low; return value; -- cgit v1.2.3-59-g8ed1b From 75cf067953d5ee543b3bda90bbfcbee5e1f94ae8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 12 May 2017 12:11:13 +0200 Subject: net: irda: irda-usb: fix firmware name on big-endian hosts Add missing endianness conversion when using the USB device-descriptor bcdDevice field to construct a firmware file name. Fixes: 8ef80aef118e ("[IRDA]: irda-usb.c: STIR421x cleanups") Cc: stable # 2.6.18 Cc: Nick Fedchik Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/irda/irda-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c index 8716b8c07feb..6f3c805f7211 100644 --- a/drivers/net/irda/irda-usb.c +++ b/drivers/net/irda/irda-usb.c @@ -1077,7 +1077,7 @@ static int stir421x_patch_device(struct irda_usb_cb *self) * are "42101001.sb" or "42101002.sb" */ sprintf(stir421x_fw_name, "4210%4X.sb", - self->usbdev->descriptor.bcdDevice); + le16_to_cpu(self->usbdev->descriptor.bcdDevice)); ret = request_firmware(&fw, stir421x_fw_name, &self->usbdev->dev); if (ret < 0) return ret; -- cgit v1.2.3-59-g8ed1b From b12ca80ca145cecadf841ba27cc061c510cd97ca Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 12 May 2017 12:13:26 +0200 Subject: net: ch9200: add missing USB-descriptor endianness conversions Add the missing endianness conversions to a debug statement printing the USB device-descriptor idVendor and idProduct fields during probe. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/ch9200.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c index c4f1c363e24b..9df3c1ffff35 100644 --- a/drivers/net/usb/ch9200.c +++ b/drivers/net/usb/ch9200.c @@ -310,8 +310,8 @@ static int get_mac_address(struct usbnet *dev, unsigned char *data) int rd_mac_len = 0; netdev_dbg(dev->net, "get_mac_address:\n\tusbnet VID:%0x PID:%0x\n", - dev->udev->descriptor.idVendor, - dev->udev->descriptor.idProduct); + le16_to_cpu(dev->udev->descriptor.idVendor), + le16_to_cpu(dev->udev->descriptor.idProduct)); memset(mac_addr, 0, sizeof(mac_addr)); rd_mac_len = control_read(dev, REQUEST_READ, 0, -- cgit v1.2.3-59-g8ed1b From 42e6cae1b35b186650f5abc7b24c20ab6986c5a0 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Fri, 12 May 2017 17:18:50 +0100 Subject: sfc: revert changes to NIC revision numbers The revision enum values (eg EFX_REV_HUNT_A0) form part of our API, and are included in ethtool. If these are inconsistent then ethtool will print garbage for a register dump (ethtool -d). Fixes: 5a6681e22c14 ("sfc: separate out SFC4000 ("Falcon") support into new sfc-falcon driver") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/nic.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 7b916aa21bde..4d7fb8af880d 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -18,8 +18,12 @@ #include "mcdi.h" enum { - EFX_REV_SIENA_A0 = 0, - EFX_REV_HUNT_A0 = 1, + /* Revisions 0-2 were Falcon A0, A1 and B0 respectively. + * They are not supported by this driver but these revision numbers + * form part of the ethtool API for register dumping. + */ + EFX_REV_SIENA_A0 = 3, + EFX_REV_HUNT_A0 = 4, }; static inline int efx_nic_rev(struct efx_nic *efx) -- cgit v1.2.3-59-g8ed1b From 1c4d5f51a812a82de97beee24f48ed05c65ebda5 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 12 May 2017 12:00:01 -0400 Subject: vmxnet3: ensure that adapter is in proper state during force_close There are several paths in vmxnet3, where settings changes cause the adapter to be brought down and back up (vmxnet3_set_ringparam among them). Should part of the reset operation fail, these paths call vmxnet3_force_close, which enables all napi instances prior to calling dev_close (with the expectation that vmxnet3_close will then properly disable them again). However, vmxnet3_force_close neglects to clear VMXNET3_STATE_BIT_QUIESCED prior to calling dev_close. As a result vmxnet3_quiesce_dev (called from vmxnet3_close), returns early, and leaves all the napi instances in a enabled state while the device itself is closed. If a device in this state is activated again, napi_enable will be called on already enabled napi_instances, leading to a BUG halt. The fix is to simply enausre that the QUIESCED bit is cleared in vmxnet3_force_close to allow quesence to be completed properly on close. Signed-off-by: Neil Horman CC: Shrikrishna Khare CC: "VMware, Inc." CC: "David S. Miller" Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 25bc764ae7dc..d1c7029ded7c 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2962,6 +2962,11 @@ vmxnet3_force_close(struct vmxnet3_adapter *adapter) /* we need to enable NAPI, otherwise dev_close will deadlock */ for (i = 0; i < adapter->num_rx_queues; i++) napi_enable(&adapter->rx_queue[i].napi); + /* + * Need to clear the quiesce bit to ensure that vmxnet3_close + * can quiesce the device properly + */ + clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); dev_close(adapter->netdev); } -- cgit v1.2.3-59-g8ed1b From bdce57e7ae9cac56cda958029104c752afaf0894 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 3 Mar 2017 09:44:11 -0500 Subject: tools/power/acpi: Add .gitignore file Add a .gitignore file so that git commands do not pick up the resulting binaries and directories. Signed-off-by: Prarit Bhargava Acked-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- tools/power/acpi/.gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tools/power/acpi/.gitignore diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore new file mode 100644 index 000000000000..cba3d994995c --- /dev/null +++ b/tools/power/acpi/.gitignore @@ -0,0 +1,4 @@ +acpidbg +acpidump +ec +include -- cgit v1.2.3-59-g8ed1b From f369fdf4f661322b73f3307e9f3cd55fb3a20123 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 9 May 2017 15:02:22 +0800 Subject: Revert "ACPI / button: Remove lid_init_state=method mode" This reverts commit ecb10b694b72ca5ea51b3c90a71ff2a11963425a. The only expected ACPI control method lid device's usage model is 1. Listen to the lid notification, 2. Evaluate _LID after being notified by BIOS, 3. Suspend the system (if users configure to do so) after seeing "close". It's not ensured that BIOS will notify OS after boot/resume, and it's not ensured that BIOS will always generate "open" event upon opening the lid. But there are 2 wrong usage models: 1. When the lid device is responsible for suspend/resume the system, userspace requires to see "open" event to be paired with "close" after the system is resumed, or it will suspend the system again. 2. When an external monitor connects to the laptop attached docks, userspace requires to see "close" event after the system is resumed so that it can determine whether the internal display should remain dark and the external display should be lit on. After we made default kernel behavior to be suitable for usage model 1, users of usage model 2 start to report regressions for such behavior change. Reversion of button.lid_init_state=method doesn't actually reverts to old default behavior as doing so can enter a regression loop, but facilitates users to work the reported regressions around with button.lid_init_state=method. Fixes: ecb10b694b72 (ACPI / button: Remove lid_init_state=method mode) Cc: 4.11+ # 4.11+ Link: https://bugzilla.kernel.org/show_bug.cgi?id=195455 Link: https://bugzilla.redhat.com/show_bug.cgi?id=1430259 Tested-by: Steffen Weber Tested-by: Julian Wiedmann Reported-by: Joachim Frieben Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- Documentation/acpi/acpi-lid.txt | 16 ++++++++++++---- drivers/acpi/button.c | 9 +++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/acpi/acpi-lid.txt index 22cb3091f297..effe7af3a5af 100644 --- a/Documentation/acpi/acpi-lid.txt +++ b/Documentation/acpi/acpi-lid.txt @@ -59,20 +59,28 @@ button driver uses the following 3 modes in order not to trigger issues. If the userspace hasn't been prepared to ignore the unreliable "opened" events and the unreliable initial state notification, Linux users can use the following kernel parameters to handle the possible issues: -A. button.lid_init_state=open: +A. button.lid_init_state=method: + When this option is specified, the ACPI button driver reports the + initial lid state using the returning value of the _LID control method + and whether the "opened"/"closed" events are paired fully relies on the + firmware implementation. + This option can be used to fix some platforms where the returning value + of the _LID control method is reliable but the initial lid state + notification is missing. + This option is the default behavior during the period the userspace + isn't ready to handle the buggy AML tables. +B. button.lid_init_state=open: When this option is specified, the ACPI button driver always reports the initial lid state as "opened" and whether the "opened"/"closed" events are paired fully relies on the firmware implementation. This may fix some platforms where the returning value of the _LID control method is not reliable and the initial lid state notification is missing. - This option is the default behavior during the period the userspace - isn't ready to handle the buggy AML tables. If the userspace has been prepared to ignore the unreliable "opened" events and the unreliable initial state notification, Linux users should always use the following kernel parameter: -B. button.lid_init_state=ignore: +C. button.lid_init_state=ignore: When this option is specified, the ACPI button driver never reports the initial lid state and there is a compensation mechanism implemented to ensure that the reliable "closed" notifications can always be delievered diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 668137e4a069..6d5a8c1d3132 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -57,6 +57,7 @@ #define ACPI_BUTTON_LID_INIT_IGNORE 0x00 #define ACPI_BUTTON_LID_INIT_OPEN 0x01 +#define ACPI_BUTTON_LID_INIT_METHOD 0x02 #define _COMPONENT ACPI_BUTTON_COMPONENT ACPI_MODULE_NAME("button"); @@ -376,6 +377,9 @@ static void acpi_lid_initialize_state(struct acpi_device *device) case ACPI_BUTTON_LID_INIT_OPEN: (void)acpi_lid_notify_state(device, 1); break; + case ACPI_BUTTON_LID_INIT_METHOD: + (void)acpi_lid_update_state(device); + break; case ACPI_BUTTON_LID_INIT_IGNORE: default: break; @@ -559,6 +563,9 @@ static int param_set_lid_init_state(const char *val, struct kernel_param *kp) if (!strncmp(val, "open", sizeof("open") - 1)) { lid_init_state = ACPI_BUTTON_LID_INIT_OPEN; pr_info("Notify initial lid state as open\n"); + } else if (!strncmp(val, "method", sizeof("method") - 1)) { + lid_init_state = ACPI_BUTTON_LID_INIT_METHOD; + pr_info("Notify initial lid state with _LID return value\n"); } else if (!strncmp(val, "ignore", sizeof("ignore") - 1)) { lid_init_state = ACPI_BUTTON_LID_INIT_IGNORE; pr_info("Do not notify initial lid state\n"); @@ -572,6 +579,8 @@ static int param_get_lid_init_state(char *buffer, struct kernel_param *kp) switch (lid_init_state) { case ACPI_BUTTON_LID_INIT_OPEN: return sprintf(buffer, "open"); + case ACPI_BUTTON_LID_INIT_METHOD: + return sprintf(buffer, "method"); case ACPI_BUTTON_LID_INIT_IGNORE: return sprintf(buffer, "ignore"); default: -- cgit v1.2.3-59-g8ed1b From d82dd0e34d0347be201fd274dc84cd645dccc064 Mon Sep 17 00:00:00 2001 From: Tomasz Majchrzak Date: Fri, 12 May 2017 14:26:10 +0200 Subject: raid1: prefer disk without bad blocks If an array consists of two drives and the first drive has the bad block, the read request to the region overlapping the bad block chooses the same disk (with bad block) as device to read from over and over and the request gets stuck. If the first disk only partially overlaps with bad block, it becomes a candidate ("best disk") for shorter range of sectors. The second disk is capable of reading the entire requested range and it is updated accordingly, however it is not recorded as a best device for the request. In the end the request is sent to the first disk to read entire range of sectors. It fails and is re-tried in a moment but with the same outcome. Actually it is quite likely scenario but it had little exposure in my test until commit 715d40b93b10 ("md/raid1: add failfast handling for reads.") removed preference for idle disk. Such scenario had been passing as second disk was always chosen when idle. Reset a candidate ("best disk") to read from if disk can read entire range. Do it only if other disk has already been chosen as a candidate for a smaller range. The head position / disk type logic will select the best disk to read from - it is fine as disk with bad block won't be considered for it. Signed-off-by: Tomasz Majchrzak Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a17ed6218d51..af5056d56878 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -666,8 +666,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect break; } continue; - } else + } else { + if ((sectors > best_good_sectors) && (best_disk >= 0)) + best_disk = -1; best_good_sectors = sectors; + } if (best_disk >= 0) /* At least two disks to choose from so failfast is OK */ -- cgit v1.2.3-59-g8ed1b From ecdcf622eb74b52cebde1387a7a1852a787d8050 Mon Sep 17 00:00:00 2001 From: Joe Perches via samba-technical Date: Sun, 7 May 2017 01:31:47 -0700 Subject: cifs: cifsacl: Use a temporary ops variable to reduce code length Create an ops variable to store tcon->ses->server->ops and cache indirections and reduce code size a trivial bit. $ size fs/cifs/cifsacl.o* text data bss dec hex filename 5338 136 8 5482 156a fs/cifs/cifsacl.o.new 5371 136 8 5515 158b fs/cifs/cifsacl.o.old Signed-off-by: Joe Perches Acked-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 15bac390dff9..b98436f5c7c7 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1135,20 +1135,19 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, u32 acllen = 0; int rc = 0; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); - struct cifs_tcon *tcon; + struct smb_version_operations *ops; cifs_dbg(NOISY, "converting ACL to mode for %s\n", path); if (IS_ERR(tlink)) return PTR_ERR(tlink); - tcon = tlink_tcon(tlink); - if (pfid && (tcon->ses->server->ops->get_acl_by_fid)) - pntsd = tcon->ses->server->ops->get_acl_by_fid(cifs_sb, pfid, - &acllen); - else if (tcon->ses->server->ops->get_acl) - pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path, - &acllen); + ops = tlink_tcon(tlink)->ses->server->ops; + + if (pfid && (ops->get_acl_by_fid)) + pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen); + else if (ops->get_acl) + pntsd = ops->get_acl(cifs_sb, inode, path, &acllen); else { cifs_put_tlink(tlink); return -EOPNOTSUPP; @@ -1181,23 +1180,23 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); - struct cifs_tcon *tcon; + struct smb_version_operations *ops; if (IS_ERR(tlink)) return PTR_ERR(tlink); - tcon = tlink_tcon(tlink); + + ops = tlink_tcon(tlink)->ses->server->ops; cifs_dbg(NOISY, "set ACL from mode for %s\n", path); /* Get the security descriptor */ - if (tcon->ses->server->ops->get_acl == NULL) { + if (ops->get_acl == NULL) { cifs_put_tlink(tlink); return -EOPNOTSUPP; } - pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path, - &secdesclen); + pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen); if (IS_ERR(pntsd)) { rc = PTR_ERR(pntsd); cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc); @@ -1224,13 +1223,12 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc); - if (tcon->ses->server->ops->set_acl == NULL) + if (ops->set_acl == NULL) rc = -EOPNOTSUPP; if (!rc) { /* Set the security descriptor */ - rc = tcon->ses->server->ops->set_acl(pnntsd, secdesclen, inode, - path, aclflag); + rc = ops->set_acl(pnntsd, secdesclen, inode, path, aclflag); cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc); } cifs_put_tlink(tlink); -- cgit v1.2.3-59-g8ed1b From cd1230070ae1c12fd34cf6a557bfa81bf9311009 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 12 May 2017 17:59:32 +0200 Subject: SMB2: Fix share type handling In fs/cifs/smb2pdu.h, we have: #define SMB2_SHARE_TYPE_DISK 0x01 #define SMB2_SHARE_TYPE_PIPE 0x02 #define SMB2_SHARE_TYPE_PRINT 0x03 Knowing that, with the current code, the SMB2_SHARE_TYPE_PRINT case can never trigger and printer share would be interpreted as disk share. So, test the ShareType value for equality instead. Fixes: faaf946a7d5b ("CIFS: Add tree connect/disconnect capability for SMB2") Signed-off-by: Christophe JAILLET Acked-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e0517f499c38..e4afdaae743f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1240,15 +1240,19 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, goto tcon_exit; } - if (rsp->ShareType & SMB2_SHARE_TYPE_DISK) + switch (rsp->ShareType) { + case SMB2_SHARE_TYPE_DISK: cifs_dbg(FYI, "connection to disk share\n"); - else if (rsp->ShareType & SMB2_SHARE_TYPE_PIPE) { + break; + case SMB2_SHARE_TYPE_PIPE: tcon->ipc = true; cifs_dbg(FYI, "connection to pipe share\n"); - } else if (rsp->ShareType & SMB2_SHARE_TYPE_PRINT) { - tcon->print = true; + break; + case SMB2_SHARE_TYPE_PRINT: + tcon->ipc = true; cifs_dbg(FYI, "connection to printer\n"); - } else { + break; + default: cifs_dbg(VFS, "unknown share type %d\n", rsp->ShareType); rc = -EOPNOTSUPP; goto tcon_error_exit; -- cgit v1.2.3-59-g8ed1b From 4328fea77ca30ef6af938ae3f263a3d055a9c0e6 Mon Sep 17 00:00:00 2001 From: Karim Eshapa Date: Fri, 12 May 2017 01:53:38 +0200 Subject: fs: cifs: transport: Use time_after for time comparison Use time_after kernel macro for time comparison that has safety check. Signed-off-by: Karim Eshapa Signed-off-by: Steve French --- fs/cifs/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index de589d0d3739..47a125ece11e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -94,7 +94,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) now = jiffies; /* commands taking longer than one second are indications that something is wrong, unless it is quite a slow link or server */ - if ((now - midEntry->when_alloc) > HZ) { + if (time_after(now, midEntry->when_alloc + HZ)) { if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) { pr_debug(" CIFS slow rsp: cmd %d mid %llu", midEntry->command, midEntry->mid); -- cgit v1.2.3-59-g8ed1b From 67b4c889cc835a2a6e2ff4e20544a33e37e2875d Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 12 May 2017 20:59:10 -0500 Subject: [CIFS] Minor cleanup of xattr query function Some minor cleanup of cifs query xattr functions (will also make SMB3 xattr implementation cleaner as well). Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 2 +- fs/cifs/cifsproto.h | 3 +-- fs/cifs/cifssmb.c | 4 +++- fs/cifs/inode.c | 3 +-- fs/cifs/xattr.c | 6 ++---- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 8be55be70faf..bcc7d9acad64 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -418,7 +418,7 @@ struct smb_version_operations { int (*validate_negotiate)(const unsigned int, struct cifs_tcon *); ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *, const unsigned char *, const unsigned char *, char *, - size_t, const struct nls_table *, int); + size_t, struct cifs_sb_info *); int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *, const char *, const void *, const __u16, const struct nls_table *, int); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index e49958c3f8bb..6eb3147132e3 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -480,8 +480,7 @@ extern int CIFSSMBCopy(unsigned int xid, extern ssize_t CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, const unsigned char *ea_name, char *EAData, - size_t bufsize, const struct nls_table *nls_codepage, - int remap_special_chars); + size_t bufsize, struct cifs_sb_info *cifs_sb); extern int CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const char *ea_name, const void *ea_value, const __u16 ea_value_len, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4de3186d8a71..fbb0d4cbda41 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -6069,11 +6069,13 @@ ssize_t CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, const unsigned char *ea_name, char *EAData, size_t buf_size, - const struct nls_table *nls_codepage, int remap) + struct cifs_sb_info *cifs_sb) { /* BB assumes one setup word */ TRANSACTION2_QPI_REQ *pSMB = NULL; TRANSACTION2_QPI_RSP *pSMBr = NULL; + int remap = cifs_remap(cifs_sb); + struct nls_table *nls_codepage = cifs_sb->local_nls; int rc = 0; int bytes_returned; int list_len; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index c3b2fa0b2ec8..4d1fcd76d022 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -563,8 +563,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, rc = tcon->ses->server->ops->query_all_EAs(xid, tcon, path, "SETFILEBITS", ea_value, 4 /* size of buf */, - cifs_sb->local_nls, - cifs_remap(cifs_sb)); + cifs_sb); cifs_put_tlink(tlink); if (rc < 0) return (int)rc; diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 20af5187ba63..3cb5c9e2d4e7 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -235,8 +235,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, if (pTcon->ses->server->ops->query_all_EAs) rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, - full_path, name, value, size, - cifs_sb->local_nls, cifs_remap(cifs_sb)); + full_path, name, value, size, cifs_sb); break; case XATTR_CIFS_ACL: { @@ -336,8 +335,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) if (pTcon->ses->server->ops->query_all_EAs) rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, - full_path, NULL, data, buf_size, - cifs_sb->local_nls, cifs_remap(cifs_sb)); + full_path, NULL, data, buf_size, cifs_sb); list_ea_exit: kfree(full_path); free_xid(xid); -- cgit v1.2.3-59-g8ed1b From b9a985db98961ae1ba0be169f19df1c567e4ffe0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 11 May 2017 18:21:01 -0500 Subject: pid_ns: Sleep in TASK_INTERRUPTIBLE in zap_pid_ns_processes The code can potentially sleep for an indefinite amount of time in zap_pid_ns_processes triggering the hung task timeout, and increasing the system average. This is undesirable. Sleep with a task state of TASK_INTERRUPTIBLE instead of TASK_UNINTERRUPTIBLE to remove these undesirable side effects. Apparently under heavy load this has been allowing Chrome to trigger the hung time task timeout error and cause ChromeOS to reboot. Reported-by: Vovo Yang Reported-by: Guenter Roeck Tested-by: Guenter Roeck Fixes: 6347e9009104 ("pidns: guarantee that the pidns init will be the last pidns process reaped") Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- kernel/pid_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index d1f3e9f558b8..74a5a7255b4d 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -277,7 +277,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) * if reparented. */ for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); if (pid_ns->nr_hashed == init_pids) break; schedule(); -- cgit v1.2.3-59-g8ed1b From 3fd37226216620c1a468afa999739d5016fbc349 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 12 May 2017 19:11:31 +0300 Subject: pid_ns: Fix race between setns'ed fork() and zap_pid_ns_processes() Imagine we have a pid namespace and a task from its parent's pid_ns, which made setns() to the pid namespace. The task is doing fork(), while the pid namespace's child reaper is dying. We have the race between them: Task from parent pid_ns Child reaper copy_process() .. alloc_pid() .. .. zap_pid_ns_processes() .. disable_pid_allocation() .. read_lock(&tasklist_lock) .. iterate over pids in pid_ns .. kill tasks linked to pids .. read_unlock(&tasklist_lock) write_lock_irq(&tasklist_lock); .. attach_pid(p, PIDTYPE_PID); .. .. .. So, just created task p won't receive SIGKILL signal, and the pid namespace will be in contradictory state. Only manual kill will help there, but does the userspace care about this? I suppose, the most users just inject a task into a pid namespace and wait a SIGCHLD from it. The patch fixes the problem. It simply checks for (pid_ns->nr_hashed & PIDNS_HASH_ADDING) in copy_process(). We do it under the tasklist_lock, and can't skip PIDNS_HASH_ADDING as noted by Oleg: "zap_pid_ns_processes() does disable_pid_allocation() and then takes tasklist_lock to kill the whole namespace. Given that copy_process() checks PIDNS_HASH_ADDING under write_lock(tasklist) they can't race; if copy_process() takes this lock first, the new child will be killed, otherwise copy_process() can't miss the change in ->nr_hashed." If allocation is disabled, we just return -ENOMEM like it's made for such cases in alloc_pid(). v2: Do not move disable_pid_allocation(), do not introduce a new variable in copy_process() and simplify the patch as suggested by Oleg Nesterov. Account the problem with double irq enabling found by Eric W. Biederman. Fixes: c876ad768215 ("pidns: Stop pid allocation when init dies") Signed-off-by: Kirill Tkhai CC: Andrew Morton CC: Ingo Molnar CC: Peter Zijlstra CC: Oleg Nesterov CC: Mike Rapoport CC: Michal Hocko CC: Andy Lutomirski CC: "Eric W. Biederman" CC: Andrei Vagin CC: Cyrill Gorcunov CC: Serge Hallyn Cc: stable@vger.kernel.org Acked-by: Oleg Nesterov Signed-off-by: Eric W. Biederman --- kernel/fork.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 06d759ab4c62..aa1076c5e4a9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1845,11 +1845,13 @@ static __latent_entropy struct task_struct *copy_process( */ recalc_sigpending(); if (signal_pending(current)) { - spin_unlock(¤t->sighand->siglock); - write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_cancel_cgroup; } + if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) { + retval = -ENOMEM; + goto bad_fork_cancel_cgroup; + } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -1907,6 +1909,8 @@ static __latent_entropy struct task_struct *copy_process( return p; bad_fork_cancel_cgroup: + spin_unlock(¤t->sighand->siglock); + write_unlock_irq(&tasklist_lock); cgroup_cancel_fork(p); bad_fork_free_pid: cgroup_threadgroup_change_end(current); -- cgit v1.2.3-59-g8ed1b From 9d109081c261d87fc84e0cce245796796ae4c460 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 13 May 2017 16:18:21 -0700 Subject: dax: fix false CONFIG_BLOCK dependency In the BLOCK=n case the dax core does not need to / must not emit the block-device-dax helpers. Otherwise it leads to compile errors. Cc: Arnd Bergmann Reported-by: Fabian Frederick Fixes: ef51042472f5 ("block, dax: move 'select DAX' from BLOCK to FS_DAX") Signed-off-by: Dan Williams --- drivers/dax/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index ebf43f531ada..6ed32aac8bbe 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -44,6 +44,7 @@ void dax_read_unlock(int id) } EXPORT_SYMBOL_GPL(dax_read_unlock); +#ifdef CONFIG_BLOCK int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, pgoff_t *pgoff) { @@ -112,6 +113,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) return 0; } EXPORT_SYMBOL_GPL(__bdev_dax_supported); +#endif /** * struct dax_device - anchor object for dax services -- cgit v1.2.3-59-g8ed1b From 33fc30b470983e5b641a16cccc882f6777dd50ef Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 14 May 2017 02:06:03 +0200 Subject: cpufreq: intel_pstate: Document the current behavior and user interface Add a document describing the current behavior and user space interface of the intel_pstate driver in the RST format and drop the existing outdated intel_pstate.txt document. Also update admin-guide/pm/cpufreq.rst with proper RST references to the new intel_pstate.rst document. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/cpufreq.rst | 19 +- Documentation/admin-guide/pm/index.rst | 1 + Documentation/admin-guide/pm/intel_pstate.rst | 755 ++++++++++++++++++++++++++ Documentation/cpu-freq/intel-pstate.txt | 281 ---------- 4 files changed, 766 insertions(+), 290 deletions(-) create mode 100644 Documentation/admin-guide/pm/intel_pstate.rst delete mode 100644 Documentation/cpu-freq/intel-pstate.txt diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 289c80f7760e..09aa2e949787 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -1,4 +1,5 @@ .. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy ` +.. |intel_pstate| replace:: :doc:`intel_pstate ` ======================= CPU Performance Scaling @@ -75,7 +76,7 @@ feedback registers, as that information is typically specific to the hardware interface it comes from and may not be easily represented in an abstract, platform-independent way. For this reason, ``CPUFreq`` allows scaling drivers to bypass the governor layer and implement their own performance scaling -algorithms. That is done by the ``intel_pstate`` scaling driver. +algorithms. That is done by the |intel_pstate| scaling driver. ``CPUFreq`` Policy Objects @@ -174,13 +175,13 @@ necessary to restart the scaling governor so that it can take the new online CPU into account. That is achieved by invoking the governor's ``->stop`` and ``->start()`` callbacks, in this order, for the entire policy. -As mentioned before, the ``intel_pstate`` scaling driver bypasses the scaling +As mentioned before, the |intel_pstate| scaling driver bypasses the scaling governor layer of ``CPUFreq`` and provides its own P-state selection algorithms. -Consequently, if ``intel_pstate`` is used, scaling governors are not attached to +Consequently, if |intel_pstate| is used, scaling governors are not attached to new policy objects. Instead, the driver's ``->setpolicy()`` callback is invoked to register per-CPU utilization update callbacks for each policy. These callbacks are invoked by the CPU scheduler in the same way as for scaling -governors, but in the ``intel_pstate`` case they both determine the P-state to +governors, but in the |intel_pstate| case they both determine the P-state to use and change the hardware configuration accordingly in one go from scheduler context. @@ -257,7 +258,7 @@ are the following: ``scaling_available_governors`` List of ``CPUFreq`` scaling governors present in the kernel that can - be attached to this policy or (if the ``intel_pstate`` scaling driver is + be attached to this policy or (if the |intel_pstate| scaling driver is in use) list of scaling algorithms provided by the driver that can be applied to this policy. @@ -274,7 +275,7 @@ are the following: the CPU is actually running at (due to hardware design and other limitations). - Some scaling drivers (e.g. ``intel_pstate``) attempt to provide + Some scaling drivers (e.g. |intel_pstate|) attempt to provide information more precisely reflecting the current CPU frequency through this attribute, but that still may not be the exact current CPU frequency as seen by the hardware at the moment. @@ -284,13 +285,13 @@ are the following: ``scaling_governor`` The scaling governor currently attached to this policy or (if the - ``intel_pstate`` scaling driver is in use) the scaling algorithm + |intel_pstate| scaling driver is in use) the scaling algorithm provided by the driver that is currently applied to this policy. This attribute is read-write and writing to it will cause a new scaling governor to be attached to this policy or a new scaling algorithm provided by the scaling driver to be applied to it (in the - ``intel_pstate`` case), as indicated by the string written to this + |intel_pstate| case), as indicated by the string written to this attribute (which must be one of the names listed by the ``scaling_available_governors`` attribute described above). @@ -619,7 +620,7 @@ This file is located under :file:`/sys/devices/system/cpu/cpufreq/` and controls the "boost" setting for the whole system. It is not present if the underlying scaling driver does not support the frequency boost mechanism (or supports it, but provides a driver-specific interface for controlling it, like -``intel_pstate``). +|intel_pstate|). If the value in this file is 1, the frequency boost mechanism is enabled. This means that either the hardware can be put into states in which it is able to diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst index c80f087321fc..7f148f76f432 100644 --- a/Documentation/admin-guide/pm/index.rst +++ b/Documentation/admin-guide/pm/index.rst @@ -6,6 +6,7 @@ Power Management :maxdepth: 2 cpufreq + intel_pstate .. only:: subproject and html diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst new file mode 100644 index 000000000000..33d703989ea8 --- /dev/null +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -0,0 +1,755 @@ +=============================================== +``intel_pstate`` CPU Performance Scaling Driver +=============================================== + +:: + + Copyright (c) 2017 Intel Corp., Rafael J. Wysocki + + +General Information +=================== + +``intel_pstate`` is a part of the +:doc:`CPU performance scaling subsystem ` in the Linux kernel +(``CPUFreq``). It is a scaling driver for the Sandy Bridge and later +generations of Intel processors. Note, however, that some of those processors +may not be supported. [To understand ``intel_pstate`` it is necessary to know +how ``CPUFreq`` works in general, so this is the time to read :doc:`cpufreq` if +you have not done that yet.] + +For the processors supported by ``intel_pstate``, the P-state concept is broader +than just an operating frequency or an operating performance point (see the +`LinuxCon Europe 2015 presentation by Kristen Accardi `_ for more +information about that). For this reason, the representation of P-states used +by ``intel_pstate`` internally follows the hardware specification (for details +refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual +Volume 3: System Programming Guide `_). However, the ``CPUFreq`` core +uses frequencies for identifying operating performance points of CPUs and +frequencies are involved in the user space interface exposed by it, so +``intel_pstate`` maps its internal representation of P-states to frequencies too +(fortunately, that mapping is unambiguous). At the same time, it would not be +practical for ``intel_pstate`` to supply the ``CPUFreq`` core with a table of +available frequencies due to the possible size of it, so the driver does not do +that. Some functionality of the core is limited by that. + +Since the hardware P-state selection interface used by ``intel_pstate`` is +available at the logical CPU level, the driver always works with individual +CPUs. Consequently, if ``intel_pstate`` is in use, every ``CPUFreq`` policy +object corresponds to one logical CPU and ``CPUFreq`` policies are effectively +equivalent to CPUs. In particular, this means that they become "inactive" every +time the corresponding CPU is taken offline and need to be re-initialized when +it goes back online. + +``intel_pstate`` is not modular, so it cannot be unloaded, which means that the +only way to pass early-configuration-time parameters to it is via the kernel +command line. However, its configuration can be adjusted via ``sysfs`` to a +great extent. In some configurations it even is possible to unregister it via +``sysfs`` which allows another ``CPUFreq`` scaling driver to be loaded and +registered (see `below `_). + + +Operation Modes +=============== + +``intel_pstate`` can operate in three different modes: in the active mode with +or without hardware-managed P-states support and in the passive mode. Which of +them will be in effect depends on what kernel command line options are used and +on the capabilities of the processor. + +Active Mode +----------- + +This is the default operation mode of ``intel_pstate``. If it works in this +mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq`` +policies contains the string "intel_pstate". + +In this mode the driver bypasses the scaling governors layer of ``CPUFreq`` and +provides its own scaling algorithms for P-state selection. Those algorithms +can be applied to ``CPUFreq`` policies in the same way as generic scaling +governors (that is, through the ``scaling_governor`` policy attribute in +``sysfs``). [Note that different P-state selection algorithms may be chosen for +different policies, but that is not recommended.] + +They are not generic scaling governors, but their names are the same as the +names of some of those governors. Moreover, confusingly enough, they generally +do not work in the same way as the generic governors they share the names with. +For example, the ``powersave`` P-state selection algorithm provided by +``intel_pstate`` is not a counterpart of the generic ``powersave`` governor +(roughly, it corresponds to the ``schedutil`` and ``ondemand`` governors). + +There are two P-state selection algorithms provided by ``intel_pstate`` in the +active mode: ``powersave`` and ``performance``. The way they both operate +depends on whether or not the hardware-managed P-states (HWP) feature has been +enabled in the processor and possibly on the processor model. + +Which of the P-state selection algorithms is used by default depends on the +:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option. +Namely, if that option is set, the ``performance`` algorithm will be used by +default, and the other one will be used by default if it is not set. + +Active Mode With HWP +~~~~~~~~~~~~~~~~~~~~ + +If the processor supports the HWP feature, it will be enabled during the +processor initialization and cannot be disabled after that. It is possible +to avoid enabling it by passing the ``intel_pstate=no_hwp`` argument to the +kernel in the command line. + +If the HWP feature has been enabled, ``intel_pstate`` relies on the processor to +select P-states by itself, but still it can give hints to the processor's +internal P-state selection logic. What those hints are depends on which P-state +selection algorithm has been applied to the given policy (or to the CPU it +corresponds to). + +Even though the P-state selection is carried out by the processor automatically, +``intel_pstate`` registers utilization update callbacks with the CPU scheduler +in this mode. However, they are not used for running a P-state selection +algorithm, but for periodic updates of the current CPU frequency information to +be made available from the ``scaling_cur_freq`` policy attribute in ``sysfs``. + +HWP + ``performance`` +..................... + +In this configuration ``intel_pstate`` will write 0 to the processor's +Energy-Performance Preference (EPP) knob (if supported) or its +Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's +internal P-state selection logic is expected to focus entirely on performance. + +This will override the EPP/EPB setting coming from the ``sysfs`` interface +(see `Energy vs Performance Hints`_ below). + +Also, in this configuration the range of P-states available to the processor's +internal P-state selection logic is always restricted to the upper boundary +(that is, the maximum P-state that the driver is allowed to use). + +HWP + ``powersave`` +................... + +In this configuration ``intel_pstate`` will set the processor's +Energy-Performance Preference (EPP) knob (if supported) or its +Energy-Performance Bias (EPB) knob (otherwise) to whatever value it was +previously set to via ``sysfs`` (or whatever default value it was +set to by the platform firmware). This usually causes the processor's +internal P-state selection logic to be less performance-focused. + +Active Mode Without HWP +~~~~~~~~~~~~~~~~~~~~~~~ + +This is the default operation mode for processors that do not support the HWP +feature. It also is used by default with the ``intel_pstate=no_hwp`` argument +in the kernel command line. However, in this mode ``intel_pstate`` may refuse +to work with the given processor if it does not recognize it. [Note that +``intel_pstate`` will never refuse to work with any processor with the HWP +feature enabled.] + +In this mode ``intel_pstate`` registers utilization update callbacks with the +CPU scheduler in order to run a P-state selection algorithm, either +``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy +setting in ``sysfs``. The current CPU frequency information to be made +available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is +periodically updated by those utilization update callbacks too. + +``performance`` +............... + +Without HWP, this P-state selection algorithm is always the same regardless of +the processor model and platform configuration. + +It selects the maximum P-state it is allowed to use, subject to limits set via +``sysfs``, every time the P-state selection computations are carried out by the +driver's utilization update callback for the given CPU (that does not happen +more often than every 10 ms), but the hardware configuration will not be changed +if the new P-state is the same as the current one. + +This is the default P-state selection algorithm if the +:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option +is set. + +``powersave`` +............. + +Without HWP, this P-state selection algorithm generally depends on the +processor model and/or the system profile setting in the ACPI tables and there +are two variants of it. + +One of them is used with processors from the Atom line and (regardless of the +processor model) on platforms with the system profile in the ACPI tables set to +"mobile" (laptops mostly), "tablet", "appliance PC", "desktop", or +"workstation". It is also used with processors supporting the HWP feature if +that feature has not been enabled (that is, with the ``intel_pstate=no_hwp`` +argument in the kernel command line). It is similar to the algorithm +implemented by the generic ``schedutil`` scaling governor except that the +utilization metric used by it is based on numbers coming from feedback +registers of the CPU. It generally selects P-states proportional to the +current CPU utilization, so it is referred to as the "proportional" algorithm. + +The second variant of the ``powersave`` P-state selection algorithm, used in all +of the other cases (generally, on processors from the Core line, so it is +referred to as the "Core" algorithm), is based on the values read from the APERF +and MPERF feedback registers and the previously requested target P-state. +It does not really take CPU utilization into account explicitly, but as a rule +it causes the CPU P-state to ramp up very quickly in response to increased +utilization which is generally desirable in server environments. + +Regardless of the variant, this algorithm is run by the driver's utilization +update callback for the given CPU when it is invoked by the CPU scheduler, but +not more often than every 10 ms (that can be tweaked via ``debugfs`` in `this +particular case `_). Like in the ``performance`` +case, the hardware configuration is not touched if the new P-state turns out to +be the same as the current one. + +This is the default P-state selection algorithm if the +:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option +is not set. + +Passive Mode +------------ + +This mode is used if the ``intel_pstate=passive`` argument is passed to the +kernel in the command line (it implies the ``intel_pstate=no_hwp`` setting too). +Like in the active mode without HWP support, in this mode ``intel_pstate`` may +refuse to work with the given processor if it does not recognize it. + +If the driver works in this mode, the ``scaling_driver`` policy attribute in +``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq". +Then, the driver behaves like a regular ``CPUFreq`` scaling driver. That is, +it is invoked by generic scaling governors when necessary to talk to the +hardware in order to change the P-state of a CPU (in particular, the +``schedutil`` governor can invoke it directly from scheduler context). + +While in this mode, ``intel_pstate`` can be used with all of the (generic) +scaling governors listed by the ``scaling_available_governors`` policy attribute +in ``sysfs`` (and the P-state selection algorithms described above are not +used). Then, it is responsible for the configuration of policy objects +corresponding to CPUs and provides the ``CPUFreq`` core (and the scaling +governors attached to the policy objects) with accurate information on the +maximum and minimum operating frequencies supported by the hardware (including +the so-called "turbo" frequency ranges). In other words, in the passive mode +the entire range of available P-states is exposed by ``intel_pstate`` to the +``CPUFreq`` core. However, in this mode the driver does not register +utilization update callbacks with the CPU scheduler and the ``scaling_cur_freq`` +information comes from the ``CPUFreq`` core (and is the last frequency selected +by the current scaling governor for the given policy). + + +.. _turbo: + +Turbo P-states Support +====================== + +In the majority of cases, the entire range of P-states available to +``intel_pstate`` can be divided into two sub-ranges that correspond to +different types of processor behavior, above and below a boundary that +will be referred to as the "turbo threshold" in what follows. + +The P-states above the turbo threshold are referred to as "turbo P-states" and +the whole sub-range of P-states they belong to is referred to as the "turbo +range". These names are related to the Turbo Boost technology allowing a +multicore processor to opportunistically increase the P-state of one or more +cores if there is enough power to do that and if that is not going to cause the +thermal envelope of the processor package to be exceeded. + +Specifically, if software sets the P-state of a CPU core within the turbo range +(that is, above the turbo threshold), the processor is permitted to take over +performance scaling control for that core and put it into turbo P-states of its +choice going forward. However, that permission is interpreted differently by +different processor generations. Namely, the Sandy Bridge generation of +processors will never use any P-states above the last one set by software for +the given core, even if it is within the turbo range, whereas all of the later +processor generations will take it as a license to use any P-states from the +turbo range, even above the one set by software. In other words, on those +processors setting any P-state from the turbo range will enable the processor +to put the given core into all turbo P-states up to and including the maximum +supported one as it sees fit. + +One important property of turbo P-states is that they are not sustainable. More +precisely, there is no guarantee that any CPUs will be able to stay in any of +those states indefinitely, because the power distribution within the processor +package may change over time or the thermal envelope it was designed for might +be exceeded if a turbo P-state was used for too long. + +In turn, the P-states below the turbo threshold generally are sustainable. In +fact, if one of them is set by software, the processor is not expected to change +it to a lower one unless in a thermal stress or a power limit violation +situation (a higher P-state may still be used if it is set for another CPU in +the same package at the same time, for example). + +Some processors allow multiple cores to be in turbo P-states at the same time, +but the maximum P-state that can be set for them generally depends on the number +of cores running concurrently. The maximum turbo P-state that can be set for 3 +cores at the same time usually is lower than the analogous maximum P-state for +2 cores, which in turn usually is lower than the maximum turbo P-state that can +be set for 1 core. The one-core maximum turbo P-state is thus the maximum +supported one overall. + +The maximum supported turbo P-state, the turbo threshold (the maximum supported +non-turbo P-state) and the minimum supported P-state are specific to the +processor model and can be determined by reading the processor's model-specific +registers (MSRs). Moreover, some processors support the Configurable TDP +(Thermal Design Power) feature and, when that feature is enabled, the turbo +threshold effectively becomes a configurable value that can be set by the +platform firmware. + +Unlike ``_PSS`` objects in the ACPI tables, ``intel_pstate`` always exposes +the entire range of available P-states, including the whole turbo range, to the +``CPUFreq`` core and (in the passive mode) to generic scaling governors. This +generally causes turbo P-states to be set more often when ``intel_pstate`` is +used relative to ACPI-based CPU performance scaling (see `below `_ +for more information). + +Moreover, since ``intel_pstate`` always knows what the real turbo threshold is +(even if the Configurable TDP feature is enabled in the processor), its +``no_turbo`` attribute in ``sysfs`` (described `below `_) should +work as expected in all cases (that is, if set to disable turbo P-states, it +always should prevent ``intel_pstate`` from using them). + + +Processor Support +================= + +To handle a given processor ``intel_pstate`` requires a number of different +pieces of information on it to be known, including: + + * The minimum supported P-state. + + * The maximum supported `non-turbo P-state `_. + + * Whether or not turbo P-states are supported at all. + + * The maximum supported `one-core turbo P-state `_ (if turbo P-states + are supported). + + * The scaling formula to translate the driver's internal representation + of P-states into frequencies and the other way around. + +Generally, ways to obtain that information are specific to the processor model +or family. Although it often is possible to obtain all of it from the processor +itself (using model-specific registers), there are cases in which hardware +manuals need to be consulted to get to it too. + +For this reason, there is a list of supported processors in ``intel_pstate`` and +the driver initialization will fail if the detected processor is not in that +list, unless it supports the `HWP feature `_. [The interface to +obtain all of the information listed above is the same for all of the processors +supporting the HWP feature, which is why they all are supported by +``intel_pstate``.] + + +User Space Interface in ``sysfs`` +================================= + +Global Attributes +----------------- + +``intel_pstate`` exposes several global attributes (files) in ``sysfs`` to +control its functionality at the system level. They are located in the +``/sys/devices/system/cpu/cpufreq/intel_pstate/`` directory and affect all +CPUs. + +Some of them are not present if the ``intel_pstate=per_cpu_perf_limits`` +argument is passed to the kernel in the command line. + +``max_perf_pct`` + Maximum P-state the driver is allowed to set in percent of the + maximum supported performance level (the highest supported `turbo + P-state `_). + + This attribute will not be exposed if the + ``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel + command line. + +``min_perf_pct`` + Minimum P-state the driver is allowed to set in percent of the + maximum supported performance level (the highest supported `turbo + P-state `_). + + This attribute will not be exposed if the + ``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel + command line. + +``num_pstates`` + Number of P-states supported by the processor (between 0 and 255 + inclusive) including both turbo and non-turbo P-states (see + `Turbo P-states Support`_). + + The value of this attribute is not affected by the ``no_turbo`` + setting described `below `_. + + This attribute is read-only. + +``turbo_pct`` + Ratio of the `turbo range `_ size to the size of the entire + range of supported P-states, in percent. + + This attribute is read-only. + +.. _no_turbo_attr: + +``no_turbo`` + If set (equal to 1), the driver is not allowed to set any turbo P-states + (see `Turbo P-states Support`_). If unset (equalt to 0, which is the + default), turbo P-states can be set by the driver. + [Note that ``intel_pstate`` does not support the general ``boost`` + attribute (supported by some other scaling drivers) which is replaced + by this one.] + + This attrubute does not affect the maximum supported frequency value + supplied to the ``CPUFreq`` core and exposed via the policy interface, + but it affects the maximum possible value of per-policy P-state limits + (see `Interpretation of Policy Attributes`_ below for details). + +.. _status_attr: + +``status`` + Operation mode of the driver: "active", "passive" or "off". + + "active" + The driver is functional and in the `active mode + `_. + + "passive" + The driver is functional and in the `passive mode + `_. + + "off" + The driver is not functional (it is not registered as a scaling + driver with the ``CPUFreq`` core). + + This attribute can be written to in order to change the driver's + operation mode or to unregister it. The string written to it must be + one of the possible values of it and, if successful, the write will + cause the driver to switch over to the operation mode represented by + that string - or to be unregistered in the "off" case. [Actually, + switching over from the active mode to the passive mode or the other + way around causes the driver to be unregistered and registered again + with a different set of callbacks, so all of its settings (the global + as well as the per-policy ones) are then reset to their default + values, possibly depending on the target operation mode.] + + That only is supported in some configurations, though (for example, if + the `HWP feature is enabled in the processor `_, + the operation mode of the driver cannot be changed), and if it is not + supported in the current configuration, writes to this attribute with + fail with an appropriate error. + +Interpretation of Policy Attributes +----------------------------------- + +The interpretation of some ``CPUFreq`` policy attributes described in +:doc:`cpufreq` is special with ``intel_pstate`` as the current scaling driver +and it generally depends on the driver's `operation mode `_. + +First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and +``scaling_cur_freq`` attributes are produced by applying a processor-specific +multiplier to the internal P-state representation used by ``intel_pstate``. +Also, the values of the ``scaling_max_freq`` and ``scaling_min_freq`` +attributes are capped by the frequency corresponding to the maximum P-state that +the driver is allowed to set. + +If the ``no_turbo`` `global attribute `_ is set, the driver is +not allowed to use turbo P-states, so the maximum value of ``scaling_max_freq`` +and ``scaling_min_freq`` is limited to the maximum non-turbo P-state frequency. +Accordingly, setting ``no_turbo`` causes ``scaling_max_freq`` and +``scaling_min_freq`` to go down to that value if they were above it before. +However, the old values of ``scaling_max_freq`` and ``scaling_min_freq`` will be +restored after unsetting ``no_turbo``, unless these attributes have been written +to after ``no_turbo`` was set. + +If ``no_turbo`` is not set, the maximum possible value of ``scaling_max_freq`` +and ``scaling_min_freq`` corresponds to the maximum supported turbo P-state, +which also is the value of ``cpuinfo_max_freq`` in either case. + +Next, the following policy attributes have special meaning if +``intel_pstate`` works in the `active mode `_: + +``scaling_available_governors`` + List of P-state selection algorithms provided by ``intel_pstate``. + +``scaling_governor`` + P-state selection algorithm provided by ``intel_pstate`` currently in + use with the given policy. + +``scaling_cur_freq`` + Frequency of the average P-state of the CPU represented by the given + policy for the time interval between the last two invocations of the + driver's utilization update callback by the CPU scheduler for that CPU. + +The meaning of these attributes in the `passive mode `_ is the +same as for other scaling drivers. + +Additionally, the value of the ``scaling_driver`` attribute for ``intel_pstate`` +depends on the operation mode of the driver. Namely, it is either +"intel_pstate" (in the `active mode `_) or "intel_cpufreq" (in the +`passive mode `_). + +Coordination of P-State Limits +------------------------------ + +``intel_pstate`` allows P-state limits to be set in two ways: with the help of +the ``max_perf_pct`` and ``min_perf_pct`` `global attributes +`_ or via the ``scaling_max_freq`` and ``scaling_min_freq`` +``CPUFreq`` policy attributes. The coordination between those limits is based +on the following rules, regardless of the current operation mode of the driver: + + 1. All CPUs are affected by the global limits (that is, none of them can be + requested to run faster than the global maximum and none of them can be + requested to run slower than the global minimum). + + 2. Each individual CPU is affected by its own per-policy limits (that is, it + cannot be requested to run faster than its own per-policy maximum and it + cannot be requested to run slower than its own per-policy minimum). + + 3. The global and per-policy limits can be set independently. + +If the `HWP feature is enabled in the processor `_, the +resulting effective values are written into its registers whenever the limits +change in order to request its internal P-state selection logic to always set +P-states within these limits. Otherwise, the limits are taken into account by +scaling governors (in the `passive mode `_) and by the driver +every time before setting a new P-state for a CPU. + +Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument +is passed to the kernel, ``max_perf_pct`` and ``min_perf_pct`` are not exposed +at all and the only way to set the limits is by using the policy attributes. + + +Energy vs Performance Hints +--------------------------- + +If ``intel_pstate`` works in the `active mode with the HWP feature enabled +`_ in the processor, additional attributes are present +in every ``CPUFreq`` policy directory in ``sysfs``. They are intended to allow +user space to help ``intel_pstate`` to adjust the processor's internal P-state +selection logic by focusing it on performance or on energy-efficiency, or +somewhere between the two extremes: + +``energy_performance_preference`` + Current value of the energy vs performance hint for the given policy + (or the CPU represented by it). + + The hint can be changed by writing to this attribute. + +``energy_performance_available_preferences`` + List of strings that can be written to the + ``energy_performance_preference`` attribute. + + They represent different energy vs performance hints and should be + self-explanatory, except that ``default`` represents whatever hint + value was set by the platform firmware. + +Strings written to the ``energy_performance_preference`` attribute are +internally translated to integer values written to the processor's +Energy-Performance Preference (EPP) knob (if supported) or its +Energy-Performance Bias (EPB) knob. + +[Note that tasks may by migrated from one CPU to another by the scheduler's +load-balancing algorithm and if different energy vs performance hints are +set for those CPUs, that may lead to undesirable outcomes. To avoid such +issues it is better to set the same energy vs performance hint for all CPUs +or to pin every task potentially sensitive to them to a specific CPU.] + +.. _acpi-cpufreq: + +``intel_pstate`` vs ``acpi-cpufreq`` +==================================== + +On the majority of systems supported by ``intel_pstate``, the ACPI tables +provided by the platform firmware contain ``_PSS`` objects returning information +that can be used for CPU performance scaling (refer to the `ACPI specification`_ +for details on the ``_PSS`` objects and the format of the information returned +by them). + +The information returned by the ACPI ``_PSS`` objects is used by the +``acpi-cpufreq`` scaling driver. On systems supported by ``intel_pstate`` +the ``acpi-cpufreq`` driver uses the same hardware CPU performance scaling +interface, but the set of P-states it can use is limited by the ``_PSS`` +output. + +On those systems each ``_PSS`` object returns a list of P-states supported by +the corresponding CPU which basically is a subset of the P-states range that can +be used by ``intel_pstate`` on the same system, with one exception: the whole +`turbo range `_ is represented by one item in it (the topmost one). By +convention, the frequency returned by ``_PSS`` for that item is greater by 1 MHz +than the frequency of the highest non-turbo P-state listed by it, but the +corresponding P-state representation (following the hardware specification) +returned for it matches the maximum supported turbo P-state (or is the +special value 255 meaning essentially "go as high as you can get"). + +The list of P-states returned by ``_PSS`` is reflected by the table of +available frequencies supplied by ``acpi-cpufreq`` to the ``CPUFreq`` core and +scaling governors and the minimum and maximum supported frequencies reported by +it come from that list as well. In particular, given the special representation +of the turbo range described above, this means that the maximum supported +frequency reported by ``acpi-cpufreq`` is higher by 1 MHz than the frequency +of the highest supported non-turbo P-state listed by ``_PSS`` which, of course, +affects decisions made by the scaling governors, except for ``powersave`` and +``performance``. + +For example, if a given governor attempts to select a frequency proportional to +estimated CPU load and maps the load of 100% to the maximum supported frequency +(possibly multiplied by a constant), then it will tend to choose P-states below +the turbo threshold if ``acpi-cpufreq`` is used as the scaling driver, because +in that case the turbo range corresponds to a small fraction of the frequency +band it can use (1 MHz vs 1 GHz or more). In consequence, it will only go to +the turbo range for the highest loads and the other loads above 50% that might +benefit from running at turbo frequencies will be given non-turbo P-states +instead. + +One more issue related to that may appear on systems supporting the +`Configurable TDP feature `_ allowing the platform firmware to set the +turbo threshold. Namely, if that is not coordinated with the lists of P-states +returned by ``_PSS`` properly, there may be more than one item corresponding to +a turbo P-state in those lists and there may be a problem with avoiding the +turbo range (if desirable or necessary). Usually, to avoid using turbo +P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state listed +by ``_PSS``, but that is not sufficient when there are other turbo P-states in +the list returned by it. + +Apart from the above, ``acpi-cpufreq`` works like ``intel_pstate`` in the +`passive mode `_, except that the number of P-states it can set +is limited to the ones listed by the ACPI ``_PSS`` objects. + + +Kernel Command Line Options for ``intel_pstate`` +================================================ + +Several kernel command line options can be used to pass early-configuration-time +parameters to ``intel_pstate`` in order to enforce specific behavior of it. All +of them have to be prepended with the ``intel_pstate=`` prefix. + +``disable`` + Do not register ``intel_pstate`` as the scaling driver even if the + processor is supported by it. + +``passive`` + Register ``intel_pstate`` in the `passive mode `_ to + start with. + + This option implies the ``no_hwp`` one described below. + +``force`` + Register ``intel_pstate`` as the scaling driver instead of + ``acpi-cpufreq`` even if the latter is preferred on the given system. + + This may prevent some platform features (such as thermal controls and + power capping) that rely on the availability of ACPI P-states + information from functioning as expected, so it should be used with + caution. + + This option does not work with processors that are not supported by + ``intel_pstate`` and on platforms where the ``pcc-cpufreq`` scaling + driver is used instead of ``acpi-cpufreq``. + +``no_hwp`` + Do not enable the `hardware-managed P-states (HWP) feature + `_ even if it is supported by the processor. + +``hwp_only`` + Register ``intel_pstate`` as the scaling driver only if the + `hardware-managed P-states (HWP) feature `_ is + supported by the processor. + +``support_acpi_ppc`` + Take ACPI ``_PPC`` performance limits into account. + + If the preferred power management profile in the FADT (Fixed ACPI + Description Table) is set to "Enterprise Server" or "Performance + Server", the ACPI ``_PPC`` limits are taken into account by default + and this option has no effect. + +``per_cpu_perf_limits`` + Use per-logical-CPU P-State limits (see `Coordination of P-state + Limits`_ for details). + + +Diagnostics and Tuning +====================== + +Trace Events +------------ + +There are two static trace events that can be used for ``intel_pstate`` +diagnostics. One of them is the ``cpu_frequency`` trace event generally used +by ``CPUFreq``, and the other one is the ``pstate_sample`` trace event specific +to ``intel_pstate``. Both of them are triggered by ``intel_pstate`` only if +it works in the `active mode `_. + +The following sequence of shell commands can be used to enable them and see +their output (if the kernel is generally configured to support event tracing):: + + # cd /sys/kernel/debug/tracing/ + # echo 1 > events/power/pstate_sample/enable + # echo 1 > events/power/cpu_frequency/enable + # cat trace + gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107 scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 freq=2474476 + cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2 + +If ``intel_pstate`` works in the `passive mode `_, the +``cpu_frequency`` trace event will be triggered either by the ``schedutil`` +scaling governor (for the policies it is attached to), or by the ``CPUFreq`` +core (for the policies with other scaling governors). + +``ftrace`` +---------- + +The ``ftrace`` interface can be used for low-level diagnostics of +``intel_pstate``. For example, to check how often the function to set a +P-state is called, the ``ftrace`` filter can be set to to +:c:func:`intel_pstate_set_pstate`:: + + # cd /sys/kernel/debug/tracing/ + # cat available_filter_functions | grep -i pstate + intel_pstate_set_pstate + intel_pstate_cpu_init + ... + # echo intel_pstate_set_pstate > set_ftrace_filter + # echo function > current_tracer + # cat trace | head -15 + # tracer: function + # + # entries-in-buffer/entries-written: 80/80 #P:4 + # + # _-----=> irqs-off + # / _----=> need-resched + # | / _---=> hardirq/softirq + # || / _--=> preempt-depth + # ||| / delay + # TASK-PID CPU# |||| TIMESTAMP FUNCTION + # | | | |||| | | + Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func + gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func + gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func + -0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func + +Tuning Interface in ``debugfs`` +------------------------------- + +The ``powersave`` algorithm provided by ``intel_pstate`` for `the Core line of +processors in the active mode `_ is based on a `PID controller`_ +whose parameters were chosen to address a number of different use cases at the +same time. However, it still is possible to fine-tune it to a specific workload +and the ``debugfs`` interface under ``/sys/kernel/debug/pstate_snb/`` is +provided for this purpose. [Note that the ``pstate_snb`` directory will be +present only if the specific P-state selection algorithm matching the interface +in it actually is in use.] + +The following files present in that directory can be used to modify the PID +controller parameters at run time: + +| ``deadband`` +| ``d_gain_pct`` +| ``i_gain_pct`` +| ``p_gain_pct`` +| ``sample_rate_ms`` +| ``setpoint`` + +Note, however, that achieving desirable results this way generally requires +expert-level understanding of the power vs performance tradeoff, so extra care +is recommended when attempting to do that. + + +.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf +.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html +.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf +.. _PID controller: https://en.wikipedia.org/wiki/PID_controller diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt deleted file mode 100644 index 3fdcdfd968ba..000000000000 --- a/Documentation/cpu-freq/intel-pstate.txt +++ /dev/null @@ -1,281 +0,0 @@ -Intel P-State driver --------------------- - -This driver provides an interface to control the P-State selection for the -SandyBridge+ Intel processors. - -The following document explains P-States: -http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf -As stated in the document, P-State doesn’t exactly mean a frequency. However, for -the sake of the relationship with cpufreq, P-State and frequency are used -interchangeably. - -Understanding the cpufreq core governors and policies are important before -discussing more details about the Intel P-State driver. Based on what callbacks -a cpufreq driver provides to the cpufreq core, it can support two types of -drivers: -- with target_index() callback: In this mode, the drivers using cpufreq core -simply provide the minimum and maximum frequency limits and an additional -interface target_index() to set the current frequency. The cpufreq subsystem -has a number of scaling governors ("performance", "powersave", "ondemand", -etc.). Depending on which governor is in use, cpufreq core will call for -transitions to a specific frequency using target_index() callback. -- setpolicy() callback: In this mode, drivers do not provide target_index() -callback, so cpufreq core can't request a transition to a specific frequency. -The driver provides minimum and maximum frequency limits and callbacks to set a -policy. The policy in cpufreq sysfs is referred to as the "scaling governor". -The cpufreq core can request the driver to operate in any of the two policies: -"performance" and "powersave". The driver decides which frequency to use based -on the above policy selection considering minimum and maximum frequency limits. - -The Intel P-State driver falls under the latter category, which implements the -setpolicy() callback. This driver decides what P-State to use based on the -requested policy from the cpufreq core. If the processor is capable of -selecting its next P-State internally, then the driver will offload this -responsibility to the processor (aka HWP: Hardware P-States). If not, the -driver implements algorithms to select the next P-State. - -Since these policies are implemented in the driver, they are not same as the -cpufreq scaling governors implementation, even if they have the same name in -the cpufreq sysfs (scaling_governors). For example the "performance" policy is -similar to cpufreq’s "performance" governor, but "powersave" is completely -different than the cpufreq "powersave" governor. The strategy here is similar -to cpufreq "ondemand", where the requested P-State is related to the system load. - -Sysfs Interface - -In addition to the frequency-controlling interfaces provided by the cpufreq -core, the driver provides its own sysfs files to control the P-State selection. -These files have been added to /sys/devices/system/cpu/intel_pstate/. -Any changes made to these files are applicable to all CPUs (even in a -multi-package system, Refer to later section on placing "Per-CPU limits"). - - max_perf_pct: Limits the maximum P-State that will be requested by - the driver. It states it as a percentage of the available performance. The - available (P-State) performance may be reduced by the no_turbo - setting described below. - - min_perf_pct: Limits the minimum P-State that will be requested by - the driver. It states it as a percentage of the max (non-turbo) - performance level. - - no_turbo: Limits the driver to selecting P-State below the turbo - frequency range. - - turbo_pct: Displays the percentage of the total performance that - is supported by hardware that is in the turbo range. This number - is independent of whether turbo has been disabled or not. - - num_pstates: Displays the number of P-States that are supported - by hardware. This number is independent of whether turbo has - been disabled or not. - -For example, if a system has these parameters: - Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State) - Max non turbo ratio: 0x17 - Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio) - -Sysfs will show : - max_perf_pct:100, which corresponds to 1 core ratio - min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio - no_turbo:0, turbo is not disabled - num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1) - turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates - -Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual -Volume 3: System Programming Guide" to understand ratios. - -There is one more sysfs attribute in /sys/devices/system/cpu/intel_pstate/ -that can be used for controlling the operation mode of the driver: - - status: Three settings are possible: - "off" - The driver is not in use at this time. - "active" - The driver works as a P-state governor (default). - "passive" - The driver works as a regular cpufreq one and collaborates - with the generic cpufreq governors (it sets P-states as - requested by those governors). - The current setting is returned by reads from this attribute. Writing one - of the above strings to it changes the operation mode as indicated by that - string, if possible. If HW-managed P-states (HWP) are enabled, it is not - possible to change the driver's operation mode and attempts to write to - this attribute will fail. - -cpufreq sysfs for Intel P-State - -Since this driver registers with cpufreq, cpufreq sysfs is also presented. -There are some important differences, which need to be considered. - -scaling_cur_freq: This displays the real frequency which was used during -the last sample period instead of what is requested. Some other cpufreq driver, -like acpi-cpufreq, displays what is requested (Some changes are on the -way to fix this for acpi-cpufreq driver). The same is true for frequencies -displayed at /proc/cpuinfo. - -scaling_governor: This displays current active policy. Since each CPU has a -cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this -is not possible with Intel P-States, as there is one common policy for all -CPUs. Here, the last requested policy will be applicable to all CPUs. It is -suggested that one use the cpupower utility to change policy to all CPUs at the -same time. - -scaling_setspeed: This attribute can never be used with Intel P-State. - -scaling_max_freq/scaling_min_freq: This interface can be used similarly to -the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies -are converted to nearest possible P-State, this is prone to rounding errors. -This method is not preferred to limit performance. - -affected_cpus: Not used -related_cpus: Not used - -For contemporary Intel processors, the frequency is controlled by the -processor itself and the P-State exposed to software is related to -performance levels. The idea that frequency can be set to a single -frequency is fictional for Intel Core processors. Even if the scaling -driver selects a single P-State, the actual frequency the processor -will run at is selected by the processor itself. - -Per-CPU limits - -The kernel command line option "intel_pstate=per_cpu_perf_limits" forces -the intel_pstate driver to use per-CPU performance limits. When it is set, -the sysfs control interface described above is subject to limitations. -- The following controls are not available for both read and write - /sys/devices/system/cpu/intel_pstate/max_perf_pct - /sys/devices/system/cpu/intel_pstate/min_perf_pct -- The following controls can be used to set performance limits, as far as the -architecture of the processor permits: - /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq - /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq - /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor -- User can still observe turbo percent and number of P-States from - /sys/devices/system/cpu/intel_pstate/turbo_pct - /sys/devices/system/cpu/intel_pstate/num_pstates -- User can read write system wide turbo status - /sys/devices/system/cpu/no_turbo - -Support of energy performance hints -It is possible to provide hints to the HWP algorithms in the processor -to be more performance centric to more energy centric. When the driver -is using HWP, two additional cpufreq sysfs attributes are presented for -each logical CPU. -These attributes are: - - energy_performance_available_preferences - - energy_performance_preference - -To get list of supported hints: -$ cat energy_performance_available_preferences - default performance balance_performance balance_power power - -The current preference can be read or changed via cpufreq sysfs -attribute "energy_performance_preference". Reading from this attribute -will display current effective setting. User can write any of the valid -preference string to this attribute. User can always restore to power-on -default by writing "default". - -Since threads can migrate to different CPUs, this is possible that the -new CPU may have different energy performance preference than the previous -one. To avoid such issues, either threads can be pinned to specific CPUs -or set the same energy performance preference value to all CPUs. - -Tuning Intel P-State driver - -When the performance can be tuned using PID (Proportional Integral -Derivative) controller, debugfs files are provided for adjusting performance. -They are presented under: -/sys/kernel/debug/pstate_snb/ - -The PID tunable parameters are: - deadband - d_gain_pct - i_gain_pct - p_gain_pct - sample_rate_ms - setpoint - -To adjust these parameters, some understanding of driver implementation is -necessary. There are some tweeks described here, but be very careful. Adjusting -them requires expert level understanding of power and performance relationship. -These limits are only useful when the "powersave" policy is active. - --To make the system more responsive to load changes, sample_rate_ms can -be adjusted (current default is 10ms). --To make the system use higher performance, even if the load is lower, setpoint -can be adjusted to a lower number. This will also lead to faster ramp up time -to reach the maximum P-State. -If there are no derivative and integral coefficients, The next P-State will be -equal to: - current P-State - ((setpoint - current cpu load) * p_gain_pct) - -For example, if the current PID parameters are (Which are defaults for the core -processors like SandyBridge): - deadband = 0 - d_gain_pct = 0 - i_gain_pct = 0 - p_gain_pct = 20 - sample_rate_ms = 10 - setpoint = 97 - -If the current P-State = 0x08 and current load = 100, this will result in the -next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State -goes up by only 1. If during next sample interval the current load doesn't -change and still 100, then P-State goes up by one again. This process will -continue as long as the load is more than the setpoint until the maximum P-State -is reached. - -For the same load at setpoint = 60, this will result in the next P-State -= 0x08 - ((60 - 100) * 0.2) = 16 -So by changing the setpoint from 97 to 60, there is an increase of the -next P-State from 9 to 16. So this will make processor execute at higher -P-State for the same CPU load. If the load continues to be more than the -setpoint during next sample intervals, then P-State will go up again till the -maximum P-State is reached. But the ramp up time to reach the maximum P-State -will be much faster when the setpoint is 60 compared to 97. - -Debugging Intel P-State driver - -Event tracing -To debug P-State transition, the Linux event tracing interface can be used. -There are two specific events, which can be enabled (Provided the kernel -configs related to event tracing are enabled). - -# cd /sys/kernel/debug/tracing/ -# echo 1 > events/power/pstate_sample/enable -# echo 1 > events/power/cpu_frequency/enable -# cat trace -gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107 - scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 - freq=2474476 -cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2 - - -Using ftrace - -If function level tracing is required, the Linux ftrace interface can be used. -For example if we want to check how often a function to set a P-State is -called, we can set ftrace filter to intel_pstate_set_pstate. - -# cd /sys/kernel/debug/tracing/ -# cat available_filter_functions | grep -i pstate -intel_pstate_set_pstate -intel_pstate_cpu_init -... - -# echo intel_pstate_set_pstate > set_ftrace_filter -# echo function > current_tracer -# cat trace | head -15 -# tracer: function -# -# entries-in-buffer/entries-written: 80/80 #P:4 -# -# _-----=> irqs-off -# / _----=> need-resched -# | / _---=> hardirq/softirq -# || / _--=> preempt-depth -# ||| / delay -# TASK-PID CPU# |||| TIMESTAMP FUNCTION -# | | | |||| | | - Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func - gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func - gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func - -0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func -- cgit v1.2.3-59-g8ed1b From 60d4553bdc1a0099adb544e12cafd7bbc7f1d484 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 14 May 2017 02:23:04 +0200 Subject: PM / wakeup: Fix up wakeup_source_report_event() Commit 8a537ece3d94 (PM / wakeup: Integrate mechanism to abort transitions in progress) modified wakeup_source_report_event() and wakeup_source_activate() to make it possible to call pm_system_wakeup() from the latter if so indicated by the caller of the former (via a new function argument added by that commit), but it overlooked the fact that in some situations wakeup_source_report_event() is called to signal a "hard" event (ie. such that should abort a system suspend in progress) after pm_stay_awake() has been called for the same wakeup source object, in which case the pm_system_wakeup() will not trigger. To work around this issue, modify wakeup_source_activate() and wakeup_source_report_event() again so that pm_system_wakeup() is called by the latter directly (if its last argument is true), in which case the additional argument does not need to be passed to wakeup_source_activate() any more, so drop it from there. Fixes: 8a537ece3d94 (PM / wakeup: Integrate mechanism to abort transitions in progress) Reported-by: David E. Box Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index f62082fdd670..9c36b27996fc 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -512,13 +512,12 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws) /** * wakup_source_activate - Mark given wakeup source as active. * @ws: Wakeup source to handle. - * @hard: If set, abort suspends in progress and wake up from suspend-to-idle. * * Update the @ws' statistics and, if @ws has just been activated, notify the PM * core of the event by incrementing the counter of of wakeup events being * processed. */ -static void wakeup_source_activate(struct wakeup_source *ws, bool hard) +static void wakeup_source_activate(struct wakeup_source *ws) { unsigned int cec; @@ -526,9 +525,6 @@ static void wakeup_source_activate(struct wakeup_source *ws, bool hard) "unregistered wakeup source\n")) return; - if (hard) - pm_system_wakeup(); - ws->active = true; ws->active_count++; ws->last_time = ktime_get(); @@ -554,7 +550,10 @@ static void wakeup_source_report_event(struct wakeup_source *ws, bool hard) ws->wakeup_count++; if (!ws->active) - wakeup_source_activate(ws, hard); + wakeup_source_activate(ws); + + if (hard) + pm_system_wakeup(); } /** -- cgit v1.2.3-59-g8ed1b From 967b08c25a091867b04261fa34addedc950256f1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 14 May 2017 02:23:12 +0200 Subject: RTC: rtc-cmos: Fix wakeup from suspend-to-idle Commit eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle) modified the core suspend-to-idle code to filter out spurious SCI interrupts received while suspended, which requires ACPI event source handlers to report wakeup events in a way that will trigger a wakeup from suspend to idle (or abort system suspends in progress, which is equivalent). That needs to be done in the rtc-cmos driver too, which was overlooked by the above commit, so do that now. Fixes: eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle) Reported-by: David E. Box Signed-off-by: Rafael J. Wysocki --- drivers/rtc/rtc-cmos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index f4a96dbdabf2..dabe47b9be72 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -1085,7 +1085,7 @@ static u32 rtc_handler(void *context) } spin_unlock_irqrestore(&rtc_lock, flags); - pm_wakeup_event(dev, 0); + pm_wakeup_hard_event(dev); acpi_clear_event(ACPI_EVENT_RTC); acpi_disable_event(ACPI_EVENT_RTC, 0); return ACPI_INTERRUPT_HANDLED; -- cgit v1.2.3-59-g8ed1b From f5705aa8cfed142d980ecac12bee0d81b756479e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 13 May 2017 16:31:05 -0700 Subject: dax, xfs, ext4: compile out iomap-dax paths in the FS_DAX=n case Tetsuo reports: fs/built-in.o: In function `xfs_file_iomap_end': xfs_iomap.c:(.text+0xe0ef9): undefined reference to `put_dax' fs/built-in.o: In function `xfs_file_iomap_begin': xfs_iomap.c:(.text+0xe1a7f): undefined reference to `dax_get_by_host' make: *** [vmlinux] Error 1 $ grep DAX .config CONFIG_DAX=m # CONFIG_DEV_DAX is not set # CONFIG_FS_DAX is not set When FS_DAX=n we can/must throw away the dax code in filesystems. Implement 'fs_' versions of dax_get_by_host() and put_dax() that are nops in the FS_DAX=n case. Cc: Cc: Cc: Jan Kara Cc: "Theodore Ts'o" Cc: "Darrick J. Wong" Cc: Ross Zwisler Tested-by: Tony Luck Fixes: ef51042472f5 ("block, dax: move 'select DAX' from BLOCK to FS_DAX") Reported-by: Tetsuo Handa Signed-off-by: Dan Williams --- fs/ext2/inode.c | 4 ++-- fs/ext4/inode.c | 4 ++-- fs/xfs/xfs_iomap.c | 4 ++-- include/linux/dax.h | 34 +++++++++++++++++++++++++++------- 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 26d77f9f8c12..2dcbd5698884 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -817,7 +817,7 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, iomap->bdev = bdev; iomap->offset = (u64)first_block << blkbits; if (blk_queue_dax(bdev->bd_queue)) - iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); + iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); else iomap->dax_dev = NULL; @@ -841,7 +841,7 @@ static int ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { - put_dax(iomap->dax_dev); + fs_put_dax(iomap->dax_dev); if (iomap->type == IOMAP_MAPPED && written < length && (flags & IOMAP_WRITE)) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5834c4d76be8..1bd0bfa547f6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3412,7 +3412,7 @@ retry: bdev = inode->i_sb->s_bdev; iomap->bdev = bdev; if (blk_queue_dax(bdev->bd_queue)) - iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); + iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); else iomap->dax_dev = NULL; iomap->offset = first_block << blkbits; @@ -3447,7 +3447,7 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, int blkbits = inode->i_blkbits; bool truncate = false; - put_dax(iomap->dax_dev); + fs_put_dax(iomap->dax_dev); if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) return 0; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index a63f61c256bd..94e5bdf7304c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1068,7 +1068,7 @@ xfs_file_iomap_begin( /* optionally associate a dax device with the iomap bdev */ bdev = iomap->bdev; if (blk_queue_dax(bdev->bd_queue)) - iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); + iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); else iomap->dax_dev = NULL; @@ -1149,7 +1149,7 @@ xfs_file_iomap_end( unsigned flags, struct iomap *iomap) { - put_dax(iomap->dax_dev); + fs_put_dax(iomap->dax_dev); if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, length, written, iomap); diff --git a/include/linux/dax.h b/include/linux/dax.h index 00ebac854bb7..5ec1f6c47716 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -18,6 +18,20 @@ struct dax_operations { void **, pfn_t *); }; +#if IS_ENABLED(CONFIG_DAX) +struct dax_device *dax_get_by_host(const char *host); +void put_dax(struct dax_device *dax_dev); +#else +static inline struct dax_device *dax_get_by_host(const char *host) +{ + return NULL; +} + +static inline void put_dax(struct dax_device *dax_dev) +{ +} +#endif + int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) int __bdev_dax_supported(struct super_block *sb, int blocksize); @@ -25,23 +39,29 @@ static inline int bdev_dax_supported(struct super_block *sb, int blocksize) { return __bdev_dax_supported(sb, blocksize); } + +static inline struct dax_device *fs_dax_get_by_host(const char *host) +{ + return dax_get_by_host(host); +} + +static inline void fs_put_dax(struct dax_device *dax_dev) +{ + put_dax(dax_dev); +} + #else static inline int bdev_dax_supported(struct super_block *sb, int blocksize) { return -EOPNOTSUPP; } -#endif -#if IS_ENABLED(CONFIG_DAX) -struct dax_device *dax_get_by_host(const char *host); -void put_dax(struct dax_device *dax_dev); -#else -static inline struct dax_device *dax_get_by_host(const char *host) +static inline struct dax_device *fs_dax_get_by_host(const char *host) { return NULL; } -static inline void put_dax(struct dax_device *dax_dev) +static inline void fs_put_dax(struct dax_device *dax_dev) { } #endif -- cgit v1.2.3-59-g8ed1b From 31d848aa1d85530770f0bdf1b61a042335d340ad Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 19 Apr 2017 09:58:23 -0700 Subject: soc: bcm: brcmstb: Correctly match 7435 SoC Remove the duplicate brcm,bcm7425-sun-top-ctrl compatible string and replace it with brcm,bcm7435-sun-top-ctrl which was intended. Fixes: bd0faf08dc7f ("soc: bcm: brcmstb: Match additional compatible strings") Reported-by: Andreas Oberritter Acked-by: Gregory Fong Signed-off-by: Florian Fainelli --- drivers/soc/bcm/brcmstb/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/bcm/brcmstb/common.c b/drivers/soc/bcm/brcmstb/common.c index b6195fdf0d00..22e98a90468c 100644 --- a/drivers/soc/bcm/brcmstb/common.c +++ b/drivers/soc/bcm/brcmstb/common.c @@ -49,7 +49,7 @@ static const struct of_device_id sun_top_ctrl_match[] = { { .compatible = "brcm,bcm7420-sun-top-ctrl", }, { .compatible = "brcm,bcm7425-sun-top-ctrl", }, { .compatible = "brcm,bcm7429-sun-top-ctrl", }, - { .compatible = "brcm,bcm7425-sun-top-ctrl", }, + { .compatible = "brcm,bcm7435-sun-top-ctrl", }, { .compatible = "brcm,brcmstb-sun-top-ctrl", }, { } }; -- cgit v1.2.3-59-g8ed1b From b383b544f2666d67446b951a9a97af239dafed5d Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 3 Apr 2017 15:11:22 +0300 Subject: net/mlx5e: Use the correct pause values for ethtool advertising Query the operational pause from firmware (PFCC register) instead of always passing zeros. Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API") Signed-off-by: Gal Pressman Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ce7b09d72ff6..d60e681b443e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -849,6 +849,8 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; + u32 rx_pause = 0; + u32 tx_pause = 0; u32 eth_proto_cap; u32 eth_proto_admin; u32 eth_proto_lp; @@ -871,11 +873,13 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); an_status = MLX5_GET(ptys_reg, out, an_status); + mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); get_supported(eth_proto_cap, link_ksettings); - get_advertising(eth_proto_admin, 0, 0, link_ksettings); + get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings); get_speed_duplex(netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; -- cgit v1.2.3-59-g8ed1b From e3c19503712d6360239b19c14cded56dd63c40d7 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 19 Apr 2017 14:35:15 +0300 Subject: net/mlx5e: Fix ethtool pause support and advertise reporting Pause bit should set when RX pause is on, not TX pause. Also, setting Asym_Pause is incorrect, and should be turned off. Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API") Signed-off-by: Gal Pressman Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d60e681b443e..8209affa75c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -794,7 +794,6 @@ static void get_supported(u32 eth_proto_cap, ptys2ethtool_supported_port(link_ksettings, eth_proto_cap); ptys2ethtool_supported_link(supported, eth_proto_cap); ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); - ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Asym_Pause); } static void get_advertising(u32 eth_proto_cap, u8 tx_pause, @@ -804,7 +803,7 @@ static void get_advertising(u32 eth_proto_cap, u8 tx_pause, unsigned long *advertising = link_ksettings->link_modes.advertising; ptys2ethtool_adver_link(advertising, eth_proto_cap); - if (tx_pause) + if (rx_pause) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); if (tx_pause ^ rx_pause) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause); -- cgit v1.2.3-59-g8ed1b From 20b6a1c78280dbeb45214c463cf9cbccb3665146 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 9 May 2017 16:40:46 +0300 Subject: net/mlx5e: Fix setup TC ndo Fail-safe support patches introduced a trivial bug, setup tc callback is doing a wrong check of the netdevice state, the fix is simply to invert the condition. Fixes: 6f9485af4020 ("net/mlx5e: Fail safe tc setup") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a61b71b6fff3..41cd22a223dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2976,7 +2976,7 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) new_channels.params = priv->channels.params; new_channels.params.num_tc = tc ? tc : 1; - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; goto out; } -- cgit v1.2.3-59-g8ed1b From 5360fd473c23f18b42722cdb13a1c6ec7acd96ff Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 4 May 2017 17:53:32 +0300 Subject: net/mlx5e: IPoIB, Only support regular RQ for now IPoIB doesn't support striding RQ at the moment, for this we need to explicitly choose non striding RQ in IPoIB init, even if the HW supports it. Fixes: 8f493ffd88ea ("net/mlx5e: IPoIB, RX steering RSS RQTs and TIRs") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/ipoib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c index 019c230da498..56bff3540954 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c @@ -66,6 +66,10 @@ static void mlx5i_init(struct mlx5_core_dev *mdev, mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); + /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ + mlx5e_set_rq_type_params(mdev, &priv->channels.params, MLX5_WQ_TYPE_LINKED_LIST); + priv->channels.params.lro_en = false; + mutex_init(&priv->state_lock); netdev->hw_features |= NETIF_F_SG; -- cgit v1.2.3-59-g8ed1b From 508541146af18e43072e41a31aa62fac2b01aac1 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 25 Apr 2017 10:39:57 +0300 Subject: net/mlx5: Use underlay QPN from the root name space Root flow table is dynamically changed by the underlying flow steering layer, and IPoIB/ULPs have no idea what will be the root flow table in the future, hence we need a dynamic infrastructure to move Underlay QPs with the root flow table. Fixes: b3ba51498bdd ("net/mlx5: Refactor create flow table method to accept underlay QP") Signed-off-by: Erez Shitrit Signed-off-by: Maor Gottlieb Signed-off-by: Yishai Hadas Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 5 ++--- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 9 +++----- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 25 +++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/ipoib.c | 7 +++++-- include/linux/mlx5/fs.h | 4 +++- 8 files changed, 38 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0099a3e397bc..2fd044b23875 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1003,7 +1003,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); -int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn); +int mlx5e_create_ttc_table(struct mlx5e_priv *priv); void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 576d6787b484..53ed58320a24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -800,7 +800,7 @@ void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) mlx5e_destroy_flow_table(&ttc->ft); } -int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn) +int mlx5e_create_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; struct mlx5_flow_table_attr ft_attr = {}; @@ -810,7 +810,6 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn) ft_attr.max_fte = MLX5E_TTC_TABLE_SIZE; ft_attr.level = MLX5E_TTC_FT_LEVEL; ft_attr.prio = MLX5E_NIC_PRIO; - ft_attr.underlay_qpn = underlay_qpn; ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { @@ -1147,7 +1146,7 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_ttc_table(priv, 0); + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 19e3d2fc2099..fcec7bedd3cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -40,28 +40,25 @@ #include "eswitch.h" int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft) + struct mlx5_flow_table *ft, u32 underlay_qpn) { u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && - ft->underlay_qpn == 0) + underlay_qpn == 0) return 0; MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn); if (ft->vport) { MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport); MLX5_SET(set_flow_table_root_in, in, other_vport, 1); } - if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && - ft->underlay_qpn != 0) - MLX5_SET(set_flow_table_root_in, in, underlay_qpn, ft->underlay_qpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 8fad80688536..0f98a7cf4877 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -71,7 +71,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, unsigned int index); int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft); + struct mlx5_flow_table *ft, + u32 underlay_qpn); int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index b8a176503d38..0e487e8ca634 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -650,7 +650,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio if (ft->level >= min_level) return 0; - err = mlx5_cmd_update_root_ft(root->dev, ft); + err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn); if (err) mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", ft->id); @@ -818,8 +818,6 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa goto unlock_root; } - ft->underlay_qpn = ft_attr->underlay_qpn; - tree_init_node(&ft->node, 1, del_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); @@ -1489,7 +1487,8 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft) new_root_ft = find_next_ft(ft); if (new_root_ft) { - int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft); + int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, + root->underlay_qpn); if (err) { mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", @@ -2062,3 +2061,21 @@ err: mlx5_cleanup_fs(dev); return err; } + +int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) +{ + struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + + root->underlay_qpn = underlay_qpn; + return 0; +} +EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn); + +int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) +{ + struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + + root->underlay_qpn = 0; + return 0; +} +EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 81eafc7b9dd9..990acee6fb09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -118,7 +118,6 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; - u32 underlay_qpn; }; struct mlx5_fc_cache { @@ -195,6 +194,7 @@ struct mlx5_flow_root_namespace { struct mlx5_flow_table *root_ft; /* Should be held when chaining flow tables */ struct mutex chain_lock; + u32 underlay_qpn; }; int mlx5_init_fc_stats(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c index 56bff3540954..cc1858752e70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c @@ -160,6 +160,8 @@ out: static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) { + mlx5_fs_remove_rx_underlay_qpn(mdev, qp->qpn); + mlx5_core_destroy_qp(mdev, qp); } @@ -174,6 +176,8 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) return err; } + mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); + err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); @@ -193,7 +197,6 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) { - struct mlx5i_priv *ipriv = priv->ppriv; int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, @@ -209,7 +212,7 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_ttc_table(priv, ipriv->qp.qpn); + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 1b166d2e19c5..b25e7baa273e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -109,7 +109,6 @@ struct mlx5_flow_table_attr { int max_fte; u32 level; u32 flags; - u32 underlay_qpn; }; struct mlx5_flow_table * @@ -167,4 +166,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); +int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); +int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); + #endif -- cgit v1.2.3-59-g8ed1b From 216c4e9db4c9d1d2a382b42880442dc632cd47d9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 May 2017 22:40:06 +0300 Subject: PowerCap: Fix an error code in powercap_register_zone() In the current code we accidentally return the successful result from idr_alloc() instead of a negative error pointer. The caller is looking for an error pointer and so it treats the returned value as a valid pointer. This one might be a bit serious because if it lets people get around the kernel's protection for remapping NULL. I'm not sure. Fixes: 75d2364ea0ca (PowerCap: Add class driver) Signed-off-by: Dan Carpenter Reviewed-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/powercap/powercap_sys.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 14bde0db8c24..5b10b50f8686 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -538,6 +538,7 @@ struct powercap_zone *powercap_register_zone( power_zone->id = result; idr_init(&power_zone->idr); + result = -ENOMEM; power_zone->name = kstrdup(name, GFP_KERNEL); if (!power_zone->name) goto err_name_alloc; -- cgit v1.2.3-59-g8ed1b From 0bae5fd3330be0517fba697e6b228601d421fade Mon Sep 17 00:00:00 2001 From: Pushkar Jambhlekar Date: Thu, 11 May 2017 10:31:24 +0530 Subject: PM / hibernate: Declare variables as static Fixing sparse warnings: 'symbol not declared. Should it be static?' Signed-off-by: Pushkar Jambhlekar Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d79a38de425a..a628cccafa4a 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1422,7 +1422,7 @@ static unsigned int nr_meta_pages; * Numbers of normal and highmem page frames allocated for hibernation image * before suspending devices. */ -unsigned int alloc_normal, alloc_highmem; +static unsigned int alloc_normal, alloc_highmem; /* * Memory bitmap used for marking saveable pages (during hibernation) or * hibernation image pages (during restore) -- cgit v1.2.3-59-g8ed1b From be0408d74d9c95de1baf6c2563b6e6d1dfb17b88 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 May 2017 14:12:29 +0200 Subject: cpufreq: dbx500: add a Kconfig symbol Moving the cooling code into the cpufreq driver caused a possible build failure when the cpu_thermal helper code is a loadable module or disabled: drivers/cpufreq/dbx500-cpufreq.o: In function `dbx500_cpufreq_ready': dbx500-cpufreq.c:(.text.dbx500_cpufreq_ready+0x4): undefined reference to `cpufreq_cooling_register' This adds the same dependency that we have in other cpufreq drivers, forcing the driver to be disabled when we can't possibly link it. Fixes: 19678ffb9fd6 (cpufreq: dbx500: Manage cooling device from cpufreq driver) Signed-off-by: Arnd Bergmann Acked-by: Viresh Kumar Reviewed-by: Linus Walleij Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 9 +++++++++ drivers/cpufreq/Makefile | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 74ed7e9a7f27..2011fec2d6ad 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -71,6 +71,15 @@ config ARM_HIGHBANK_CPUFREQ If in doubt, say N. +config ARM_DB8500_CPUFREQ + tristate "ST-Ericsson DB8500 cpufreq" if COMPILE_TEST && !ARCH_U8500 + default ARCH_U8500 + depends on HAS_IOMEM + depends on !CPU_THERMAL || THERMAL + help + This adds the CPUFreq driver for ST-Ericsson Ux500 (DB8500) SoC + series. + config ARM_IMX6Q_CPUFREQ tristate "Freescale i.MX6 cpufreq support" depends on ARCH_MXC diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index b7e78f063c4f..ab3a42cd29ef 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -53,7 +53,7 @@ obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o obj-$(CONFIG_ARCH_DAVINCI) += davinci-cpufreq.o -obj-$(CONFIG_UX500_SOC_DB8500) += dbx500-cpufreq.o +obj-$(CONFIG_ARM_DB8500_CPUFREQ) += dbx500-cpufreq.o obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ) += exynos5440-cpufreq.o obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o -- cgit v1.2.3-59-g8ed1b From 90b4f30b6d15222a509dacf47f29efef2b22571e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 May 2017 16:30:12 +0200 Subject: hwmon: (coretemp) Handle frozen hotplug state correctly The recent conversion to the hotplug state machine missed that the original hotplug notifiers did not execute in the frozen state, which is used on suspend on resume. This does not matter on single socket machines, but on multi socket systems this breaks when the device for a non-boot socket is removed when the last CPU of that socket is brought offline. The device removal locks up the machine hard w/o any debug output. Prevent executing the hotplug callbacks when cpuhp_tasks_frozen is true. Thanks to Tommi for providing debug information patiently while I failed to spot the obvious. Fixes: e00ca5df37ad ("hwmon: (coretemp) Convert to hotplug state machine") Reported-by: Tommi Rantala Tested-by: Tommi Rantala Signed-off-by: Thomas Gleixner Signed-off-by: Guenter Roeck --- drivers/hwmon/coretemp.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 3ac4c03ba77b..c13a4fd86b3c 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -604,6 +604,13 @@ static int coretemp_cpu_online(unsigned int cpu) struct cpuinfo_x86 *c = &cpu_data(cpu); struct platform_data *pdata; + /* + * Don't execute this on resume as the offline callback did + * not get executed on suspend. + */ + if (cpuhp_tasks_frozen) + return 0; + /* * CPUID.06H.EAX[0] indicates whether the CPU has thermal * sensors. We check this bit only, all the early CPUs @@ -654,6 +661,13 @@ static int coretemp_cpu_offline(unsigned int cpu) struct temp_data *tdata; int indx, target; + /* + * Don't execute this on suspend as the device remove locks + * up the machine. + */ + if (cpuhp_tasks_frozen) + return 0; + /* If the physical CPU device does not exist, just return */ if (!pdev) return 0; -- cgit v1.2.3-59-g8ed1b From 2fe4bff3516924a37e083e3211364abe59db1161 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 12 Apr 2017 18:31:18 -0300 Subject: ARM: dts: imx53-qsrb: Pulldown PMIC IRQ pin Currently the following errors are seen: [ 14.015056] mc13xxx 0-0008: Failed to read IRQ status: -6 [ 27.321093] mc13xxx 0-0008: Failed to read IRQ status: -6 [ 27.411681] mc13xxx 0-0008: Failed to read IRQ status: -6 [ 27.456281] mc13xxx 0-0008: Failed to read IRQ status: -6 [ 30.527106] mc13xxx 0-0008: Failed to read IRQ status: -6 [ 36.596900] mc13xxx 0-0008: Failed to read IRQ status: -6 Also when reading the interrupts via 'cat /proc/interrupts' the PMIC GPIO interrupt counter does not stop increasing. The reason for the storm of interrupts is that the PUS field of register IOMUXC_SW_PAD_CTL_PAD_CSI0_DAT5 is currently configured as: 10 : 100k pullup and the PMIC interrupt is being registered as IRQ_TYPE_LEVEL_HIGH type, which is the correct type as per the MC34708 datasheet. Use the default power on value for the IOMUX, which sets PUS field as: 00: 360k pull down This prevents the spurious PMIC interrupts from happening. Commit e1ffceb078c6 ("ARM: imx53: qsrb: fix PMIC interrupt level") correctly described the irq type as IRQ_TYPE_LEVEL_HIGH, but missed to update the IOMUX of the PMIC GPIO as pull down. Fixes: e1ffceb078c6 ("ARM: imx53: qsrb: fix PMIC interrupt level") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx53-qsrb.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx53-qsrb.dts b/arch/arm/boot/dts/imx53-qsrb.dts index de2215832372..4e103a905dc9 100644 --- a/arch/arm/boot/dts/imx53-qsrb.dts +++ b/arch/arm/boot/dts/imx53-qsrb.dts @@ -23,7 +23,7 @@ imx53-qsrb { pinctrl_pmic: pmicgrp { fsl,pins = < - MX53_PAD_CSI0_DAT5__GPIO5_23 0x1e4 /* IRQ */ + MX53_PAD_CSI0_DAT5__GPIO5_23 0x1c4 /* IRQ */ >; }; }; -- cgit v1.2.3-59-g8ed1b From d8581c7c8be172dac156a19d261f988a72ce596f Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 5 May 2017 14:00:17 +0300 Subject: ARM: dts: imx6sx-sdb: Remove OPP override The board file for imx6sx-sdb overrides cpufreq operating points to use higher voltages. This is done because the board has a shared rail for VDD_ARM_IN and VDD_SOC_IN and when using LDO bypass the shared voltage needs to be a value suitable for both ARM and SOC. This only applies to LDO bypass mode, a feature not present in upstream. When LDOs are enabled the effect is to use higher voltages than necessary for no good reason. Setting these higher voltages can make some boards fail to boot with ugly semi-random crashes reminiscent of memory corruption. These failures only happen on board rev. C, rev. B is reported to still work. Signed-off-by: Leonard Crestez Fixes: 54183bd7f766 ("ARM: imx6sx-sdb: add revb board and make it default") Cc: stable@vger.kernel.org Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6sx-sdb.dts | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts index 5bb8fd57e7f5..d71da30c9cff 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dts +++ b/arch/arm/boot/dts/imx6sx-sdb.dts @@ -12,23 +12,6 @@ model = "Freescale i.MX6 SoloX SDB RevB Board"; }; -&cpu0 { - operating-points = < - /* kHz uV */ - 996000 1250000 - 792000 1175000 - 396000 1175000 - 198000 1175000 - >; - fsl,soc-operating-points = < - /* ARM kHz SOC uV */ - 996000 1250000 - 792000 1175000 - 396000 1175000 - 198000 1175000 - >; -}; - &i2c1 { clock-frequency = <100000>; pinctrl-names = "default"; -- cgit v1.2.3-59-g8ed1b From e23c7f7d57831fdae444be9d507e67716ab601d4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 May 2017 13:37:47 +0200 Subject: soc: imx: add PM dependency for IMX7_PM_DOMAINS The new pm domain driver causes a build failure when CONFIG_PM is not set: warning: (IMX7_PM_DOMAINS) selects PM_GENERIC_DOMAINS which has unmet direct dependencies (PM) drivers/base/power/domain_governor.c: In function 'default_suspend_ok': drivers/base/power/domain_governor.c:75:17: error: 'struct dev_pm_info' has no member named 'ignore_children' This adds a dependency to ensure that we don't attempt to build the driver without CONFIG_PM. Fixes: 03aa12629fc4 ("soc: imx: Add GPCv2 power gating driver") Signed-off-by: Arnd Bergmann Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- drivers/soc/imx/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/imx/Kconfig b/drivers/soc/imx/Kconfig index 357a5d8f8da0..a5b86a28f343 100644 --- a/drivers/soc/imx/Kconfig +++ b/drivers/soc/imx/Kconfig @@ -2,8 +2,9 @@ menu "i.MX SoC drivers" config IMX7_PM_DOMAINS bool "i.MX7 PM domains" - select PM_GENERIC_DOMAINS depends on SOC_IMX7D || (COMPILE_TEST && OF) + depends on PM + select PM_GENERIC_DOMAINS default y if SOC_IMX7D endmenu -- cgit v1.2.3-59-g8ed1b From 072792dcdfc8d5f91a26050e5665285f50afebf5 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 11 May 2017 06:14:16 -0400 Subject: dm cache: fix incorrect 'idle_time' reset in IO tracker Some bios have no payload (eg, a FLUSH), don't reset the idle_time when these come in. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1db375f50a13..0760ba409c21 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -94,6 +94,9 @@ static void iot_io_begin(struct io_tracker *iot, sector_t len) static void __iot_io_end(struct io_tracker *iot, sector_t len) { + if (!len) + return; + iot->in_flight -= len; if (!iot->in_flight) iot->idle_time = jiffies; -- cgit v1.2.3-59-g8ed1b From a8cd1eba6135e086109e2b94bf96deb17456ede8 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 11 May 2017 05:07:34 -0400 Subject: dm cache policy smq: only demote entries in bottom half of the clean multiqueue Heavy IO load may mean there are very few clean blocks in the cache, and we risk demoting entries that get hit a lot. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-policy-smq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index 72479bd61e11..a177559f2049 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1190,7 +1190,7 @@ static void queue_demotion(struct smq_policy *mq) if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed))) return; - e = q_peek(&mq->clean, mq->clean.nr_levels, true); + e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true); if (!e) { if (!clean_target_met(mq, false)) queue_writeback(mq); -- cgit v1.2.3-59-g8ed1b From 78c45607b909fb384c47c134d89b39285a6a8b45 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 11 May 2017 05:09:38 -0400 Subject: dm cache policy smq: be more aggressive about triggering a writeback If there are no clean entries to demote we really want to writeback immediately. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-policy-smq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index a177559f2049..5aa8f43856c5 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1192,7 +1192,7 @@ static void queue_demotion(struct smq_policy *mq) e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true); if (!e) { - if (!clean_target_met(mq, false)) + if (!clean_target_met(mq, true)) queue_writeback(mq); return; } -- cgit v1.2.3-59-g8ed1b From 4d44ec5ab751be63c5d348f13294304d87baa8c3 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 11 May 2017 05:11:06 -0400 Subject: dm cache policy smq: put newly promoted entries at the top of the multiqueue This stops entries bouncing in and out of the cache quickly. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-policy-smq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index 5aa8f43856c5..54421a846a0c 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1452,6 +1452,7 @@ static void __complete_background_work(struct smq_policy *mq, clear_pending(mq, e); if (success) { e->oblock = work->oblock; + e->level = NR_CACHE_LEVELS - 1; push(mq, e); // h, q, a } else { -- cgit v1.2.3-59-g8ed1b From 6cf4cc8f8b3b7bc9e3c04a7eab44b985d50029fc Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 11 May 2017 07:48:18 -0400 Subject: dm cache policy smq: stop preemptively demoting blocks It causes a lot of churn if the working set's size is close to the fast device's size. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-policy-smq.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index 54421a846a0c..758480a1893d 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1134,13 +1134,10 @@ static bool clean_target_met(struct smq_policy *mq, bool idle) percent_to_target(mq, CLEAN_TARGET); } -static bool free_target_met(struct smq_policy *mq, bool idle) +static bool free_target_met(struct smq_policy *mq) { unsigned nr_free; - if (!idle) - return true; - nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated; return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >= percent_to_target(mq, FREE_TARGET); @@ -1220,7 +1217,7 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock, * We always claim to be 'idle' to ensure some demotions happen * with continuous loads. */ - if (!free_target_met(mq, true)) + if (!free_target_met(mq)) queue_demotion(mq); return; } @@ -1421,14 +1418,10 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle, spin_lock_irqsave(&mq->lock, flags); r = btracker_issue(mq->bg_work, result); if (r == -ENODATA) { - /* find some writeback work to do */ - if (mq->migrations_allowed && !free_target_met(mq, idle)) - queue_demotion(mq); - - else if (!clean_target_met(mq, idle)) + if (!clean_target_met(mq, idle)) { queue_writeback(mq); - - r = btracker_issue(mq->bg_work, result); + r = btracker_issue(mq->bg_work, result); + } } spin_unlock_irqrestore(&mq->lock, flags); -- cgit v1.2.3-59-g8ed1b From 701e03e4e180f0cd97d4139a32e2b2d879d12da2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 11 May 2017 08:22:31 -0400 Subject: dm cache: track all IO to the cache rather than just the origin device's IO IO tracking used to throttle writebacks when the origin device is busy. Even if all the IO is going to the fast device, writebacks can significantly degrade performance. So track all IO to gauge whether the cache is busy or not. Otherwise, synthetic IO tests (e.g. fio) that might send all IO to the fast device wouldn't cause writebacks to get throttled. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 0760ba409c21..232078e48167 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -477,7 +477,7 @@ struct cache { spinlock_t invalidation_lock; struct list_head invalidation_requests; - struct io_tracker origin_tracker; + struct io_tracker tracker; struct work_struct commit_ws; struct batcher committer; @@ -904,8 +904,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) static bool accountable_bio(struct cache *cache, struct bio *bio) { - return ((bio->bi_bdev == cache->origin_dev->bdev) && - bio_op(bio) != REQ_OP_DISCARD); + return bio_op(bio) != REQ_OP_DISCARD; } static void accounted_begin(struct cache *cache, struct bio *bio) @@ -915,7 +914,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio) if (accountable_bio(cache, bio)) { pb->len = bio_sectors(bio); - iot_io_begin(&cache->origin_tracker, pb->len); + iot_io_begin(&cache->tracker, pb->len); } } @@ -924,7 +923,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio) size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); - iot_io_end(&cache->origin_tracker, pb->len); + iot_io_end(&cache->tracker, pb->len); } static void accounted_request(struct cache *cache, struct bio *bio) @@ -1725,7 +1724,7 @@ enum busy { static enum busy spare_migration_bandwidth(struct cache *cache) { - bool idle = iot_idle_for(&cache->origin_tracker, HZ); + bool idle = iot_idle_for(&cache->tracker, HZ); sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * cache->sectors_per_block; @@ -2720,7 +2719,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) batcher_init(&cache->committer, commit_op, cache, issue_op, cache, cache->wq); - iot_init(&cache->origin_tracker); + iot_init(&cache->tracker); init_rwsem(&cache->background_work_lock); prevent_background_work(cache); @@ -2944,7 +2943,7 @@ static void cache_postsuspend(struct dm_target *ti) cancel_delayed_work(&cache->waker); flush_workqueue(cache->wq); - WARN_ON(cache->origin_tracker.in_flight); + WARN_ON(cache->tracker.in_flight); /* * If it's a flush suspend there won't be any deferred bios, so this -- cgit v1.2.3-59-g8ed1b From 49b7f768900f4084a65c3689d955b2fceac39e53 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 11 May 2017 09:07:16 -0400 Subject: dm cache: simplify the IDLE vs BUSY state calculation Drop the MODERATE state since it wasn't buying us much. Also, in check_migrations(), prepare for the next commit ("dm cache policy smq: don't do any writebacks unless IDLE") by deferring to the policy to make the final decision on whether writebacks can be serviced. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 232078e48167..d682a0511381 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1718,7 +1718,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock, enum busy { IDLE, - MODERATE, BUSY }; @@ -1728,10 +1727,10 @@ static enum busy spare_migration_bandwidth(struct cache *cache) sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * cache->sectors_per_block; - if (current_volume <= cache->migration_threshold) - return idle ? IDLE : MODERATE; + if (idle && current_volume <= cache->migration_threshold) + return IDLE; else - return idle ? MODERATE : BUSY; + return BUSY; } static void inc_hit_counter(struct cache *cache, struct bio *bio) @@ -2047,8 +2046,6 @@ static void check_migrations(struct work_struct *ws) for (;;) { b = spare_migration_bandwidth(cache); - if (b == BUSY) - break; r = policy_get_background_work(cache->policy, b == IDLE, &op); if (r == -ENODATA) -- cgit v1.2.3-59-g8ed1b From 2e63309507c818e8b631a03f02c363031c007fb7 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 11 May 2017 09:09:04 -0400 Subject: dm cache policy smq: don't do any writebacks unless IDLE If there are no clean blocks to be demoted the writeback will be triggered at that point. Preemptively writing back can hurt high IO load scenarios. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-policy-smq.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index 758480a1893d..e5eb9c9b4bc8 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1120,8 +1120,6 @@ static bool clean_target_met(struct smq_policy *mq, bool idle) * Cache entries may not be populated. So we cannot rely on the * size of the clean queue. */ - unsigned nr_clean; - if (idle) { /* * We'd like to clean everything. @@ -1129,9 +1127,10 @@ static bool clean_target_met(struct smq_policy *mq, bool idle) return q_size(&mq->dirty) == 0u; } - nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty); - return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >= - percent_to_target(mq, CLEAN_TARGET); + /* + * If we're busy we don't worry about cleaning at all. + */ + return true; } static bool free_target_met(struct smq_policy *mq) -- cgit v1.2.3-59-g8ed1b From 01630ab8543f21df30b0dca19087b85744f61aee Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 May 2017 11:04:52 +0100 Subject: ARM: KVM: Fix tracepoint generation after move to virt/kvm/arm/ Moving most of the shared code to virt/kvm/arm had for consequence that KVM/ARM doesn't build anymore, because the code that used to define the tracepoints is now somewhere else. Fix this by defining CREATE_TRACE_POINTS in coproc.c, and clean-up trace.h as well. Fixes: 35d2d5d490e2 ("KVM: arm/arm64: Move shared files to virt/kvm/arm") Reported-by: Arnd Bergmann Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 1 + arch/arm/kvm/trace.h | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 2c14b69511e9..ac8d36da4d08 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -32,6 +32,7 @@ #include #include "../vfp/vfpinstr.h" +#define CREATE_TRACE_POINTS #include "trace.h" #include "coproc.h" diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index fc0943776db2..b0d10648c486 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -1,5 +1,5 @@ -#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_KVM_H +#if !defined(_TRACE_ARM_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ARM_KVM_H #include @@ -74,10 +74,10 @@ TRACE_EVENT(kvm_hvc, __entry->vcpu_pc, __entry->r0, __entry->imm) ); -#endif /* _TRACE_KVM_H */ +#endif /* _TRACE_ARM_KVM_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH arch/arm/kvm +#define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace -- cgit v1.2.3-59-g8ed1b From 40dd46048c155b8f0683f468c950a1c107f77a7c Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 3 May 2017 10:28:54 +0200 Subject: usb: serial: option: add Telit ME910 support This patch adds support for Telit ME910 PID 0x1100. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index af67a0de6b5d..3bf61acfc26b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -281,6 +281,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG0 0x1042 #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 +#define TELIT_PRODUCT_ME910 0x1100 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 #define TELIT_PRODUCT_LE910_USBCFG4 0x1206 @@ -640,6 +641,11 @@ static const struct option_blacklist_info simcom_sim7100e_blacklist = { .reserved = BIT(5) | BIT(6), }; +static const struct option_blacklist_info telit_me910_blacklist = { + .sendsetup = BIT(0), + .reserved = BIT(1) | BIT(3), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -1235,6 +1241,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), + .driver_info = (kernel_ulong_t)&telit_me910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), -- cgit v1.2.3-59-g8ed1b From 8663effb24f9430394d3bf1ed2dac42a771421d1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 14 Apr 2017 08:48:09 -0400 Subject: sched/core: Call __schedule() from do_idle() without enabling preemption I finally got around to creating trampolines for dynamically allocated ftrace_ops with using synchronize_rcu_tasks(). For users of the ftrace function hook callbacks, like perf, that allocate the ftrace_ops descriptor via kmalloc() and friends, ftrace was not able to optimize the functions being traced to use a trampoline because they would also need to be allocated dynamically. The problem is that they cannot be freed when CONFIG_PREEMPT is set, as there's no way to tell if a task was preempted on the trampoline. That was before Paul McKenney implemented synchronize_rcu_tasks() that would make sure all tasks (except idle) have scheduled out or have entered user space. While testing this, I triggered this bug: BUG: unable to handle kernel paging request at ffffffffa0230077 ... RIP: 0010:0xffffffffa0230077 ... Call Trace: schedule+0x5/0xe0 schedule_preempt_disabled+0x18/0x30 do_idle+0x172/0x220 What happened was that the idle task was preempted on the trampoline. As synchronize_rcu_tasks() ignores the idle thread, there's nothing that lets ftrace know that the idle task was preempted on a trampoline. The idle task shouldn't need to ever enable preemption. The idle task is simply a loop that calls schedule or places the cpu into idle mode. In fact, having preemption enabled is inefficient, because it can happen when idle is just about to call schedule anyway, which would cause schedule to be called twice. Once for when the interrupt came in and was returning back to normal context, and then again in the normal path that the idle loop is running in, which would be pointless, as it had already scheduled. The only reason schedule_preempt_disable() enables preemption is to be able to call sched_submit_work(), which requires preemption enabled. As this is a nop when the task is in the RUNNING state, and idle is always in the running state, there's no reason that idle needs to enable preemption. But that means it cannot use schedule_preempt_disable() as other callers of that function require calling sched_submit_work(). Adding a new function local to kernel/sched/ that allows idle to call the scheduler without enabling preemption, fixes the synchronize_rcu_tasks() issue, as well as removes the pointless spurious schedule calls caused by interrupts happening in the brief window where preemption is enabled just before it calls schedule. Reviewed: Thomas Gleixner Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Paul E. McKenney Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170414084809.3dacde2a@gandalf.local.home Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 25 +++++++++++++++++++++++++ kernel/sched/idle.c | 2 +- kernel/sched/sched.h | 2 ++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 759f4bd52cd6..803c3bc274c4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3502,6 +3502,31 @@ asmlinkage __visible void __sched schedule(void) } EXPORT_SYMBOL(schedule); +/* + * synchronize_rcu_tasks() makes sure that no task is stuck in preempted + * state (have scheduled out non-voluntarily) by making sure that all + * tasks have either left the run queue or have gone into user space. + * As idle tasks do not do either, they must not ever be preempted + * (schedule out non-voluntarily). + * + * schedule_idle() is similar to schedule_preempt_disable() except that it + * never enables preemption because it does not call sched_submit_work(). + */ +void __sched schedule_idle(void) +{ + /* + * As this skips calling sched_submit_work(), which the idle task does + * regardless because that function is a nop when the task is in a + * TASK_RUNNING state, make sure this isn't used someplace that the + * current task can be in any other state. Note, idle is always in the + * TASK_RUNNING state. + */ + WARN_ON_ONCE(current->state); + do { + __schedule(false); + } while (need_resched()); +} + #ifdef CONFIG_CONTEXT_TRACKING asmlinkage __visible void __sched schedule_user(void) { diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 2a25a9ec2c6e..ef63adce0c9c 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -265,7 +265,7 @@ static void do_idle(void) smp_mb__after_atomic(); sched_ttwu_pending(); - schedule_preempt_disabled(); + schedule_idle(); if (unlikely(klp_patch_pending(current))) klp_update_patch_state(current); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7808ab050599..6dda2aab731e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1467,6 +1467,8 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq) } #endif +extern void schedule_idle(void); + extern void sysrq_sched_debug_show(void); extern void sched_init_granularity(void); extern void update_max_interval(void); -- cgit v1.2.3-59-g8ed1b From bb246681b3ed0967489a7401ad528c1aaa1a4c2e Mon Sep 17 00:00:00 2001 From: Anthony Mallet Date: Fri, 5 May 2017 17:30:16 +0200 Subject: USB: serial: ftdi_sio: fix setting latency for unprivileged users Commit 557aaa7ffab6 ("ft232: support the ASYNC_LOW_LATENCY flag") enables unprivileged users to set the FTDI latency timer, but there was a logic flaw that skipped sending the corresponding USB control message to the device. Specifically, the device latency timer would not be updated until next open, something which was later also inadvertently broken by commit c19db4c9e49a ("USB: ftdi_sio: set device latency timeout at port probe"). A recent commit c6dce2626606 ("USB: serial: ftdi_sio: fix extreme low-latency setting") disabled the low-latency mode by default so we now need this fix to allow unprivileged users to again enable it. Signed-off-by: Anthony Mallet [johan: amend commit message] Fixes: 557aaa7ffab6 ("ft232: support the ASYNC_LOW_LATENCY flag") Fixes: c19db4c9e49a ("USB: ftdi_sio: set device latency timeout at port probe"). Cc: stable # 2.6.31 Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index d38780fa8788..0e634c11abbf 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1527,9 +1527,9 @@ static int set_serial_info(struct tty_struct *tty, (new_serial.flags & ASYNC_FLAGS)); priv->custom_divisor = new_serial.custom_divisor; +check_and_exit: write_latency_timer(port); -check_and_exit: if ((old_priv.flags & ASYNC_SPD_MASK) != (priv->flags & ASYNC_SPD_MASK)) { if ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI) -- cgit v1.2.3-59-g8ed1b From f83914fdfcc3ecb62a5a83eeb609ff59a9c2052d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 12 May 2017 14:34:37 +0200 Subject: ALSA: usb-audio: fix Amanero Combo384 quirk on big-endian hosts Add missing endianness conversion when using the USB device-descriptor bcdDevice field when applying the Amanero Combo384 (endianness!) quirk. Fixes: 3eff682d765b ("ALSA: usb-audio: Support both DSD LE/BE Amanero firmware versions") Cc: Jussi Laako Signed-off-by: Johan Hovold Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 01eff6ce6401..d7b0b0a3a2db 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1364,7 +1364,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, /* Amanero Combo384 USB interface with native DSD support */ case USB_ID(0x16d0, 0x071a): if (fp->altsetting == 2) { - switch (chip->dev->descriptor.bcdDevice) { + switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) { case 0x199: return SNDRV_PCM_FMTBIT_DSD_U32_LE; case 0x19b: -- cgit v1.2.3-59-g8ed1b From 849ff8190eb9add34c376219b5ae319de83eeb32 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Sat, 6 May 2017 17:49:08 +0800 Subject: staging/android/ion: remove useless document file After commit 9828282e33a0 ("staging: android: ion: Remove old platform support"), the document about devicetree of ion is no need anymore, so just remove it. Signed-off-by: Yisheng Xie Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/staging/ion/hi6220-ion.txt | 31 ------------- MAINTAINERS | 1 - drivers/staging/android/ion/devicetree.txt | 51 ---------------------- 3 files changed, 83 deletions(-) delete mode 100644 Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt delete mode 100644 drivers/staging/android/ion/devicetree.txt diff --git a/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt b/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt deleted file mode 100644 index c59e27c632c1..000000000000 --- a/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt +++ /dev/null @@ -1,31 +0,0 @@ -Hi6220 SoC ION -=================================================================== -Required properties: -- compatible : "hisilicon,hi6220-ion" -- list of the ION heaps - - heap name : maybe heap_sys_user@0 - - heap id : id should be unique in the system. - - heap base : base ddr address of the heap,0 means that - it is dynamic. - - heap size : memory size and 0 means it is dynamic. - - heap type : the heap type of the heap, please also - see the define in ion.h(drivers/staging/android/uapi/ion.h) -------------------------------------------------------------------- -Example: - hi6220-ion { - compatible = "hisilicon,hi6220-ion"; - heap_sys_user@0 { - heap-name = "sys_user"; - heap-id = <0x0>; - heap-base = <0x0>; - heap-size = <0x0>; - heap-type = "ion_system"; - }; - heap_sys_contig@0 { - heap-name = "sys_contig"; - heap-id = <0x1>; - heap-base = <0x0>; - heap-size = <0x0>; - heap-type = "ion_system_contig"; - }; - }; diff --git a/MAINTAINERS b/MAINTAINERS index f7d568b8f133..5cf8b8d0f733 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -846,7 +846,6 @@ M: Laura Abbott M: Sumit Semwal L: devel@driverdev.osuosl.org S: Supported -F: Documentation/devicetree/bindings/staging/ion/ F: drivers/staging/android/ion F: drivers/staging/android/uapi/ion.h F: drivers/staging/android/uapi/ion_test.h diff --git a/drivers/staging/android/ion/devicetree.txt b/drivers/staging/android/ion/devicetree.txt deleted file mode 100644 index 168715271f06..000000000000 --- a/drivers/staging/android/ion/devicetree.txt +++ /dev/null @@ -1,51 +0,0 @@ -Ion Memory Manager - -Ion is a memory manager that allows for sharing of buffers via dma-buf. -Ion allows for different types of allocation via an abstraction called -a 'heap'. A heap represents a specific type of memory. Each heap has -a different type. There can be multiple instances of the same heap -type. - -Specific heap instances are tied to heap IDs. Heap IDs are not to be specified -in the devicetree. - -Required properties for Ion - -- compatible: "linux,ion" PLUS a compatible property for the device - -All child nodes of a linux,ion node are interpreted as heaps - -required properties for heaps - -- compatible: compatible string for a heap type PLUS a compatible property -for the specific instance of the heap. Current heap types --- linux,ion-heap-system --- linux,ion-heap-system-contig --- linux,ion-heap-carveout --- linux,ion-heap-chunk --- linux,ion-heap-dma --- linux,ion-heap-custom - -Optional properties -- memory-region: A phandle to a memory region. Required for DMA heap type -(see reserved-memory.txt for details on the reservation) - -Example: - - ion { - compatbile = "hisilicon,ion", "linux,ion"; - - ion-system-heap { - compatbile = "hisilicon,system-heap", "linux,ion-heap-system" - }; - - ion-camera-region { - compatible = "hisilicon,camera-heap", "linux,ion-heap-dma" - memory-region = <&camera_region>; - }; - - ion-fb-region { - compatbile = "hisilicon,fb-heap", "linux,ion-heap-dma" - memory-region = <&fb_region>; - }; - } -- cgit v1.2.3-59-g8ed1b From 84817ef091f378b3f9fc86b02efb48b3fc1a3428 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Sun, 7 May 2017 16:18:27 +0300 Subject: staging: MAINTAINERS: add GBY as ccree maintainer I work for Arm on maintaining the TrustZone CryptoCell driver. Signed-off-by: Gilad Ben-Yossef Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5cf8b8d0f733..da0149f2d16c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3115,6 +3115,14 @@ F: drivers/net/ieee802154/cc2520.c F: include/linux/spi/cc2520.h F: Documentation/devicetree/bindings/net/ieee802154/cc2520.txt +CCREE ARM TRUSTZONE CRYPTOCELL 700 REE DRIVER +M: Gilad Ben-Yossef +L: linux-crypto@vger.kernel.org +L: driverdev-devel@linuxdriverproject.org +S: Supported +F: drivers/staging/ccree/ +W: https://developer.arm.com/products/system-ip/trustzone-cryptocell/cryptocell-700-family + CEC FRAMEWORK M: Hans Verkuil L: linux-media@vger.kernel.org -- cgit v1.2.3-59-g8ed1b From c6a9d3eaee508f53ec4f777035522565ed567692 Mon Sep 17 00:00:00 2001 From: Olivier Leveque Date: Tue, 9 May 2017 09:04:53 -0700 Subject: staging: typec: tcpci: declare private structure as static This fixes a sparse warning regarding an undeclared symbol. Since the structure tcpci_tcpc_config is private to tcpci.c, it should be declared as static. Signed-off-by: Olivier Leveque Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/tcpci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/typec/tcpci.c b/drivers/staging/typec/tcpci.c index 5e5be74c7850..df72d8b01e73 100644 --- a/drivers/staging/typec/tcpci.c +++ b/drivers/staging/typec/tcpci.c @@ -425,7 +425,7 @@ static const struct regmap_config tcpci_regmap_config = { .max_register = 0x7F, /* 0x80 .. 0xFF are vendor defined */ }; -const struct tcpc_config tcpci_tcpc_config = { +static const struct tcpc_config tcpci_tcpc_config = { .type = TYPEC_PORT_DFP, .default_role = TYPEC_SINK, }; -- cgit v1.2.3-59-g8ed1b From 227383f8c28ea9e53d958801790d0e2e8f985d08 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 9 May 2017 09:04:54 -0700 Subject: staging: typec: fusb302: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the OF and I2C device ID table entries as module aliases, using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/staging/typec/fusb302/fusb302.ko | grep alias $ After this patch: $ modinfo drivers/staging/typec/fusb302/fusb302.ko | grep alias alias: of:N*T*Cfcs,fusb302C* alias: of:N*T*Cfcs,fusb302 alias: i2c:typec_fusb302 Signed-off-by: Javier Martinez Canillas Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/fusb302/fusb302.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c index 2cee9a952c9b..aa460f93a293 100644 --- a/drivers/staging/typec/fusb302/fusb302.c +++ b/drivers/staging/typec/fusb302/fusb302.c @@ -1787,11 +1787,13 @@ static const struct of_device_id fusb302_dt_match[] = { {.compatible = "fcs,fusb302"}, {}, }; +MODULE_DEVICE_TABLE(of, fusb302_dt_match); static const struct i2c_device_id fusb302_i2c_device_id[] = { {"typec_fusb302", 0}, {}, }; +MODULE_DEVICE_TABLE(i2c, fusb302_i2c_device_id); static const struct dev_pm_ops fusb302_pm_ops = { .suspend = fusb302_pm_suspend, -- cgit v1.2.3-59-g8ed1b From aac53ee4557947d778cb6a255c719e5c70963c42 Mon Sep 17 00:00:00 2001 From: Yueyao Zhu Date: Tue, 9 May 2017 09:04:55 -0700 Subject: staging: typec: fusb302: Fix chip->vbus_present init value FUSB_REG_STATUS0 & FUSB_REG_STATUS0_VBUSOK = 0x40 & 0x80 is always zero. Fix the code to what it is intended to be: check the VBUSOK bit of the value read from address FUSB_REG_STATUS0. Reported-by: Dan Carpenter Cc: Guenter Roeck Signed-off-by: Yueyao Zhu Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/fusb302/fusb302.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c index aa460f93a293..d8b50b49bb2d 100644 --- a/drivers/staging/typec/fusb302/fusb302.c +++ b/drivers/staging/typec/fusb302/fusb302.c @@ -489,7 +489,7 @@ static int tcpm_init(struct tcpc_dev *dev) ret = fusb302_i2c_read(chip, FUSB_REG_STATUS0, &data); if (ret < 0) return ret; - chip->vbus_present = !!(FUSB_REG_STATUS0 & FUSB_REG_STATUS0_VBUSOK); + chip->vbus_present = !!(data & FUSB_REG_STATUS0_VBUSOK); ret = fusb302_i2c_read(chip, FUSB_REG_DEVICE_ID, &data); if (ret < 0) return ret; -- cgit v1.2.3-59-g8ed1b From 5fec4b54d0bf6c3eeb176b624ce50d6aef4819c0 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 9 May 2017 09:04:56 -0700 Subject: staging: typec: tcpm: Drop duplicate PD messages Per USB PD standard, we have to drop duplicate PD messages. We can not expect lower protocol layers to drop such messages, since lower layers don't know if a message was dropped somewhere else in the stack. Originally-from: Puma Hsu Cc: Yueyao Zhu Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/pd.h | 10 ++++++++++ drivers/staging/typec/tcpm.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/drivers/staging/typec/pd.h b/drivers/staging/typec/pd.h index 8d97bdb95f23..510ef7279900 100644 --- a/drivers/staging/typec/pd.h +++ b/drivers/staging/typec/pd.h @@ -92,6 +92,16 @@ static inline unsigned int pd_header_type_le(__le16 header) return pd_header_type(le16_to_cpu(header)); } +static inline unsigned int pd_header_msgid(u16 header) +{ + return (header >> PD_HEADER_ID_SHIFT) & PD_HEADER_ID_MASK; +} + +static inline unsigned int pd_header_msgid_le(__le16 header) +{ + return pd_header_msgid(le16_to_cpu(header)); +} + #define PD_MAX_PAYLOAD 7 struct pd_message { diff --git a/drivers/staging/typec/tcpm.c b/drivers/staging/typec/tcpm.c index abba655ba00a..c5d8b129c4f4 100644 --- a/drivers/staging/typec/tcpm.c +++ b/drivers/staging/typec/tcpm.c @@ -238,6 +238,7 @@ struct tcpm_port { unsigned int hard_reset_count; bool pd_capable; bool explicit_contract; + unsigned int rx_msgid; /* Partner capabilities/requests */ u32 sink_request; @@ -1415,6 +1416,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port, break; case SOFT_RESET_SEND: port->message_id = 0; + port->rx_msgid = -1; if (port->pwr_role == TYPEC_SOURCE) next_state = SRC_SEND_CAPABILITIES; else @@ -1503,6 +1505,22 @@ static void tcpm_pd_rx_handler(struct work_struct *work) port->attached); if (port->attached) { + enum pd_ctrl_msg_type type = pd_header_type_le(msg->header); + unsigned int msgid = pd_header_msgid_le(msg->header); + + /* + * USB PD standard, 6.6.1.2: + * "... if MessageID value in a received Message is the + * same as the stored value, the receiver shall return a + * GoodCRC Message with that MessageID value and drop + * the Message (this is a retry of an already received + * Message). Note: this shall not apply to the Soft_Reset + * Message which always has a MessageID value of zero." + */ + if (msgid == port->rx_msgid && type != PD_CTRL_SOFT_RESET) + goto done; + port->rx_msgid = msgid; + /* * If both ends believe to be DFP/host, we have a data role * mismatch. @@ -1520,6 +1538,7 @@ static void tcpm_pd_rx_handler(struct work_struct *work) } } +done: mutex_unlock(&port->lock); kfree(event); } @@ -1957,6 +1976,12 @@ static void tcpm_reset_port(struct tcpm_port *port) port->attached = false; port->pd_capable = false; + /* + * First Rx ID should be 0; set this to a sentinel of -1 so that + * we can check tcpm_pd_rx_handler() if we had seen it before. + */ + port->rx_msgid = -1; + port->tcpc->set_pd_rx(port->tcpc, false); tcpm_init_vbus(port); /* also disables charging */ tcpm_init_vconn(port); @@ -2170,6 +2195,7 @@ static void run_state_machine(struct tcpm_port *port) port->pwr_opmode = TYPEC_PWR_MODE_USB; port->caps_count = 0; port->message_id = 0; + port->rx_msgid = -1; port->explicit_contract = false; tcpm_set_state(port, SRC_SEND_CAPABILITIES, 0); break; @@ -2329,6 +2355,7 @@ static void run_state_machine(struct tcpm_port *port) typec_set_pwr_opmode(port->typec_port, TYPEC_PWR_MODE_USB); port->pwr_opmode = TYPEC_PWR_MODE_USB; port->message_id = 0; + port->rx_msgid = -1; port->explicit_contract = false; tcpm_set_state(port, SNK_DISCOVERY, 0); break; @@ -2496,6 +2523,7 @@ static void run_state_machine(struct tcpm_port *port) /* Soft_Reset states */ case SOFT_RESET: port->message_id = 0; + port->rx_msgid = -1; tcpm_pd_send_control(port, PD_CTRL_ACCEPT); if (port->pwr_role == TYPEC_SOURCE) tcpm_set_state(port, SRC_SEND_CAPABILITIES, 0); @@ -2504,6 +2532,7 @@ static void run_state_machine(struct tcpm_port *port) break; case SOFT_RESET_SEND: port->message_id = 0; + port->rx_msgid = -1; if (tcpm_pd_send_control(port, PD_CTRL_SOFT_RESET)) tcpm_set_state_cond(port, hard_reset_state(port), 0); else -- cgit v1.2.3-59-g8ed1b From 931693f973507b320ecbdccd0ac3e786ddaf795f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 9 May 2017 09:04:57 -0700 Subject: staging: typec: tcpm: Set correct flags in PD request messages We do support USB PD communication, and devices supported by this driver typically use USB power for purposes other than USB communication. Originally-from: Puma Hsu Cc: Yueyao Zhu Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/tcpm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/typec/tcpm.c b/drivers/staging/typec/tcpm.c index c5d8b129c4f4..a385f7e2a6fd 100644 --- a/drivers/staging/typec/tcpm.c +++ b/drivers/staging/typec/tcpm.c @@ -1738,8 +1738,7 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo) } ma = min(ma, port->max_snk_ma); - /* XXX: Any other flags need to be set? */ - flags = 0; + flags = RDO_USB_COMM | RDO_NO_SUSPEND; /* Set mismatch bit if offered power is less than operating power */ mw = ma * mv / 1000; -- cgit v1.2.3-59-g8ed1b From 193a68011fdc002cc03d6e6edabf941251df5690 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 9 May 2017 09:04:58 -0700 Subject: staging: typec: tcpm: Respond to Discover Identity commands If the lower level driver provided a list of VDOs in its configuration data, send it to the partner as response to a Discover Identity command if in device mode (UFP). Cc: Yueyao Zhu Originally-from: Puma Hsu Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/pd_vdo.h | 4 +++- drivers/staging/typec/tcpm.c | 27 +++++++++++++++++++++++++++ drivers/staging/typec/tcpm.h | 3 +++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/staging/typec/pd_vdo.h b/drivers/staging/typec/pd_vdo.h index dba172e0e0d1..d92259f8de0a 100644 --- a/drivers/staging/typec/pd_vdo.h +++ b/drivers/staging/typec/pd_vdo.h @@ -22,6 +22,9 @@ * VDM object is minimum of VDM header + 6 additional data objects. */ +#define VDO_MAX_OBJECTS 6 +#define VDO_MAX_SIZE (VDO_MAX_OBJECTS + 1) + /* * VDM header * ---------- @@ -34,7 +37,6 @@ * <5> :: reserved (SVDM), command type (UVDM) * <4:0> :: command */ -#define VDO_MAX_SIZE 7 #define VDO(vid, type, custom) \ (((vid) << 16) | \ ((type) << 15) | \ diff --git a/drivers/staging/typec/tcpm.c b/drivers/staging/typec/tcpm.c index a385f7e2a6fd..c749e980ddf9 100644 --- a/drivers/staging/typec/tcpm.c +++ b/drivers/staging/typec/tcpm.c @@ -252,6 +252,8 @@ struct tcpm_port { unsigned int nr_src_pdo; u32 snk_pdo[PDO_MAX_OBJECTS]; unsigned int nr_snk_pdo; + u32 snk_vdo[VDO_MAX_OBJECTS]; + unsigned int nr_snk_vdo; unsigned int max_snk_mv; unsigned int max_snk_ma; @@ -998,6 +1000,7 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt, struct pd_mode_data *modep; int rlen = 0; u16 svid; + int i; tcpm_log(port, "Rx VDM cmd 0x%x type %d cmd %d len %d", p0, cmd_type, cmd, cnt); @@ -1008,6 +1011,14 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt, case CMDT_INIT: switch (cmd) { case CMD_DISCOVER_IDENT: + /* 6.4.4.3.1: Only respond as UFP (device) */ + if (port->data_role == TYPEC_DEVICE && + port->nr_snk_vdo) { + for (i = 0; i < port->nr_snk_vdo; i++) + response[i + 1] + = cpu_to_le32(port->snk_vdo[i]); + rlen = port->nr_snk_vdo + 1; + } break; case CMD_DISCOVER_SVID: break; @@ -3320,6 +3331,20 @@ static int tcpm_copy_pdos(u32 *dest_pdo, const u32 *src_pdo, return nr_pdo; } +static int tcpm_copy_vdos(u32 *dest_vdo, const u32 *src_vdo, + unsigned int nr_vdo) +{ + unsigned int i; + + if (nr_vdo > VDO_MAX_OBJECTS) + nr_vdo = VDO_MAX_OBJECTS; + + for (i = 0; i < nr_vdo; i++) + dest_vdo[i] = src_vdo[i]; + + return nr_vdo; +} + void tcpm_update_source_capabilities(struct tcpm_port *port, const u32 *pdo, unsigned int nr_pdo) { @@ -3410,6 +3435,8 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) tcpc->config->nr_src_pdo); port->nr_snk_pdo = tcpm_copy_pdos(port->snk_pdo, tcpc->config->snk_pdo, tcpc->config->nr_snk_pdo); + port->nr_snk_vdo = tcpm_copy_vdos(port->snk_vdo, tcpc->config->snk_vdo, + tcpc->config->nr_snk_vdo); port->max_snk_mv = tcpc->config->max_snk_mv; port->max_snk_ma = tcpc->config->max_snk_ma; diff --git a/drivers/staging/typec/tcpm.h b/drivers/staging/typec/tcpm.h index 969b365e6549..19c307d31a5a 100644 --- a/drivers/staging/typec/tcpm.h +++ b/drivers/staging/typec/tcpm.h @@ -60,6 +60,9 @@ struct tcpc_config { const u32 *snk_pdo; unsigned int nr_snk_pdo; + const u32 *snk_vdo; + unsigned int nr_snk_vdo; + unsigned int max_snk_mv; unsigned int max_snk_ma; unsigned int max_snk_mw; -- cgit v1.2.3-59-g8ed1b From 050161ea3268ad72d276bc2c327e9654048a82b2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 9 May 2017 09:04:59 -0700 Subject: staging: typec: tcpm: Fix Port Power Role field in PS_RDY messages PS_RDY messages sent during power swap sequences are expected to reflect the new power role. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/tcpm.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/staging/typec/tcpm.c b/drivers/staging/typec/tcpm.c index c749e980ddf9..20eb4ebcf8c3 100644 --- a/drivers/staging/typec/tcpm.c +++ b/drivers/staging/typec/tcpm.c @@ -2607,6 +2607,14 @@ static void run_state_machine(struct tcpm_port *port) break; case PR_SWAP_SRC_SNK_SOURCE_OFF: tcpm_set_cc(port, TYPEC_CC_RD); + /* + * USB-PD standard, 6.2.1.4, Port Power Role: + * "During the Power Role Swap Sequence, for the initial Source + * Port, the Port Power Role field shall be set to Sink in the + * PS_RDY Message indicating that the initial Source’s power + * supply is turned off" + */ + tcpm_set_pwr_role(port, TYPEC_SINK); if (tcpm_pd_send_control(port, PD_CTRL_PS_RDY)) { tcpm_set_state(port, ERROR_RECOVERY, 0); break; @@ -2614,7 +2622,6 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_state_cond(port, SNK_UNATTACHED, PD_T_PS_SOURCE_ON); break; case PR_SWAP_SRC_SNK_SINK_ON: - tcpm_set_pwr_role(port, TYPEC_SINK); tcpm_swap_complete(port, 0); tcpm_set_state(port, SNK_STARTUP, 0); break; @@ -2626,8 +2633,15 @@ static void run_state_machine(struct tcpm_port *port) case PR_SWAP_SNK_SRC_SOURCE_ON: tcpm_set_cc(port, tcpm_rp_cc(port)); tcpm_set_vbus(port, true); - tcpm_pd_send_control(port, PD_CTRL_PS_RDY); + /* + * USB PD standard, 6.2.1.4: + * "Subsequent Messages initiated by the Policy Engine, + * such as the PS_RDY Message sent to indicate that Vbus + * is ready, will have the Port Power Role field set to + * Source." + */ tcpm_set_pwr_role(port, TYPEC_SOURCE); + tcpm_pd_send_control(port, PD_CTRL_PS_RDY); tcpm_swap_complete(port, 0); tcpm_set_state(port, SRC_STARTUP, 0); break; -- cgit v1.2.3-59-g8ed1b From cde13b5dad60471886a3bccb4f4134c647c4a9dc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 May 2017 14:30:37 +0100 Subject: arm64: KVM: Do not use stack-protector to compile EL2 code We like living dangerously. Nothing explicitely forbids stack-protector to be used in the EL2 code, while distributions routinely compile their kernel with it. We're just lucky that no code actually triggers the instrumentation. Let's not try our luck for much longer, and disable stack-protector for code living at EL2. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index aaf42ae8d8c3..14c4e3b14bcb 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -2,6 +2,8 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # +ccflags-y += -fno-stack-protector + KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o -- cgit v1.2.3-59-g8ed1b From 43e24e82f35291d4c1ca78877ce1b20d3aeb78f1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 May 2017 16:57:49 +1000 Subject: powerpc/modules: If mprofile-kernel is enabled add it to vermagic On powerpc we can build the kernel with two different ABIs for mcount(), which is used by ftrace. Kernels built with one ABI do not know how to load modules built with the other ABI. The new style ABI is called "mprofile-kernel", for want of a better name. Currently if we build a module using the old style ABI, and the kernel with mprofile-kernel, when we load the module we'll oops something like: # insmod autofs4-no-mprofile-kernel.ko ftrace-powerpc: Unexpected instruction f8810028 around bl _mcount ------------[ cut here ]------------ WARNING: CPU: 6 PID: 3759 at ../kernel/trace/ftrace.c:2024 ftrace_bug+0x2b8/0x3c0 CPU: 6 PID: 3759 Comm: insmod Not tainted 4.11.0-rc3-gcc-5.4.1-00017-g5a61ef74f269 #11 ... NIP [c0000000001eaa48] ftrace_bug+0x2b8/0x3c0 LR [c0000000001eaff8] ftrace_process_locs+0x4a8/0x590 Call Trace: alloc_pages_current+0xc4/0x1d0 (unreliable) ftrace_process_locs+0x4a8/0x590 load_module+0x1c8c/0x28f0 SyS_finit_module+0x110/0x140 system_call+0x38/0xfc ... ftrace failed to modify [] 0xd000000002a31024 actual: 35:65:00:48 We can avoid this by including in the vermagic whether the kernel/module was built with mprofile-kernel. Which results in: # insmod autofs4-pg.ko autofs4: version magic '4.11.0-rc3-gcc-5.4.1-00017-g5a61ef74f269 SMP mod_unload modversions ' should be '4.11.0-rc3-gcc-5.4.1-00017-g5a61ef74f269-dirty SMP mod_unload modversions mprofile-kernel' insmod: ERROR: could not insert module autofs4-pg.ko: Invalid module format Fixes: 8c50b72a3b4f ("powerpc/ftrace: Add Kconfig & Make glue for mprofile-kernel") Signed-off-by: Michael Ellerman Acked-by: Balbir Singh Acked-by: Jessica Yu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/module.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 53885512b8d3..6c0132c7212f 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -14,6 +14,10 @@ #include +#ifdef CC_USING_MPROFILE_KERNEL +#define MODULE_ARCH_VERMAGIC "mprofile-kernel" +#endif + #ifndef __powerpc64__ /* * Thanks to Paul M for explaining this. -- cgit v1.2.3-59-g8ed1b From 501ad27c67ed0b90df465f23d33e9aed64058a47 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 May 2017 14:30:38 +0100 Subject: arm: KVM: Do not use stack-protector to compile HYP code We like living dangerously. Nothing explicitely forbids stack-protector to be used in the HYP code, while distributions routinely compile their kernel with it. We're just lucky that no code actually triggers the instrumentation. Let's not try our luck for much longer, and disable stack-protector for code living at HYP. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm/kvm/hyp/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 3023bb530edf..8679405b0b2b 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -2,6 +2,8 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # +ccflags-y += -fno-stack-protector + KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o -- cgit v1.2.3-59-g8ed1b From f48e91e87e67b56bef63393d1a02c6e22c1d7078 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 8 May 2017 17:16:26 +1000 Subject: powerpc/tm: Fix FP and VMX register corruption In commit dc3106690b20 ("powerpc: tm: Always use fp_state and vr_state to store live registers"), a section of code was removed that copied the current state to checkpointed state. That code should not have been removed. When an FP (Floating Point) unavailable is taken inside a transaction, we need to abort the transaction. This is because at the time of the tbegin, the FP state is bogus so the state stored in the checkpointed registers is incorrect. To fix this, we treclaim (to get the checkpointed GPRs) and then copy the thread_struct FP live state into the checkpointed state. We then trecheckpoint so that the FP state is correctly restored into the CPU. The copying of the FP registers from live to checkpointed is what was missing. This simplifies the logic slightly from the original patch. tm_reclaim_thread() will now always write the checkpointed FP state. Either the checkpointed FP state will be written as part of the actual treclaim (in tm.S), or it'll be a copy of the live state. Which one we use is based on MSR[FP] from userspace. Similarly for VMX. Fixes: dc3106690b20 ("powerpc: tm: Always use fp_state and vr_state to store live registers") Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Michael Neuling Reviewed-by: cyrilbur@gmail.com Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d645da302bf2..baae104b16c7 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -864,6 +864,25 @@ static void tm_reclaim_thread(struct thread_struct *thr, if (!MSR_TM_SUSPENDED(mfmsr())) return; + /* + * If we are in a transaction and FP is off then we can't have + * used FP inside that transaction. Hence the checkpointed + * state is the same as the live state. We need to copy the + * live state to the checkpointed state so that when the + * transaction is restored, the checkpointed state is correct + * and the aborted transaction sees the correct state. We use + * ckpt_regs.msr here as that's what tm_reclaim will use to + * determine if it's going to write the checkpointed state or + * not. So either this will write the checkpointed registers, + * or reclaim will. Similarly for VMX. + */ + if ((thr->ckpt_regs.msr & MSR_FP) == 0) + memcpy(&thr->ckfp_state, &thr->fp_state, + sizeof(struct thread_fp_state)); + if ((thr->ckpt_regs.msr & MSR_VEC) == 0) + memcpy(&thr->ckvr_state, &thr->vr_state, + sizeof(struct thread_vr_state)); + giveup_all(container_of(thr, struct task_struct, thread)); tm_reclaim(thr, thr->ckpt_regs.msr, cause); -- cgit v1.2.3-59-g8ed1b From ddf42d068f8802de122bb7efdfcb3179336053f1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 May 2017 14:30:39 +0100 Subject: KVM: arm/arm64: vgic-v2: Do not use Active+Pending state for a HW interrupt When an interrupt is injected with the HW bit set (indicating that deactivation should be propagated to the physical distributor), special care must be taken so that we never mark the corresponding LR with the Active+Pending state (as the pending state is kept in the physycal distributor). Cc: stable@vger.kernel.org Fixes: 140b086dd197 ("KVM: arm/arm64: vgic-new: Add GICv2 world switch backend") Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v2.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index a65757aab6d3..504b4bd0d651 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -149,6 +149,13 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->hw) { val |= GICH_LR_HW; val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active && irq_is_pending(irq)) + val &= ~GICH_LR_PENDING_BIT; } else { if (irq->config == VGIC_CONFIG_LEVEL) val |= GICH_LR_EOI; -- cgit v1.2.3-59-g8ed1b From 3d6e77ad1489650afa20da92bb589c8778baa8da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 May 2017 14:30:40 +0100 Subject: KVM: arm/arm64: vgic-v3: Do not use Active+Pending state for a HW interrupt When an interrupt is injected with the HW bit set (indicating that deactivation should be propagated to the physical distributor), special care must be taken so that we never mark the corresponding LR with the Active+Pending state (as the pending state is kept in the physycal distributor). Cc: stable@vger.kernel.org Fixes: 59529f69f504 ("KVM: arm/arm64: vgic-new: Add GICv3 world switch backend") Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v3.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 8fa737edde6f..6fe3f003636a 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -127,6 +127,13 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->hw) { val |= ICH_LR_HW; val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active && irq_is_pending(irq)) + val &= ~ICH_LR_PENDING_BIT; } else { if (irq->config == VGIC_CONFIG_LEVEL) val |= ICH_LR_EOI; -- cgit v1.2.3-59-g8ed1b From 15d2bffdde6268883647c6112970f74d3e1af651 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 May 2017 14:30:41 +0100 Subject: KVM: arm/arm64: vgic-v3: Use PREbits to infer the number of ICH_APxRn_EL2 registers The GICv3 documentation is extremely confusing, as it talks about the number of priorities represented by the ICH_APxRn_EL2 registers, while it should really talk about the number of preemption levels. This leads to a bug where we may access undefined ICH_APxRn_EL2 registers, since PREbits is allowed to be smaller than PRIbits. Thankfully, nobody seem to have taken this path so far... The fix is to use ICH_VTR_EL2.PREbits instead. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/hyp/vgic-v3-sr.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index bce6037cf01d..32c3295929b0 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -22,7 +22,7 @@ #include #define vtr_to_max_lr_idx(v) ((v) & 0xf) -#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) +#define vtr_to_nr_pre_bits(v) (((u32)(v) >> 26) + 1) static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) { @@ -135,13 +135,13 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) if (used_lrs) { int i; - u32 nr_pri_bits; + u32 nr_pre_bits; cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); write_gicreg(0, ICH_HCR_EL2); val = read_gicreg(ICH_VTR_EL2); - nr_pri_bits = vtr_to_nr_pri_bits(val); + nr_pre_bits = vtr_to_nr_pre_bits(val); for (i = 0; i < used_lrs; i++) { if (cpu_if->vgic_elrsr & (1 << i)) @@ -152,7 +152,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) __gic_v3_set_lr(0, i); } - switch (nr_pri_bits) { + switch (nr_pre_bits) { case 7: cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); @@ -162,7 +162,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); } - switch (nr_pri_bits) { + switch (nr_pre_bits) { case 7: cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); @@ -198,7 +198,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; u64 val; - u32 nr_pri_bits; + u32 nr_pre_bits; int i; /* @@ -217,12 +217,12 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) } val = read_gicreg(ICH_VTR_EL2); - nr_pri_bits = vtr_to_nr_pri_bits(val); + nr_pre_bits = vtr_to_nr_pre_bits(val); if (used_lrs) { write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); - switch (nr_pri_bits) { + switch (nr_pre_bits) { case 7: write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); @@ -232,7 +232,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); } - switch (nr_pri_bits) { + switch (nr_pre_bits) { case 7: write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); -- cgit v1.2.3-59-g8ed1b From 2c8e3f44f708a89a2c73a25a134af8c23998a2bc Mon Sep 17 00:00:00 2001 From: Rui Miguel Silva Date: Fri, 12 May 2017 21:16:13 +0100 Subject: staging: typec: fusb302: do not free gpio from managed resource When allocating a gpio using the managed resource devm_, we can avoid freeing it manually. But even if we did it we should use devm_gpio_free. So, just remove the free of the gpio in the error path. Signed-off-by: Rui Miguel Silva Acked-by: Yueyao Zhu Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/fusb302/fusb302.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c index d8b50b49bb2d..ef5cceaa5967 100644 --- a/drivers/staging/typec/fusb302/fusb302.c +++ b/drivers/staging/typec/fusb302/fusb302.c @@ -1663,14 +1663,12 @@ static int init_gpio(struct fusb302_chip *chip) if (ret < 0) { fusb302_log(chip, "cannot set GPIO Int_N to input, ret=%d", ret); - gpio_free(chip->gpio_int_n); return ret; } ret = gpio_to_irq(chip->gpio_int_n); if (ret < 0) { fusb302_log(chip, "cannot request IRQ for GPIO Int_N, ret=%d", ret); - gpio_free(chip->gpio_int_n); return ret; } chip->gpio_int_n_irq = ret; -- cgit v1.2.3-59-g8ed1b From f03d95f59026d14219230795ac4dcda8c09b5321 Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Wed, 10 May 2017 22:51:35 -0700 Subject: staging: typec: Fix sparse warnings about incorrect types Fix the following sparse warnings about incorrect type usage: fusb302.c:1028:32: warning: incorrect type in argument 1 (different base types) fusb302.c:1028:32: expected unsigned short [unsigned] [usertype] header fusb302.c:1028:32: got restricted __le16 const [usertype] header fusb302.c:1484:32: warning: incorrect type in argument 1 (different base types) fusb302.c:1484:32: expected unsigned short [unsigned] [usertype] header fusb302.c:1484:32: got restricted __le16 [usertype] header Signed-off-by: Guru Das Srinagesh Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/fusb302/fusb302.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c index ef5cceaa5967..6bd602db11be 100644 --- a/drivers/staging/typec/fusb302/fusb302.c +++ b/drivers/staging/typec/fusb302/fusb302.c @@ -1025,7 +1025,7 @@ static int fusb302_pd_send_message(struct fusb302_chip *chip, buf[pos++] = FUSB302_TKN_SYNC1; buf[pos++] = FUSB302_TKN_SYNC2; - len = pd_header_cnt(msg->header) * 4; + len = pd_header_cnt_le(msg->header) * 4; /* plug 2 for header */ len += 2; if (len > 0x1F) { @@ -1481,7 +1481,7 @@ static int fusb302_pd_read_message(struct fusb302_chip *chip, (u8 *)&msg->header); if (ret < 0) return ret; - len = pd_header_cnt(msg->header) * 4; + len = pd_header_cnt_le(msg->header) * 4; /* add 4 to length to include the CRC */ if (len > PD_MAX_PAYLOAD * 4) { fusb302_log(chip, "PD message too long %d", len); -- cgit v1.2.3-59-g8ed1b From c21376631d6325590e53ac8720312d2b02494103 Mon Sep 17 00:00:00 2001 From: Ian Chard Date: Wed, 10 May 2017 10:20:59 +0100 Subject: staging: ccree: remove extraneous spin_unlock_bh() in error handler An early error handler in send_request() tries to release a spinlock, but the lock isn't acquired until the loop below it is entered. Signed-off-by: Ian Chard Acked-by: Gilad Ben-Yossef Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ccree/ssi_request_mgr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/ccree/ssi_request_mgr.c b/drivers/staging/ccree/ssi_request_mgr.c index 522bd62c102e..8611adf3bb2e 100644 --- a/drivers/staging/ccree/ssi_request_mgr.c +++ b/drivers/staging/ccree/ssi_request_mgr.c @@ -376,7 +376,6 @@ int send_request( rc = ssi_power_mgr_runtime_get(&drvdata->plat_dev->dev); if (rc != 0) { SSI_LOG_ERR("ssi_power_mgr_runtime_get returned %x\n",rc); - spin_unlock_bh(&req_mgr_h->hw_lock); return rc; } #endif -- cgit v1.2.3-59-g8ed1b From ff92b9e3c9f85fa442c430d70bf075499e1193b7 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Thu, 4 May 2017 10:58:20 +0100 Subject: staging: vc04_services: Fix bulk cache maintenance vchiq_arm supports transfers less than one page and at arbitrary alignment, using the dma-mapping API to perform its cache maintenance (even though the VPU drives the DMA hardware). Read (DMA_FROM_DEVICE) operations use cache invalidation for speed, falling back to clean+invalidate on partial cache lines, with writes (DMA_TO_DEVICE) using flushes. If a read transfer has ends which aren't page-aligned, performing cache maintenance as if they were whole pages can lead to memory corruption since the partial cache lines at the ends (and any cache lines before or after the transfer area) will be invalidated. This bug was masked until the disabling of the cache flush in flush_dcache_page(). Honouring the requested transfer start- and end-points prevents the corruption. Fixes: cf9caf192988 ("staging: vc04_services: Replace dmac_map_area with dmac_map_sg") Signed-off-by: Phil Elwell Cc: stable # 4.10 Reported-by: Stefan Wahren Tested-by: Stefan Wahren Signed-off-by: Greg Kroah-Hartman --- .../interface/vchiq_arm/vchiq_2835_arm.c | 31 +++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c index 988ee61fb4a7..d04db3f55519 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c @@ -502,8 +502,15 @@ create_pagelist(char __user *buf, size_t count, unsigned short type, */ sg_init_table(scatterlist, num_pages); /* Now set the pages for each scatterlist */ - for (i = 0; i < num_pages; i++) - sg_set_page(scatterlist + i, pages[i], PAGE_SIZE, 0); + for (i = 0; i < num_pages; i++) { + unsigned int len = PAGE_SIZE - offset; + + if (len > count) + len = count; + sg_set_page(scatterlist + i, pages[i], len, offset); + offset = 0; + count -= len; + } dma_buffers = dma_map_sg(g_dev, scatterlist, @@ -524,20 +531,20 @@ create_pagelist(char __user *buf, size_t count, unsigned short type, u32 addr = sg_dma_address(sg); /* Note: addrs is the address + page_count - 1 - * The firmware expects the block to be page + * The firmware expects blocks after the first to be page- * aligned and a multiple of the page size */ WARN_ON(len == 0); - WARN_ON(len & ~PAGE_MASK); - WARN_ON(addr & ~PAGE_MASK); + WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK)); + WARN_ON(i && (addr & ~PAGE_MASK)); if (k > 0 && - ((addrs[k - 1] & PAGE_MASK) | - ((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT) - == addr) { - addrs[k - 1] += (len >> PAGE_SHIFT); - } else { - addrs[k++] = addr | ((len >> PAGE_SHIFT) - 1); - } + ((addrs[k - 1] & PAGE_MASK) + + (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT)) + == (addr & PAGE_MASK)) + addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT); + else + addrs[k++] = (addr & PAGE_MASK) | + (((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1); } /* Partial cache lines (fragments) require special measures */ -- cgit v1.2.3-59-g8ed1b From baabd567f87be05330faa5140f72a91960e7405a Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 11 May 2017 18:57:43 +0100 Subject: staging: rtl8192e: rtl92e_fill_tx_desc fix write to mapped out memory. The driver attempts to alter memory that is mapped to PCI device. This is because tx_fwinfo_8190pci points to skb->data Move the pci_map_single to when completed buffer is ready to be mapped with psdec is empty to drop on mapping error. Signed-off-by: Malcolm Priestley Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index 4723a0bd5067..a23628f390c9 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -1182,8 +1182,7 @@ void rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc, struct cb_desc *cb_desc, struct sk_buff *skb) { struct r8192_priv *priv = rtllib_priv(dev); - dma_addr_t mapping = pci_map_single(priv->pdev, skb->data, skb->len, - PCI_DMA_TODEVICE); + dma_addr_t mapping; struct tx_fwinfo_8190pci *pTxFwInfo; pTxFwInfo = (struct tx_fwinfo_8190pci *)skb->data; @@ -1194,8 +1193,6 @@ void rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc, pTxFwInfo->Short = _rtl92e_query_is_short(pTxFwInfo->TxHT, pTxFwInfo->TxRate, cb_desc); - if (pci_dma_mapping_error(priv->pdev, mapping)) - netdev_err(dev, "%s(): DMA Mapping error\n", __func__); if (cb_desc->bAMPDUEnable) { pTxFwInfo->AllowAggregation = 1; pTxFwInfo->RxMF = cb_desc->ampdu_factor; @@ -1230,6 +1227,14 @@ void rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc, } memset((u8 *)pdesc, 0, 12); + + mapping = pci_map_single(priv->pdev, skb->data, skb->len, + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(priv->pdev, mapping)) { + netdev_err(dev, "%s(): DMA Mapping error\n", __func__); + return; + } + pdesc->LINIP = 0; pdesc->CmdInit = 1; pdesc->Offset = sizeof(struct tx_fwinfo_8190pci) + 8; -- cgit v1.2.3-59-g8ed1b From 867510bde14e7b7fc6dd0f50b48f6753cfbd227a Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 11 May 2017 18:57:44 +0100 Subject: staging: rtl8192e: fix 2 byte alignment of register BSSIDR. BSSIDR has two byte alignment on PCI ioremap correct the write by swapping to 16 bits first. This fixes a problem that the device associates fail because the filter is not set correctly. Signed-off-by: Malcolm Priestley Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index a23628f390c9..e03d0a3a6dcc 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -97,8 +97,9 @@ void rtl92e_set_reg(struct net_device *dev, u8 variable, u8 *val) switch (variable) { case HW_VAR_BSSID: - rtl92e_writel(dev, BSSIDR, ((u32 *)(val))[0]); - rtl92e_writew(dev, BSSIDR+2, ((u16 *)(val+2))[0]); + /* BSSIDR 2 byte alignment */ + rtl92e_writew(dev, BSSIDR, *(u16 *)val); + rtl92e_writel(dev, BSSIDR + 2, *(u32 *)(val + 2)); break; case HW_VAR_MEDIA_STATUS: @@ -961,8 +962,8 @@ static void _rtl92e_net_update(struct net_device *dev) rtl92e_config_rate(dev, &rate_config); priv->dot11CurrentPreambleMode = PREAMBLE_AUTO; priv->basic_rate = rate_config &= 0x15f; - rtl92e_writel(dev, BSSIDR, ((u32 *)net->bssid)[0]); - rtl92e_writew(dev, BSSIDR+4, ((u16 *)net->bssid)[2]); + rtl92e_writew(dev, BSSIDR, *(u16 *)net->bssid); + rtl92e_writel(dev, BSSIDR + 2, *(u32 *)(net->bssid + 2)); if (priv->rtllib->iw_mode == IW_MODE_ADHOC) { rtl92e_writew(dev, ATIMWND, 2); -- cgit v1.2.3-59-g8ed1b From 90be652c9f157d44b9c2803f902a8839796c090d Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 11 May 2017 18:57:45 +0100 Subject: staging: rtl8192e: rtl92e_get_eeprom_size Fix read size of EPROM_CMD. EPROM_CMD is 2 byte aligned on PCI map so calling with rtl92e_readl will return invalid data so use rtl92e_readw. The device is unable to select the right eeprom type. Signed-off-by: Malcolm Priestley Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index e03d0a3a6dcc..1c6ed5b2a6f9 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -625,7 +625,7 @@ void rtl92e_get_eeprom_size(struct net_device *dev) struct r8192_priv *priv = rtllib_priv(dev); RT_TRACE(COMP_INIT, "===========>%s()\n", __func__); - curCR = rtl92e_readl(dev, EPROM_CMD); + curCR = rtl92e_readw(dev, EPROM_CMD); RT_TRACE(COMP_INIT, "read from Reg Cmd9346CR(%x):%x\n", EPROM_CMD, curCR); priv->epromtype = (curCR & EPROM_CMD_9356SEL) ? EEPROM_93C56 : -- cgit v1.2.3-59-g8ed1b From 95d93e271d920dfda369d4740b1cc1061d41fe7f Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 11 May 2017 18:57:46 +0100 Subject: staging: rtl8192e: GetTs Fix invalid TID 7 warning. TID 7 is a valid value for QoS IEEE 802.11e. The switch statement that follows states 7 is valid. Remove function IsACValid and use the default case to filter invalid TIDs. Signed-off-by: Malcolm Priestley Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_TSProc.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl819x_TSProc.c b/drivers/staging/rtl8192e/rtl819x_TSProc.c index 48bbd9e8a52f..dcc4eb691889 100644 --- a/drivers/staging/rtl8192e/rtl819x_TSProc.c +++ b/drivers/staging/rtl8192e/rtl819x_TSProc.c @@ -306,11 +306,6 @@ static void MakeTSEntry(struct ts_common_info *pTsCommonInfo, u8 *Addr, pTsCommonInfo->TClasNum = TCLAS_Num; } -static bool IsACValid(unsigned int tid) -{ - return tid < 7; -} - bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS, u8 *Addr, u8 TID, enum tr_select TxRxSelect, bool bAddNewTs) { @@ -328,12 +323,6 @@ bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS, if (ieee->current_network.qos_data.supported == 0) { UP = 0; } else { - if (!IsACValid(TID)) { - netdev_warn(ieee->dev, "%s(): TID(%d) is not valid\n", - __func__, TID); - return false; - } - switch (TID) { case 0: case 3: @@ -351,6 +340,10 @@ bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS, case 7: UP = 7; break; + default: + netdev_warn(ieee->dev, "%s(): TID(%d) is not valid\n", + __func__, TID); + return false; } } -- cgit v1.2.3-59-g8ed1b From ad0ccac76dcc92c3331f4c94c9fc54f8bf1ab20c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 May 2017 11:41:19 +0200 Subject: USB: serial: ir-usb: fix big-endian baud-rate debug printk Add missing endianness conversion when printing the supported baud rates. Found using sparse: warning: restricted __le16 degrades to integer Fixes: e0d795e4f36c ("usb: irda: cleanup on ir-usb module") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/ir-usb.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 73956d48a0c5..f9734a96d516 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -197,6 +197,7 @@ static u8 ir_xbof_change(u8 xbof) static int ir_startup(struct usb_serial *serial) { struct usb_irda_cs_descriptor *irda_desc; + int rates; irda_desc = irda_usb_find_class_desc(serial, 0); if (!irda_desc) { @@ -205,18 +206,20 @@ static int ir_startup(struct usb_serial *serial) return -ENODEV; } + rates = le16_to_cpu(irda_desc->wBaudRate); + dev_dbg(&serial->dev->dev, "%s - Baud rates supported:%s%s%s%s%s%s%s%s%s\n", __func__, - (irda_desc->wBaudRate & USB_IRDA_BR_2400) ? " 2400" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_9600) ? " 9600" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_19200) ? " 19200" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_38400) ? " 38400" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_57600) ? " 57600" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_115200) ? " 115200" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_576000) ? " 576000" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_1152000) ? " 1152000" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_4000000) ? " 4000000" : ""); + (rates & USB_IRDA_BR_2400) ? " 2400" : "", + (rates & USB_IRDA_BR_9600) ? " 9600" : "", + (rates & USB_IRDA_BR_19200) ? " 19200" : "", + (rates & USB_IRDA_BR_38400) ? " 38400" : "", + (rates & USB_IRDA_BR_57600) ? " 57600" : "", + (rates & USB_IRDA_BR_115200) ? " 115200" : "", + (rates & USB_IRDA_BR_576000) ? " 576000" : "", + (rates & USB_IRDA_BR_1152000) ? " 1152000" : "", + (rates & USB_IRDA_BR_4000000) ? " 4000000" : ""); switch (irda_desc->bmAdditionalBOFs) { case USB_IRDA_AB_48: -- cgit v1.2.3-59-g8ed1b From 26cede343656c0bc2c33cdc783771282405c7fb2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 May 2017 11:41:20 +0200 Subject: USB: serial: mct_u232: fix big-endian baud-rate handling Drop erroneous cpu_to_le32 when setting the baud rate, something which corrupted the divisor on big-endian hosts. Found using sparse: warning: incorrect type in argument 1 (different base types) expected unsigned int [unsigned] [usertype] val got restricted __le32 [usertype] Fixes: af2ac1a091bc ("USB: serial mct_usb232: move DMA buffers to heap") Cc: stable # 2.6.34 Reviewed-by: Greg Kroah-Hartman Acked-By: Pete Zaitcev Signed-off-by: Johan Hovold --- drivers/usb/serial/mct_u232.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index edbc81f205c2..70f346f1aa86 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -189,7 +189,7 @@ static int mct_u232_set_baud_rate(struct tty_struct *tty, return -ENOMEM; divisor = mct_u232_calculate_baud_rate(serial, value, &speed); - put_unaligned_le32(cpu_to_le32(divisor), buf); + put_unaligned_le32(divisor, buf); rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), MCT_U232_SET_BAUD_RATE_REQUEST, MCT_U232_SET_REQUEST_TYPE, -- cgit v1.2.3-59-g8ed1b From 6aeb75e6adfaed16e58780309613a578fe1ee90b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 May 2017 11:41:21 +0200 Subject: USB: serial: io_ti: fix div-by-zero in set_termios Fix a division-by-zero in set_termios when debugging is enabled and a high-enough speed has been requested so that the divisor value becomes zero. Instead of just fixing the offending debug statement, cap the baud rate at the base as a zero divisor value also appears to crash the firmware. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable # 2.6.12 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/io_ti.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 87798e625d6c..6cefb9cb133d 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -2336,8 +2336,11 @@ static void change_port_settings(struct tty_struct *tty, if (!baud) { /* pick a default, any default... */ baud = 9600; - } else + } else { + /* Avoid a zero divisor. */ + baud = min(baud, 461550); tty_encode_baud_rate(tty, baud, baud); + } edge_port->baud_rate = baud; config->wBaudRate = (__u16)((461550L + baud/2) / baud); -- cgit v1.2.3-59-g8ed1b From 6c0d706b563af732adb094c5bf807437e8963e84 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 3 May 2017 15:17:51 +0100 Subject: kvm: arm/arm64: Fix race in resetting stage2 PGD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In kvm_free_stage2_pgd() we check the stage2 PGD before holding the lock and proceed to take the lock if it is valid. And we unmap the page tables, followed by releasing the lock. We reset the PGD only after dropping this lock, which could cause a race condition where another thread waiting on or even holding the lock, could potentially see that the PGD is still valid and proceed to perform a stage2 operation and later encounter a NULL PGD. [223090.242280] Unable to handle kernel NULL pointer dereference at virtual address 00000040 [223090.262330] PC is at unmap_stage2_range+0x8c/0x428 [223090.262332] LR is at kvm_unmap_hva_handler+0x2c/0x3c [223090.262531] Call trace: [223090.262533] [] unmap_stage2_range+0x8c/0x428 [223090.262535] [] kvm_unmap_hva_handler+0x2c/0x3c [223090.262537] [] handle_hva_to_gpa+0xb0/0x104 [223090.262539] [] kvm_unmap_hva+0x5c/0xbc [223090.262543] [] kvm_mmu_notifier_invalidate_page+0x50/0x8c [223090.262547] [] __mmu_notifier_invalidate_page+0x5c/0x84 [223090.262551] [] try_to_unmap_one+0x1d0/0x4a0 [223090.262553] [] rmap_walk+0x1cc/0x2e0 [223090.262555] [] try_to_unmap+0x74/0xa4 [223090.262557] [] migrate_pages+0x31c/0x5ac [223090.262561] [] compact_zone+0x3fc/0x7ac [223090.262563] [] compact_zone_order+0x94/0xb0 [223090.262564] [] try_to_compact_pages+0x108/0x290 [223090.262569] [] __alloc_pages_direct_compact+0x70/0x1ac [223090.262571] [] __alloc_pages_nodemask+0x434/0x9f4 [223090.262572] [] alloc_pages_vma+0x230/0x254 [223090.262574] [] do_huge_pmd_anonymous_page+0x114/0x538 [223090.262576] [] handle_mm_fault+0xd40/0x17a4 [223090.262577] [] __get_user_pages+0x12c/0x36c [223090.262578] [] get_user_pages_unlocked+0xa4/0x1b8 [223090.262579] [] __gfn_to_pfn_memslot+0x280/0x31c [223090.262580] [] gfn_to_pfn_prot+0x4c/0x5c [223090.262582] [] kvm_handle_guest_abort+0x240/0x774 [223090.262584] [] handle_exit+0x11c/0x1ac [223090.262586] [] kvm_arch_vcpu_ioctl_run+0x31c/0x648 [223090.262587] [] kvm_vcpu_ioctl+0x378/0x768 [223090.262590] [] do_vfs_ioctl+0x324/0x5a4 [223090.262591] [] SyS_ioctl+0x90/0xa4 [223090.262595] [] el0_svc_naked+0x38/0x3c This patch moves the stage2 PGD manipulation under the lock. Reported-by: Alexander Graf Cc: Mark Rutland Cc: Marc Zyngier Cc: Paolo Bonzini Cc: Radim Krčmář Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Christoffer Dall --- virt/kvm/arm/mmu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 313ee646480f..909a1a793b31 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -829,22 +829,22 @@ void stage2_unmap_vm(struct kvm *kvm) * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all * underlying level-2 and level-3 tables before freeing the actual level-1 table * and setting the struct pointer to NULL. - * - * Note we don't need locking here as this is only called when the VM is - * destroyed, which can only be done once. */ void kvm_free_stage2_pgd(struct kvm *kvm) { - if (kvm->arch.pgd == NULL) - return; + void *pgd = NULL; spin_lock(&kvm->mmu_lock); - unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + if (kvm->arch.pgd) { + unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + pgd = kvm->arch.pgd; + kvm->arch.pgd = NULL; + } spin_unlock(&kvm->mmu_lock); /* Free the HW pgd, one page at a time */ - free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE); - kvm->arch.pgd = NULL; + if (pgd) + free_pages_exact(pgd, S2_PGD_SIZE); } static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, -- cgit v1.2.3-59-g8ed1b From a2b7cbdd2559aff06cebc28a7150f81c307a90d3 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 19 Apr 2017 11:39:20 -0700 Subject: netfilter: ctnetlink: Make some parameters integer to avoid enum mismatch Not all parameters passed to ctnetlink_parse_tuple() and ctnetlink_exp_dump_tuple() match the enum type in the signatures of these functions. Since this is intended change the argument type of to be an unsigned integer value. Signed-off-by: Matthias Kaehlcke Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index dcf561b5c97a..fa752626029e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1007,9 +1007,8 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { static int ctnetlink_parse_tuple(const struct nlattr * const cda[], - struct nf_conntrack_tuple *tuple, - enum ctattr_type type, u_int8_t l3num, - struct nf_conntrack_zone *zone) + struct nf_conntrack_tuple *tuple, u32 type, + u_int8_t l3num, struct nf_conntrack_zone *zone) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; @@ -2447,7 +2446,7 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = { static int ctnetlink_exp_dump_tuple(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, - enum ctattr_expect type) + u32 type) { struct nlattr *nest_parms; -- cgit v1.2.3-59-g8ed1b From fb317002ab4419ae7e068bee6897f2d5745aa3b9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 May 2017 12:50:53 +0200 Subject: s390/virtio: change virtio_feature_desc:features type to __le32 The feature member of virtio_feature_desc contains little endian values, given that it contents will be converted with le32_to_cpu(). The "wrong" __u32 type leads to the sparse warnings below. In order to avoid them, use the correct __le32 type instead. drivers/s390/virtio/virtio_ccw.c:749:14: warning: cast to restricted __le32 drivers/s390/virtio/virtio_ccw.c:762:28: warning: cast to restricted __le32 Acked-by: Halil Pasic Acked-by: Cornelia Huck Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/virtio/virtio_ccw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 0ed209f3d8b0..c7d4ef7b4b22 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -87,7 +87,7 @@ struct vq_info_block { } __packed; struct virtio_feature_desc { - __u32 features; + __le32 features; __u8 index; } __packed; -- cgit v1.2.3-59-g8ed1b From ca6e8cdbe1865caf7b05483e1a242e72d9bc919f Mon Sep 17 00:00:00 2001 From: Ian W MORRISON Date: Mon, 8 May 2017 23:40:35 +1000 Subject: staging: rtl8723bs: remove re-positioned call to kfree in os_dep/ioctl_cfg80211.c A re-positioned call to kfree() in drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c causes a segmentation error. This patch removed the kfree() call. Fixes 6557ddfec348 ("staging: rtl8723bs: Fix various errors in os_dep/ioctl_cfg80211.c") Signed-off-by: Ian W Morrison Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index 5e7a61f24f8d..36c3189fc4b7 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -3531,7 +3531,6 @@ int rtw_wdev_alloc(struct adapter *padapter, struct device *dev) pwdev_priv->power_mgmt = true; else pwdev_priv->power_mgmt = false; - kfree((u8 *)wdev); return ret; -- cgit v1.2.3-59-g8ed1b From d110a3942aca78d14929bc648aeb83ee0b245a61 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 6 May 2017 20:28:02 +0800 Subject: netfilter: don't setup nat info for confirmed ct We cannot setup nat info if the ct has been confirmed already, else, different cpu may race to handle the same ct. In extreme situation, we may hit the "BUG_ON(nf_nat_initialized(ct, maniptype))" in the nf_nat_setup_info. Also running the following commands will easily hit NF_CT_ASSERT in nf_conntrack_alter_reply: # nft flush ruleset # ping -c 2 -W 1 1.1.1.111 & # nft add table t # nft add chain t c {type nat hook postrouting priority 0 \;} # nft add rule t c snat to 4.5.6.7 WARNING: CPU: 1 PID: 10065 at net/netfilter/nf_conntrack_core.c:1472 nf_conntrack_alter_reply+0x9a/0x1a0 [nf_conntrack] [...] Call Trace: nf_nat_setup_info+0xad/0x840 [nf_nat] ? deactivate_slab+0x65d/0x6c0 nft_nat_eval+0xcd/0x100 [nft_nat] nft_do_chain+0xff/0x5d0 [nf_tables] ? mark_held_locks+0x6f/0xa0 ? __local_bh_enable_ip+0x70/0xa0 ? trace_hardirqs_on_caller+0x11f/0x190 ? ipt_do_table+0x310/0x610 ? trace_hardirqs_on+0xd/0x10 ? __local_bh_enable_ip+0x70/0xa0 ? ipt_do_table+0x32b/0x610 ? __lock_acquire+0x2ac/0x1580 ? ipt_do_table+0x32b/0x610 nft_nat_do_chain+0x65/0x80 [nft_chain_nat_ipv4] nf_nat_ipv4_fn+0x1ae/0x240 [nf_nat_ipv4] nf_nat_ipv4_out+0x4a/0xf0 [nf_nat_ipv4] nft_nat_ipv4_out+0x15/0x20 [nft_chain_nat_ipv4] nf_hook_slow+0x2c/0xf0 ip_output+0x154/0x270 So for the confirmed ct, just ignore it and return NF_ACCEPT. Fixes: 9a08ecfe74d7 ("netfilter: don't attach a nat extension by default") Signed-off-by: Liping Zhang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index b48d6b5aae8a..ef0be325a0c6 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -409,6 +409,10 @@ nf_nat_setup_info(struct nf_conn *ct, { struct nf_conntrack_tuple curr_tuple, new_tuple; + /* Can't setup nat info for confirmed ct. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); -- cgit v1.2.3-59-g8ed1b From d91fc59cd77c719f33eda65c194ad8f95a055190 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 7 May 2017 22:01:55 +0800 Subject: netfilter: introduce nf_conntrack_helper_put helper function And convert module_put invocation to nf_conntrack_helper_put, this is prepared for the followup patch, which will add a refcnt for cthelper, so we can reject the deleting request when cthelper is in use. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 2 ++ net/netfilter/nf_conntrack_helper.c | 6 ++++++ net/netfilter/nft_ct.c | 4 ++-- net/netfilter/xt_CT.c | 6 +++--- net/openvswitch/conntrack.c | 4 ++-- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index e04fa7691e5d..c1c12411103a 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -79,6 +79,8 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); +void nf_conntrack_helper_put(struct nf_conntrack_helper *helper); + void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3a60efa7799b..e17006b6e434 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -181,6 +181,12 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); +void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) +{ + module_put(helper->me); +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); + struct nf_conn_help * nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a34ceb38fc55..1678e9e75e8e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -826,9 +826,9 @@ static void nft_ct_helper_obj_destroy(struct nft_object *obj) struct nft_ct_helper_obj *priv = nft_obj_data(obj); if (priv->helper4) - module_put(priv->helper4->me); + nf_conntrack_helper_put(priv->helper4); if (priv->helper6) - module_put(priv->helper6->me); + nf_conntrack_helper_put(priv->helper6); } static void nft_ct_helper_obj_eval(struct nft_object *obj, diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index bb7ad82dcd56..623ef37de886 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -96,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); if (help == NULL) { - module_put(helper->me); + nf_conntrack_helper_put(helper); return -ENOMEM; } @@ -263,7 +263,7 @@ out: err4: help = nfct_help(ct); if (help) - module_put(help->helper->me); + nf_conntrack_helper_put(help->helper); err3: nf_ct_tmpl_free(ct); err2: @@ -346,7 +346,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, if (ct) { help = nfct_help(ct); if (help) - module_put(help->helper->me); + nf_conntrack_helper_put(help->helper); nf_ct_netns_put(par->net, par->family); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index bf602e33c40a..08679ebb3068 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1123,7 +1123,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); if (!help) { - module_put(helper->me); + nf_conntrack_helper_put(helper); return -ENOMEM; } @@ -1584,7 +1584,7 @@ void ovs_ct_free_action(const struct nlattr *a) static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) { if (ct_info->helper) - module_put(ct_info->helper->me); + nf_conntrack_helper_put(ct_info->helper); if (ct_info->ct) nf_ct_tmpl_free(ct_info->ct); } -- cgit v1.2.3-59-g8ed1b From 9338d7b4418e9996a7642867d8f6b482a6040ed6 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 7 May 2017 22:01:56 +0800 Subject: netfilter: nfnl_cthelper: reject del request if helper obj is in use We can still delete the ct helper even if it is in use, this will cause a use-after-free error. In more detail, I mean: # nfct helper add ssdp inet udp # iptables -t raw -A OUTPUT -p udp -j CT --helper ssdp # nfct helper delete ssdp //--> oops, succeed! BUG: unable to handle kernel paging request at 000026ca IP: 0x26ca [...] Call Trace: ? ipv4_helper+0x62/0x80 [nf_conntrack_ipv4] nf_hook_slow+0x21/0xb0 ip_output+0xe9/0x100 ? ip_fragment.constprop.54+0xc0/0xc0 ip_local_out+0x33/0x40 ip_send_skb+0x16/0x80 udp_send_skb+0x84/0x240 udp_sendmsg+0x35d/0xa50 So add reference count to fix this issue, if ct helper is used by others, reject the delete request. Apply this patch: # nfct helper delete ssdp nfct v1.4.3: netlink error: Device or resource busy Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 2 ++ net/netfilter/nf_conntrack_helper.c | 6 ++++++ net/netfilter/nfnetlink_cthelper.c | 17 +++++++++++------ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index c1c12411103a..c519bb5b5bb8 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -9,6 +9,7 @@ #ifndef _NF_CONNTRACK_HELPER_H #define _NF_CONNTRACK_HELPER_H +#include #include #include #include @@ -26,6 +27,7 @@ struct nf_conntrack_helper { struct hlist_node hnode; /* Internal use. */ char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */ + refcount_t refcnt; struct module *me; /* pointer to self */ const struct nf_conntrack_expect_policy *expect_policy; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index e17006b6e434..7f6100ca63be 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -174,6 +174,10 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) #endif if (h != NULL && !try_module_get(h->me)) h = NULL; + if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) { + module_put(h->me); + h = NULL; + } rcu_read_unlock(); @@ -183,6 +187,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) { + refcount_dec(&helper->refcnt); module_put(helper->me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); @@ -423,6 +428,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) } } } + refcount_set(&me->refcnt, 1); hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; out: diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 950bf6eadc65..be678a323598 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -686,6 +686,7 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } + ret = -ENOENT; list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { cur = &nlcth->helper; j++; @@ -699,16 +700,20 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple.dst.protonum != cur->tuple.dst.protonum)) continue; - found = true; - nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); + if (refcount_dec_if_one(&cur->refcnt)) { + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); - list_del(&nlcth->list); - kfree(nlcth); + list_del(&nlcth->list); + kfree(nlcth); + } else { + ret = -EBUSY; + } } /* Make sure we return success if we flush and there is no helpers */ - return (found || j == 0) ? 0 : -ENOENT; + return (found || j == 0) ? 0 : ret; } static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { -- cgit v1.2.3-59-g8ed1b From 324318f0248c31be8a08984146e7e4dd7cdd091d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 9 May 2017 16:17:37 -0400 Subject: netfilter: xtables: zero padding in data_to_user When looking up an iptables rule, the iptables binary compares the aligned match and target data (XT_ALIGN). In some cases this can exceed the actual data size to include padding bytes. Before commit f77bc5b23fb1 ("iptables: use match, target and data copy_to_user helpers") the malloc()ed bytes were overwritten by the kernel with kzalloced contents, zeroing the padding and making the comparison succeed. After this patch, the kernel copies and clears only data, leaving the padding bytes undefined. Extend the clear operation from data size to aligned data size to include the padding bytes, if any. Padding bytes can be observed in both match and target, and the bug triggered, by issuing a rule with match icmp and target ACCEPT: iptables -t mangle -A INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT iptables -t mangle -D INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT Fixes: f77bc5b23fb1 ("iptables: use match, target and data copy_to_user helpers") Reported-by: Paul Moore Reported-by: Richard Guy Briggs Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 +- net/bridge/netfilter/ebtables.c | 9 ++++++--- net/netfilter/x_tables.c | 9 ++++++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index be378cf47fcc..b3044c2c62cb 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m, int xt_target_to_user(const struct xt_entry_target *t, struct xt_entry_target __user *u); int xt_data_to_user(void __user *dst, const void *src, - int usersize, int size); + int usersize, int size, int aligned_size); void *xt_copy_counters_from_user(const void __user *user, unsigned int len, struct xt_counters_info *info, bool compat); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 9ec0c9f908fa..9c6e619f452b 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1373,7 +1373,8 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name, strlcpy(name, _name, sizeof(name)); if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) || put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) || - xt_data_to_user(um + entrysize, data, usersize, datasize)) + xt_data_to_user(um + entrysize, data, usersize, datasize, + XT_ALIGN(datasize))) return -EFAULT; return 0; @@ -1658,7 +1659,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, if (match->compat_to_user(cm->data, m->data)) return -EFAULT; } else { - if (xt_data_to_user(cm->data, m->data, match->usersize, msize)) + if (xt_data_to_user(cm->data, m->data, match->usersize, msize, + COMPAT_XT_ALIGN(msize))) return -EFAULT; } @@ -1687,7 +1689,8 @@ static int compat_target_to_user(struct ebt_entry_target *t, if (target->compat_to_user(cm->data, t->data)) return -EFAULT; } else { - if (xt_data_to_user(cm->data, t->data, target->usersize, tsize)) + if (xt_data_to_user(cm->data, t->data, target->usersize, tsize, + COMPAT_XT_ALIGN(tsize))) return -EFAULT; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8876b7da6884..d17769599c10 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -283,12 +283,13 @@ static int xt_obj_to_user(u16 __user *psize, u16 size, &U->u.user.revision, K->u.kernel.TYPE->revision) int xt_data_to_user(void __user *dst, const void *src, - int usersize, int size) + int usersize, int size, int aligned_size) { usersize = usersize ? : size; if (copy_to_user(dst, src, usersize)) return -EFAULT; - if (usersize != size && clear_user(dst + usersize, size - usersize)) + if (usersize != aligned_size && + clear_user(dst + usersize, aligned_size - usersize)) return -EFAULT; return 0; @@ -298,7 +299,9 @@ EXPORT_SYMBOL_GPL(xt_data_to_user); #define XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ xt_data_to_user(U->data, K->data, \ K->u.kernel.TYPE->usersize, \ - C_SIZE ? : K->u.kernel.TYPE->TYPE##size) + C_SIZE ? : K->u.kernel.TYPE->TYPE##size, \ + C_SIZE ? COMPAT_XT_ALIGN(C_SIZE) : \ + XT_ALIGN(K->u.kernel.TYPE->TYPE##size)) int xt_match_to_user(const struct xt_entry_match *m, struct xt_entry_match __user *u) -- cgit v1.2.3-59-g8ed1b From 87e94dbc210a720a34be5c1174faee5c84be963e Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 11 May 2017 18:56:38 +0200 Subject: netfilter: synproxy: fix conntrackd interaction This patch fixes the creation of connection tracking entry from netlink when synproxy is used. It was missing the addition of the synproxy extension. This was causing kernel crashes when a conntrack entry created by conntrackd was used after the switch of traffic from active node to the passive node. Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index fa752626029e..9799a50bc604 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,6 +45,8 @@ #include #include #include +#include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -1827,6 +1829,8 @@ ctnetlink_create_conntrack(struct net *net, nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); nf_ct_labels_ext_add(ct); + nfct_seqadj_ext_add(ct); + nfct_synproxy_ext_add(ct); /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; -- cgit v1.2.3-59-g8ed1b From fa803605eef39372e53d7813002d73a3fcf10c88 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 14 May 2017 21:35:22 +0800 Subject: netfilter: nf_tables: can't assume lock is acquired when dumping set elems When dumping the elements related to a specified set, we may invoke the nf_tables_dump_set with the NFNL_SUBSYS_NFTABLES lock not acquired. So we should use the proper rcu operation to avoid race condition, just like other nft dump operations. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 78 +++++++++++++++++++++++++++++++------------ net/netfilter/nft_set_hash.c | 2 +- 2 files changed, 57 insertions(+), 23 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 559225029740..5f4a4d48b871 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3367,35 +3367,50 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, return nf_tables_fill_setelem(args->skb, set, elem); } +struct nft_set_dump_ctx { + const struct nft_set *set; + struct nft_ctx ctx; +}; + static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { + struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); - u8 genmask = nft_genmask_cur(net); + struct nft_af_info *afi; + struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; - struct nft_ctx ctx; - struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1]; + bool set_found = false; struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *nest; u32 portid, seq; - int event, err; + int event; - err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla, - NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy, - NULL); - if (err < 0) - return err; + rcu_read_lock(); + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { + if (afi != dump_ctx->ctx.afi) + continue; - err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh, - (void *)nla, genmask); - if (err < 0) - return err; + list_for_each_entry_rcu(table, &afi->tables, list) { + if (table != dump_ctx->ctx.table) + continue; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], - genmask); - if (IS_ERR(set)) - return PTR_ERR(set); + list_for_each_entry_rcu(set, &table->sets, list) { + if (set == dump_ctx->set) { + set_found = true; + break; + } + } + break; + } + break; + } + + if (!set_found) { + rcu_read_unlock(); + return -ENOENT; + } event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM); portid = NETLINK_CB(cb->skb).portid; @@ -3407,11 +3422,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx.afi->family; + nfmsg->nfgen_family = afi->family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(ctx.net->nft.base_seq & 0xffff); + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name)) + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) goto nla_put_failure; @@ -3422,12 +3437,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.cb = cb; args.skb = skb; - args.iter.genmask = nft_genmask_cur(ctx.net); + args.iter.genmask = nft_genmask_cur(net); args.iter.skip = cb->args[0]; args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; - set->ops->walk(&ctx, set, &args.iter); + set->ops->walk(&dump_ctx->ctx, set, &args.iter); + rcu_read_unlock(); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); @@ -3441,9 +3457,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; nla_put_failure: + rcu_read_unlock(); return -ENOSPC; } +static int nf_tables_dump_set_done(struct netlink_callback *cb) +{ + kfree(cb->data); + return 0; +} + static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -3465,7 +3488,18 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_set, + .done = nf_tables_dump_set_done, }; + struct nft_set_dump_ctx *dump_ctx; + + dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); + if (!dump_ctx) + return -ENOMEM; + + dump_ctx->set = set; + dump_ctx->ctx = ctx; + + c.data = dump_ctx; return netlink_dump_start(nlsk, skb, nlh, &c); } return -EOPNOTSUPP; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 8ec086b6b56b..3d3a6df4ce70 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -222,7 +222,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_elem elem; int err; - err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); + err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC); iter->err = err; if (err) return; -- cgit v1.2.3-59-g8ed1b From 71df14b0ce094be46d105b5a3ededd83b8e779a0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 15 May 2017 11:17:29 +0100 Subject: netfilter: nf_tables: missing sanitization in data from userspace Do not assume userspace always sends us NFT_DATA_VALUE for bitwise and cmp expressions. Although NFT_DATA_VERDICT does not make any sense, it is still possible to handcraft a netlink message using this incorrect data type. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_bitwise.c | 19 ++++++++++++++----- net/netfilter/nft_cmp.c | 12 ++++++++++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 877d9acd91ef..96bd4f325b0f 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -83,17 +83,26 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_MASK]); if (err < 0) return err; - if (d1.len != priv->len) - return -EINVAL; + if (d1.len != priv->len) { + err = -EINVAL; + goto err1; + } err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2, tb[NFTA_BITWISE_XOR]); if (err < 0) - return err; - if (d2.len != priv->len) - return -EINVAL; + goto err1; + if (d2.len != priv->len) { + err = -EINVAL; + goto err2; + } return 0; +err2: + nft_data_uninit(&priv->xor, d2.type); +err1: + nft_data_uninit(&priv->mask, d1.type); + return err; } static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2b96effeadc1..8c9d0fb19118 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -201,10 +201,18 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) if (err < 0) return ERR_PTR(err); + if (desc.type != NFT_DATA_VALUE) { + err = -EINVAL; + goto err1; + } + if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ) return &nft_cmp_fast_ops; - else - return &nft_cmp_ops; + + return &nft_cmp_ops; +err1: + nft_data_uninit(&data, desc.type); + return ERR_PTR(-EINVAL); } struct nft_expr_type nft_cmp_type __read_mostly = { -- cgit v1.2.3-59-g8ed1b From 591054469b3eef34bc097c30fae8ededddf8d796 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 15 May 2017 11:17:34 +0100 Subject: netfilter: nf_tables: revisit chain/object refcounting from elements Andreas reports that the following incremental update using our commit protocol doesn't work. # nft -f incremental-update.nft delete element ip filter client_to_any { 10.180.86.22 : goto CIn_1 } delete chain ip filter CIn_1 ... Error: Could not process rule: Device or resource busy The existing code is not well-integrated into the commit phase protocol, since element deletions do not result in refcount decrement from the preparation phase. This results in bogus EBUSY errors like the one above. Two new functions come with this patch: * nft_set_elem_activate() function is used from the abort path, to restore the set element refcounting on objects that occurred from the preparation phase. * nft_set_elem_deactivate() that is called from nft_del_setelem() to decrement set element refcounting on objects from the preparation phase in the commit protocol. The nft_data_uninit() has been renamed to nft_data_release() since this function does not uninitialize any data store in the data register, instead just releases the references to objects. Moreover, a new function nft_data_hold() has been introduced to be used from nft_set_elem_activate(). Reported-by: Andreas Schultz Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 82 ++++++++++++++++++++++++++++++++++----- net/netfilter/nft_bitwise.c | 4 +- net/netfilter/nft_cmp.c | 2 +- net/netfilter/nft_immediate.c | 5 ++- net/netfilter/nft_range.c | 4 +- 6 files changed, 81 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 028faec8fc27..8a8bab8d7b15 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -176,7 +176,7 @@ struct nft_data_desc { int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla); -void nft_data_uninit(const struct nft_data *data, enum nft_data_types type); +void nft_data_release(const struct nft_data *data, enum nft_data_types type); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5f4a4d48b871..da314be0c048 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3627,9 +3627,9 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); - nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); + nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) - nft_data_uninit(nft_set_ext_data(ext), set->dtype); + nft_data_release(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) @@ -3638,6 +3638,18 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); +/* Only called from commit path, nft_set_elem_deactivate() already deals with + * the refcounting from the preparation phase. + */ +static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem) +{ + struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); + kfree(elem); +} + static int nft_setelem_parse_flags(const struct nft_set *set, const struct nlattr *attr, u32 *flags) { @@ -3849,9 +3861,9 @@ err4: kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_uninit(&data, d2.type); + nft_data_release(&data, d2.type); err2: - nft_data_uninit(&elem.key.val, d1.type); + nft_data_release(&elem.key.val, d1.type); err1: return err; } @@ -3896,6 +3908,53 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, return err; } +/** + * nft_data_hold - hold a nft_data item + * + * @data: struct nft_data to release + * @type: type of data + * + * Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded, + * NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and + * NFT_GOTO verdicts. This function must be called on active data objects + * from the second phase of the commit protocol. + */ +static void nft_data_hold(const struct nft_data *data, enum nft_data_types type) +{ + if (type == NFT_DATA_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + data->verdict.chain->use++; + break; + } + } +} + +static void nft_set_elem_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) +{ + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_hold(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) + (*nft_set_ext_obj(ext))->use++; +} + +static void nft_set_elem_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) +{ + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_release(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) + (*nft_set_ext_obj(ext))->use--; +} + static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { @@ -3961,6 +4020,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, kfree(elem.priv); elem.priv = priv; + nft_set_elem_deactivate(ctx->net, set, &elem); + nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -3970,7 +4031,7 @@ err4: err3: kfree(elem.priv); err2: - nft_data_uninit(&elem.key.val, desc.type); + nft_data_release(&elem.key.val, desc.type); err1: return err; } @@ -4777,8 +4838,8 @@ static void nf_tables_commit_release(struct nft_trans *trans) nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: - nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv, true); + nf_tables_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); break; case NFT_MSG_DELOBJ: nft_obj_destroy(nft_trans_obj(trans)); @@ -5013,6 +5074,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; + nft_set_elem_activate(net, te->set, &te->elem); te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; @@ -5498,7 +5560,7 @@ int nft_data_init(const struct nft_ctx *ctx, EXPORT_SYMBOL_GPL(nft_data_init); /** - * nft_data_uninit - release a nft_data item + * nft_data_release - release a nft_data item * * @data: struct nft_data to release * @type: type of data @@ -5506,7 +5568,7 @@ EXPORT_SYMBOL_GPL(nft_data_init); * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded, * all others need to be released by calling this function. */ -void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) +void nft_data_release(const struct nft_data *data, enum nft_data_types type) { if (type < NFT_DATA_VERDICT) return; @@ -5517,7 +5579,7 @@ void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) WARN_ON(1); } } -EXPORT_SYMBOL_GPL(nft_data_uninit); +EXPORT_SYMBOL_GPL(nft_data_release); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 96bd4f325b0f..fff8073e2a56 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -99,9 +99,9 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, return 0; err2: - nft_data_uninit(&priv->xor, d2.type); + nft_data_release(&priv->xor, d2.type); err1: - nft_data_uninit(&priv->mask, d1.type); + nft_data_release(&priv->mask, d1.type); return err; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 8c9d0fb19118..c2945eb3397c 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -211,7 +211,7 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return &nft_cmp_ops; err1: - nft_data_uninit(&data, desc.type); + nft_data_release(&data, desc.type); return ERR_PTR(-EINVAL); } diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 728baf88295a..4717d7796927 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -65,7 +65,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx, return 0; err1: - nft_data_uninit(&priv->data, desc.type); + nft_data_release(&priv->data, desc.type); return err; } @@ -73,7 +73,8 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); - return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); + + return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); } static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 9edc74eedc10..cedb96c3619f 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -102,9 +102,9 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr priv->len = desc_from.len; return 0; err2: - nft_data_uninit(&priv->data_to, desc_to.type); + nft_data_release(&priv->data_to, desc_to.type); err1: - nft_data_uninit(&priv->data_from, desc_from.type); + nft_data_release(&priv->data_from, desc_from.type); return err; } -- cgit v1.2.3-59-g8ed1b From 9b5fed0daa2a9ea3355ef06bb9e87ce800e71df6 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 28 Apr 2017 11:02:22 +0300 Subject: drm/i915/glk: Fix DSI "*ERROR* ULPS is still active" messages The sequence in glk_dsi_device_ready() enters ULPS then waits until it is *not* active to then disable it. The correct sequence according to the spec is to enter ULPS then wait until the GLK_ULPS_NOT_ACTIVE bit is zero, i.e., ULPS is active, and then disable ULPS. Fixing the condition gets rid of the following spurious error messages: [drm:glk_dsi_device_ready [i915]] *ERROR* ULPS is still active Fixes: 4644848369c0 ("drm/i915/glk: Add MIPIIO Enable/disable sequence") Cc: Deepak M Cc: Madhav Chauhan Cc: Jani Nikula Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Madhav Chauhan Link: http://patchwork.freedesktop.org/patch/msgid/20170428080222.6147-1-ander.conselvan.de.oliveira@intel.com (cherry picked from commit 3acbec03b3c51559d01c879e9564d9c9610fe8ce) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dsi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 3ffe8b1f1d48..fc0ef492252a 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -410,11 +410,10 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) val |= (ULPS_STATE_ENTER | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - /* Wait for ULPS Not active */ + /* Wait for ULPS active */ if (intel_wait_for_register(dev_priv, - MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, - GLK_ULPS_NOT_ACTIVE, 20)) - DRM_ERROR("ULPS is still active\n"); + MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, 0, 20)) + DRM_ERROR("ULPS not active\n"); /* Exit ULPS */ val = I915_READ(MIPI_DEVICE_READY(port)); -- cgit v1.2.3-59-g8ed1b From 668e3b014afb66ab29e134bca7c258527273ac75 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 27 Apr 2017 19:02:20 +0300 Subject: drm/i915: Fix runtime PM for LPE audio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not calling pm_runtime_enable() means that runtime PM can't be enabled at all via sysfs. So we definitely need to call it from somewhere. Calling it from the driver seems like a bad idea because it would have to be paired with a pm_runtime_disable() at driver unload time, otherwise the core gets upset. Also if there's no LPE audio driver loaded then we couldn't runtime suspend i915 either. So it looks like a better plan is to call it from i915 when we register the platform device. That seems to match how pci generally does things. I cargo culted the pm_runtime_forbid() and pm_runtime_set_active() calls from pci as well. The exposed runtime PM API is massive an thorougly misleading, so I don't actually know if this is how you're supposed to use the API or not. But it seems to work. I can now runtime suspend i915 again with or without the LPE audio driver loaded, and reloading the LPE audio driver also seems to work. Note that powertop won't auto-tune runtime PM for platform devices, which is a little annoying. So I'm not sure that leaving runtime PM in "on" mode by default is the best choice here. But I've left it like that for now at least. Also remove the comment about there not being much benefit from LPE audio runtime PM. Not allowing runtime PM blocks i915 runtime PM, which will also block s0ix, and that could have a measurable impact on power consumption. Cc: stable@vger.kernel.org Cc: Takashi Iwai Cc: Pierre-Louis Bossart Fixes: 0b6b524f3915 ("ALSA: x86: Don't enable runtime PM as default") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-2-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai (cherry picked from commit 183c00350ccda86781f6695840e6c5f5b22efbd1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lpe_audio.c | 5 +++++ sound/x86/intel_hdmi_audio.c | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 25d8e76489e4..668f00480d97 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "i915_drv.h" #include @@ -121,6 +122,10 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) kfree(rsc); + pm_runtime_forbid(&platdev->dev); + pm_runtime_set_active(&platdev->dev); + pm_runtime_enable(&platdev->dev); + return platdev; err: diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 664b7fe206d6..b11d3920b9a5 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1809,10 +1809,6 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) pdata->notify_pending = false; spin_unlock_irq(&pdata->lpe_audio_slock); - /* runtime PM isn't enabled as default, since it won't save much on - * BYT/CHT devices; user who want the runtime PM should adjust the - * power/ontrol and power/autosuspend_delay_ms sysfs entries instead - */ pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_set_active(&pdev->dev); -- cgit v1.2.3-59-g8ed1b From 82f2b4aca8fd90476fc3fd1786d107163ab17201 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 4 May 2017 21:15:30 +0300 Subject: drm/i915: Fix rawclk readout for g4x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out our skills in decoding the CLKCFG register weren't good enough. On this particular elk the answer we got was 400 MHz when in reality the clock was running at 266 MHz, which then caused us to program a bogus AUX clock divider that caused all AUX communication to fail. Sadly the docs are now in bit heaven, so the fix will have to be based on empirical evidence. Using another elk machine I was able to frob the FSB frequency from the BIOS and see how it affects the CLKCFG register. The machine seesm to use a frequency of 266 MHz by default, and fortunately it still boot even with the 50% CPU overclock that we get when we bump the FSB up to 400 MHz. It turns out the actual FSB frequency and the register have no real link whatsoever. The register value is based on some straps or something, but fortunately those too can be configured from the BIOS on this board, although it doesn't seem to respect the settings 100%. In the end I was able to derive the following relationship: BIOS FSB / strap | CLKCFG ------------------------- 200 | 0x2 266 | 0x0 333 | 0x4 400 | 0x4 So only the 200 and 400 MHz cases actually match how we're currently decoding that register. But as the comment next to some of the defines says, we have been just guessing anyway. So let's fix things up so that at least the 266 MHz case will work correctly as that is actually the setting used by both the buggy machine and my test machine. The fact that 333 and 400 MHz BIOS settings result in the same register value is a little disappointing, as that means we can't tell them apart. However, according to the gmch datasheet for both elk and ctg 400 Mhz is not even a supported FSB frequency, so I'm going to make the assumption that we should decode it as 333 MHz instead. Cc: stable@vger.kernel.org Cc: Tomi Sarvela Reported-by: Tomi Sarvela Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100926 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170504181530.6908-1-ville.syrjala@linux.intel.com Acked-by: Jani Nikula Tested-by: Tomi Sarvela (cherry picked from commit 6f38123ecaac446312a63523b68df84ceb5a06ed) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 10 +++++++--- drivers/gpu/drm/i915/intel_cdclk.c | 6 ++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 11b12f412492..5a7c63e64381 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3051,10 +3051,14 @@ enum skl_disp_power_wells { #define CLKCFG_FSB_667 (3 << 0) /* hrawclk 166 */ #define CLKCFG_FSB_800 (2 << 0) /* hrawclk 200 */ #define CLKCFG_FSB_1067 (6 << 0) /* hrawclk 266 */ +#define CLKCFG_FSB_1067_ALT (0 << 0) /* hrawclk 266 */ #define CLKCFG_FSB_1333 (7 << 0) /* hrawclk 333 */ -/* Note, below two are guess */ -#define CLKCFG_FSB_1600 (4 << 0) /* hrawclk 400 */ -#define CLKCFG_FSB_1600_ALT (0 << 0) /* hrawclk 400 */ +/* + * Note that on at least on ELK the below value is reported for both + * 333 and 400 MHz BIOS FSB setting, but given that the gmch datasheet + * lists only 200/266/333 MHz FSB as supported let's decode it as 333 MHz. + */ +#define CLKCFG_FSB_1333_ALT (4 << 0) /* hrawclk 333 */ #define CLKCFG_FSB_MASK (7 << 0) #define CLKCFG_MEM_533 (1 << 4) #define CLKCFG_MEM_667 (2 << 4) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index dd3ad52b7dfe..f29a226e24d8 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1798,13 +1798,11 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv) case CLKCFG_FSB_800: return 200000; case CLKCFG_FSB_1067: + case CLKCFG_FSB_1067_ALT: return 266667; case CLKCFG_FSB_1333: + case CLKCFG_FSB_1333_ALT: return 333333; - /* these two are just a guess; one of them might be right */ - case CLKCFG_FSB_1600: - case CLKCFG_FSB_1600_ALT: - return 400000; default: return 133333; } -- cgit v1.2.3-59-g8ed1b From 2f720aac936dc7a301b757d3b197d86c333d59b8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 12 May 2017 10:14:23 +0100 Subject: drm/i915: don't do allocate_va_range again on PIN_UPDATE If a vma is already bound to a ppgtt, we incorrectly call allocate_va_range again when doing a PIN_UPDATE, which will result in over accounting within our paging structures, such that when we do unbind something we don't actually destroy the structures and end up inadvertently recycling them. In reality this probably isn't too bad, but once we start touching PDEs and PDPEs for 64K/2M/1G pages this apparent recycling will manifest into lots of really, really subtle bugs. v2: Fix the testing of vma->flags for aliasing_ppgtt_bind_vma Fixes: ff685975d97f ("drm/i915: Move allocate_va_range to GTT") Signed-off-by: Matthew Auld Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170512091423.26085-1-chris@chris-wilson.co.uk (cherry picked from commit 1f23475c893a85c934143cd64865ebb9b6af383f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2aa6b97fd22f..a0563e18d753 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -195,9 +195,12 @@ static int ppgtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); - if (ret) - return ret; + if (!(vma->flags & I915_VMA_LOCAL_BIND)) { + ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, + vma->size); + if (ret) + return ret; + } vma->pages = vma->obj->mm.pages; @@ -2306,7 +2309,8 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_LOCAL_BIND) { struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - if (appgtt->base.allocate_va_range) { + if (!(vma->flags & I915_VMA_LOCAL_BIND) && + appgtt->base.allocate_va_range) { ret = appgtt->base.allocate_va_range(&appgtt->base, vma->node.start, vma->node.size); -- cgit v1.2.3-59-g8ed1b From 661e6b02b5aa82db31897f36e96324b77450fd7a Mon Sep 17 00:00:00 2001 From: Zhichao Huang Date: Thu, 11 May 2017 13:46:11 +0100 Subject: KVM: arm: plug potential guest hardware debug leakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hardware debugging in guests is not intercepted currently, it means that a malicious guest can bring down the entire machine by writing to the debug registers. This patch enable trapping of all debug registers, preventing the guests to access the debug registers. This includes access to the debug mode(DBGDSCR) in the guest world all the time which could otherwise mess with the host state. Reads return 0 and writes are ignored (RAZ_WI). The result is the guest cannot detect any working hardware based debug support. As debug exceptions are still routed to the guest normal debug using software based breakpoints still works. To support debugging using hardware registers we need to implement a debug register aware world switch as well as special trapping for registers that may affect the host state. Cc: stable@vger.kernel.org Signed-off-by: Zhichao Huang Signed-off-by: Alex Bennée Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_coproc.h | 3 +- arch/arm/kvm/coproc.c | 77 ++++++++++++++++++++++++++++++--------- arch/arm/kvm/handle_exit.c | 4 +- arch/arm/kvm/hyp/switch.c | 4 +- 4 files changed, 66 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/kvm_coproc.h b/arch/arm/include/asm/kvm_coproc.h index 4917c2f7e459..e74ab0fbab79 100644 --- a/arch/arm/include/asm/kvm_coproc.h +++ b/arch/arm/include/asm/kvm_coproc.h @@ -31,7 +31,8 @@ void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table); int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index ac8d36da4d08..1403ffb1916b 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -112,12 +112,6 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - kvm_inject_undefined(vcpu); - return 1; -} - static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { /* @@ -533,12 +527,7 @@ static int emulate_cp15(struct kvm_vcpu *vcpu, return 1; } -/** - * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access - * @vcpu: The VCPU pointer - * @run: The kvm_run struct - */ -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +static struct coproc_params decode_64bit_hsr(struct kvm_vcpu *vcpu) { struct coproc_params params; @@ -552,9 +541,38 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; params.CRm = 0; + return params; +} + +/** + * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct coproc_params params = decode_64bit_hsr(vcpu); + return emulate_cp15(vcpu, ¶ms); } +/** + * kvm_handle_cp14_64 -- handles a mrrc/mcrr trap on a guest CP14 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct coproc_params params = decode_64bit_hsr(vcpu); + + /* raz_wi cp14 */ + pm_fake(vcpu, ¶ms, NULL); + + /* handled */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; +} + static void reset_coproc_regs(struct kvm_vcpu *vcpu, const struct coproc_reg *table, size_t num) { @@ -565,12 +583,7 @@ static void reset_coproc_regs(struct kvm_vcpu *vcpu, table[i].reset(vcpu, &table[i]); } -/** - * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access - * @vcpu: The VCPU pointer - * @run: The kvm_run struct - */ -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu) { struct coproc_params params; @@ -584,9 +597,37 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7; params.Rt2 = 0; + return params; +} + +/** + * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct coproc_params params = decode_32bit_hsr(vcpu); return emulate_cp15(vcpu, ¶ms); } +/** + * kvm_handle_cp14_32 -- handles a mrc/mcr trap on a guest CP14 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct coproc_params params = decode_32bit_hsr(vcpu); + + /* raz_wi cp14 */ + pm_fake(vcpu, ¶ms, NULL); + + /* handled */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; +} + /****************************************************************************** * Userspace API *****************************************************************************/ diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 5fd7968cdae9..f86a9aaef462 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -95,9 +95,9 @@ static exit_handle_fn arm_exit_handlers[] = { [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, - [HSR_EC_CP14_MR] = kvm_handle_cp14_access, + [HSR_EC_CP14_MR] = kvm_handle_cp14_32, [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store, - [HSR_EC_CP14_64] = kvm_handle_cp14_access, + [HSR_EC_CP14_64] = kvm_handle_cp14_64, [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access, [HSR_EC_CP10_ID] = kvm_handle_cp10_id, [HSR_EC_HVC] = handle_hvc, diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index 92678b7bd046..624a510d31df 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -48,7 +48,9 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host) write_sysreg(HSTR_T(15), HSTR); write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR); val = read_sysreg(HDCR); - write_sysreg(val | HDCR_TPM | HDCR_TPMCR, HDCR); + val |= HDCR_TPM | HDCR_TPMCR; /* trap performance monitors */ + val |= HDCR_TDRA | HDCR_TDOSA | HDCR_TDA; /* trap debug regs */ + write_sysreg(val, HDCR); } static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) -- cgit v1.2.3-59-g8ed1b From 9b619a8f08da9f61f166edbbe30ad05c359ec19e Mon Sep 17 00:00:00 2001 From: Zhichao Huang Date: Thu, 11 May 2017 13:46:12 +0100 Subject: KVM: arm: rename pm_fake handler to trap_raz_wi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pm_fake doesn't quite describe what the handler does (ignoring writes and returning 0 for reads). As we're about to use it (a lot) in a different context, rename it with a (admitedly cryptic) name that make sense for all users. Signed-off-by: Zhichao Huang Reviewed-by: Alex Bennee Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 1403ffb1916b..6d1d2e26dfe5 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -279,7 +279,7 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, * must always support PMCCNTR (the cycle counter): we just RAZ/WI for * all PM registers, which doesn't crash the guest kernel at least. */ -static bool pm_fake(struct kvm_vcpu *vcpu, +static bool trap_raz_wi(struct kvm_vcpu *vcpu, const struct coproc_params *p, const struct coproc_reg *r) { @@ -289,19 +289,19 @@ static bool pm_fake(struct kvm_vcpu *vcpu, return read_zero(vcpu, p); } -#define access_pmcr pm_fake -#define access_pmcntenset pm_fake -#define access_pmcntenclr pm_fake -#define access_pmovsr pm_fake -#define access_pmselr pm_fake -#define access_pmceid0 pm_fake -#define access_pmceid1 pm_fake -#define access_pmccntr pm_fake -#define access_pmxevtyper pm_fake -#define access_pmxevcntr pm_fake -#define access_pmuserenr pm_fake -#define access_pmintenset pm_fake -#define access_pmintenclr pm_fake +#define access_pmcr trap_raz_wi +#define access_pmcntenset trap_raz_wi +#define access_pmcntenclr trap_raz_wi +#define access_pmovsr trap_raz_wi +#define access_pmselr trap_raz_wi +#define access_pmceid0 trap_raz_wi +#define access_pmceid1 trap_raz_wi +#define access_pmccntr trap_raz_wi +#define access_pmxevtyper trap_raz_wi +#define access_pmxevcntr trap_raz_wi +#define access_pmuserenr trap_raz_wi +#define access_pmintenset trap_raz_wi +#define access_pmintenclr trap_raz_wi /* Architected CP15 registers. * CRn denotes the primary register number, but is copied to the CRm in the @@ -566,7 +566,7 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) struct coproc_params params = decode_64bit_hsr(vcpu); /* raz_wi cp14 */ - pm_fake(vcpu, ¶ms, NULL); + trap_raz_wi(vcpu, ¶ms, NULL); /* handled */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); @@ -621,7 +621,7 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) struct coproc_params params = decode_32bit_hsr(vcpu); /* raz_wi cp14 */ - pm_fake(vcpu, ¶ms, NULL); + trap_raz_wi(vcpu, ¶ms, NULL); /* handled */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); -- cgit v1.2.3-59-g8ed1b From 66c25f6e31766a9ec19c2bdc7f5f69f9c59bafd7 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 15 May 2017 10:56:06 +0200 Subject: net: stmmac: use correct pointer when printing normal descriptor ring There are two pointers in sysfs_display_ring, one that increments if using normal dma descriptors, another if using extended dma descriptors. When printing the normal dma descriptors, the wrong pointer is used, thus the printed descriptor addresses are incorrect. Signed-off-by: Niklas Cassel Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index cd8c60132390..a74c481401c4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3725,7 +3725,7 @@ static void sysfs_display_ring(void *head, int size, int extend_desc, ep++; } else { seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(ep), + i, (unsigned int)virt_to_phys(p), le32_to_cpu(p->des0), le32_to_cpu(p->des1), le32_to_cpu(p->des2), le32_to_cpu(p->des3)); p++; -- cgit v1.2.3-59-g8ed1b From 4769886baf39b6a307eb8f9e39848823ca6c5939 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 May 2017 22:43:17 +0300 Subject: kvm: nVMX: off by one in vmx_write_pml_buffer() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are PML_ENTITY_NUM elements in the pml_address[] array so the > should be >= or we write beyond the end of the array when we do: pml_address[vmcs12->guest_pml_index--] = gpa; Fixes: c5f983f6e845 ("nVMX: Implement emulated Page Modification Logging") Signed-off-by: Dan Carpenter Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c6f4ad44aa95..7698e8f321bf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11213,7 +11213,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) if (!nested_cpu_has_pml(vmcs12)) return 0; - if (vmcs12->guest_pml_index > PML_ENTITY_NUM) { + if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) { vmx->nested.pml_full = true; return 1; } -- cgit v1.2.3-59-g8ed1b From a575813bfe4bc15aba511a5e91e61d242bff8b9d Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 11 May 2017 02:58:55 -0700 Subject: KVM: x86: Fix load damaged SSEx MXCSR register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: BUG: unable to handle kernel paging request at ffffffffc07f6a2e IP: report_bug+0x94/0x120 PGD 348e12067 P4D 348e12067 PUD 348e14067 PMD 3cbd84067 PTE 80000003f7e87161 Oops: 0003 [#1] SMP CPU: 2 PID: 7091 Comm: kvm_load_guest_ Tainted: G OE 4.11.0+ #8 task: ffff92fdfb525400 task.stack: ffffbda6c3d04000 RIP: 0010:report_bug+0x94/0x120 RSP: 0018:ffffbda6c3d07b20 EFLAGS: 00010202 do_trap+0x156/0x170 do_error_trap+0xa3/0x170 ? kvm_load_guest_fpu.part.175+0x12a/0x170 [kvm] ? mark_held_locks+0x79/0xa0 ? retint_kernel+0x10/0x10 ? trace_hardirqs_off_thunk+0x1a/0x1c do_invalid_op+0x20/0x30 invalid_op+0x1e/0x30 RIP: 0010:kvm_load_guest_fpu.part.175+0x12a/0x170 [kvm] ? kvm_load_guest_fpu.part.175+0x1c/0x170 [kvm] kvm_arch_vcpu_ioctl_run+0xed6/0x1b70 [kvm] kvm_vcpu_ioctl+0x384/0x780 [kvm] ? kvm_vcpu_ioctl+0x384/0x780 [kvm] ? sched_clock+0x13/0x20 ? __do_page_fault+0x2a0/0x550 do_vfs_ioctl+0xa4/0x700 ? up_read+0x1f/0x40 ? __do_page_fault+0x2a0/0x550 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x23/0xc2 SDM mentioned that "The MXCSR has several reserved bits, and attempting to write a 1 to any of these bits will cause a general-protection exception(#GP) to be generated". The syzkaller forks' testcase overrides xsave area w/ random values and steps on the reserved bits of MXCSR register. The damaged MXCSR register values of guest will be restored to SSEx MXCSR register before vmentry. This patch fixes it by catching userspace override MXCSR register reserved bits w/ random values and bails out immediately. Reported-by: Andrey Konovalov Reviewed-by: Paolo Bonzini Cc: Paolo Bonzini Cc: Radim Krčmář Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kernel/fpu/init.c | 1 + arch/x86/kvm/x86.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index c2f8dde3255c..d5d44c452624 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -90,6 +90,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) * Boot time FPU feature detection code: */ unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +EXPORT_SYMBOL_GPL(mxcsr_feature_mask); static void __init fpu__init_system_mxcsr(void) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 464da936c53d..b54125b590e8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3288,11 +3288,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, } } +#define XSAVE_MXCSR_OFFSET 24 + static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { u64 xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; + u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)]; if (boot_cpu_has(X86_FEATURE_XSAVE)) { /* @@ -3300,11 +3303,13 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility * with old userspace. */ - if (xstate_bv & ~kvm_supported_xcr0()) + if (xstate_bv & ~kvm_supported_xcr0() || + mxcsr & ~mxcsr_feature_mask) return -EINVAL; load_xsave(vcpu, (u8 *)guest_xsave->region); } else { - if (xstate_bv & ~XFEATURE_MASK_FPSSE) + if (xstate_bv & ~XFEATURE_MASK_FPSSE || + mxcsr & ~mxcsr_feature_mask) return -EINVAL; memcpy(&vcpu->arch.guest_fpu.state.fxsave, guest_xsave->region, sizeof(struct fxregs_state)); -- cgit v1.2.3-59-g8ed1b From fce6ac4c0508b985d497e3d9c8eff28ec8a43182 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 11 May 2017 02:58:56 -0700 Subject: KVM: VMX: Don't enable EPT A/D feature if EPT feature is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can observe eptad kvm_intel module parameter is still Y even if ept is disabled which is weird. This patch will not enable EPT A/D feature if EPT feature is disabled. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7698e8f321bf..72f78396bc09 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6504,7 +6504,7 @@ static __init int hardware_setup(void) enable_ept_ad_bits = 0; } - if (!cpu_has_vmx_ept_ad_bits()) + if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) enable_ept_ad_bits = 0; if (!cpu_has_vmx_unrestricted_guest()) -- cgit v1.2.3-59-g8ed1b From 0780516a18f87e881e42ed815f189279b0a1743c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 May 2017 13:23:29 +0200 Subject: KVM: nVMX: fix EPT permissions as reported in exit qualification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the new ept_access_test_read_only and ept_access_test_read_write testcases from vmx.flat. The problem is that gpte_access moves bits around to switch from EPT bit order (XWR) to ACC_*_MASK bit order (RWX). This results in an incorrect exit qualification. To fix this, make pt_access and pte_access operate on raw PTE values (only with NX flipped to mean "can execute") and call gpte_access at the end of the walk. This lets us use pte_access to compute the exit qualification with XWR bit order. Signed-off-by: Paolo Bonzini Reviewed-by: Xiao Guangrong Signed-off-by: Radim Krčmář --- arch/x86/kvm/paging_tmpl.h | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 56241746abbd..b0454c7e4cff 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, pt_element_t pte; pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; - unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; + u64 pt_access, pte_access; + unsigned index, accessed_dirty, pte_pkey; unsigned nested_access; gpa_t pte_gpa; bool have_ad; int offset; + u64 walk_nx_mask = 0; const int write_fault = access & PFERR_WRITE_MASK; const int user_fault = access & PFERR_USER_MASK; const int fetch_fault = access & PFERR_FETCH_MASK; @@ -302,6 +304,7 @@ retry_walk: have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); #if PTTYPE == 64 + walk_nx_mask = 1ULL << PT64_NX_SHIFT; if (walker->level == PT32E_ROOT_LEVEL) { pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); trace_kvm_mmu_paging_element(pte, walker->level); @@ -313,8 +316,6 @@ retry_walk: walker->max_level = walker->level; ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); - accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0; - /* * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging * by the MOV to CR instruction are treated as reads and do not cause the @@ -322,14 +323,14 @@ retry_walk: */ nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; - pt_access = pte_access = ACC_ALL; + pte_access = ~0; ++walker->level; do { gfn_t real_gfn; unsigned long host_addr; - pt_access &= pte_access; + pt_access = pte_access; --walker->level; index = PT_INDEX(addr, walker->level); @@ -371,6 +372,12 @@ retry_walk: trace_kvm_mmu_paging_element(pte, walker->level); + /* + * Inverting the NX it lets us AND it like other + * permission bits. + */ + pte_access = pt_access & (pte ^ walk_nx_mask); + if (unlikely(!FNAME(is_present_gpte)(pte))) goto error; @@ -379,14 +386,16 @@ retry_walk: goto error; } - accessed_dirty &= pte; - pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); - walker->ptes[walker->level - 1] = pte; } while (!is_last_gpte(mmu, walker->level, pte)); pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); - errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access); + accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; + + /* Convert to ACC_*_MASK flags for struct guest_walker. */ + walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask); + walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask); + errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); if (unlikely(errcode)) goto error; @@ -403,7 +412,7 @@ retry_walk: walker->gfn = real_gpa >> PAGE_SHIFT; if (!write_fault) - FNAME(protect_clean_gpte)(mmu, &pte_access, pte); + FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte); else /* * On a write fault, fold the dirty bit into accessed_dirty. @@ -421,10 +430,8 @@ retry_walk: goto retry_walk; } - walker->pt_access = pt_access; - walker->pte_access = pte_access; pgprintk("%s: pte %llx pte_access %x pt_access %x\n", - __func__, (u64)pte, pte_access, pt_access); + __func__, (u64)pte, walker->pte_access, walker->pt_access); return 1; error: @@ -452,7 +459,7 @@ error: */ if (!(errcode & PFERR_RSVD_MASK)) { vcpu->arch.exit_qualification &= 0x187; - vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; + vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3; } #endif walker->fault.address = addr; -- cgit v1.2.3-59-g8ed1b From 68118e0e73aa3a6291c8b9eb1ee708e05f110cea Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Sun, 7 May 2017 07:16:30 +0200 Subject: i2c: mux: reg: put away the parent i2c adapter on probe failure It is only prudent to let go of resources that are not used. Fixes: b3fdd32799d8 ("i2c: mux: Add register-based mux i2c-mux-reg") Signed-off-by: Peter Rosin --- drivers/i2c/muxes/i2c-mux-reg.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c index 406d5059072c..11974e3cd1e5 100644 --- a/drivers/i2c/muxes/i2c-mux-reg.c +++ b/drivers/i2c/muxes/i2c-mux-reg.c @@ -196,20 +196,25 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); mux->data.reg_size = resource_size(res); mux->data.reg = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(mux->data.reg)) - return PTR_ERR(mux->data.reg); + if (IS_ERR(mux->data.reg)) { + ret = PTR_ERR(mux->data.reg); + goto err_put_parent; + } } if (mux->data.reg_size != 4 && mux->data.reg_size != 2 && mux->data.reg_size != 1) { dev_err(&pdev->dev, "Invalid register size\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_parent; } muxc = i2c_mux_alloc(parent, &pdev->dev, mux->data.n_values, 0, 0, i2c_mux_reg_select, NULL); - if (!muxc) - return -ENOMEM; + if (!muxc) { + ret = -ENOMEM; + goto err_put_parent; + } muxc->priv = mux; platform_set_drvdata(pdev, muxc); @@ -233,6 +238,8 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) add_adapter_failed: i2c_mux_del_adapters(muxc); +err_put_parent: + i2c_put_adapter(parent); return ret; } -- cgit v1.2.3-59-g8ed1b From a36d4637e4a06be067b8e327a0b1118bb2a73cb8 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 15 May 2017 18:48:55 +0200 Subject: i2c: mux: reg: rename label to indicate what it does That maintains sanity if it is ever called from some other spot, and also makes the label names coherent. Signed-off-by: Peter Rosin --- drivers/i2c/muxes/i2c-mux-reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c index 11974e3cd1e5..d97031804de8 100644 --- a/drivers/i2c/muxes/i2c-mux-reg.c +++ b/drivers/i2c/muxes/i2c-mux-reg.c @@ -228,7 +228,7 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) ret = i2c_mux_add_adapter(muxc, nr, mux->data.values[i], class); if (ret) - goto add_adapter_failed; + goto err_del_mux_adapters; } dev_dbg(&pdev->dev, "%d port mux on %s adapter\n", @@ -236,7 +236,7 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) return 0; -add_adapter_failed: +err_del_mux_adapters: i2c_mux_del_adapters(muxc); err_put_parent: i2c_put_adapter(parent); -- cgit v1.2.3-59-g8ed1b From 9fce894d03a98ec8e8e8106a964644633d2772ee Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 15 May 2017 09:03:50 +0200 Subject: i2c: mux: only print failure message on error As is, a failure message is printed unconditionally, which is confusing. And noisy. Fixes: 8d4d159f25a7 ("i2c: mux: provide more info on failure in i2c_mux_add_adapter") Signed-off-by: Peter Rosin --- drivers/i2c/i2c-mux.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 26f7237558ba..9669ca4937b8 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -395,18 +395,20 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc, if (force_nr) { priv->adap.nr = force_nr; ret = i2c_add_numbered_adapter(&priv->adap); - dev_err(&parent->dev, - "failed to add mux-adapter %u as bus %u (error=%d)\n", - chan_id, force_nr, ret); + if (ret < 0) { + dev_err(&parent->dev, + "failed to add mux-adapter %u as bus %u (error=%d)\n", + chan_id, force_nr, ret); + goto err_free_priv; + } } else { ret = i2c_add_adapter(&priv->adap); - dev_err(&parent->dev, - "failed to add mux-adapter %u (error=%d)\n", - chan_id, ret); - } - if (ret < 0) { - kfree(priv); - return ret; + if (ret < 0) { + dev_err(&parent->dev, + "failed to add mux-adapter %u (error=%d)\n", + chan_id, ret); + goto err_free_priv; + } } WARN(sysfs_create_link(&priv->adap.dev.kobj, &muxc->dev->kobj, @@ -422,6 +424,10 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc, muxc->adapter[muxc->num_adapters++] = &priv->adap; return 0; + +err_free_priv: + kfree(priv); + return ret; } EXPORT_SYMBOL_GPL(i2c_mux_add_adapter); -- cgit v1.2.3-59-g8ed1b From 8df728e1ae614f592961e51f65d3e3212ede5a75 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 12 May 2017 13:48:41 +0100 Subject: arm64: Remove redundant mov from LL/SC cmpxchg The cmpxchg implementation introduced by commit c342f78217e8 ("arm64: cmpxchg: patch in lse instructions when supported by the CPU") performs an apparently redundant register move of [old] to [oldval] in the success case - it always uses the same register width as [oldval] was originally loaded with, and is only executed when [old] and [oldval] are known to be equal anyway. The only effect it seemingly does have is to take up a surprising amount of space in the kernel text, as removing it reveals: text data bss dec hex filename 12426658 1348614 4499749 18275021 116dacd vmlinux.o.new 12429238 1348614 4499749 18277601 116e4e1 vmlinux.o.old Reviewed-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic_ll_sc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index f819fdcff1ac..f5a2d09afb38 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -264,7 +264,6 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \ " cbnz %w[tmp], 1b\n" \ " " #mb "\n" \ - " mov %" #w "[oldval], %" #w "[old]\n" \ "2:" \ : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \ [v] "+Q" (*(unsigned long *)ptr) \ -- cgit v1.2.3-59-g8ed1b From 78a19cfdf37d19002c83c8790853c1cc10feccdc Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Tue, 2 May 2017 21:59:34 +0530 Subject: arm64: perf: Ignore exclude_hv when kernel is running in HYP commit d98ecdaca296 ("arm64: perf: Count EL2 events if the kernel is running in HYP") returns -EINVAL when perf system call perf_event_open is called with exclude_hv != exclude_kernel. This change breaks applications on VHE enabled ARMv8.1 platforms. The issue was observed with HHVM application, which calls perf_event_open with exclude_hv = 1 and exclude_kernel = 0. There is no separate hypervisor privilege level when VHE is enabled, the host kernel runs at EL2. So when VHE is enabled, we should ignore exclude_hv from the application. This behaviour is consistent with PowerPC where the exclude_hv is ignored when the hypervisor is not present and with x86 where this flag is ignored. Signed-off-by: Ganapatrao Kulkarni [will: added comment to justify the behaviour of exclude_hv] Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/perf_event.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index bcc79471b38e..83a1b1ad189f 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -877,15 +877,24 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, if (attr->exclude_idle) return -EPERM; - if (is_kernel_in_hyp_mode() && - attr->exclude_kernel != attr->exclude_hv) - return -EINVAL; + + /* + * If we're running in hyp mode, then we *are* the hypervisor. + * Therefore we ignore exclude_hv in this configuration, since + * there's no hypervisor to sample anyway. This is consistent + * with other architectures (x86 and Power). + */ + if (is_kernel_in_hyp_mode()) { + if (!attr->exclude_kernel) + config_base |= ARMV8_PMU_INCLUDE_EL2; + } else { + if (attr->exclude_kernel) + config_base |= ARMV8_PMU_EXCLUDE_EL1; + if (!attr->exclude_hv) + config_base |= ARMV8_PMU_INCLUDE_EL2; + } if (attr->exclude_user) config_base |= ARMV8_PMU_EXCLUDE_EL0; - if (!is_kernel_in_hyp_mode() && attr->exclude_kernel) - config_base |= ARMV8_PMU_EXCLUDE_EL1; - if (!attr->exclude_hv) - config_base |= ARMV8_PMU_INCLUDE_EL2; /* * Install the filter into config_base as this is used to -- cgit v1.2.3-59-g8ed1b From 70957eaecc2e43308e403c80293bec3d59632412 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 11 May 2017 11:09:52 -0400 Subject: macvlan: Fix performance issues with vlan tagged packets Macvlan always turns on offload features that have sofware fallback (NETIF_GSO_SOFTWARE). This allows much higher guest-guest communications over macvtap. However, macvtap does not turn on these features for vlan tagged traffic. As a result, depending on the HW that mactap is configured on, the performance of guest-guest communication over a vlan is very inconsistent. If the HW supports TSO/UFO over vlans, then the performance will be fine. If not, the the performance will suffer greatly since the VM may continue using TSO/UFO, and will force the host segment the traffic and possibly overlow the macvtap queue. This patch adds the always on offloads to vlan_features. This makes sure that any vlan tagged traffic between 2 guest will not be segmented needlessly. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index b34eaaae03fd..346ad2ff3998 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -789,10 +789,12 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu) */ static struct lock_class_key macvlan_netdev_addr_lock_key; -#define ALWAYS_ON_FEATURES \ - (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \ +#define ALWAYS_ON_OFFLOADS \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \ NETIF_F_GSO_ROBUST) +#define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX) + #define MACVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \ @@ -827,6 +829,7 @@ static int macvlan_init(struct net_device *dev) dev->features |= ALWAYS_ON_FEATURES; dev->hw_features |= NETIF_F_LRO; dev->vlan_features = lowerdev->vlan_features & MACVLAN_FEATURES; + dev->vlan_features |= ALWAYS_ON_OFFLOADS; dev->gso_max_size = lowerdev->gso_max_size; dev->gso_max_segs = lowerdev->gso_max_segs; dev->hard_header_len = lowerdev->hard_header_len; -- cgit v1.2.3-59-g8ed1b From 4762010f09ac0453f613df345c5281e7f2dec510 Mon Sep 17 00:00:00 2001 From: "yuval.shaia@oracle.com" Date: Fri, 12 May 2017 09:10:51 +0300 Subject: net/mlx4_core: Use min3 to select number of MSI-X vectors Signed-off-by: Yuval Shaia Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 703205475524..83aab1e4c8c8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2862,12 +2862,10 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) int port = 0; if (msi_x) { - int nreq = dev->caps.num_ports * num_online_cpus() + 1; - - nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, - nreq); - if (nreq > MAX_MSIX) - nreq = MAX_MSIX; + int nreq = min3(dev->caps.num_ports * + (int)num_online_cpus() + 1, + dev->caps.num_eqs - dev->caps.reserved_eqs, + MAX_MSIX); entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); if (!entries) -- cgit v1.2.3-59-g8ed1b From d19b183cdc1fa3d70d6abe2a4c369e748cd7ebb8 Mon Sep 17 00:00:00 2001 From: Douglas Caetano dos Santos Date: Fri, 12 May 2017 15:19:15 -0300 Subject: net/packet: fix missing net_device reference release When using a TX ring buffer, if an error occurs processing a control message (e.g. invalid message), the net_device reference is not released. Fixes c14ac9451c348 ("sock: enable timestamping using control messages") Signed-off-by: Douglas Caetano dos Santos Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/packet/af_packet.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f4001763134d..e3eeed19cc7a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2658,13 +2658,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); } - sockc.tsflags = po->sk.sk_tsflags; - if (msg->msg_controllen) { - err = sock_cmsg_send(&po->sk, msg, &sockc); - if (unlikely(err)) - goto out; - } - err = -ENXIO; if (unlikely(dev == NULL)) goto out; @@ -2672,6 +2665,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + sockc.tsflags = po->sk.sk_tsflags; + if (msg->msg_controllen) { + err = sock_cmsg_send(&po->sk, msg, &sockc); + if (unlikely(err)) + goto out_put; + } + if (po->sk.sk_socket->type == SOCK_RAW) reserve = dev->hard_header_len; size_max = po->tx_ring.frame_size -- cgit v1.2.3-59-g8ed1b From 8c977f5a856a7276450ddf3a7b57b4a8815b63f9 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 12 May 2017 22:54:23 +0800 Subject: mdio: mux: fix device_node_continue.cocci warnings Device node iterators put the previous value of the index variable, so an explicit put causes a double put. In particular, of_mdiobus_register can fail before doing anything interesting, so one could view it as a no-op from the reference count point of view. Generated by: scripts/coccinelle/iterators/device_node_continue.cocci CC: Jon Mason Signed-off-by: Julia Lawall Signed-off-by: Fengguang Wu Signed-off-by: David S. Miller --- drivers/net/phy/mdio-mux.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 6943c5ece44a..599ce24c514f 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -169,7 +169,6 @@ int mdio_mux_init(struct device *dev, if (r) { mdiobus_free(cb->mii_bus); devm_kfree(dev, cb); - of_node_put(child_bus_node); } else { cb->next = pb->children; pb->children = cb; -- cgit v1.2.3-59-g8ed1b From aa4ad88cfcd4ee45f527fb982140576711e3b501 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 14 May 2017 12:21:23 +0300 Subject: qed: Fix uninitialized data in aRFS infrastructure Current memset is using incorrect type of variable, causing the upper-half of the strucutre to be left uninitialized and causing: ethernet/qlogic/qed/qed_init_fw_funcs.c: In function 'qed_set_rfs_mode_disable': ethernet/qlogic/qed/qed_init_fw_funcs.c:993:3: error: '*((void *)&ramline+4)' is used uninitialized in this function [-Werror=uninitialized] Fixes: d51e4af5c209 ("qed: aRFS infrastructure support") Reported-by: Arnd Bergmann Signed-off-by: Yuval Mintz Reviewed-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c index 67200c5498ab..0a8fde629991 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c @@ -983,7 +983,7 @@ void qed_set_rfs_mode_disable(struct qed_hwfn *p_hwfn, memset(&camline, 0, sizeof(union gft_cam_line_union)); qed_wr(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id, camline.cam_line_mapped.camline); - memset(&ramline, 0, sizeof(union gft_cam_line_union)); + memset(&ramline, 0, sizeof(ramline)); for (i = 0; i < RAM_LINE_SIZE / REG_SIZE; i++) { u32 hw_addr = PRS_REG_GFT_PROFILE_MASK_RAM; -- cgit v1.2.3-59-g8ed1b From 66eb9f86e50547ec2a8ff7a75997066a74ef584b Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 12 May 2017 17:03:39 -0700 Subject: ipv6: avoid dad-failures for addresses with NODAD Every address gets added with TENTATIVE flag even for the addresses with IFA_F_NODAD flag and dad-work is scheduled for them. During this DAD process we realize it's an address with NODAD and complete the process without sending any probe. However the TENTATIVE flags stays on the address for sometime enough to cause misinterpretation when we receive a NS. While processing NS, if the address has TENTATIVE flag, we mark it DADFAILED and endup with an address that was originally configured as NODAD with DADFAILED. We can't avoid scheduling dad_work for addresses with NODAD but we can avoid adding TENTATIVE flag to avoid this racy situation. Signed-off-by: Mahesh Bandewar Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8d297a79b568..6a4fb1e629fb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1022,7 +1022,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; ifa->prefix_len = pfxlen; - ifa->flags = flags | IFA_F_TENTATIVE; + ifa->flags = flags; + /* No need to add the TENTATIVE flag for addresses with NODAD */ + if (!(flags & IFA_F_NODAD)) + ifa->flags |= IFA_F_TENTATIVE; ifa->valid_lft = valid_lft; ifa->prefered_lft = prefered_lft; ifa->cstamp = ifa->tstamp = jiffies; -- cgit v1.2.3-59-g8ed1b From 91bcdb92d39711d1adb40c26b653b7978d93eb98 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 15 May 2017 09:43:05 -0400 Subject: dm thin metadata: call precommit before saving the roots These calls were the wrong way round in __write_initial_superblock. Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-thin-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 0f0251d0d337..d31d18d9727c 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -484,11 +484,11 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) if (r < 0) return r; - r = save_sm_roots(pmd); + r = dm_tm_pre_commit(pmd->tm); if (r < 0) return r; - r = dm_tm_pre_commit(pmd->tm); + r = save_sm_roots(pmd); if (r < 0) return r; -- cgit v1.2.3-59-g8ed1b From 0377a07c7a035e0d033cd8b29f0cb15244c0916a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 15 May 2017 09:45:40 -0400 Subject: dm space map disk: fix some book keeping in the disk space map When decrementing the reference count for a block, the free count wasn't being updated if the reference count went to zero. Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-space-map-disk.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index ebb280a14325..32adf6b4a9c7 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -142,10 +142,23 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b) static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) { + int r; + uint32_t old_count; enum allocation_event ev; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - return sm_ll_dec(&smd->ll, b, &ev); + r = sm_ll_dec(&smd->ll, b, &ev); + if (!r && (ev == SM_FREE)) { + /* + * It's only free if it's also free in the last + * transaction. + */ + r = sm_ll_lookup(&smd->old_ll, b, &old_count); + if (!r && !old_count) + smd->nr_allocated_this_transaction--; + } + + return r; } static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) -- cgit v1.2.3-59-g8ed1b From ece0728037b15f4d31198f12b359104bcb5db4c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 15 May 2017 17:28:36 +0200 Subject: dm rq: add a missing break to map_request We don't want to bug when receiving a DM_MAPIO_KILL value.. Fixes: 412445ac ("dm: introduce a new DM_MAPIO_KILL return value") Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 2af27026aa2e..b639fa7246ee 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -507,6 +507,7 @@ static int map_request(struct dm_rq_target_io *tio) case DM_MAPIO_KILL: /* The target wants to complete the I/O */ dm_kill_unmapped_request(rq, -EIO); + break; default: DMWARN("unimplemented target map return value: %d", r); BUG(); -- cgit v1.2.3-59-g8ed1b From 18a482f5245cc875755090853e84283512b3e6bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 15 May 2017 17:28:37 +0200 Subject: dm mpath: don't return -EIO from dm_report_EIO Instead just turn the macro into a helper for the warning message. This removes an unnecessary assignment and will allow the next commit to fix a place where -EIO is the wrong return value. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 926a6bcb32c8..d55454f98b59 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -447,7 +447,7 @@ failed: * it has been invoked. */ #define dm_report_EIO(m) \ -({ \ +do { \ struct mapped_device *md = dm_table_get_md((m)->ti->table); \ \ pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \ @@ -455,8 +455,7 @@ failed: test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \ test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \ dm_noflush_suspending((m)->ti)); \ - -EIO; \ -}) +} while (0) /* * Map cloned requests (request-based multipath) @@ -481,7 +480,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, if (!pgpath) { if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) return DM_MAPIO_DELAY_REQUEUE; - return dm_report_EIO(m); /* Failed */ + dm_report_EIO(m); /* Failed */ + return -EIO; } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { if (pg_init_all_paths(m)) @@ -558,7 +558,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m if (!pgpath) { if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) return DM_MAPIO_REQUEUE; - return dm_report_EIO(m); + dm_report_EIO(m); + return -EIO; } mpio->pgpath = pgpath; @@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, if (atomic_read(&m->nr_valid_paths) == 0 && !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { if (error == -EIO) - error = dm_report_EIO(m); + dm_report_EIO(m); /* complete with the original error */ r = DM_ENDIO_DONE; } @@ -1524,8 +1525,10 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone, fail_path(mpio->pgpath); if (atomic_read(&m->nr_valid_paths) == 0 && - !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) - return dm_report_EIO(m); + !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + dm_report_EIO(m); + return -EIO; + } /* Queue for the daemon to resubmit */ dm_bio_restore(get_bio_details_from_bio(clone), clone); -- cgit v1.2.3-59-g8ed1b From f98e0eb68008aff9824d1c4dad7276c8bab83ca5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 15 May 2017 17:28:38 +0200 Subject: dm mpath: multipath_clone_and_map must not return -EIO Since 412445ac ("dm: introduce a new DM_MAPIO_KILL return value"), the clone_and_map_rq methods must not return errno values, so fix it up to properly return DM_MAPIO_KILL, instead of the -EIO value that snuck in due to a conflict between two patches. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d55454f98b59..3df056b73b66 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -481,7 +481,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) return DM_MAPIO_DELAY_REQUEUE; dm_report_EIO(m); /* Failed */ - return -EIO; + return DM_MAPIO_KILL; } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { if (pg_init_all_paths(m)) -- cgit v1.2.3-59-g8ed1b From ca9df7ede41afd006d74fd6f09f36d909d0eaad7 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Mon, 15 May 2017 16:04:36 +0200 Subject: net: netcp: fix check of requested timestamping filter The driver doesn't support timestamping of all received packets and should return error when trying to enable the HWTSTAMP_FILTER_ALL filter. Cc: WingMan Kwok Cc: Richard Cochran Signed-off-by: Miroslav Lichvar Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_ethss.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 897176fc5043..dd92950a4615 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -2651,7 +2651,6 @@ static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *ifr) case HWTSTAMP_FILTER_NONE: cpts_rx_enable(cpts, 0); break; - case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: -- cgit v1.2.3-59-g8ed1b From b18e5e86b44be0dca399d8e2383f97c8077392ce Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Mon, 15 May 2017 10:51:07 -0700 Subject: ldmvsw: unregistering netdev before disable hardware When running LDom binding/unbinding test, kernel may panic in ldmvsw_open(). It is more likely that because we're removing the ldc connection before unregistering the netdev in vsw_port_remove(), we set up a window of time where one process could be removing the device while another trying to UP the device. This also sometimes causes vio handshake error due to opening a device without closing it completely. We should unregister the netdev before we disable the "hardware". Orabug: 25980913, 25925306 Signed-off-by: Thomas Tai Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/ldmvsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 5a90fed06260..309747c7b8ae 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -413,6 +413,7 @@ static int vsw_port_remove(struct vio_dev *vdev) del_timer_sync(&port->vio.timer); napi_disable(&port->napi); + unregister_netdev(port->dev); list_del_rcu(&port->list); @@ -427,7 +428,6 @@ static int vsw_port_remove(struct vio_dev *vdev) dev_set_drvdata(&vdev->dev, NULL); - unregister_netdev(port->dev); free_netdev(port->dev); } -- cgit v1.2.3-59-g8ed1b From 8b671f906c2debc4f2393438c4e7668936522e99 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 15 May 2017 10:51:08 -0700 Subject: ldmvsw: stop the clean timer at beginning of remove Stop the clean timer earlier to be sure there's no asynchronous interference while stopping the port. Orabug: 25748241 Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/ldmvsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 309747c7b8ae..5b56c24b6ed2 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -411,6 +411,7 @@ static int vsw_port_remove(struct vio_dev *vdev) if (port) { del_timer_sync(&port->vio.timer); + del_timer_sync(&port->clean_timer); napi_disable(&port->napi); unregister_netdev(port->dev); @@ -418,7 +419,6 @@ static int vsw_port_remove(struct vio_dev *vdev) list_del_rcu(&port->list); synchronize_rcu(); - del_timer_sync(&port->clean_timer); spin_lock_irqsave(&port->vp->lock, flags); sunvnet_port_rm_txq_common(port); spin_unlock_irqrestore(&port->vp->lock, flags); -- cgit v1.2.3-59-g8ed1b From 2d4456c73a487abe53863e10641c2f73537edf5c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 11 May 2017 10:27:35 -0500 Subject: block: xen-blkback: add null check to avoid null pointer dereference Add null check before calling xen_blkif_put() to avoid potential null pointer dereference. Addresses-Coverity-ID: 1350942 Cc: Juergen Gross Signed-off-by: Gustavo A. R. Silva Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 8fe61b5dc5a6..1f3dfaa54d87 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -504,11 +504,13 @@ static int xen_blkbk_remove(struct xenbus_device *dev) dev_set_drvdata(&dev->dev, NULL); - if (be->blkif) + if (be->blkif) { xen_blkif_disconnect(be->blkif); - /* Put the reference we set in xen_blkif_alloc(). */ - xen_blkif_put(be->blkif); + /* Put the reference we set in xen_blkif_alloc(). */ + xen_blkif_put(be->blkif); + } + kfree(be->mode); kfree(be); return 0; -- cgit v1.2.3-59-g8ed1b From cde97f8492ac4425c4d0647a308e15e78cb4c218 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 8 May 2017 17:16:27 +1000 Subject: selftests/powerpc: Test TM and VMX register state Test that the VMX checkpointed register state is maintained when a VMX unavailable exception is taken during a transaction. Thanks to Breno Leitao and Gustavo Bueno Romero for the original test this is based heavily on. Signed-off-by: Michael Neuling Reviewed-by: Cyril Bur [mpe: Add to .gitignore, always build it 64-bit to fix build errors] Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/tm/Makefile | 4 +- .../testing/selftests/powerpc/tm/tm-vmx-unavail.c | 118 +++++++++++++++++++++ 3 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 427621792229..2f1f7b013293 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -11,3 +11,4 @@ tm-signal-context-chk-fpu tm-signal-context-chk-gpr tm-signal-context-chk-vmx tm-signal-context-chk-vsx +tm-vmx-unavail diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 5576ee6a51f2..958c11c14acd 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -2,7 +2,8 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu tm-signal-context-chk-vmx tm-signal-context-chk-vsx TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ - tm-vmxcopy tm-fork tm-tar tm-tmspr $(SIGNAL_CONTEXT_CHK_TESTS) + tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail \ + $(SIGNAL_CONTEXT_CHK_TESTS) include ../../lib.mk @@ -13,6 +14,7 @@ CFLAGS += -mhtm $(OUTPUT)/tm-syscall: tm-syscall-asm.S $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include $(OUTPUT)/tm-tmspr: CFLAGS += -pthread +$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c new file mode 100644 index 000000000000..137185ba4937 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c @@ -0,0 +1,118 @@ +/* + * Copyright 2017, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * Original: Breno Leitao & + * Gustavo Bueno Romero + * Edited: Michael Neuling + * + * Force VMX unavailable during a transaction and see if it corrupts + * the checkpointed VMX register state after the abort. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tm.h" +#include "utils.h" + +int passed; + +void *worker(void *unused) +{ + __int128 vmx0; + uint64_t texasr; + + asm goto ( + "li 3, 1;" /* Stick non-zero value in VMX0 */ + "std 3, 0(%[vmx0_ptr]);" + "lvx 0, 0, %[vmx0_ptr];" + + /* Wait here a bit so we get scheduled out 255 times */ + "lis 3, 0x3fff;" + "1: ;" + "addi 3, 3, -1;" + "cmpdi 3, 0;" + "bne 1b;" + + /* Kernel will hopefully turn VMX off now */ + + "tbegin. ;" + "beq failure;" + + /* Cause VMX unavail. Any VMX instruction */ + "vaddcuw 0,0,0;" + + "tend. ;" + "b %l[success];" + + /* Check VMX0 sanity after abort */ + "failure: ;" + "lvx 1, 0, %[vmx0_ptr];" + "vcmpequb. 2, 0, 1;" + "bc 4, 24, %l[value_mismatch];" + "b %l[value_match];" + : + : [vmx0_ptr] "r"(&vmx0) + : "r3" + : success, value_match, value_mismatch + ); + + /* HTM aborted and VMX0 is corrupted */ +value_mismatch: + texasr = __builtin_get_texasr(); + + printf("\n\n==============\n\n"); + printf("Failure with error: %lx\n", _TEXASR_FAILURE_CODE(texasr)); + printf("Summary error : %lx\n", _TEXASR_FAILURE_SUMMARY(texasr)); + printf("TFIAR exact : %lx\n\n", _TEXASR_TFIAR_EXACT(texasr)); + + passed = 0; + return NULL; + + /* HTM aborted but VMX0 is correct */ +value_match: +// printf("!"); + return NULL; + +success: +// printf("."); + return NULL; +} + +int tm_vmx_unavail_test() +{ + int threads; + pthread_t *thread; + + SKIP_IF(!have_htm()); + + passed = 1; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * 4; + thread = malloc(sizeof(pthread_t)*threads); + if (!thread) + return EXIT_FAILURE; + + for (uint64_t i = 0; i < threads; i++) + pthread_create(&thread[i], NULL, &worker, NULL); + + for (uint64_t i = 0; i < threads; i++) + pthread_join(thread[i], NULL); + + free(thread); + + return passed ? EXIT_SUCCESS : EXIT_FAILURE; +} + + +int main(int argc, char **argv) +{ + return test_harness(tm_vmx_unavail_test, "tm_vmx_unavail_test"); +} -- cgit v1.2.3-59-g8ed1b From bbb075ddf7d58762040ca413ad82e9974713def3 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Fri, 12 May 2017 14:52:06 +0530 Subject: powerpc/powernv: Set NAPSTATELOST after recovering paca on P9 DD1 Commit 17ed4c8f81da ("powerpc/powernv: Recover correct PACA on wakeup from a stop on P9 DD1") promises to set the NAPSTATELOST bit in paca after recovering the correct paca for the thread waking up from stop1 on DD1, so that the GPRs can be correctly restored on the stop exit path. However, it loads the value 1 into r3, but stores the value in r0 into NAPSTATELOST(r13). Fix this by correctly set the NAPSTATELOST bit in paca after recovering the paca on POWER9 DD1. Fixes: 17ed4c8f81da ("powerpc/powernv: Recover correct PACA on wakeup from a stop on P9 DD1") Signed-off-by: Gautham R. Shenoy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 07d4e0ad60db..4898d676dcae 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -416,7 +416,7 @@ power9_dd1_recover_paca: * which needs to be restored from the stack. */ li r3, 1 - stb r0,PACA_NAPSTATELOST(r13) + stb r3,PACA_NAPSTATELOST(r13) blr /* -- cgit v1.2.3-59-g8ed1b From d04c02f8aa96d82e4cbe783f85a820aae820e746 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 15 May 2017 23:40:05 +0530 Subject: powerpc/kprobes: Fix handling of instruction emulation on probe re-entry Commit 22d8b3dec214c ("powerpc/kprobes: Emulate instructions on kprobe handler re-entry") enabled emulating instructions on kprobe re-entry, rather than single-stepping always. However, we didn't update the single stepping code to only be run if the emulation fails. Also, we missed re-enabling preemption if the instruction emulation was successful. Fix those issues. Fixes: 22d8b3dec214c ("powerpc/kprobes: Emulate instructions on kprobe handler re-entry") Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/kprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 160ae0fa7d0d..fc4343514bed 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -305,16 +305,17 @@ int kprobe_handler(struct pt_regs *regs) save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); kprobes_inc_nmissed_count(p); - prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_REENTER; if (p->ainsn.boostable >= 0) { ret = try_to_emulate(p, regs); if (ret > 0) { restore_previous_kprobe(kcb); + preempt_enable_no_resched(); return 1; } } + prepare_singlestep(p, regs); return 1; } else { if (*addr != BREAKPOINT_INSTRUCTION) { -- cgit v1.2.3-59-g8ed1b From fa16b69f1299004b60b625f181143500a246e5cb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 May 2017 09:11:33 +0200 Subject: ALSA: hda - No loopback on ALC299 codec ALC299 has no loopback mixer, but the driver still tries to add a beep control over the mixer NID which leads to the error at accessing it. This patch fixes it by properly declaring mixer_nid=0 for this codec. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195775 Fixes: 28f1f9b26cee ("ALSA: hda/realtek - Add new codec ID ALC299") Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 58df440013c5..9c22ad694534 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6465,8 +6465,11 @@ static int patch_alc269(struct hda_codec *codec) break; case 0x10ec0225: case 0x10ec0295: + spec->codec_variant = ALC269_TYPE_ALC225; + break; case 0x10ec0299: spec->codec_variant = ALC269_TYPE_ALC225; + spec->gen.mixer_nid = 0; /* no loopback on ALC299 */ break; case 0x10ec0234: case 0x10ec0274: -- cgit v1.2.3-59-g8ed1b From c953d63548207a085abcb12a15fefc8a11ffdf0a Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 16 May 2017 09:30:18 +0800 Subject: ebtables: arpreply: Add the standard target sanity check The info->target comes from userspace and it would be used directly. So we need to add the sanity check to make sure it is a valid standard target, although the ebtables tool has already checked it. Kernel needs to validate anything coming from userspace. If the target is set as an evil value, it would break the ebtables and cause a panic. Because the non-standard target is treated as one offset. Now add one helper function ebt_invalid_target, and we would replace the macro INVALID_TARGET later. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 5 +++++ net/bridge/netfilter/ebt_arpreply.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index a30efb437e6d..e0cbf17af780 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, /* True if the target is not a standard target */ #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) +static inline bool ebt_invalid_target(int target) +{ + return (target < -NUM_STANDARD_TARGETS || target >= 0); +} + #endif diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 5929309beaa1..db85230e49c3 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -68,6 +68,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par) if (e->ethproto != htons(ETH_P_ARP) || e->invflags & EBT_IPROTO) return -EINVAL; + if (ebt_invalid_target(info->target)) + return -EINVAL; + return 0; } -- cgit v1.2.3-59-g8ed1b From 2952a6070e07ebdd5896f1f5b861acad677caded Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 16 May 2017 10:34:54 +0100 Subject: kvm: arm/arm64: Force reading uncached stage2 PGD Make sure we don't use a cached value of the KVM stage2 PGD while resetting the PGD. Cc: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 909a1a793b31..704e35f312a4 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -837,7 +837,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); - pgd = kvm->arch.pgd; + pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; } spin_unlock(&kvm->mmu_lock); -- cgit v1.2.3-59-g8ed1b From 0c428a6a9256fcd66817e12db32a50b405ed2e5c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 16 May 2017 10:34:55 +0100 Subject: kvm: arm/arm64: Fix use after free of stage2 page table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We yield the kvm->mmu_lock occassionaly while performing an operation (e.g, unmap or permission changes) on a large area of stage2 mappings. However this could possibly cause another thread to clear and free up the stage2 page tables while we were waiting for regaining the lock and thus the original thread could end up in accessing memory that was freed. This patch fixes the problem by making sure that the stage2 pagetable is still valid after we regain the lock. The fact that mmu_notifer->release() could be called twice (via __mmu_notifier_release and mmu_notifier_unregsister) enhances the possibility of hitting this race where there are two threads trying to unmap the entire guest shadow pages. While at it, cleanup the redudant checks around cond_resched_lock in stage2_wp_range(), as cond_resched_lock already does the same checks. Cc: Mark Rutland Cc: Radim Krčmář Cc: andreyknvl@google.com Cc: Paolo Bonzini Cc: stable@vger.kernel.org Acked-by: Marc Zyngier Signed-off-by: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/mmu.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 704e35f312a4..a2d63247d1bb 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -295,6 +295,13 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) assert_spin_locked(&kvm->mmu_lock); pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { + /* + * Make sure the page table is still active, as another thread + * could have possibly freed the page table, while we released + * the lock. + */ + if (!READ_ONCE(kvm->arch.pgd)) + break; next = stage2_pgd_addr_end(addr, end); if (!stage2_pgd_none(*pgd)) unmap_stage2_puds(kvm, pgd, addr, next); @@ -1170,11 +1177,13 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) * large. Otherwise, we may see kernel panics with * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR, * CONFIG_LOCKDEP. Additionally, holding the lock too long - * will also starve other vCPUs. + * will also starve other vCPUs. We have to also make sure + * that the page tables are not freed while we released + * the lock. */ - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) - cond_resched_lock(&kvm->mmu_lock); - + cond_resched_lock(&kvm->mmu_lock); + if (!READ_ONCE(kvm->arch.pgd)) + break; next = stage2_pgd_addr_end(addr, end); if (stage2_pgd_present(*pgd)) stage2_wp_puds(pgd, addr, next); -- cgit v1.2.3-59-g8ed1b From 682179592e48fa66056fbad1a86604be4992f885 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 1 Apr 2016 17:13:13 +0300 Subject: usb: dwc3: pci: add Intel Cannonlake PCI IDs Intel Cannonlake PCH has the same DWC3 than Intel Sunrisepoint. Add the new IDs to the supported devices. Signed-off-by: Heikki Krogerus Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index a15ec71d0423..84a2cebfc712 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -39,6 +39,8 @@ #define PCI_DEVICE_ID_INTEL_APL 0x5aaa #define PCI_DEVICE_ID_INTEL_KBP 0xa2b0 #define PCI_DEVICE_ID_INTEL_GLK 0x31aa +#define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee +#define PCI_DEVICE_ID_INTEL_CNPH 0xa36e #define PCI_INTEL_BXT_DSM_UUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" #define PCI_INTEL_BXT_FUNC_PMU_PWR 4 @@ -270,6 +272,8 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GLK), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPLP), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPH), }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), }, { } /* Terminating Entry */ }; -- cgit v1.2.3-59-g8ed1b From f1d6826cae30e97e37a1f2481d7e1dc4faa09ce1 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 21 Apr 2017 15:58:08 +0300 Subject: usb: dwc3: gadget: Fix ISO transfer performance Commit 08a36b543803 ("usb: dwc3: gadget: simplify __dwc3_gadget_ep_queue()") caused a small change in the way ISO transfer is handled in the case when XferInProgress event happens on Isoc EP with an active transfer. This caused a performance degradation of 50%. e.g. using g_webcam on DUT and luvcview on host the video frame rate dropped from 16fps to 8fps @high-speed. Make the ISO transfer handling equivalent to that prior to that commit to get back the original ISO performance numbers. Fixes: 08a36b543803 ("usb: dwc3: gadget: simplify __dwc3_gadget_ep_queue()") Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 6f6f0b3be3ad..3d6180805eb8 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1261,14 +1261,24 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) __dwc3_gadget_start_isoc(dwc, dep, cur_uf); dep->flags &= ~DWC3_EP_PENDING_REQUEST; } + return 0; } - return 0; + + if ((dep->flags & DWC3_EP_BUSY) && + !(dep->flags & DWC3_EP_MISSED_ISOC)) { + WARN_ON_ONCE(!dep->resource_index); + ret = __dwc3_gadget_kick_transfer(dep, + dep->resource_index); + } + + goto out; } if (!dwc3_calc_trbs_left(dep)) return 0; ret = __dwc3_gadget_kick_transfer(dep, 0); +out: if (ret == -EBUSY) ret = 0; -- cgit v1.2.3-59-g8ed1b From d325a1de49d61ee11aca58a529571c91ecea7879 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 11 May 2017 17:26:47 -0700 Subject: usb: dwc3: gadget: Prevent losing events in event cache The dwc3 driver can overwite its previous events if its top-half IRQ handler (TH) gets invoked again before processing the events in the cache. We see this as a hang in the file transfer and the host will attempt to reset the device. TH gets the event count and deasserts the interrupt line by writing DWC3_GEVNTSIZ_INTMASK to DWC3_GEVNTSIZ. If there's a new event coming between reading the event count and interrupt deassertion, dwc3 will lose previous pending events. More generally, we will see 0 event count, which should not affect anything. This shouldn't be possible in the current dwc3 implementation. However, through testing and reading the PCIe trace, the TH occasionally still gets invoked one more time after HW interrupt deassertion. (With PCIe legacy interrupts, TH is called repeatedly as long as the interrupt line is asserted). We suspect that there is a small detection delay in the SW. To avoid this issue, Check DWC3_EVENT_PENDING flag to determine if the events are processed in the bottom-half IRQ handler. If not, return IRQ_HANDLED and don't process new event. Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 3d6180805eb8..aea9a5b948b4 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3036,6 +3036,15 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) return IRQ_HANDLED; } + /* + * With PCIe legacy interrupt, test shows that top-half irq handler can + * be called again after HW interrupt deassertion. Check if bottom-half + * irq event handler completes before caching new event to prevent + * losing events. + */ + if (evt->flags & DWC3_EVENT_PENDING) + return IRQ_HANDLED; + count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0)); count &= DWC3_GEVNTCOUNT_MASK; if (!count) -- cgit v1.2.3-59-g8ed1b From 844cf8a9d51f1d1e116d0bb18c353a2a94c70e79 Mon Sep 17 00:00:00 2001 From: Bogdan Mirea Date: Sun, 30 Apr 2017 13:21:26 +0300 Subject: usb: gadget: gserial: check if console kthread exists Check for bad pointer that may result because of kthread_create failure. This check is needed since the gserial setup callback function (gs_console_setup()) is only freeing the info->con_buf in case of kthread_create failure which will result into bad info->console_thread pointer. Without checking info->console_thread pointer validity in the gserial_console_exit() function, before calling kthread_stop(), the rmmod will generate Kernel Oops. Signed-off-by: Bogdan Mirea Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/u_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 000677c991b0..9b0805f55ad7 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1256,7 +1256,7 @@ static void gserial_console_exit(void) struct gscons_info *info = &gscons_info; unregister_console(&gserial_cons); - if (info->console_thread != NULL) + if (!IS_ERR_OR_NULL(info->console_thread)) kthread_stop(info->console_thread); gs_buf_free(&info->con_buf); } -- cgit v1.2.3-59-g8ed1b From b7f73850bb4fac1e2209a4dd5e636d39be92f42c Mon Sep 17 00:00:00 2001 From: William Wu Date: Tue, 25 Apr 2017 17:45:48 +0800 Subject: usb: gadget: f_fs: avoid out of bounds access on comp_desc Companion descriptor is only used for SuperSpeed endpoints, if the endpoints are HighSpeed or FullSpeed, the Companion descriptor will not allocated, so we can only access it if gadget is SuperSpeed. I can reproduce this issue on Rockchip platform rk3368 SoC which supports USB 2.0, and use functionfs for ADB. Kernel build with CONFIG_KASAN=y and CONFIG_SLUB_DEBUG=y report the following BUG: ================================================================== BUG: KASAN: slab-out-of-bounds in ffs_func_set_alt+0x224/0x3a0 at addr ffffffc0601f6509 Read of size 1 by task swapper/0/0 ============================================================================ BUG kmalloc-256 (Not tainted): kasan: bad access detected ---------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in ffs_func_bind+0x52c/0x99c age=1275 cpu=0 pid=1 alloc_debug_processing+0x128/0x17c ___slab_alloc.constprop.58+0x50c/0x610 __slab_alloc.isra.55.constprop.57+0x24/0x34 __kmalloc+0xe0/0x250 ffs_func_bind+0x52c/0x99c usb_add_function+0xd8/0x1d4 configfs_composite_bind+0x48c/0x570 udc_bind_to_driver+0x6c/0x170 usb_udc_attach_driver+0xa4/0xd0 gadget_dev_desc_UDC_store+0xcc/0x118 configfs_write_file+0x1a0/0x1f8 __vfs_write+0x64/0x174 vfs_write+0xe4/0x200 SyS_write+0x68/0xc8 el0_svc_naked+0x24/0x28 INFO: Freed in inode_doinit_with_dentry+0x3f0/0x7c4 age=1275 cpu=7 pid=247 ... Call trace: [] dump_backtrace+0x0/0x230 [] show_stack+0x14/0x1c [] dump_stack+0xa0/0xc8 [] print_trailer+0x188/0x198 [] object_err+0x3c/0x4c [] kasan_report+0x324/0x4dc [] __asan_load1+0x24/0x50 [] ffs_func_set_alt+0x224/0x3a0 [] composite_setup+0xdcc/0x1ac8 [] android_setup+0x124/0x1a0 [] _setup+0x54/0x74 [] handle_ep0+0x3288/0x4390 [] dwc_otg_pcd_handle_out_ep_intr+0x14dc/0x2ae4 [] dwc_otg_pcd_handle_intr+0x1ec/0x298 [] dwc_otg_pcd_irq+0x10/0x20 [] handle_irq_event_percpu+0x124/0x3ac [] handle_irq_event+0x60/0xa0 [] handle_fasteoi_irq+0x10c/0x1d4 [] generic_handle_irq+0x30/0x40 [] __handle_domain_irq+0xac/0xdc [] gic_handle_irq+0x64/0xa4 ... Memory state around the buggy address: ffffffc0601f6400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffffffc0601f6480: 00 00 00 00 00 00 00 00 00 00 06 fc fc fc fc fc >ffffffc0601f6500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffffffc0601f6580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffffc0601f6600: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00 ================================================================== Signed-off-by: William Wu Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 71dd27c0d7f2..47dda3450abd 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1858,12 +1858,12 @@ static int ffs_func_eps_enable(struct ffs_function *func) ep->ep->driver_data = ep; ep->ep->desc = ds; - comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + - USB_DT_ENDPOINT_SIZE); - ep->ep->maxburst = comp_desc->bMaxBurst + 1; - - if (needs_comp_desc) + if (needs_comp_desc) { + comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + + USB_DT_ENDPOINT_SIZE); + ep->ep->maxburst = comp_desc->bMaxBurst + 1; ep->ep->comp_desc = comp_desc; + } ret = usb_ep_enable(ep->ep); if (likely(!ret)) { -- cgit v1.2.3-59-g8ed1b From 018047a1dba7636e1f7fdae2cc290a528991d648 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 23 Apr 2017 13:55:13 +0800 Subject: usb: dwc3: keystone: check return value Function devm_clk_get() returns an ERR_PTR when it fails. However, in function kdwc3_probe(), its return value is not checked, which may result in a bad memory access bug. This patch fixes the bug. Signed-off-by: Pan Bian Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-keystone.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-keystone.c b/drivers/usb/dwc3/dwc3-keystone.c index 72664700b8a2..12ee23f53cdd 100644 --- a/drivers/usb/dwc3/dwc3-keystone.c +++ b/drivers/usb/dwc3/dwc3-keystone.c @@ -107,6 +107,10 @@ static int kdwc3_probe(struct platform_device *pdev) return PTR_ERR(kdwc->usbss); kdwc->clk = devm_clk_get(kdwc->dev, "usb"); + if (IS_ERR(kdwc->clk)) { + dev_err(kdwc->dev, "unable to get usb clock\n"); + return PTR_ERR(kdwc->clk); + } error = clk_prepare_enable(kdwc->clk); if (error < 0) { -- cgit v1.2.3-59-g8ed1b From f0d39a179b9cd38c739acfc4f92720a0c79e1d66 Mon Sep 17 00:00:00 2001 From: Rui Miguel Silva Date: Fri, 12 May 2017 21:16:14 +0100 Subject: staging: typec: fusb302: reset i2c_busy state in error Fix reset of i2c_busy flag if an error occurs during the i2c block read. Signed-off-by: Rui Miguel Silva Acked-by: Yueyao Zhu Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/fusb302/fusb302.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c index 6bd602db11be..7b0b9e51016d 100644 --- a/drivers/staging/typec/fusb302/fusb302.c +++ b/drivers/staging/typec/fusb302/fusb302.c @@ -365,13 +365,15 @@ static int fusb302_i2c_block_read(struct fusb302_chip *chip, u8 address, if (ret < 0) { fusb302_log(chip, "cannot block read 0x%02x, len=%d, ret=%d", address, length, ret); - return ret; + goto done; } if (ret != length) { fusb302_log(chip, "only read %d/%d bytes from 0x%02x", ret, length, address); - return -EIO; + ret = -EIO; } + +done: atomic_set(&chip->i2c_busy, 0); return ret; -- cgit v1.2.3-59-g8ed1b From 50b7c322cfc8b44a36b1373dab2177db23e3282c Mon Sep 17 00:00:00 2001 From: Rui Miguel Silva Date: Fri, 12 May 2017 21:16:15 +0100 Subject: staging: typec: fusb302: refactor resume retry mechanism The i2c functions need to test the pm_suspend state and do, if needed, some retry before i2c operations. This code was repeated 4x. To isolate this, create a new function to check suspend state and call it in every need place. As at it, move the error message from pr_err to dev_err. Signed-off-by: Rui Miguel Silva Acked-by: Yueyao Zhu Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/fusb302/fusb302.c | 70 +++++++++++++++++---------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/drivers/staging/typec/fusb302/fusb302.c b/drivers/staging/typec/fusb302/fusb302.c index 7b0b9e51016d..4a356e509fe4 100644 --- a/drivers/staging/typec/fusb302/fusb302.c +++ b/drivers/staging/typec/fusb302/fusb302.c @@ -264,22 +264,36 @@ static void fusb302_debugfs_exit(const struct fusb302_chip *chip) { } #define FUSB302_RESUME_RETRY 10 #define FUSB302_RESUME_RETRY_SLEEP 50 -static int fusb302_i2c_write(struct fusb302_chip *chip, - u8 address, u8 data) + +static bool fusb302_is_suspended(struct fusb302_chip *chip) { int retry_cnt; - int ret = 0; - atomic_set(&chip->i2c_busy, 1); for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) { if (atomic_read(&chip->pm_suspend)) { - pr_err("fusb302_i2c: pm suspend, retry %d/%d\n", - retry_cnt + 1, FUSB302_RESUME_RETRY); + dev_err(chip->dev, "i2c: pm suspend, retry %d/%d\n", + retry_cnt + 1, FUSB302_RESUME_RETRY); msleep(FUSB302_RESUME_RETRY_SLEEP); } else { - break; + return false; } } + + return true; +} + +static int fusb302_i2c_write(struct fusb302_chip *chip, + u8 address, u8 data) +{ + int ret = 0; + + atomic_set(&chip->i2c_busy, 1); + + if (fusb302_is_suspended(chip)) { + atomic_set(&chip->i2c_busy, 0); + return -ETIMEDOUT; + } + ret = i2c_smbus_write_byte_data(chip->i2c_client, address, data); if (ret < 0) fusb302_log(chip, "cannot write 0x%02x to 0x%02x, ret=%d", @@ -292,21 +306,17 @@ static int fusb302_i2c_write(struct fusb302_chip *chip, static int fusb302_i2c_block_write(struct fusb302_chip *chip, u8 address, u8 length, const u8 *data) { - int retry_cnt; int ret = 0; if (length <= 0) return ret; atomic_set(&chip->i2c_busy, 1); - for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) { - if (atomic_read(&chip->pm_suspend)) { - pr_err("fusb302_i2c: pm suspend, retry %d/%d\n", - retry_cnt + 1, FUSB302_RESUME_RETRY); - msleep(FUSB302_RESUME_RETRY_SLEEP); - } else { - break; - } + + if (fusb302_is_suspended(chip)) { + atomic_set(&chip->i2c_busy, 0); + return -ETIMEDOUT; } + ret = i2c_smbus_write_i2c_block_data(chip->i2c_client, address, length, data); if (ret < 0) @@ -320,19 +330,15 @@ static int fusb302_i2c_block_write(struct fusb302_chip *chip, u8 address, static int fusb302_i2c_read(struct fusb302_chip *chip, u8 address, u8 *data) { - int retry_cnt; int ret = 0; atomic_set(&chip->i2c_busy, 1); - for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) { - if (atomic_read(&chip->pm_suspend)) { - pr_err("fusb302_i2c: pm suspend, retry %d/%d\n", - retry_cnt + 1, FUSB302_RESUME_RETRY); - msleep(FUSB302_RESUME_RETRY_SLEEP); - } else { - break; - } + + if (fusb302_is_suspended(chip)) { + atomic_set(&chip->i2c_busy, 0); + return -ETIMEDOUT; } + ret = i2c_smbus_read_byte_data(chip->i2c_client, address); *data = (u8)ret; if (ret < 0) @@ -345,21 +351,17 @@ static int fusb302_i2c_read(struct fusb302_chip *chip, static int fusb302_i2c_block_read(struct fusb302_chip *chip, u8 address, u8 length, u8 *data) { - int retry_cnt; int ret = 0; if (length <= 0) return ret; atomic_set(&chip->i2c_busy, 1); - for (retry_cnt = 0; retry_cnt < FUSB302_RESUME_RETRY; retry_cnt++) { - if (atomic_read(&chip->pm_suspend)) { - pr_err("fusb302_i2c: pm suspend, retry %d/%d\n", - retry_cnt + 1, FUSB302_RESUME_RETRY); - msleep(FUSB302_RESUME_RETRY_SLEEP); - } else { - break; - } + + if (fusb302_is_suspended(chip)) { + atomic_set(&chip->i2c_busy, 0); + return -ETIMEDOUT; } + ret = i2c_smbus_read_i2c_block_data(chip->i2c_client, address, length, data); if (ret < 0) { -- cgit v1.2.3-59-g8ed1b From b72d7451209a0bad4264f5f4cb389e7f71cc5ad4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2017 13:30:05 +0200 Subject: staging: fsl-dpaa2/eth: add ETHERNET dependency The new driver cannot link correctly when the netdevice infrastructure is disabled: ERROR: "netdev_info" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined! ERROR: "skb_to_sgvec" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined! ERROR: "napi_disable" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined! ERROR: "napi_schedule_prep" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined! ERROR: "__napi_schedule_irqoff" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined! ERROR: "netif_carrier_on" [drivers/staging/fsl-dpaa2/ethernet/fsl-dpaa2-eth.ko] undefined! This adds a dependency on NETDEVICES and ETHERNET. Fixes: 0352d1d85201 ("staging: fsl-dpaa2/eth: Add APIs for DPNI objects") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fsl-dpaa2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/fsl-dpaa2/Kconfig b/drivers/staging/fsl-dpaa2/Kconfig index 2e325cb747ae..730fd6d4db33 100644 --- a/drivers/staging/fsl-dpaa2/Kconfig +++ b/drivers/staging/fsl-dpaa2/Kconfig @@ -12,6 +12,7 @@ config FSL_DPAA2 config FSL_DPAA2_ETH tristate "Freescale DPAA2 Ethernet" depends on FSL_DPAA2 && FSL_MC_DPIO + depends on NETDEVICES && ETHERNET ---help--- Ethernet driver for Freescale DPAA2 SoCs, using the Freescale MC bus driver -- cgit v1.2.3-59-g8ed1b From 2c4569ca26986d18243f282dd727da27e9adae4c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 May 2017 13:54:11 +0200 Subject: genirq: Fix chained interrupt data ordering irq_set_chained_handler_and_data() sets up the chained interrupt and then stores the handler data. That's racy against an immediate interrupt which gets handled before the store of the handler data happened. The handler will dereference a NULL pointer and crash. Cure it by storing handler data before installing the chained handler. Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- kernel/irq/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 686be4b73018..c94da688ee9b 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -880,8 +880,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, if (!desc) return; - __irq_do_set_handler(desc, handle, 1, NULL); desc->irq_common_data.handler_data = data; + __irq_do_set_handler(desc, handle, 1, NULL); irq_put_desc_busunlock(desc, flags); } -- cgit v1.2.3-59-g8ed1b From 7c62de5f3fc92291decc0dac5f36949bdc3fb575 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Thu, 13 Apr 2017 10:21:21 +0530 Subject: ARM: dts: dra7: Add power hold and power controller properties to palmas Add power hold and power controller properties to palmas node. This is needed to shutdown pmic correctly on boards with powerhold set. Signed-off-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7-evm.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index 4bc4b575c99b..31a9e061ddd0 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -204,6 +204,8 @@ tps659038: tps659038@58 { compatible = "ti,tps659038"; reg = <0x58>; + ti,palmas-override-powerhold; + ti,system-power-controller; tps659038_pmic { compatible = "ti,tps659038-pmic"; -- cgit v1.2.3-59-g8ed1b From 910958b65cf638347e6c8f65d1f749d6a4cfb7e3 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 28 Apr 2017 12:27:35 +0200 Subject: ARM: dts: gta04: fix polarity of clocks for mcbsp4 The clock polarity setting of the mcbsp connected to the modem was wrong so almost only noise was received. With this patch it is also the same as it was on earlier non-dt kernels where it was working properly Signed-off-by: Andreas Kemnade Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-gta04.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index b3a8b1f24499..9ec737069369 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -55,7 +55,8 @@ simple-audio-card,bitclock-master = <&telephony_link_master>; simple-audio-card,frame-master = <&telephony_link_master>; simple-audio-card,format = "i2s"; - + simple-audio-card,bitclock-inversion; + simple-audio-card,frame-inversion; simple-audio-card,cpu { sound-dai = <&mcbsp4>; }; -- cgit v1.2.3-59-g8ed1b From f0b8dca8336b112d23be9fa9c0d38cc4eb733344 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 16 May 2017 08:10:07 -0700 Subject: ARM: dts: omap4: enable CEC pin for Pandaboard A4 and ES The CEC pin was always pulled up, making it impossible to use it. Change to PIN_INPUT so it can be used by the new CEC support. Signed-off-by: Hans Verkuil Reviewed-by: Tomi Valkeinen --- arch/arm/boot/dts/omap4-panda-a4.dts | 2 +- arch/arm/boot/dts/omap4-panda-es.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/omap4-panda-a4.dts b/arch/arm/boot/dts/omap4-panda-a4.dts index 78d363177762..f1a6476af371 100644 --- a/arch/arm/boot/dts/omap4-panda-a4.dts +++ b/arch/arm/boot/dts/omap4-panda-a4.dts @@ -13,7 +13,7 @@ /* Pandaboard Rev A4+ have external pullups on SCL & SDA */ &dss_hdmi_pins { pinctrl-single,pins = < - OMAP4_IOPAD(0x09a, PIN_INPUT_PULLUP | MUX_MODE0) /* hdmi_cec.hdmi_cec */ + OMAP4_IOPAD(0x09a, PIN_INPUT | MUX_MODE0) /* hdmi_cec.hdmi_cec */ OMAP4_IOPAD(0x09c, PIN_INPUT | MUX_MODE0) /* hdmi_scl.hdmi_scl */ OMAP4_IOPAD(0x09e, PIN_INPUT | MUX_MODE0) /* hdmi_sda.hdmi_sda */ >; diff --git a/arch/arm/boot/dts/omap4-panda-es.dts b/arch/arm/boot/dts/omap4-panda-es.dts index 119f8e657edc..940fe4f7c5f6 100644 --- a/arch/arm/boot/dts/omap4-panda-es.dts +++ b/arch/arm/boot/dts/omap4-panda-es.dts @@ -34,7 +34,7 @@ /* PandaboardES has external pullups on SCL & SDA */ &dss_hdmi_pins { pinctrl-single,pins = < - OMAP4_IOPAD(0x09a, PIN_INPUT_PULLUP | MUX_MODE0) /* hdmi_cec.hdmi_cec */ + OMAP4_IOPAD(0x09a, PIN_INPUT | MUX_MODE0) /* hdmi_cec.hdmi_cec */ OMAP4_IOPAD(0x09c, PIN_INPUT | MUX_MODE0) /* hdmi_scl.hdmi_scl */ OMAP4_IOPAD(0x09e, PIN_INPUT | MUX_MODE0) /* hdmi_sda.hdmi_sda */ >; -- cgit v1.2.3-59-g8ed1b From 56322e123235370f1449c7444e311cce857d12f5 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 11 May 2017 12:21:19 -0500 Subject: ARM: dts: LogicPD Torpedo: Fix camera pin mux Fix commit 05c4ffc3a266 ("ARM: dts: LogicPD Torpedo: Add MT9P031 Support") In the previous commit, I indicated that the only testing was done by showing the camera showed up when probing. This patch fixes an incorrect pin muxing on cam_d0, cam_d1 and cam_d2. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts index 08cce17a25a0..43e9364083de 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts @@ -249,9 +249,9 @@ OMAP3_CORE1_IOPAD(0x2110, PIN_INPUT | MUX_MODE0) /* cam_xclka.cam_xclka */ OMAP3_CORE1_IOPAD(0x2112, PIN_INPUT | MUX_MODE0) /* cam_pclk.cam_pclk */ - OMAP3_CORE1_IOPAD(0x2114, PIN_INPUT | MUX_MODE0) /* cam_d0.cam_d0 */ - OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0) /* cam_d1.cam_d1 */ - OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0) /* cam_d2.cam_d2 */ + OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0) /* cam_d0.cam_d0 */ + OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0) /* cam_d1.cam_d1 */ + OMAP3_CORE1_IOPAD(0x211a, PIN_INPUT | MUX_MODE0) /* cam_d2.cam_d2 */ OMAP3_CORE1_IOPAD(0x211c, PIN_INPUT | MUX_MODE0) /* cam_d3.cam_d3 */ OMAP3_CORE1_IOPAD(0x211e, PIN_INPUT | MUX_MODE0) /* cam_d4.cam_d4 */ OMAP3_CORE1_IOPAD(0x2120, PIN_INPUT | MUX_MODE0) /* cam_d5.cam_d5 */ -- cgit v1.2.3-59-g8ed1b From aff523fb82696f59b197ea79ea3652c216dbe4c0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 5 May 2017 15:37:06 -0700 Subject: memory: omap-gpmc: Fix debug output for access width The width needs to be configured in bytes with 1 meaning 8-bit access and 2 meaning 16-bit access. Cc: Peter Ujfalusi Acked-by: Roger Quadros Signed-off-by: Tony Lindgren --- drivers/memory/omap-gpmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index bf0fe0137dfe..6d1b4b707cc2 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -512,7 +512,7 @@ static void gpmc_cs_show_timings(int cs, const char *desc) pr_info("gpmc cs%i access configuration:\n", cs); GPMC_GET_RAW_BOOL(GPMC_CS_CONFIG1, 4, 4, "time-para-granularity"); GPMC_GET_RAW(GPMC_CS_CONFIG1, 8, 9, "mux-add-data"); - GPMC_GET_RAW_MAX(GPMC_CS_CONFIG1, 12, 13, + GPMC_GET_RAW_SHIFT_MAX(GPMC_CS_CONFIG1, 12, 13, 1, GPMC_CONFIG1_DEVICESIZE_MAX, "device-width"); GPMC_GET_RAW(GPMC_CS_CONFIG1, 16, 17, "wait-pin"); GPMC_GET_RAW_BOOL(GPMC_CS_CONFIG1, 21, 21, "wait-on-write"); -- cgit v1.2.3-59-g8ed1b From 0daaecacb83bc6b656a56393ab77a31c28139bc7 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 12 May 2017 10:44:08 -0700 Subject: xfs: fix indlen accounting error on partial delalloc conversion The delalloc -> real block conversion path uses an incorrect calculation in the case where the middle part of a delalloc extent is being converted. This is documented as a rare situation because XFS generally attempts to maximize contiguity by converting as much of a delalloc extent as possible. If this situation does occur, the indlen reservation for the two new delalloc extents left behind by the conversion of the middle range is calculated and compared with the original reservation. If more blocks are required, the delta is allocated from the global block pool. This delta value can be characterized as the difference between the new total requirement (temp + temp2) and the currently available reservation minus those blocks that have already been allocated (startblockval(PREV.br_startblock) - allocated). The problem is that the current code does not account for previously allocated blocks correctly. It subtracts the current allocation count from the (new - old) delta rather than the old indlen reservation. This means that more indlen blocks than have been allocated end up stashed in the remaining extents and free space accounting is broken as a result. Fix up the calculation to subtract the allocated block count from the original extent indlen and thus correctly allocate the reservation delta based on the difference between the new total requirement and the unused blocks from the original reservation. Also remove a bogus assert that contradicts the fact that the new indlen reservation can be larger than the original indlen reservation. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index f02eb7673392..8adb91b05588 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -2065,8 +2065,10 @@ xfs_bmap_add_extent_delay_real( } temp = xfs_bmap_worst_indlen(bma->ip, temp); temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); - diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + diff = (int)(temp + temp2 - + (startblockval(PREV.br_startblock) - + (bma->cur ? + bma->cur->bc_private.b.allocated : 0))); if (diff > 0) { error = xfs_mod_fdblocks(bma->ip->i_mount, -((int64_t)diff), false); @@ -2123,7 +2125,6 @@ xfs_bmap_add_extent_delay_real( temp = da_new; if (bma->cur) temp += bma->cur->bc_private.b.allocated; - ASSERT(temp <= da_old); if (temp < da_old) xfs_mod_fdblocks(bma->ip->i_mount, (int64_t)(da_old - temp), false); -- cgit v1.2.3-59-g8ed1b From 6eadbf4c8ba816c10d1c97bed9aa861d9fd17809 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 12 May 2017 10:44:08 -0700 Subject: xfs: BMAPX shouldn't barf on inline-format directories When we're fulfilling a BMAPX request, jump out early if the data fork is in local format. This prevents us from hitting a debugging check in bmapi_read and barfing errors back to userspace. The on-disk extent count check later isn't sufficient for IF_DELALLOC mode because da extents are in memory and not on disk. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_bmap_util.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2b954308a1d6..2e8851ee6759 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -582,9 +582,13 @@ xfs_getbmap( } break; default: + /* Local format data forks report no extents. */ + if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + bmv->bmv_entries = 0; + return 0; + } if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_format != XFS_DINODE_FMT_BTREE && - ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) + ip->i_d.di_format != XFS_DINODE_FMT_BTREE) return -EINVAL; if (xfs_get_extsz_hint(ip) || -- cgit v1.2.3-59-g8ed1b From 6e747506dde195d3d05fe2bb8ef78aceba28a5e3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 12 May 2017 10:44:11 -0700 Subject: xfs: fix warnings about unused stack variables Reduce stack usage and get rid of compiler warnings by eliminating unused variables. Signed-off-by: Darrick J. Wong Reviewed-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_bmap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 8adb91b05588..a7048eafa8e6 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1280,7 +1280,6 @@ xfs_bmap_read_extents( xfs_bmbt_rec_t *frp; xfs_fsblock_t nextbno; xfs_extnum_t num_recs; - xfs_extnum_t start; num_recs = xfs_btree_get_numrecs(block); if (unlikely(i + num_recs > room)) { @@ -1303,7 +1302,6 @@ xfs_bmap_read_extents( * Copy records into the extent records. */ frp = XFS_BMBT_REC_ADDR(mp, block, 1); - start = i; for (j = 0; j < num_recs; j++, i++, frp++) { xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); trp->l0 = be64_to_cpu(frp->l0); -- cgit v1.2.3-59-g8ed1b From 892d2a5f705723b2cb488bfb38bcbdcf83273184 Mon Sep 17 00:00:00 2001 From: Zorro Lang Date: Mon, 15 May 2017 08:40:02 -0700 Subject: xfs: bad assertion for delalloc an extent that start at i_size By run fsstress long enough time enough in RHEL-7, I find an assertion failure (harder to reproduce on linux-4.11, but problem is still there): XFS: Assertion failed: (iflags & BMV_IF_DELALLOC) != 0, file: fs/xfs/xfs_bmap_util.c The assertion is in xfs_getbmap() funciton: if (map[i].br_startblock == DELAYSTARTBLOCK && --> map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); When map[i].br_startoff == XFS_B_TO_FSB(mp, XFS_ISIZE(ip)), the startoff is just at EOF. But we only need to make sure delalloc extents that are within EOF, not include EOF. Signed-off-by: Zorro Lang Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2e8851ee6759..9e3cc2146d5b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -716,7 +716,7 @@ xfs_getbmap( * extents. */ if (map[i].br_startblock == DELAYSTARTBLOCK && - map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) + map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); if (map[i].br_startblock == HOLESTARTBLOCK && -- cgit v1.2.3-59-g8ed1b From bafbb9c73241760023d8981191ddd30bb1c6dbac Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 15 May 2017 17:05:47 -0400 Subject: tcp: eliminate negative reordering in tcp_clean_rtx_queue tcp_ack() can call tcp_fragment() which may dededuct the value tp->fackets_out when MSS changes. When prior_fackets is larger than tp->fackets_out, tcp_clean_rtx_queue() can invoke tcp_update_reordering() with negative values. This results in absurd tp->reodering values higher than sysctl_tcp_max_reordering. Note that tcp_update_reordering indeeds sets tp->reordering to min(sysctl_tcp_max_reordering, metric), but because the comparison is signed, a negative metric always wins. Fixes: c7caf8d3ed7a ("[TCP]: Fix reord detection due to snd_una covered holes") Reported-by: Rebecca Isaacs Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 06e2dbc2b4a2..174d4376baa5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3190,7 +3190,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, int delta; /* Non-retransmitted hole got filled? That's reordering */ - if (reord < prior_fackets) + if (reord < prior_fackets && reord <= tp->fackets_out) tcp_update_reordering(sk, tp->fackets_out - reord, 0); delta = tcp_is_fack(tp) ? pkts_acked : -- cgit v1.2.3-59-g8ed1b From bcfc7d33110b0f33069d74138eeb7ca9acbb3c85 Mon Sep 17 00:00:00 2001 From: Thomas Winter Date: Tue, 16 May 2017 10:14:44 +1200 Subject: ipmr: vrf: Find VIFs using the actual device The skb->dev that is passed into ip_mr_input is the loX device for VRFs. When we lookup a vif for this dev, none is found as we do not create vifs for loopbacks. Instead lookup a vif for the actual device that the packet was received on, eg the vlan. Signed-off-by: Thomas Winter cc: David Ahern cc: Nikolay Aleksandrov cc: roopa Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3a02d52ed50e..551de4d023a8 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1980,6 +1980,20 @@ int ip_mr_input(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; struct mr_table *mrt; + struct net_device *dev; + + /* skb->dev passed in is the loX master dev for vrfs. + * As there are no vifs associated with loopback devices, + * get the proper interface that does have a vif associated with it. + */ + dev = skb->dev; + if (netif_is_l3_master(skb->dev)) { + dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); + if (!dev) { + kfree_skb(skb); + return -ENODEV; + } + } /* Packet is looped back after forward, it should not be * forwarded second time, but still can be delivered locally. @@ -2017,7 +2031,7 @@ int ip_mr_input(struct sk_buff *skb) /* already under rcu_read_lock() */ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); if (!cache) { - int vif = ipmr_find_vif(mrt, skb->dev); + int vif = ipmr_find_vif(mrt, dev); if (vif >= 0) cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, @@ -2037,7 +2051,7 @@ int ip_mr_input(struct sk_buff *skb) } read_lock(&mrt_lock); - vif = ipmr_find_vif(mrt, skb->dev); + vif = ipmr_find_vif(mrt, dev); if (vif >= 0) { int err2 = ipmr_cache_unresolved(mrt, vif, skb); read_unlock(&mrt_lock); -- cgit v1.2.3-59-g8ed1b From 6f61dd3aa35179043f1fcdb0965c5d56278ab724 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 12 May 2017 14:52:34 -0700 Subject: efi-pstore: Fix read iter after pstore API refactor During the internal pstore API refactoring, the EFI vars read entry was accidentally made to update a stack variable instead of the pstore private data pointer. This corrects the problem (and removes the now needless argument). Fixes: 125cc42baf8a ("pstore: Replace arguments for read() API") Signed-off-by: Kees Cook --- drivers/firmware/efi/efi-pstore.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 93d8cdbe7ef4..44148fd4c9f2 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -155,19 +155,14 @@ static int efi_pstore_scan_sysfs_exit(struct efivar_entry *pos, * efi_pstore_sysfs_entry_iter * * @record: pstore record to pass to callback - * @pos: entry to begin iterating from * * You MUST call efivar_enter_iter_begin() before this function, and * efivar_entry_iter_end() afterwards. * - * It is possible to begin iteration from an arbitrary entry within - * the list by passing @pos. @pos is updated on return to point to - * the next entry of the last one passed to efi_pstore_read_func(). - * To begin iterating from the beginning of the list @pos must be %NULL. */ -static int efi_pstore_sysfs_entry_iter(struct pstore_record *record, - struct efivar_entry **pos) +static int efi_pstore_sysfs_entry_iter(struct pstore_record *record) { + struct efivar_entry **pos = (struct efivar_entry **)&record->psi->data; struct efivar_entry *entry, *n; struct list_head *head = &efivar_sysfs_list; int size = 0; @@ -218,7 +213,6 @@ static int efi_pstore_sysfs_entry_iter(struct pstore_record *record, */ static ssize_t efi_pstore_read(struct pstore_record *record) { - struct efivar_entry *entry = (struct efivar_entry *)record->psi->data; ssize_t size; record->buf = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL); @@ -229,7 +223,7 @@ static ssize_t efi_pstore_read(struct pstore_record *record) size = -EINTR; goto out; } - size = efi_pstore_sysfs_entry_iter(record, &entry); + size = efi_pstore_sysfs_entry_iter(record); efivar_entry_iter_end(); out: -- cgit v1.2.3-59-g8ed1b From 263eec9b2a82e8697d064709414914b5b10ac538 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 15 May 2017 17:33:37 +0200 Subject: smc: switch to usage of IB_PD_UNSAFE_GLOBAL_RKEY Currently, SMC enables remote access to physical memory when a user has successfully configured and established an SMC-connection until ten minutes after the last SMC connection is closed. Because this is considered a security risk, drivers are supposed to use IB_PD_UNSAFE_GLOBAL_RKEY in such a case. This patch changes the current SMC code to use IB_PD_UNSAFE_GLOBAL_RKEY. This improves user awareness, but does not remove the security risk itself. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 4 ++-- net/smc/smc_core.c | 16 +++------------- net/smc/smc_core.h | 2 +- net/smc/smc_ib.c | 21 ++------------------- net/smc/smc_ib.h | 2 -- 5 files changed, 8 insertions(+), 37 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index e41f594a1e1d..03ec058d18df 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -204,7 +204,7 @@ int smc_clc_send_confirm(struct smc_sock *smc) memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(cclc.qpn, link->roce_qp->qp_num); cclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ cclc.rmbe_alert_token = htonl(conn->alert_token_local); cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); @@ -256,7 +256,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(aclc.qpn, link->roce_qp->qp_num); aclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ aclc.rmbe_alert_token = htonl(conn->alert_token_local); aclc.qp_mtu = link->path_mtu; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 65020e93ff21..3ac09a629ea1 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -613,19 +613,8 @@ int smc_rmb_create(struct smc_sock *smc) rmb_desc = NULL; continue; /* if mapping failed, try smaller one */ } - rc = smc_ib_get_memory_region(lgr->lnk[SMC_SINGLE_LINK].roce_pd, - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_LOCAL_WRITE, - &rmb_desc->mr_rx[SMC_SINGLE_LINK]); - if (rc) { - smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - tmp_bufsize, rmb_desc, - DMA_FROM_DEVICE); - kfree(rmb_desc->cpu_addr); - kfree(rmb_desc); - rmb_desc = NULL; - continue; - } + rmb_desc->rkey[SMC_SINGLE_LINK] = + lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey; rmb_desc->used = 1; write_lock_bh(&lgr->rmbs_lock); list_add(&rmb_desc->list, @@ -668,6 +657,7 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && + (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && test_bit(i, lgr->rtokens_used_mask)) { conn->rtoken_idx = i; return 0; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 27eb38056a27..b013cb43a327 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -93,7 +93,7 @@ struct smc_buf_desc { u64 dma_addr[SMC_LINKS_PER_LGR_MAX]; /* mapped address of buffer */ void *cpu_addr; /* virtual address of buffer */ - struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; + u32 rkey[SMC_LINKS_PER_LGR_MAX]; /* for rmb only: * rkey provided to peer */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index cb69ab977cd7..b31715505a35 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -37,24 +37,6 @@ u8 local_systemid[SMC_SYSTEMID_LEN] = SMC_LOCAL_SYSTEMID_RESET; /* unique system * identifier */ -int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, - struct ib_mr **mr) -{ - int rc; - - if (*mr) - return 0; /* already done */ - - /* obtain unique key - - * next invocation of get_dma_mr returns a different key! - */ - *mr = pd->device->get_dma_mr(pd, access_flags); - rc = PTR_ERR_OR_ZERO(*mr); - if (IS_ERR(*mr)) - *mr = NULL; - return rc; -} - static int smc_ib_modify_qp_init(struct smc_link *lnk) { struct ib_qp_attr qp_attr; @@ -210,7 +192,8 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) { int rc; - lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); + lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, + IB_PD_UNSAFE_GLOBAL_RKEY); rc = PTR_ERR_OR_ZERO(lnk->roce_pd); if (IS_ERR(lnk->roce_pd)) lnk->roce_pd = NULL; diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 7e1f0e24d177..b567152a526d 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -61,8 +61,6 @@ void smc_ib_dealloc_protection_domain(struct smc_link *lnk); int smc_ib_create_protection_domain(struct smc_link *lnk); void smc_ib_destroy_queue_pair(struct smc_link *lnk); int smc_ib_create_queue_pair(struct smc_link *lnk); -int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, - struct ib_mr **mr); int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); -- cgit v1.2.3-59-g8ed1b From 19a0f7e37c0761a0a1cbf550705a6063c9675223 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 May 2017 09:51:38 +0300 Subject: net/smc: Add warning about remote memory exposure The driver explicitly bypasses APIs to register all memory once a connection is made, and thus allows remote access to memory. Signed-off-by: Christoph Hellwig Signed-off-by: Leon Romanovsky Acked-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/smc/Kconfig b/net/smc/Kconfig index c717ef0896aa..33954852f3f8 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -8,6 +8,10 @@ config SMC The Linux implementation of the SMC-R solution is designed as a separate socket family SMC. + Warning: SMC will expose all memory for remote reads and writes + once a connection is established. Don't enable this option except + for tightly controlled lab environment. + Select this option if you want to run SMC socket applications config SMC_DIAG -- cgit v1.2.3-59-g8ed1b From f6c5775ff0bfa62b072face6bf1d40f659f194b2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 May 2017 23:19:17 -0700 Subject: net: Improve handling of failures on link and route dumps In general, rtnetlink dumps do not anticipate failure to dump a single object (e.g., link or route) on a single pass. As both route and link objects have grown via more attributes, that is no longer a given. netlink dumps can handle a failure if the dump function returns an error; specifically, netlink_dump adds the return code to the response if it is <= 0 so userspace is notified of the failure. The missing piece is the rtnetlink dump functions returning the error. Fix route and link dump functions to return the errors if no object is added to an skb (detected by skb->len != 0). IPv6 route dumps (rt6_dump_route) already return the error; this patch updates IPv4 and link dumps. Other dump functions may need to be ajusted as well. Reported-by: Jan Moskyto Matejka Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 36 ++++++++++++++++++++++++------------ net/ipv4/fib_frontend.c | 15 +++++++++++---- net/ipv4/fib_trie.c | 26 ++++++++++++++------------ 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d7f82c3450b1..49a279a7cc15 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1627,13 +1627,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, 0, flags, ext_filter_mask); - /* If we ran out of room on the first message, - * we're in trouble - */ - WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); - if (err < 0) - goto out; + if (err < 0) { + if (likely(skb->len)) + goto out; + + goto out_err; + } nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: @@ -1641,10 +1641,12 @@ cont: } } out: + err = skb->len; +out_err: cb->args[1] = idx; cb->args[0] = h; - return skb->len; + return err; } int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, @@ -3453,8 +3455,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) err = br_dev->netdev_ops->ndo_bridge_getlink( skb, portid, seq, dev, filter_mask, NLM_F_MULTI); - if (err < 0 && err != -EOPNOTSUPP) - break; + if (err < 0 && err != -EOPNOTSUPP) { + if (likely(skb->len)) + break; + + goto out_err; + } } idx++; } @@ -3465,16 +3471,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) seq, dev, filter_mask, NLM_F_MULTI); - if (err < 0 && err != -EOPNOTSUPP) - break; + if (err < 0 && err != -EOPNOTSUPP) { + if (likely(skb->len)) + break; + + goto out_err; + } } idx++; } } + err = skb->len; +out_err: rcu_read_unlock(); cb->args[0] = idx; - return skb->len; + return err; } static inline size_t bridge_nlmsg_size(void) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 39bd1edee676..83e3ed258467 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -763,7 +763,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) unsigned int e = 0, s_e; struct fib_table *tb; struct hlist_head *head; - int dumped = 0; + int dumped = 0, err; if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) @@ -783,20 +783,27 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (dumped) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); - if (fib_table_dump(tb, skb, cb) < 0) - goto out; + err = fib_table_dump(tb, skb, cb); + if (err < 0) { + if (likely(skb->len)) + goto out; + + goto out_err; + } dumped = 1; next: e++; } } out: + err = skb->len; +out_err: rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; - return skb->len; + return err; } /* Prepare and feed intra-kernel routing request. diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1201409ba1dc..51182ff2b441 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1983,6 +1983,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, /* rcu_read_lock is hold by caller */ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + int err; + if (i < s_i) { i++; continue; @@ -1993,17 +1995,14 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, continue; } - if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWROUTE, - tb->tb_id, - fa->fa_type, - xkey, - KEYLENGTH - fa->fa_slen, - fa->fa_tos, - fa->fa_info, NLM_F_MULTI) < 0) { + err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE, + tb->tb_id, fa->fa_type, + xkey, KEYLENGTH - fa->fa_slen, + fa->fa_tos, fa->fa_info, NLM_F_MULTI); + if (err < 0) { cb->args[4] = i; - return -1; + return err; } i++; } @@ -2025,10 +2024,13 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, t_key key = cb->args[3]; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { - if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { + int err; + + err = fn_trie_dump_leaf(l, tb, skb, cb); + if (err < 0) { cb->args[3] = key; cb->args[2] = count; - return -1; + return err; } ++count; -- cgit v1.2.3-59-g8ed1b From 13840d38016203f0095cd547b90352812d24b787 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 30 Apr 2017 17:32:28 -0400 Subject: dm bufio: make the parameter "retain_bytes" unsigned long Change the type of the parameter "retain_bytes" from unsigned to unsigned long, so that on 64-bit machines the user can set more than 4GiB of data to be retained. Also, change the type of the variable "count" in the function "__evict_old_buffers" to unsigned long. The assignment "count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];" could result in unsigned long to unsigned overflow and that could result in buffers not being freed when they should. While at it, avoid division in get_retain_buffers(). Division is slow, we can change it to shift because we have precalculated the log2 of block size. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 5db11a405129..cd8139593ccd 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -218,7 +218,7 @@ static DEFINE_SPINLOCK(param_spinlock); * Buffers are freed after this timeout */ static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; -static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; +static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; static unsigned long dm_bufio_peak_allocated; static unsigned long dm_bufio_allocated_kmem_cache; @@ -1558,10 +1558,10 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) return true; } -static unsigned get_retain_buffers(struct dm_bufio_client *c) +static unsigned long get_retain_buffers(struct dm_bufio_client *c) { - unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes); - return retain_bytes / c->block_size; + unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes); + return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT); } static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, @@ -1571,7 +1571,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, struct dm_buffer *b, *tmp; unsigned long freed = 0; unsigned long count = nr_to_scan; - unsigned retain_target = get_retain_buffers(c); + unsigned long retain_target = get_retain_buffers(c); for (l = 0; l < LIST_SIZE; l++) { list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { @@ -1794,8 +1794,8 @@ static bool older_than(struct dm_buffer *b, unsigned long age_hz) static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) { struct dm_buffer *b, *tmp; - unsigned retain_target = get_retain_buffers(c); - unsigned count; + unsigned long retain_target = get_retain_buffers(c); + unsigned long count; LIST_HEAD(write_list); dm_bufio_lock(c); @@ -1955,7 +1955,7 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); -module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR); +module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory"); module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR); -- cgit v1.2.3-59-g8ed1b From b401ee0b85a53e89739ff68a5b1a0667d664afc9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 18 Apr 2017 12:41:18 +0200 Subject: KVM: x86: lower default for halt_poll_ns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some fio benchmarks, halt_poll_ns=400000 caused CPU utilization to increase heavily even in cases where the performance improvement was small. In particular, bandwidth divided by CPU usage was as much as 60% lower. To some extent this is the expected effect of the patch, and the additional CPU utilization is only visible when running the benchmarks. However, halving the threshold also halves the extra CPU utilization (from +30-130% to +20-70%) and has no negative effect on performance. Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9c761fea0c98..695605eb1dfb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -43,7 +43,7 @@ #define KVM_PRIVATE_MEM_SLOTS 3 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) -#define KVM_HALT_POLL_NS_DEFAULT 400000 +#define KVM_HALT_POLL_NS_DEFAULT 200000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS -- cgit v1.2.3-59-g8ed1b From ea9a46e1c49251331dbfda19ced7114337966178 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 12 May 2017 10:44:10 -0700 Subject: xfs: only return detailed fsmap info if the caller has CAP_SYS_ADMIN There were a number of handwaving complaints that one could "possibly" use inode numbers and extent maps to fingerprint a filesystem hosting multiple containers and somehow use the information to guess at the contents of other containers and attack them. Despite the total lack of any demonstration that this is actually possible, it's easier to restrict access now and broaden it later, so use the rmapbt fsmap backends only if the caller has CAP_SYS_ADMIN. Unprivileged users will just have to make do with only getting the free space and static metadata placement information. Signed-off-by: Darrick J. Wong Reviewed-by: Carlos Maiolino --- fs/xfs/xfs_fsmap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 3683819887a5..814ed729881d 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -828,6 +828,7 @@ xfs_getfsmap( struct xfs_fsmap dkeys[2]; /* per-dev keys */ struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS]; struct xfs_getfsmap_info info = { NULL }; + bool use_rmap; int i; int error = 0; @@ -837,12 +838,14 @@ xfs_getfsmap( !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1])) return -EINVAL; + use_rmap = capable(CAP_SYS_ADMIN) && + xfs_sb_version_hasrmapbt(&mp->m_sb); head->fmh_entries = 0; /* Set up our device handlers. */ memset(handlers, 0, sizeof(handlers)); handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev); - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (use_rmap) handlers[0].fn = xfs_getfsmap_datadev_rmapbt; else handlers[0].fn = xfs_getfsmap_datadev_bnobt; -- cgit v1.2.3-59-g8ed1b From 2432a3fb5cff026005aaad24e42226402f7fd0aa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2017 13:27:49 +0200 Subject: mlx5e: add CONFIG_INET dependency We now reference the arp_tbl, which requires IPv4 support to be enabled in the kernel, otherwise we get a link error: drivers/net/built-in.o: In function `mlx5e_tc_update_neigh_used_value': (.text+0x16afec): undefined reference to `arp_tbl' drivers/net/built-in.o: In function `mlx5e_rep_neigh_init': en_rep.c:(.text+0x16c16d): undefined reference to `arp_tbl' drivers/net/built-in.o: In function `mlx5e_rep_netevent_event': en_rep.c:(.text+0x16cbb5): undefined reference to `arp_tbl' This adds a Kconfig dependency for it. Fixes: 232c001398ae ("net/mlx5e: Add support to neighbour update flow") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index fc52d742b7f7..27251a78075c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -13,7 +13,7 @@ config MLX5_CORE config MLX5_CORE_EN bool "Mellanox Technologies ConnectX-4 Ethernet support" - depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE + depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m imply PTP_1588_CLOCK default n -- cgit v1.2.3-59-g8ed1b From 3e21f4af170bebf47c187c1ff8bf155583c9f3b1 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 16 May 2017 19:18:55 +0200 Subject: char: lp: fix possible integer overflow in lp_setup() The lp_setup() code doesn't apply any bounds checking when passing "lp=none", and only in this case, resulting in an overflow of the parport_nr[] array. All versions in Git history are affected. Reported-By: Roee Hay Cc: Ben Hutchings Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- drivers/char/lp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/lp.c b/drivers/char/lp.c index 565e4cf04a02..8249762192d5 100644 --- a/drivers/char/lp.c +++ b/drivers/char/lp.c @@ -859,7 +859,11 @@ static int __init lp_setup (char *str) } else if (!strcmp(str, "auto")) { parport_nr[0] = LP_PARPORT_AUTO; } else if (!strcmp(str, "none")) { - parport_nr[parport_ptr++] = LP_PARPORT_NONE; + if (parport_ptr < LP_NO) + parport_nr[parport_ptr++] = LP_PARPORT_NONE; + else + printk(KERN_INFO "lp: too many ports, %s ignored.\n", + str); } else if (!strcmp(str, "reset")) { reset = 1; } -- cgit v1.2.3-59-g8ed1b From a20cfc1cde76047657045fc5976834f57422a8c5 Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Tue, 9 May 2017 13:52:02 +0200 Subject: misc: pci_endpoint_test: select CRC32 There is the following link error with CONFIG_PCI_ENDPOINT_TEST=y and CONFIG_CRC32=m: drivers/built-in.o: In function 'pci_endpoint_test_ioctl': pci_endpoint_test.c:(.text+0xf1251): undefined reference to 'crc32_le' pci_endpoint_test.c:(.text+0xf1322): undefined reference to 'crc32_le' pci_endpoint_test.c:(.text+0xf13b2): undefined reference to 'crc32_le' pci_endpoint_test.c:(.text+0xf141e): undefined reference to 'crc32_le' Fix this by selecting CRC32 in the PCI_ENDPOINT_TEST kconfig entry. Fixes: 2c156ac71c6b ("misc: Add host side PCI driver for PCI test function device") Signed-off-by: Tobias Regnery Signed-off-by: Greg Kroah-Hartman --- drivers/misc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 2cba76e6fa3c..07bbd4cc1852 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -492,6 +492,7 @@ config ASPEED_LPC_CTRL config PCI_ENDPOINT_TEST depends on PCI + select CRC32 tristate "PCI Endpoint Test driver" ---help--- Enable this configuration option to enable the host side test driver -- cgit v1.2.3-59-g8ed1b From 0d83539092ddb1ab79b4d65bccb866bf07ea2ccd Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Tue, 9 May 2017 18:58:24 -0500 Subject: uio: fix incorrect memory leak cleanup Commit 75f0aef6220d ("uio: fix memory leak") has fixed up some memory leaks during the failure paths of the addition of uio attributes, but still is not correct entirely. A kobject_uevent() failure still needs a kobject_put() and the kobject container structure allocation failure before the kobject_init() doesn't need a kobject_put(). Fix this properly. Fixes: 75f0aef6220d ("uio: fix memory leak") Signed-off-by: Suman Anna Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 1c196f87e9d9..ff04b7f8549f 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -279,7 +279,7 @@ static int uio_dev_add_attributes(struct uio_device *idev) map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { ret = -ENOMEM; - goto err_map_kobj; + goto err_map; } kobject_init(&map->kobj, &map_attr_type); map->mem = mem; @@ -289,7 +289,7 @@ static int uio_dev_add_attributes(struct uio_device *idev) goto err_map_kobj; ret = kobject_uevent(&map->kobj, KOBJ_ADD); if (ret) - goto err_map; + goto err_map_kobj; } for (pi = 0; pi < MAX_UIO_PORT_REGIONS; pi++) { @@ -308,7 +308,7 @@ static int uio_dev_add_attributes(struct uio_device *idev) portio = kzalloc(sizeof(*portio), GFP_KERNEL); if (!portio) { ret = -ENOMEM; - goto err_portio_kobj; + goto err_portio; } kobject_init(&portio->kobj, &portio_attr_type); portio->port = port; @@ -319,7 +319,7 @@ static int uio_dev_add_attributes(struct uio_device *idev) goto err_portio_kobj; ret = kobject_uevent(&portio->kobj, KOBJ_ADD); if (ret) - goto err_portio; + goto err_portio_kobj; } return 0; -- cgit v1.2.3-59-g8ed1b From 88ad60c23a394b2f8bf1e570c756f415435d1d35 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 16 May 2017 14:07:24 +0200 Subject: i2c: mv64xxx: don't override deferred probing when getting irq There is no reason to use platform_get_irq() for non-DT probing and irq_of_parse_and_map() for DT probing. Indeed, platform_get_irq() works fine for both. In addition, using platform_get_irq() properly returns -EPROBE_DEFER when the interrupt controller is not yet available, so instead of inventing our own error code (-ENXIO), return the one provided by platform_get_irq(). Signed-off-by: Thomas Petazzoni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mv64xxx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index cf737ec8563b..5c4db65c5019 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -819,7 +819,6 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data, rc = -EINVAL; goto out; } - drv_data->irq = irq_of_parse_and_map(np, 0); drv_data->rstc = devm_reset_control_get_optional(dev, NULL); if (IS_ERR(drv_data->rstc)) { @@ -902,10 +901,11 @@ mv64xxx_i2c_probe(struct platform_device *pd) if (!IS_ERR(drv_data->clk)) clk_prepare_enable(drv_data->clk); + drv_data->irq = platform_get_irq(pd, 0); + if (pdata) { drv_data->freq_m = pdata->freq_m; drv_data->freq_n = pdata->freq_n; - drv_data->irq = platform_get_irq(pd, 0); drv_data->adapter.timeout = msecs_to_jiffies(pdata->timeout); drv_data->offload_enabled = false; memcpy(&drv_data->reg_offsets, &mv64xxx_i2c_regs_mv64xxx, sizeof(drv_data->reg_offsets)); @@ -915,7 +915,7 @@ mv64xxx_i2c_probe(struct platform_device *pd) goto exit_clk; } if (drv_data->irq < 0) { - rc = -ENXIO; + rc = drv_data->irq; goto exit_reset; } -- cgit v1.2.3-59-g8ed1b From 9a2eba337cacefc95b97c2726e3efdd435b3460e Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 15 May 2017 12:04:31 +0300 Subject: drm/nouveau: Fix drm poll_helper handling Commit cae9ff036eea effectively disabled the drm poll_helper by checking the wrong flag to see if the driver should enable the poll or not: mode_config.poll_enabled is only set to true by poll_init and it is not indicating if the poll is enabled or not. nouveau_display_create() will initialize the poll and going to disable it right away. After poll_init() the mode_config.poll_enabled will be true, but the poll itself is disabled. To avoid the race caused by calling the poll_enable() from different paths, this patch will enable the poll from one place, in the nouveau_display_hpd_work(). In case the pm_runtime is disabled we will enable the poll in nouveau_drm_load() once. Fixes: cae9ff036eea ("drm/nouveau: Don't enabling polling twice on runtime resume") Signed-off-by: Peter Ujfalusi Reviewed-by: Lyude Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_display.c | 6 ++---- drivers/gpu/drm/nouveau/nouveau_drm.c | 6 +++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 21b10f9840c9..549763f5e17d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -360,6 +360,8 @@ nouveau_display_hpd_work(struct work_struct *work) pm_runtime_get_sync(drm->dev->dev); drm_helper_hpd_irq_event(drm->dev); + /* enable polling for external displays */ + drm_kms_helper_poll_enable(drm->dev); pm_runtime_mark_last_busy(drm->dev->dev); pm_runtime_put_sync(drm->dev->dev); @@ -413,10 +415,6 @@ nouveau_display_init(struct drm_device *dev) if (ret) return ret; - /* enable polling for external displays */ - if (!dev->mode_config.poll_enabled) - drm_kms_helper_poll_enable(dev); - /* enable hotplug interrupts */ list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct nouveau_connector *conn = nouveau_connector(connector); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 2b6ac24ce690..36268e1802b5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -502,6 +502,9 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) pm_runtime_allow(dev->dev); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put(dev->dev); + } else { + /* enable polling for external displays */ + drm_kms_helper_poll_enable(dev); } return 0; @@ -774,9 +777,6 @@ nouveau_pmops_runtime_resume(struct device *dev) ret = nouveau_do_resume(drm_dev, true); - if (!drm_dev->mode_config.poll_enabled) - drm_kms_helper_poll_enable(drm_dev); - /* do magic */ nvif_mask(&device->object, 0x088488, (1 << 25), (1 << 25)); vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON); -- cgit v1.2.3-59-g8ed1b From 563ad2b640955a0da21aedce1aaa332c7d016dde Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 8 May 2017 14:30:43 +0200 Subject: drm/nouveau/secboot: plug memory leak in ls_ucode_img_load_gr() error path The last goto looks spurious because it releases less resources than the previous one. Also free 'img->sig' if 'ls_ucode_img_build()' fails. Fixes: 9d896f3e41a6 ("drm/nouveau/secboot: abstract LS firmware loading functions") Signed-off-by: Christophe JAILLET Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c index d1cf02d22db1..1b0c793c0192 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c @@ -116,6 +116,7 @@ ls_ucode_img_load_gr(const struct nvkm_subdev *subdev, struct ls_ucode_img *img, ret = nvkm_firmware_get(subdev->device, f, &sig); if (ret) goto free_data; + img->sig = kmemdup(sig->data, sig->size, GFP_KERNEL); if (!img->sig) { ret = -ENOMEM; @@ -126,8 +127,9 @@ ls_ucode_img_load_gr(const struct nvkm_subdev *subdev, struct ls_ucode_img *img, img->ucode_data = ls_ucode_img_build(bl, code, data, &img->ucode_desc); if (IS_ERR(img->ucode_data)) { + kfree(img->sig); ret = PTR_ERR(img->ucode_data); - goto free_data; + goto free_sig; } img->ucode_size = img->ucode_desc.image_size; -- cgit v1.2.3-59-g8ed1b From 2579b8b0ece53248b815042f8662a4531acf120d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 27 Apr 2017 12:12:56 +0300 Subject: drm/nouveau/fifo/gk104-: Silence a locking warning Presumably we can never actually hit this return, but static checkers complain that we should unlock before we return. Signed-off-by: Dan Carpenter Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c index 3a24788c3185..a7e55c422501 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c @@ -148,7 +148,7 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl) case NVKM_MEM_TARGET_NCOH: target = 3; break; default: WARN_ON(1); - return; + goto unlock; } nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) | @@ -160,6 +160,7 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl) & 0x00100000), msecs_to_jiffies(2000)) == 0) nvkm_error(subdev, "runlist %d update timeout\n", runl); +unlock: mutex_unlock(&subdev->mutex); } -- cgit v1.2.3-59-g8ed1b From 4fa8324461b824eaea9b6695464395710fe20c44 Mon Sep 17 00:00:00 2001 From: Derek Basehore Date: Thu, 11 May 2017 14:34:24 +0200 Subject: scsi: sd: Ignore sync cache failures when not supported Some external hard drives don't support the sync command even though the hard drive has write cache enabled. In this case, upon suspend request, sync cache failures are ignored if the error code in the sense header is ILLEGAL_REQUEST. There's not much we can do for these drives, so we shouldn't fail to suspend for this error case. The drive may stay powered if that's the setup for the port it's plugged into. Signed-off-by: Derek Basehore Signed-off-by: Thierry Escande Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index de9e2f2ef662..b6bb4e0ce0e3 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1582,17 +1582,21 @@ out: return retval; } -static int sd_sync_cache(struct scsi_disk *sdkp) +static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) { int retries, res; struct scsi_device *sdp = sdkp->device; const int timeout = sdp->request_queue->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; - struct scsi_sense_hdr sshdr; + struct scsi_sense_hdr my_sshdr; if (!scsi_device_online(sdp)) return -ENODEV; + /* caller might not be interested in sense, but we need it */ + if (!sshdr) + sshdr = &my_sshdr; + for (retries = 3; retries > 0; --retries) { unsigned char cmd[10] = { 0 }; @@ -1601,7 +1605,7 @@ static int sd_sync_cache(struct scsi_disk *sdkp) * Leave the rest of the command zero to indicate * flush everything. */ - res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, + res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, sshdr, timeout, SD_MAX_RETRIES, 0, RQF_PM, NULL); if (res == 0) break; @@ -1611,11 +1615,12 @@ static int sd_sync_cache(struct scsi_disk *sdkp) sd_print_result(sdkp, "Synchronize Cache(10) failed", res); if (driver_byte(res) & DRIVER_SENSE) - sd_print_sense_hdr(sdkp, &sshdr); + sd_print_sense_hdr(sdkp, sshdr); + /* we need to evaluate the error return */ - if (scsi_sense_valid(&sshdr) && - (sshdr.asc == 0x3a || /* medium not present */ - sshdr.asc == 0x20)) /* invalid command */ + if (scsi_sense_valid(sshdr) && + (sshdr->asc == 0x3a || /* medium not present */ + sshdr->asc == 0x20)) /* invalid command */ /* this is no error here */ return 0; @@ -3459,7 +3464,7 @@ static void sd_shutdown(struct device *dev) if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); - sd_sync_cache(sdkp); + sd_sync_cache(sdkp, NULL); } if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) { @@ -3471,6 +3476,7 @@ static void sd_shutdown(struct device *dev) static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) { struct scsi_disk *sdkp = dev_get_drvdata(dev); + struct scsi_sense_hdr sshdr; int ret = 0; if (!sdkp) /* E.g.: runtime suspend following sd_remove() */ @@ -3478,12 +3484,23 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); - ret = sd_sync_cache(sdkp); + ret = sd_sync_cache(sdkp, &sshdr); + if (ret) { /* ignore OFFLINE device */ if (ret == -ENODEV) - ret = 0; - goto done; + return 0; + + if (!scsi_sense_valid(&sshdr) || + sshdr.sense_key != ILLEGAL_REQUEST) + return ret; + + /* + * sshdr.sense_key == ILLEGAL_REQUEST means this drive + * doesn't support sync. There's not much to do and + * suspend shouldn't fail. + */ + ret = 0; } } @@ -3495,7 +3512,6 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) ret = 0; } -done: return ret; } -- cgit v1.2.3-59-g8ed1b From dd6e1f71b785a6ac2511e2ddb86315f292873e59 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 11 May 2017 17:24:44 -0500 Subject: scsi: libfc: fix incorrect variable assignment Previous assignment was causing the use of the uninitialized variable _explan_ inside fc_seq_ls_rjt() function, which in this particular case is being called by fc_seq_els_rsp_send(). [mkp: fixed typo] Addresses-Coverity-ID: 1398125 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_rport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index b44c3136eb51..520325867e2b 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -1422,7 +1422,7 @@ static void fc_rport_recv_rtv_req(struct fc_rport_priv *rdata, fp = fc_frame_alloc(lport, sizeof(*rtv)); if (!fp) { rjt_data.reason = ELS_RJT_UNAB; - rjt_data.reason = ELS_EXPL_INSUF_RES; + rjt_data.explan = ELS_EXPL_INSUF_RES; fc_seq_els_rsp_send(in_fp, ELS_LS_RJT, &rjt_data); goto drop; } -- cgit v1.2.3-59-g8ed1b From 845d9e8df2fa879e6494e786f290e1fd5560ac8c Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:38 -0700 Subject: scsi: lpfc: Fix used-RPI accounting problem. With 255 vports created a link trasition can casue a crash. When going through discovery after a link bounce the driver is using rpis before the cmd FCOE_POST_HDR_TEMPLATES completes. By doing that the next rpi bumps the rpi range out of the boundary. The fix it to increment the next_rpi only when the FCOE_POST_HDR_TEMPLATE succeeds. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 3 ++- drivers/scsi/lpfc/lpfc_init.c | 24 +++++------------------- drivers/scsi/lpfc/lpfc_sli.c | 8 ++++++++ drivers/scsi/lpfc/lpfc_sli4.h | 1 + 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 67827e397431..3f9f6d5f8c69 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -8667,7 +8667,8 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, lpfc_do_scr_ns_plogi(phba, vport); goto out; fdisc_failed: - if (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS) + if (vport->fc_vport && + (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS)) lpfc_vport_set_state(vport, FC_VPORT_FAILED); /* Cancel discovery timer */ lpfc_can_disctmo(vport); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 4b1eb98c228d..b1b181a756dc 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -6525,7 +6525,6 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba) uint16_t rpi_limit, curr_rpi_range; struct lpfc_dmabuf *dmabuf; struct lpfc_rpi_hdr *rpi_hdr; - uint32_t rpi_count; /* * If the SLI4 port supports extents, posting the rpi header isn't @@ -6538,8 +6537,7 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba) return NULL; /* The limit on the logical index is just the max_rpi count. */ - rpi_limit = phba->sli4_hba.max_cfg_param.rpi_base + - phba->sli4_hba.max_cfg_param.max_rpi - 1; + rpi_limit = phba->sli4_hba.max_cfg_param.max_rpi; spin_lock_irq(&phba->hbalock); /* @@ -6550,18 +6548,10 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba) curr_rpi_range = phba->sli4_hba.next_rpi; spin_unlock_irq(&phba->hbalock); - /* - * The port has a limited number of rpis. The increment here - * is LPFC_RPI_HDR_COUNT - 1 to account for the starting value - * and to allow the full max_rpi range per port. - */ - if ((curr_rpi_range + (LPFC_RPI_HDR_COUNT - 1)) > rpi_limit) - rpi_count = rpi_limit - curr_rpi_range; - else - rpi_count = LPFC_RPI_HDR_COUNT; - - if (!rpi_count) + /* Reached full RPI range */ + if (curr_rpi_range == rpi_limit) return NULL; + /* * First allocate the protocol header region for the port. The * port expects a 4KB DMA-mapped memory region that is 4K aligned. @@ -6595,13 +6585,9 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba) /* The rpi_hdr stores the logical index only. */ rpi_hdr->start_rpi = curr_rpi_range; + rpi_hdr->next_rpi = phba->sli4_hba.next_rpi + LPFC_RPI_HDR_COUNT; list_add_tail(&rpi_hdr->list, &phba->sli4_hba.lpfc_rpi_hdr_list); - /* - * The next_rpi stores the next logical module-64 rpi value used - * to post physical rpis in subsequent rpi postings. - */ - phba->sli4_hba.next_rpi += rpi_count; spin_unlock_irq(&phba->hbalock); return rpi_hdr; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 2a4fc00dfa9b..e2d25ae5ba45 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -17137,6 +17137,14 @@ lpfc_sli4_post_rpi_hdr(struct lpfc_hba *phba, struct lpfc_rpi_hdr *rpi_page) "status x%x add_status x%x, mbx status x%x\n", shdr_status, shdr_add_status, rc); rc = -ENXIO; + } else { + /* + * The next_rpi stores the next logical module-64 rpi value used + * to post physical rpis in subsequent rpi postings. + */ + spin_lock_irq(&phba->hbalock); + phba->sli4_hba.next_rpi = rpi_page->next_rpi; + spin_unlock_irq(&phba->hbalock); } return rc; } diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index da46471337c8..915e8d5581bd 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -698,6 +698,7 @@ struct lpfc_rpi_hdr { struct lpfc_dmabuf *dmabuf; uint32_t page_count; uint32_t start_rpi; + uint16_t next_rpi; }; struct lpfc_rsrc_blks { -- cgit v1.2.3-59-g8ed1b From 0c9c6a75141810acade82add4f4708959a5d3a1d Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:39 -0700 Subject: scsi: lpfc: Fix system crash when port is reset. The driver panic when using the els_wq during port reset. Check for NULL els_wq before dereferencing. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 8 ++++++-- drivers/scsi/lpfc/lpfc_hbadisc.c | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 6d7840b096e6..62571fa9c6ad 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -1228,7 +1228,11 @@ lpfc_sli_read_hs(struct lpfc_hba *phba) static inline struct lpfc_sli_ring * lpfc_phba_elsring(struct lpfc_hba *phba) { - if (phba->sli_rev == LPFC_SLI_REV4) - return phba->sli4_hba.els_wq->pring; + if (phba->sli_rev == LPFC_SLI_REV4) { + if (phba->sli4_hba.els_wq) + return phba->sli4_hba.els_wq->pring; + else + return NULL; + } return &phba->sli.sli3_ring[LPFC_ELS_RING]; } diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 0482c5580331..dcc9b3858778 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -693,9 +693,9 @@ lpfc_work_done(struct lpfc_hba *phba) pring = lpfc_phba_elsring(phba); status = (ha_copy & (HA_RXMASK << (4*LPFC_ELS_RING))); status >>= (4*LPFC_ELS_RING); - if ((status & HA_RXMASK) || - (pring->flag & LPFC_DEFERRED_RING_EVENT) || - (phba->hba_flag & HBA_SP_QUEUE_EVT)) { + if (pring && (status & HA_RXMASK || + pring->flag & LPFC_DEFERRED_RING_EVENT || + phba->hba_flag & HBA_SP_QUEUE_EVT)) { if (pring->flag & LPFC_STOP_IOCB_EVENT) { pring->flag |= LPFC_DEFERRED_RING_EVENT; /* Set the lpfc data pending flag */ -- cgit v1.2.3-59-g8ed1b From 547077a44b3b49f56c0f05c0b46c8c617dea591d Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:40 -0700 Subject: scsi: lpfc: Adding additional stats counters for nvme. More debug messages added for nvme statistics. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 24 ++++++++++++--------- drivers/scsi/lpfc/lpfc_debugfs.c | 27 +++++++++++++---------- drivers/scsi/lpfc/lpfc_nvmet.c | 46 ++++++++++++++++++++++++++++++---------- drivers/scsi/lpfc/lpfc_nvmet.h | 12 ++++++----- drivers/scsi/lpfc/lpfc_sli.c | 38 ++++++++++++++++++++++++++++----- drivers/scsi/lpfc/lpfc_sli4.h | 2 +- 6 files changed, 106 insertions(+), 43 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 4830370bfab1..41ec7451689b 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -205,8 +205,9 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, atomic_read(&tgtp->xmt_ls_rsp_error)); len += snprintf(buf+len, PAGE_SIZE-len, - "FCP: Rcv %08x Drop %08x\n", + "FCP: Rcv %08x Release %08x Drop %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), + atomic_read(&tgtp->xmt_fcp_release), atomic_read(&tgtp->rcv_fcp_cmd_drop)); if (atomic_read(&tgtp->rcv_fcp_cmd_in) != @@ -218,15 +219,12 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, } len += snprintf(buf+len, PAGE_SIZE-len, - "FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x\n", + "FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x " + "drop %08x\n", atomic_read(&tgtp->xmt_fcp_read), atomic_read(&tgtp->xmt_fcp_read_rsp), atomic_read(&tgtp->xmt_fcp_write), - atomic_read(&tgtp->xmt_fcp_rsp)); - - len += snprintf(buf+len, PAGE_SIZE-len, - "FCP Rsp: abort %08x drop %08x\n", - atomic_read(&tgtp->xmt_fcp_abort), + atomic_read(&tgtp->xmt_fcp_rsp), atomic_read(&tgtp->xmt_fcp_drop)); len += snprintf(buf+len, PAGE_SIZE-len, @@ -236,10 +234,16 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, atomic_read(&tgtp->xmt_fcp_rsp_drop)); len += snprintf(buf+len, PAGE_SIZE-len, - "ABORT: Xmt %08x Err %08x Cmpl %08x", + "ABORT: Xmt %08x Cmpl %08x\n", + atomic_read(&tgtp->xmt_fcp_abort), + atomic_read(&tgtp->xmt_fcp_abort_cmpl)); + + len += snprintf(buf + len, PAGE_SIZE - len, + "ABORT: Sol %08x Usol %08x Err %08x Cmpl %08x", + atomic_read(&tgtp->xmt_abort_sol), + atomic_read(&tgtp->xmt_abort_unsol), atomic_read(&tgtp->xmt_abort_rsp), - atomic_read(&tgtp->xmt_abort_rsp_error), - atomic_read(&tgtp->xmt_abort_cmpl)); + atomic_read(&tgtp->xmt_abort_rsp_error)); len += snprintf(buf+len, PAGE_SIZE-len, "\n"); return len; diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index fce549a91911..a41daedeb967 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -797,11 +797,6 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) atomic_read(&tgtp->xmt_fcp_write), atomic_read(&tgtp->xmt_fcp_rsp)); - len += snprintf(buf + len, size - len, - "FCP Rsp: abort %08x drop %08x\n", - atomic_read(&tgtp->xmt_fcp_abort), - atomic_read(&tgtp->xmt_fcp_drop)); - len += snprintf(buf + len, size - len, "FCP Rsp Cmpl: %08x err %08x drop %08x\n", atomic_read(&tgtp->xmt_fcp_rsp_cmpl), @@ -809,10 +804,16 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) atomic_read(&tgtp->xmt_fcp_rsp_drop)); len += snprintf(buf + len, size - len, - "ABORT: Xmt %08x Err %08x Cmpl %08x", + "ABORT: Xmt %08x Cmpl %08x\n", + atomic_read(&tgtp->xmt_fcp_abort), + atomic_read(&tgtp->xmt_fcp_abort_cmpl)); + + len += snprintf(buf + len, size - len, + "ABORT: Sol %08x Usol %08x Err %08x Cmpl %08x", + atomic_read(&tgtp->xmt_abort_sol), + atomic_read(&tgtp->xmt_abort_unsol), atomic_read(&tgtp->xmt_abort_rsp), - atomic_read(&tgtp->xmt_abort_rsp_error), - atomic_read(&tgtp->xmt_abort_cmpl)); + atomic_read(&tgtp->xmt_abort_rsp_error)); len += snprintf(buf + len, size - len, "\n"); @@ -1959,6 +1960,7 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf, atomic_set(&tgtp->rcv_ls_req_out, 0); atomic_set(&tgtp->rcv_ls_req_drop, 0); atomic_set(&tgtp->xmt_ls_abort, 0); + atomic_set(&tgtp->xmt_ls_abort_cmpl, 0); atomic_set(&tgtp->xmt_ls_rsp, 0); atomic_set(&tgtp->xmt_ls_drop, 0); atomic_set(&tgtp->xmt_ls_rsp_error, 0); @@ -1967,19 +1969,22 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf, atomic_set(&tgtp->rcv_fcp_cmd_in, 0); atomic_set(&tgtp->rcv_fcp_cmd_out, 0); atomic_set(&tgtp->rcv_fcp_cmd_drop, 0); - atomic_set(&tgtp->xmt_fcp_abort, 0); atomic_set(&tgtp->xmt_fcp_drop, 0); atomic_set(&tgtp->xmt_fcp_read_rsp, 0); atomic_set(&tgtp->xmt_fcp_read, 0); atomic_set(&tgtp->xmt_fcp_write, 0); atomic_set(&tgtp->xmt_fcp_rsp, 0); + atomic_set(&tgtp->xmt_fcp_release, 0); atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0); atomic_set(&tgtp->xmt_fcp_rsp_error, 0); atomic_set(&tgtp->xmt_fcp_rsp_drop, 0); + atomic_set(&tgtp->xmt_fcp_abort, 0); + atomic_set(&tgtp->xmt_fcp_abort_cmpl, 0); + atomic_set(&tgtp->xmt_abort_sol, 0); + atomic_set(&tgtp->xmt_abort_unsol, 0); atomic_set(&tgtp->xmt_abort_rsp, 0); atomic_set(&tgtp->xmt_abort_rsp_error, 0); - atomic_set(&tgtp->xmt_abort_cmpl, 0); } return nbytes; } @@ -3143,7 +3148,7 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp, "\t\t%s RQ info: ", rqtype); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "AssocCQID[%02d]: RQ-STAT[nopost:x%x nobuf:x%x " - "trunc:x%x rcv:x%llx]\n", + "posted:x%x rcv:x%llx]\n", qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3, (unsigned long long)qp->q_cnt_4); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 94434e621c33..bb12e2c9fbf4 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -502,6 +502,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, "6150 LS Drop IO x%x: Prep\n", ctxp->oxid); lpfc_in_buf_free(phba, &nvmebuf->dbuf); + atomic_inc(&nvmep->xmt_ls_abort); lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); return -ENOMEM; @@ -545,6 +546,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, lpfc_nlp_put(nvmewqeq->context1); lpfc_in_buf_free(phba, &nvmebuf->dbuf); + atomic_inc(&nvmep->xmt_ls_abort); lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); return -ENXIO; } @@ -692,6 +694,7 @@ static void lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *rsp) { + struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; struct lpfc_nvmet_rcv_ctx *ctxp = container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; @@ -710,6 +713,8 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid, ctxp->state, 0); + atomic_inc(&lpfc_nvmep->xmt_fcp_release); + if (aborting) return; @@ -796,6 +801,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) atomic_set(&tgtp->rcv_ls_req_out, 0); atomic_set(&tgtp->rcv_ls_req_drop, 0); atomic_set(&tgtp->xmt_ls_abort, 0); + atomic_set(&tgtp->xmt_ls_abort_cmpl, 0); atomic_set(&tgtp->xmt_ls_rsp, 0); atomic_set(&tgtp->xmt_ls_drop, 0); atomic_set(&tgtp->xmt_ls_rsp_error, 0); @@ -803,18 +809,21 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) atomic_set(&tgtp->rcv_fcp_cmd_in, 0); atomic_set(&tgtp->rcv_fcp_cmd_out, 0); atomic_set(&tgtp->rcv_fcp_cmd_drop, 0); - atomic_set(&tgtp->xmt_fcp_abort, 0); atomic_set(&tgtp->xmt_fcp_drop, 0); atomic_set(&tgtp->xmt_fcp_read_rsp, 0); atomic_set(&tgtp->xmt_fcp_read, 0); atomic_set(&tgtp->xmt_fcp_write, 0); atomic_set(&tgtp->xmt_fcp_rsp, 0); + atomic_set(&tgtp->xmt_fcp_release, 0); atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0); atomic_set(&tgtp->xmt_fcp_rsp_error, 0); atomic_set(&tgtp->xmt_fcp_rsp_drop, 0); + atomic_set(&tgtp->xmt_fcp_abort, 0); + atomic_set(&tgtp->xmt_fcp_abort_cmpl, 0); + atomic_set(&tgtp->xmt_abort_unsol, 0); + atomic_set(&tgtp->xmt_abort_sol, 0); atomic_set(&tgtp->xmt_abort_rsp, 0); atomic_set(&tgtp->xmt_abort_rsp_error, 0); - atomic_set(&tgtp->xmt_abort_cmpl, 0); } return error; } @@ -1011,6 +1020,7 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, oxid = 0; size = 0; sid = 0; + ctxp = NULL; goto dropit; } @@ -1117,6 +1127,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, oxid = 0; size = 0; sid = 0; + ctxp = NULL; goto dropit; } @@ -1193,8 +1204,11 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, atomic_inc(&tgtp->rcv_fcp_cmd_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6159 FCP Drop IO x%x: err x%x\n", - ctxp->oxid, rc); + "6159 FCP Drop IO x%x: err x%x: x%x x%x x%x\n", + ctxp->oxid, rc, + atomic_read(&tgtp->rcv_fcp_cmd_in), + atomic_read(&tgtp->rcv_fcp_cmd_out), + atomic_read(&tgtp->xmt_fcp_release)); dropit: lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n", oxid, size, sid); @@ -1206,7 +1220,7 @@ dropit: if (nvmebuf) { nvmebuf->iocbq->hba_wqidx = 0; /* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */ - lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf); + lpfc_nvmet_rq_post(phba, ctxp, &nvmebuf->hbuf); } #endif } @@ -1812,7 +1826,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, result = wcqe->parameter; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - atomic_inc(&tgtp->xmt_abort_cmpl); + if (ctxp->flag & LPFC_NVMET_ABORT_OP) + atomic_inc(&tgtp->xmt_fcp_abort_cmpl); ctxp->state = LPFC_NVMET_STE_DONE; @@ -1827,6 +1842,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, } ctxp->flag &= ~LPFC_NVMET_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); + atomic_inc(&tgtp->xmt_abort_rsp); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, "6165 ABORT cmpl: xri x%x flg x%x (%d) " @@ -1877,7 +1893,8 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, result = wcqe->parameter; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - atomic_inc(&tgtp->xmt_abort_cmpl); + if (ctxp->flag & LPFC_NVMET_ABORT_OP) + atomic_inc(&tgtp->xmt_fcp_abort_cmpl); if (!ctxp) { /* if context is clear, related io alrady complete */ @@ -1907,6 +1924,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, } ctxp->flag &= ~LPFC_NVMET_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); + atomic_inc(&tgtp->xmt_abort_rsp); lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, "6316 ABTS cmpl xri x%x flg x%x (%x) " @@ -1953,7 +1971,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, result = wcqe->parameter; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - atomic_inc(&tgtp->xmt_abort_cmpl); + atomic_inc(&tgtp->xmt_ls_abort_cmpl); lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, "6083 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n", @@ -2104,6 +2122,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, /* Issue ABTS for this WQE based on iotag */ ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba); if (!ctxp->abort_wqeq) { + atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, "6161 ABORT failed: No wqeqs: " "xri: x%x\n", ctxp->oxid); @@ -2128,6 +2147,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, /* driver queued commands are in process of being flushed */ if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) { spin_unlock_irqrestore(&phba->hbalock, flags); + atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_ERR, LOG_NVME, "6163 Driver in reset cleanup - flushing " "NVME Req now. hba_flag x%x oxid x%x\n", @@ -2140,6 +2160,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, /* Outstanding abort is in progress */ if (abts_wqeq->iocb_flag & LPFC_DRIVER_ABORTED) { spin_unlock_irqrestore(&phba->hbalock, flags); + atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_ERR, LOG_NVME, "6164 Outstanding NVME I/O Abort Request " "still pending on oxid x%x\n", @@ -2190,9 +2211,12 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, abts_wqeq->context2 = ctxp; rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq); spin_unlock_irqrestore(&phba->hbalock, flags); - if (rc == WQE_SUCCESS) + if (rc == WQE_SUCCESS) { + atomic_inc(&tgtp->xmt_abort_sol); return 0; + } + atomic_inc(&tgtp->xmt_abort_rsp_error); ctxp->flag &= ~LPFC_NVMET_ABORT_OP; lpfc_sli_release_iocbq(phba, abts_wqeq); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, @@ -2231,11 +2255,11 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq); spin_unlock_irqrestore(&phba->hbalock, flags); if (rc == WQE_SUCCESS) { - atomic_inc(&tgtp->xmt_abort_rsp); return 0; } aerr: + atomic_inc(&tgtp->xmt_abort_rsp_error); ctxp->flag &= ~LPFC_NVMET_ABORT_OP; atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, @@ -2279,7 +2303,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba, rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, abts_wqeq); spin_unlock_irqrestore(&phba->hbalock, flags); if (rc == WQE_SUCCESS) { - atomic_inc(&tgtp->xmt_abort_rsp); + atomic_inc(&tgtp->xmt_abort_unsol); return 0; } diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index 128759fe6650..837210a3e7c8 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -34,6 +34,7 @@ struct lpfc_nvmet_tgtport { atomic_t rcv_ls_req_out; atomic_t rcv_ls_req_drop; atomic_t xmt_ls_abort; + atomic_t xmt_ls_abort_cmpl; /* Stats counters - lpfc_nvmet_xmt_ls_rsp */ atomic_t xmt_ls_rsp; @@ -47,9 +48,9 @@ struct lpfc_nvmet_tgtport { atomic_t rcv_fcp_cmd_in; atomic_t rcv_fcp_cmd_out; atomic_t rcv_fcp_cmd_drop; + atomic_t xmt_fcp_release; /* Stats counters - lpfc_nvmet_xmt_fcp_op */ - atomic_t xmt_fcp_abort; atomic_t xmt_fcp_drop; atomic_t xmt_fcp_read_rsp; atomic_t xmt_fcp_read; @@ -62,12 +63,13 @@ struct lpfc_nvmet_tgtport { atomic_t xmt_fcp_rsp_drop; - /* Stats counters - lpfc_nvmet_unsol_issue_abort */ + /* Stats counters - lpfc_nvmet_xmt_fcp_abort */ + atomic_t xmt_fcp_abort; + atomic_t xmt_fcp_abort_cmpl; + atomic_t xmt_abort_sol; + atomic_t xmt_abort_unsol; atomic_t xmt_abort_rsp; atomic_t xmt_abort_rsp_error; - - /* Stats counters - lpfc_nvmet_xmt_abort_cmp */ - atomic_t xmt_abort_cmpl; }; struct lpfc_nvmet_rcv_ctx { diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index e2d25ae5ba45..333c5094b97d 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -512,6 +512,7 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, } else { return -EINVAL; } + hq->RQ_buf_posted += hq->entry_repost; writel(doorbell.word0, hq->db_regaddr); } return put_index; @@ -12788,6 +12789,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe) struct fc_frame_header *fc_hdr; struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq; struct lpfc_queue *drq = phba->sli4_hba.dat_rq; + struct lpfc_nvmet_tgtport *tgtp; struct hbq_dmabuf *dma_buf; uint32_t status, rq_id; unsigned long iflags; @@ -12808,7 +12810,6 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe) case FC_STATUS_RQ_BUF_LEN_EXCEEDED: lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "2537 Receive Frame Truncated!!\n"); - hrq->RQ_buf_trunc++; case FC_STATUS_RQ_SUCCESS: lpfc_sli4_rq_release(hrq, drq); spin_lock_irqsave(&phba->hbalock, iflags); @@ -12819,6 +12820,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe) goto out; } hrq->RQ_rcv_buf++; + hrq->RQ_buf_posted--; memcpy(&dma_buf->cq_event.cqe.rcqe_cmpl, rcqe, sizeof(*rcqe)); /* If a NVME LS event (type 0x28), treat it as Fast path */ @@ -12832,8 +12834,21 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe) spin_unlock_irqrestore(&phba->hbalock, iflags); workposted = true; break; - case FC_STATUS_INSUFF_BUF_NEED_BUF: case FC_STATUS_INSUFF_BUF_FRM_DISC: + if (phba->nvmet_support) { + tgtp = phba->targetport->private; + lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_NVME, + "6402 RQE Error x%x, posted %d err_cnt " + "%d: %x %x %x\n", + status, hrq->RQ_buf_posted, + hrq->RQ_no_posted_buf, + atomic_read(&tgtp->rcv_fcp_cmd_in), + atomic_read(&tgtp->rcv_fcp_cmd_out), + atomic_read(&tgtp->xmt_fcp_release)); + } + /* fallthrough */ + + case FC_STATUS_INSUFF_BUF_NEED_BUF: hrq->RQ_no_posted_buf++; /* Post more buffers if possible */ spin_lock_irqsave(&phba->hbalock, iflags); @@ -13135,6 +13150,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, struct lpfc_queue *drq; struct rqb_dmabuf *dma_buf; struct fc_frame_header *fc_hdr; + struct lpfc_nvmet_tgtport *tgtp; uint32_t status, rq_id; unsigned long iflags; uint32_t fctl, idx; @@ -13165,8 +13181,6 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, case FC_STATUS_RQ_BUF_LEN_EXCEEDED: lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "6126 Receive Frame Truncated!!\n"); - hrq->RQ_buf_trunc++; - break; case FC_STATUS_RQ_SUCCESS: lpfc_sli4_rq_release(hrq, drq); spin_lock_irqsave(&phba->hbalock, iflags); @@ -13178,6 +13192,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, } spin_unlock_irqrestore(&phba->hbalock, iflags); hrq->RQ_rcv_buf++; + hrq->RQ_buf_posted--; fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt; /* Just some basic sanity checks on FCP Command frame */ @@ -13200,8 +13215,21 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, drop: lpfc_in_buf_free(phba, &dma_buf->dbuf); break; - case FC_STATUS_INSUFF_BUF_NEED_BUF: case FC_STATUS_INSUFF_BUF_FRM_DISC: + if (phba->nvmet_support) { + tgtp = phba->targetport->private; + lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_NVME, + "6401 RQE Error x%x, posted %d err_cnt " + "%d: %x %x %x\n", + status, hrq->RQ_buf_posted, + hrq->RQ_no_posted_buf, + atomic_read(&tgtp->rcv_fcp_cmd_in), + atomic_read(&tgtp->rcv_fcp_cmd_out), + atomic_read(&tgtp->xmt_fcp_release)); + } + /* fallthrough */ + + case FC_STATUS_INSUFF_BUF_NEED_BUF: hrq->RQ_no_posted_buf++; /* Post more buffers if possible */ spin_lock_irqsave(&phba->hbalock, iflags); diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 915e8d5581bd..7a8cbeb6a745 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -195,7 +195,7 @@ struct lpfc_queue { /* defines for RQ stats */ #define RQ_no_posted_buf q_cnt_1 #define RQ_no_buf_found q_cnt_2 -#define RQ_buf_trunc q_cnt_3 +#define RQ_buf_posted q_cnt_3 #define RQ_rcv_buf q_cnt_4 uint64_t isr_timestamp; -- cgit v1.2.3-59-g8ed1b From 61f3d4bf4f8f062cf6be143c9b7adbc3a017ea6e Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:41 -0700 Subject: scsi: lpfc: Fix nvmet RQ resource needs for large block writes. Large block writes to the nvme target were failing because the default number of RQs posted was insufficient. Expand the NVMET RQs to 2048 RQEs and ensure a minimum of 512 RQEs are posted, no matter how many MRQs are configured. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 6 +++--- drivers/scsi/lpfc/lpfc_init.c | 23 +++++++++++++------- drivers/scsi/lpfc/lpfc_nvmet.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.h | 1 + drivers/scsi/lpfc/lpfc_sli.c | 49 +++++++++--------------------------------- drivers/scsi/lpfc/lpfc_sli4.h | 2 +- 6 files changed, 31 insertions(+), 52 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 41ec7451689b..129d6cd7635b 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -60,9 +60,9 @@ #define LPFC_MIN_DEVLOSS_TMO 1 #define LPFC_MAX_DEVLOSS_TMO 255 -#define LPFC_DEF_MRQ_POST 256 -#define LPFC_MIN_MRQ_POST 32 -#define LPFC_MAX_MRQ_POST 512 +#define LPFC_DEF_MRQ_POST 512 +#define LPFC_MIN_MRQ_POST 512 +#define LPFC_MAX_MRQ_POST 2048 /* * Write key size should be multiple of 4. If write key is changed diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index b1b181a756dc..5f62e3a1dff6 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3390,6 +3390,11 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba) */ els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba); nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post; + + /* Ensure we at least meet the minimun for the system */ + if (nvmet_xri_cnt < LPFC_NVMET_RQE_DEF_COUNT) + nvmet_xri_cnt = LPFC_NVMET_RQE_DEF_COUNT; + tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt; if (nvmet_xri_cnt > tot_cnt) { phba->cfg_nvmet_mrq_post = tot_cnt / phba->cfg_nvmet_mrq; @@ -8158,7 +8163,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create NVMET Receive Queue for header */ qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.rq_esize, - phba->sli4_hba.rq_ecount); + LPFC_NVMET_RQE_DEF_COUNT); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3146 Failed allocate " @@ -8180,7 +8185,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create NVMET Receive Queue for data */ qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.rq_esize, - phba->sli4_hba.rq_ecount); + LPFC_NVMET_RQE_DEF_COUNT); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3156 Failed allocate " @@ -8770,9 +8775,6 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) goto out_destroy; } - lpfc_rq_adjust_repost(phba, phba->sli4_hba.hdr_rq, LPFC_ELS_HBQ); - lpfc_rq_adjust_repost(phba, phba->sli4_hba.dat_rq, LPFC_ELS_HBQ); - rc = lpfc_rq_create(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq, phba->sli4_hba.els_cq, LPFC_USOL); if (rc) { @@ -11096,7 +11098,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) struct lpfc_hba *phba; struct lpfc_vport *vport = NULL; struct Scsi_Host *shost = NULL; - int error, cnt; + int error, cnt, num; uint32_t cfg_mode, intr_mode; /* Allocate memory for HBA structure */ @@ -11131,8 +11133,13 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) } cnt = phba->cfg_iocb_cnt * 1024; - if (phba->nvmet_support) - cnt += phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq; + if (phba->nvmet_support) { + /* Ensure we at least meet the minimun for the system */ + num = (phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq); + if (num < LPFC_NVMET_RQE_DEF_COUNT) + num = LPFC_NVMET_RQE_DEF_COUNT; + cnt += num; + } /* Initialize and populate the iocb list per host */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index bb12e2c9fbf4..dfa7296499cf 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -614,9 +614,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n", ctxp->oxid, rsp->op, rsp->rsplen); + ctxp->flag |= LPFC_NVMET_IO_INP; rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq); if (rc == WQE_SUCCESS) { - ctxp->flag |= LPFC_NVMET_IO_INP; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (!phba->ktime_on) return 0; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index 837210a3e7c8..55f2a859dc70 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -22,6 +22,7 @@ ********************************************************************/ #define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ +#define LPFC_NVMET_RQE_DEF_COUNT 512 #define LPFC_NVMET_SUCCESS_LEN 12 /* Used for NVME Target */ diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 333c5094b97d..f344abce4949 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -479,22 +479,23 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, if (unlikely(!hq) || unlikely(!dq)) return -ENOMEM; put_index = hq->host_index; - temp_hrqe = hq->qe[hq->host_index].rqe; + temp_hrqe = hq->qe[put_index].rqe; temp_drqe = dq->qe[dq->host_index].rqe; if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ) return -EINVAL; - if (hq->host_index != dq->host_index) + if (put_index != dq->host_index) return -EINVAL; /* If the host has not yet processed the next entry then we are done */ - if (((hq->host_index + 1) % hq->entry_count) == hq->hba_index) + if (((put_index + 1) % hq->entry_count) == hq->hba_index) return -EBUSY; lpfc_sli_pcimem_bcopy(hrqe, temp_hrqe, hq->entry_size); lpfc_sli_pcimem_bcopy(drqe, temp_drqe, dq->entry_size); /* Update the host index to point to the next slot */ - hq->host_index = ((hq->host_index + 1) % hq->entry_count); + hq->host_index = ((put_index + 1) % hq->entry_count); dq->host_index = ((dq->host_index + 1) % dq->entry_count); + hq->RQ_buf_posted++; /* Ring The Header Receive Queue Doorbell */ if (!(hq->host_index % hq->entry_repost)) { @@ -512,7 +513,6 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, } else { return -EINVAL; } - hq->RQ_buf_posted += hq->entry_repost; writel(doorbell.word0, hq->db_regaddr); } return put_index; @@ -6905,14 +6905,9 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) INIT_LIST_HEAD(&rqbp->rqb_buffer_list); rqbp->rqb_alloc_buffer = lpfc_sli4_nvmet_alloc; rqbp->rqb_free_buffer = lpfc_sli4_nvmet_free; - rqbp->entry_count = 256; + rqbp->entry_count = LPFC_NVMET_RQE_DEF_COUNT; rqbp->buffer_count = 0; - /* Divide by 4 and round down to multiple of 16 */ - rc = (phba->cfg_nvmet_mrq_post >> 2) & 0xfff8; - phba->sli4_hba.nvmet_mrq_hdr[i]->entry_repost = rc; - phba->sli4_hba.nvmet_mrq_data[i]->entry_repost = rc; - lpfc_post_rq_buffer( phba, phba->sli4_hba.nvmet_mrq_hdr[i], phba->sli4_hba.nvmet_mrq_data[i], @@ -14892,34 +14887,6 @@ out: return status; } -/** - * lpfc_rq_adjust_repost - Adjust entry_repost for an RQ - * @phba: HBA structure that indicates port to create a queue on. - * @rq: The queue structure to use for the receive queue. - * @qno: The associated HBQ number - * - * - * For SLI4 we need to adjust the RQ repost value based on - * the number of buffers that are initially posted to the RQ. - */ -void -lpfc_rq_adjust_repost(struct lpfc_hba *phba, struct lpfc_queue *rq, int qno) -{ - uint32_t cnt; - - /* sanity check on queue memory */ - if (!rq) - return; - cnt = lpfc_hbq_defs[qno]->entry_count; - - /* Recalc repost for RQs based on buffers initially posted */ - cnt = (cnt >> 3); - if (cnt < LPFC_QUEUE_MIN_REPOST) - cnt = LPFC_QUEUE_MIN_REPOST; - - rq->entry_repost = cnt; -} - /** * lpfc_rq_create - Create a Receive Queue on the HBA * @phba: HBA structure that indicates port to create a queue on. @@ -15105,6 +15072,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, hrq->subtype = subtype; hrq->host_index = 0; hrq->hba_index = 0; + hrq->entry_repost = LPFC_RQ_REPOST; /* now create the data queue */ lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE, @@ -15186,6 +15154,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, drq->subtype = subtype; drq->host_index = 0; drq->hba_index = 0; + drq->entry_repost = LPFC_RQ_REPOST; /* link the header and data RQs onto the parent cq child list */ list_add_tail(&hrq->list, &cq->child_list); @@ -15343,6 +15312,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp, hrq->subtype = subtype; hrq->host_index = 0; hrq->hba_index = 0; + hrq->entry_repost = LPFC_RQ_REPOST; drq->db_format = LPFC_DB_RING_FORMAT; drq->db_regaddr = phba->sli4_hba.RQDBregaddr; @@ -15351,6 +15321,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp, drq->subtype = subtype; drq->host_index = 0; drq->hba_index = 0; + drq->entry_repost = LPFC_RQ_REPOST; list_add_tail(&hrq->list, &cq->child_list); list_add_tail(&drq->list, &cq->child_list); diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 7a8cbeb6a745..422bde85c9f1 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -156,6 +156,7 @@ struct lpfc_queue { uint32_t entry_size; /* Size of each queue entry. */ uint32_t entry_repost; /* Count of entries before doorbell is rung */ #define LPFC_QUEUE_MIN_REPOST 8 +#define LPFC_RQ_REPOST 64 uint32_t queue_id; /* Queue ID assigned by the hardware */ uint32_t assoc_qid; /* Queue ID associated with, for CQ/WQ/MQ */ uint32_t page_count; /* Number of pages allocated for this queue */ @@ -763,7 +764,6 @@ int lpfc_rq_create(struct lpfc_hba *, struct lpfc_queue *, int lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp, struct lpfc_queue **drqp, struct lpfc_queue **cqp, uint32_t subtype); -void lpfc_rq_adjust_repost(struct lpfc_hba *, struct lpfc_queue *, int); int lpfc_eq_destroy(struct lpfc_hba *, struct lpfc_queue *); int lpfc_cq_destroy(struct lpfc_hba *, struct lpfc_queue *); int lpfc_mq_destroy(struct lpfc_hba *, struct lpfc_queue *); -- cgit v1.2.3-59-g8ed1b From 3120046a970aee08a0787fb6792590f1e0047f62 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:42 -0700 Subject: scsi: lpfc: Fix NVMEI driver not decrementing counter causing bad rport state. During driver boot, a latency in the NVMET driver side causes the incoming NVMEI PRLI to get rejected by the NVMET driver. When this happens, the NVMEI driver runs out of PRLI retries. Bouncing the link does not fix the situation. If the NVMEI driver decides, on PRLI completion failures, to retry the PRLI, always decrement the fc4_prli_sent counter. This allows the PRLI completion to resolve to UNMAPPED when NVMET rejects the PRLI. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 3f9f6d5f8c69..3085895464d9 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -2077,16 +2077,19 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if (irsp->ulpStatus) { /* Check for retry */ + ndlp->fc4_prli_sent--; if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { /* ELS command is being retried */ - ndlp->fc4_prli_sent--; goto out; } + /* PRLI failed */ lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, - "2754 PRLI failure DID:%06X Status:x%x/x%x\n", + "2754 PRLI failure DID:%06X Status:x%x/x%x, " + "data: x%x\n", ndlp->nlp_DID, irsp->ulpStatus, - irsp->un.ulpWord[4]); + irsp->un.ulpWord[4], ndlp->fc4_prli_sent); + /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ if (lpfc_error_lost_link(irsp)) goto out; -- cgit v1.2.3-59-g8ed1b From 7869da183a7cfc8a2189f6eddd3bc558be40d5e3 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:43 -0700 Subject: scsi: lpfc: Fix NMI watchdog assertions when running nvmet IOPS tests After running IOPS test for 30 second we get kernel:NMI watchdog: Watchdog detected hard LOCKUP on cpu 0 The driver is speend too much time in its ISR. In ISR EQ and CQ processing routines, if we hit the entry_repost numbers of EQE/CQEs just break out of the routine as opposed to hitting the doorbell with NOARM and continue processing. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 30 +++++++++++++++--------------- drivers/scsi/lpfc/lpfc_sli.c | 16 ++++++---------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index a41daedeb967..7284533f4df2 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -3075,11 +3075,11 @@ __lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype, qp->assoc_qid, qp->q_cnt_1, (unsigned long long)qp->q_cnt_4); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, - "\t\tWQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], " - "HOST-IDX[%04d], PORT-IDX[%04d]", + "\t\tWQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " + "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]", qp->queue_id, qp->entry_count, qp->entry_size, qp->host_index, - qp->hba_index); + qp->hba_index, qp->entry_repost); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); return len; @@ -3126,11 +3126,11 @@ __lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype, qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3, (unsigned long long)qp->q_cnt_4); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, - "\tCQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], " - "HOST-IDX[%04d], PORT-IDX[%04d]", + "\tCQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " + "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]", qp->queue_id, qp->entry_count, qp->entry_size, qp->host_index, - qp->hba_index); + qp->hba_index, qp->entry_repost); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); @@ -3152,16 +3152,16 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp, qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3, (unsigned long long)qp->q_cnt_4); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, - "\t\tHQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], " - "HOST-IDX[%04d], PORT-IDX[%04d]\n", + "\t\tHQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " + "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n", qp->queue_id, qp->entry_count, qp->entry_size, - qp->host_index, qp->hba_index); + qp->host_index, qp->hba_index, qp->entry_repost); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, - "\t\tDQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], " - "HOST-IDX[%04d], PORT-IDX[%04d]\n", + "\t\tDQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " + "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n", datqp->queue_id, datqp->entry_count, datqp->entry_size, datqp->host_index, - datqp->hba_index); + datqp->hba_index, datqp->entry_repost); return len; } @@ -3247,10 +3247,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype, eqtype, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3, (unsigned long long)qp->q_cnt_4); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, - "EQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], " - "HOST-IDX[%04d], PORT-IDX[%04d]", + "EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " + "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]", qp->queue_id, qp->entry_count, qp->entry_size, - qp->host_index, qp->hba_index); + qp->host_index, qp->hba_index, qp->entry_repost); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); return len; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index f344abce4949..cc45e9191062 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -12961,7 +12961,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe, while ((cqe = lpfc_sli4_cq_get(cq))) { workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe); if (!(++ecount % cq->entry_repost)) - lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM); + break; cq->CQ_mbox++; } break; @@ -12975,7 +12975,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe, workposted |= lpfc_sli4_sp_handle_cqe(phba, cq, cqe); if (!(++ecount % cq->entry_repost)) - lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM); + break; } /* Track the max number of CQEs processed in 1 EQ */ @@ -13227,10 +13227,6 @@ drop: case FC_STATUS_INSUFF_BUF_NEED_BUF: hrq->RQ_no_posted_buf++; /* Post more buffers if possible */ - spin_lock_irqsave(&phba->hbalock, iflags); - phba->hba_flag |= HBA_POST_RECEIVE_BUFFER; - spin_unlock_irqrestore(&phba->hbalock, iflags); - workposted = true; break; } out: @@ -13384,7 +13380,7 @@ process_cq: while ((cqe = lpfc_sli4_cq_get(cq))) { workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe); if (!(++ecount % cq->entry_repost)) - lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM); + break; } /* Track the max number of CQEs processed in 1 EQ */ @@ -13475,7 +13471,7 @@ lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe) while ((cqe = lpfc_sli4_cq_get(cq))) { workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe); if (!(++ecount % cq->entry_repost)) - lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM); + break; } /* Track the max number of CQEs processed in 1 EQ */ @@ -13557,7 +13553,7 @@ lpfc_sli4_fof_intr_handler(int irq, void *dev_id) while ((eqe = lpfc_sli4_eq_get(eq))) { lpfc_sli4_fof_handle_eqe(phba, eqe); if (!(++ecount % eq->entry_repost)) - lpfc_sli4_eq_release(eq, LPFC_QUEUE_NOARM); + break; eq->EQ_processed++; } @@ -13674,7 +13670,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx); if (!(++ecount % fpeq->entry_repost)) - lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_NOARM); + break; fpeq->EQ_processed++; } -- cgit v1.2.3-59-g8ed1b From 3c603be9798758dde794daa622e0f7017dbff3a7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:44 -0700 Subject: scsi: lpfc: Separate NVMET data buffer pool fir ELS/CT. Using 2048 byte buffer and onle 128 bytes is needed. Create nee LFPC_NVMET_DATA_BUF_SIZE define to use for NVMET RQ/MRQs. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_crtn.h | 1 + drivers/scsi/lpfc/lpfc_hw4.h | 1 + drivers/scsi/lpfc/lpfc_init.c | 7 ++++++- drivers/scsi/lpfc/lpfc_mem.c | 33 ++++++++++++++++++++++++++------- drivers/scsi/lpfc/lpfc_sli.c | 19 +++++++++++++++---- 6 files changed, 50 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 62571fa9c6ad..c4b38491da8e 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -943,6 +943,7 @@ struct lpfc_hba { struct pci_pool *lpfc_mbuf_pool; struct pci_pool *lpfc_hrb_pool; /* header receive buffer pool */ struct pci_pool *lpfc_drb_pool; /* data receive buffer pool */ + struct pci_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */ struct pci_pool *lpfc_hbq_pool; /* SLI3 hbq buffer pool */ struct pci_pool *txrdy_payload_pool; struct lpfc_dma_pool lpfc_mbuf_safety_pool; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 1c55408ac718..fb7fc48a1324 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -271,6 +271,7 @@ int lpfc_sli4_fcf_rr_next_proc(struct lpfc_vport *, uint16_t); void lpfc_sli4_clear_fcf_rr_bmask(struct lpfc_hba *); int lpfc_mem_alloc(struct lpfc_hba *, int align); +int lpfc_nvmet_mem_alloc(struct lpfc_hba *phba); int lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *); void lpfc_mem_free(struct lpfc_hba *); void lpfc_mem_free_all(struct lpfc_hba *); diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 1d12f2be36bc..df97c6b7433b 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -1356,6 +1356,7 @@ struct lpfc_mbx_wq_destroy { #define LPFC_HDR_BUF_SIZE 128 #define LPFC_DATA_BUF_SIZE 2048 +#define LPFC_NVMET_DATA_BUF_SIZE 128 struct rq_context { uint32_t word0; #define lpfc_rq_context_rqe_count_SHIFT 16 /* Version 0 Only */ diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 5f62e3a1dff6..26b6a843d32d 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -5956,16 +5956,21 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) for (i = 0; i < lpfc_enable_nvmet_cnt; i++) { if (wwn == lpfc_enable_nvmet[i]) { #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) + if (lpfc_nvmet_mem_alloc(phba)) + break; + + phba->nvmet_support = 1; /* a match */ + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6017 NVME Target %016llx\n", wwn); - phba->nvmet_support = 1; /* a match */ #else lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6021 Can't enable NVME Target." " NVME_TARGET_FC infrastructure" " is not in kernel\n"); #endif + break; } } } diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index 5986c7957199..91060afc9721 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -214,6 +214,21 @@ fail_free_drb_pool: return -ENOMEM; } +int +lpfc_nvmet_mem_alloc(struct lpfc_hba *phba) +{ + phba->lpfc_nvmet_drb_pool = + pci_pool_create("lpfc_nvmet_drb_pool", + phba->pcidev, LPFC_NVMET_DATA_BUF_SIZE, + SGL_ALIGN_SZ, 0); + if (!phba->lpfc_nvmet_drb_pool) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "6024 Can't enable NVME Target - no memory\n"); + return -ENOMEM; + } + return 0; +} + /** * lpfc_mem_free - Frees memory allocated by lpfc_mem_alloc * @phba: HBA to free memory for @@ -232,6 +247,9 @@ lpfc_mem_free(struct lpfc_hba *phba) /* Free HBQ pools */ lpfc_sli_hbqbuf_free_all(phba); + if (phba->lpfc_nvmet_drb_pool) + pci_pool_destroy(phba->lpfc_nvmet_drb_pool); + phba->lpfc_nvmet_drb_pool = NULL; if (phba->lpfc_drb_pool) pci_pool_destroy(phba->lpfc_drb_pool); phba->lpfc_drb_pool = NULL; @@ -624,20 +642,20 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) kfree(dma_buf); return NULL; } - dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL, - &dma_buf->dbuf.phys); + dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_nvmet_drb_pool, + GFP_KERNEL, &dma_buf->dbuf.phys); if (!dma_buf->dbuf.virt) { pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt, dma_buf->hbuf.phys); kfree(dma_buf); return NULL; } - dma_buf->total_size = LPFC_DATA_BUF_SIZE; + dma_buf->total_size = LPFC_NVMET_DATA_BUF_SIZE; dma_buf->context = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx), GFP_KERNEL); if (!dma_buf->context) { - pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt, + pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt, dma_buf->dbuf.phys); pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt, dma_buf->hbuf.phys); @@ -648,7 +666,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) dma_buf->iocbq = lpfc_sli_get_iocbq(phba); if (!dma_buf->iocbq) { kfree(dma_buf->context); - pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt, + pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt, dma_buf->dbuf.phys); pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt, dma_buf->hbuf.phys); @@ -678,7 +696,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) if (!dma_buf->sglq) { lpfc_sli_release_iocbq(phba, dma_buf->iocbq); kfree(dma_buf->context); - pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt, + pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt, dma_buf->dbuf.phys); pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt, dma_buf->hbuf.phys); @@ -718,7 +736,8 @@ lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab) lpfc_sli_release_iocbq(phba, dmab->iocbq); kfree(dmab->context); pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys); - pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys); + pci_pool_free(phba->lpfc_nvmet_drb_pool, + dmab->dbuf.virt, dmab->dbuf.phys); kfree(dmab); } diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index cc45e9191062..49d5c4700054 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -15079,7 +15079,12 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, if (phba->sli4_hba.pc_sli4_params.rqv == LPFC_Q_CREATE_VERSION_1) { bf_set(lpfc_rq_context_rqe_count_1, &rq_create->u.request.context, hrq->entry_count); - rq_create->u.request.context.buffer_size = LPFC_DATA_BUF_SIZE; + if (subtype == LPFC_NVMET) + rq_create->u.request.context.buffer_size = + LPFC_NVMET_DATA_BUF_SIZE; + else + rq_create->u.request.context.buffer_size = + LPFC_DATA_BUF_SIZE; bf_set(lpfc_rq_context_rqe_size, &rq_create->u.request.context, LPFC_RQE_SIZE_8); bf_set(lpfc_rq_context_page_size, &rq_create->u.request.context, @@ -15116,8 +15121,14 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, LPFC_RQ_RING_SIZE_4096); break; } - bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context, - LPFC_DATA_BUF_SIZE); + if (subtype == LPFC_NVMET) + bf_set(lpfc_rq_context_buf_size, + &rq_create->u.request.context, + LPFC_NVMET_DATA_BUF_SIZE); + else + bf_set(lpfc_rq_context_buf_size, + &rq_create->u.request.context, + LPFC_DATA_BUF_SIZE); } bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context, cq->queue_id); @@ -15263,7 +15274,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp, cq->queue_id); bf_set(lpfc_rq_context_data_size, &rq_create->u.request.context, - LPFC_DATA_BUF_SIZE); + LPFC_NVMET_DATA_BUF_SIZE); bf_set(lpfc_rq_context_hdr_size, &rq_create->u.request.context, LPFC_HDR_BUF_SIZE); -- cgit v1.2.3-59-g8ed1b From 6c621a2229b084da0d926967f84b059a10c26ede Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:45 -0700 Subject: scsi: lpfc: Separate NVMET RQ buffer posting from IO resources SGL/iocbq/context Currently IO resources are mapped 1 to 1 with RQ buffers posted Added logic to separate RQE buffers from IO op resources (sgl/iocbq/context). During initialization, the driver will determine how many SGLs it will allocate for NVMET (based on what the firmware reports) and associate a NVMET IOCBq and NVMET context structure with each one. Now that hdr/data buffers are immediately reposted back to the RQ, 512 RQEs for each MRQ is sufficient. Also, since NVMET data buffers are now 128 bytes, lpfc_nvmet_mrq_post is not necessary anymore as we will always post the max (512) buffers per NVMET MRQ. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 11 +- drivers/scsi/lpfc/lpfc_attr.c | 11 -- drivers/scsi/lpfc/lpfc_crtn.h | 8 +- drivers/scsi/lpfc/lpfc_init.c | 92 ++------------- drivers/scsi/lpfc/lpfc_mem.c | 73 +----------- drivers/scsi/lpfc/lpfc_nvmet.c | 246 +++++++++++++++++++++++++++++++---------- drivers/scsi/lpfc/lpfc_nvmet.h | 1 + drivers/scsi/lpfc/lpfc_sli.c | 78 ++++++++++++- drivers/scsi/lpfc/lpfc_sli4.h | 4 +- 9 files changed, 291 insertions(+), 233 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index c4b38491da8e..72641b1d3ab8 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -141,6 +141,13 @@ struct lpfc_dmabuf { uint32_t buffer_tag; /* used for tagged queue ring */ }; +struct lpfc_nvmet_ctxbuf { + struct list_head list; + struct lpfc_nvmet_rcv_ctx *context; + struct lpfc_iocbq *iocbq; + struct lpfc_sglq *sglq; +}; + struct lpfc_dma_pool { struct lpfc_dmabuf *elements; uint32_t max_count; @@ -163,9 +170,6 @@ struct rqb_dmabuf { struct lpfc_dmabuf dbuf; uint16_t total_size; uint16_t bytes_recv; - void *context; - struct lpfc_iocbq *iocbq; - struct lpfc_sglq *sglq; struct lpfc_queue *hrq; /* ptr to associated Header RQ */ struct lpfc_queue *drq; /* ptr to associated Data RQ */ }; @@ -777,7 +781,6 @@ struct lpfc_hba { uint32_t cfg_nvme_oas; uint32_t cfg_nvme_io_channel; uint32_t cfg_nvmet_mrq; - uint32_t cfg_nvmet_mrq_post; uint32_t cfg_enable_nvmet; uint32_t cfg_nvme_enable_fb; uint32_t cfg_nvmet_fb_size; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 129d6cd7635b..65264582915a 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3315,14 +3315,6 @@ LPFC_ATTR_R(nvmet_mrq, 1, 1, 16, "Specify number of RQ pairs for processing NVMET cmds"); -/* - * lpfc_nvmet_mrq_post: Specify number buffers to post on every MRQ - * - */ -LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST, - LPFC_MIN_MRQ_POST, LPFC_MAX_MRQ_POST, - "Specify number of buffers to post on every MRQ"); - /* * lpfc_enable_fc4_type: Defines what FC4 types are supported. * Supported Values: 1 - register just FCP @@ -5158,7 +5150,6 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_lpfc_suppress_rsp, &dev_attr_lpfc_nvme_io_channel, &dev_attr_lpfc_nvmet_mrq, - &dev_attr_lpfc_nvmet_mrq_post, &dev_attr_lpfc_nvme_enable_fb, &dev_attr_lpfc_nvmet_fb_size, &dev_attr_lpfc_enable_bg, @@ -6198,7 +6189,6 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type); lpfc_nvmet_mrq_init(phba, lpfc_nvmet_mrq); - lpfc_nvmet_mrq_post_init(phba, lpfc_nvmet_mrq_post); /* Initialize first burst. Target vs Initiator are different. */ lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb); @@ -6295,7 +6285,6 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba) /* Not NVME Target mode. Turn off Target parameters. */ phba->nvmet_support = 0; phba->cfg_nvmet_mrq = 0; - phba->cfg_nvmet_mrq_post = 0; phba->cfg_nvmet_fb_size = 0; } diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index fb7fc48a1324..cc95abd130b4 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -75,6 +75,8 @@ void lpfc_init_vpi_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_cancel_all_vport_retry_delay_timer(struct lpfc_hba *); void lpfc_retry_pport_discovery(struct lpfc_hba *); void lpfc_release_rpi(struct lpfc_hba *, struct lpfc_vport *, uint16_t); +int lpfc_init_iocb_list(struct lpfc_hba *phba, int cnt); +void lpfc_free_iocb_list(struct lpfc_hba *phba); void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); @@ -246,16 +248,14 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *); void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *); struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba); void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab); -void lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp, - struct lpfc_dmabuf *mp); +void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, + struct lpfc_nvmet_ctxbuf *ctxp); int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, struct fc_frame_header *fc_hdr); void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *, uint16_t); int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe); -int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq, - struct lpfc_queue *dq, int count); int lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq); void lpfc_unregister_fcf(struct lpfc_hba *); void lpfc_unregister_fcf_rescan(struct lpfc_hba *); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 26b6a843d32d..86b0b26dfeea 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1099,7 +1099,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) { ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP); - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); } } @@ -3381,7 +3381,7 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba) { struct lpfc_sglq *sglq_entry = NULL, *sglq_entry_next = NULL; uint16_t i, lxri, xri_cnt, els_xri_cnt; - uint16_t nvmet_xri_cnt, tot_cnt; + uint16_t nvmet_xri_cnt; LIST_HEAD(nvmet_sgl_list); int rc; @@ -3389,20 +3389,9 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba) * update on pci function's nvmet xri-sgl list */ els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba); - nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post; - /* Ensure we at least meet the minimun for the system */ - if (nvmet_xri_cnt < LPFC_NVMET_RQE_DEF_COUNT) - nvmet_xri_cnt = LPFC_NVMET_RQE_DEF_COUNT; - - tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt; - if (nvmet_xri_cnt > tot_cnt) { - phba->cfg_nvmet_mrq_post = tot_cnt / phba->cfg_nvmet_mrq; - nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post; - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "6301 NVMET post-sgl count changed to %d\n", - phba->cfg_nvmet_mrq_post); - } + /* For NVMET, ALL remaining XRIs are dedicated for IO processing */ + nvmet_xri_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt; if (nvmet_xri_cnt > phba->sli4_hba.nvmet_xri_cnt) { /* els xri-sgl expanded */ @@ -5835,6 +5824,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); + INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_list); + /* Fast-path XRI aborted CQ Event work queue list */ INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue); } @@ -6279,7 +6270,7 @@ lpfc_unset_driver_resource_phase2(struct lpfc_hba *phba) * * This routine is invoked to free the driver's IOCB list and memory. **/ -static void +void lpfc_free_iocb_list(struct lpfc_hba *phba) { struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL; @@ -6307,7 +6298,7 @@ lpfc_free_iocb_list(struct lpfc_hba *phba) * 0 - successful * other values - error **/ -static int +int lpfc_init_iocb_list(struct lpfc_hba *phba, int iocb_count) { struct lpfc_iocbq *iocbq_entry = NULL; @@ -8321,46 +8312,6 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list); } -int -lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq, - struct lpfc_queue *drq, int count) -{ - int rc, i; - struct lpfc_rqe hrqe; - struct lpfc_rqe drqe; - struct lpfc_rqb *rqbp; - struct rqb_dmabuf *rqb_buffer; - LIST_HEAD(rqb_buf_list); - - rqbp = hrq->rqbp; - for (i = 0; i < count; i++) { - rqb_buffer = (rqbp->rqb_alloc_buffer)(phba); - if (!rqb_buffer) - break; - rqb_buffer->hrq = hrq; - rqb_buffer->drq = drq; - list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list); - } - while (!list_empty(&rqb_buf_list)) { - list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf, - hbuf.list); - - hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys); - hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys); - drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys); - drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys); - rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe); - if (rc < 0) { - (rqbp->rqb_free_buffer)(phba, rqb_buffer); - } else { - list_add_tail(&rqb_buffer->hbuf.list, - &rqbp->rqb_buffer_list); - rqbp->buffer_count++; - } - } - return 1; -} - int lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *rq) { @@ -11103,7 +11054,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) struct lpfc_hba *phba; struct lpfc_vport *vport = NULL; struct Scsi_Host *shost = NULL; - int error, cnt, num; + int error; uint32_t cfg_mode, intr_mode; /* Allocate memory for HBA structure */ @@ -11137,27 +11088,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) goto out_unset_pci_mem_s4; } - cnt = phba->cfg_iocb_cnt * 1024; - if (phba->nvmet_support) { - /* Ensure we at least meet the minimun for the system */ - num = (phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq); - if (num < LPFC_NVMET_RQE_DEF_COUNT) - num = LPFC_NVMET_RQE_DEF_COUNT; - cnt += num; - } - - /* Initialize and populate the iocb list per host */ - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "2821 initialize iocb list %d total %d\n", - phba->cfg_iocb_cnt, cnt); - error = lpfc_init_iocb_list(phba, cnt); - - if (error) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "1413 Failed to initialize iocb list.\n"); - goto out_unset_driver_resource_s4; - } - INIT_LIST_HEAD(&phba->active_rrq_list); INIT_LIST_HEAD(&phba->fcf.fcf_pri_list); @@ -11166,7 +11096,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) if (error) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "1414 Failed to set up driver resource.\n"); - goto out_free_iocb_list; + goto out_unset_driver_resource_s4; } /* Get the default values for Model Name and Description */ @@ -11266,8 +11196,6 @@ out_destroy_shost: lpfc_destroy_shost(phba); out_unset_driver_resource: lpfc_unset_driver_resource_phase2(phba); -out_free_iocb_list: - lpfc_free_iocb_list(phba); out_unset_driver_resource_s4: lpfc_sli4_driver_resource_unset(phba); out_unset_pci_mem_s4: diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index 91060afc9721..fcc05a1517c2 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -629,8 +629,6 @@ struct rqb_dmabuf * lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) { struct rqb_dmabuf *dma_buf; - struct lpfc_iocbq *nvmewqe; - union lpfc_wqe128 *wqe; dma_buf = kzalloc(sizeof(struct rqb_dmabuf), GFP_KERNEL); if (!dma_buf) @@ -651,60 +649,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) return NULL; } dma_buf->total_size = LPFC_NVMET_DATA_BUF_SIZE; - - dma_buf->context = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx), - GFP_KERNEL); - if (!dma_buf->context) { - pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt, - dma_buf->dbuf.phys); - pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt, - dma_buf->hbuf.phys); - kfree(dma_buf); - return NULL; - } - - dma_buf->iocbq = lpfc_sli_get_iocbq(phba); - if (!dma_buf->iocbq) { - kfree(dma_buf->context); - pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt, - dma_buf->dbuf.phys); - pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt, - dma_buf->hbuf.phys); - kfree(dma_buf); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME, - "2621 Ran out of nvmet iocb/WQEs\n"); - return NULL; - } - dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET; - nvmewqe = dma_buf->iocbq; - wqe = (union lpfc_wqe128 *)&nvmewqe->wqe; - /* Initialize WQE */ - memset(wqe, 0, sizeof(union lpfc_wqe)); - /* Word 7 */ - bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI); - bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3); - bf_set(wqe_pu, &wqe->generic.wqe_com, 1); - /* Word 10 */ - bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1); - bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0); - bf_set(wqe_qosd, &wqe->generic.wqe_com, 0); - - dma_buf->iocbq->context1 = NULL; - spin_lock(&phba->sli4_hba.sgl_list_lock); - dma_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, dma_buf->iocbq); - spin_unlock(&phba->sli4_hba.sgl_list_lock); - if (!dma_buf->sglq) { - lpfc_sli_release_iocbq(phba, dma_buf->iocbq); - kfree(dma_buf->context); - pci_pool_free(phba->lpfc_nvmet_drb_pool, dma_buf->dbuf.virt, - dma_buf->dbuf.phys); - pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt, - dma_buf->hbuf.phys); - kfree(dma_buf); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME, - "6132 Ran out of nvmet XRIs\n"); - return NULL; - } return dma_buf; } @@ -723,18 +667,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab) { - unsigned long flags; - - __lpfc_clear_active_sglq(phba, dmab->sglq->sli4_lxritag); - dmab->sglq->state = SGL_FREED; - dmab->sglq->ndlp = NULL; - - spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags); - list_add_tail(&dmab->sglq->list, &phba->sli4_hba.lpfc_nvmet_sgl_list); - spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock, flags); - - lpfc_sli_release_iocbq(phba, dmab->iocbq); - kfree(dmab->context); pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys); pci_pool_free(phba->lpfc_nvmet_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys); @@ -822,6 +754,11 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp) rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe); if (rc < 0) { (rqbp->rqb_free_buffer)(phba, rqb_entry); + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "6409 Cannot post to RQ %d: %x %x\n", + rqb_entry->hrq->queue_id, + rqb_entry->hrq->host_index, + rqb_entry->hrq->hba_index); } else { list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list); rqbp->buffer_count++; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index dfa7296499cf..fcc77ae0c71c 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -142,7 +142,7 @@ out: } /** - * lpfc_nvmet_rq_post - Repost a NVMET RQ DMA buffer and clean up context + * lpfc_nvmet_ctxbuf_post - Repost a NVMET RQ DMA buffer and clean up context * @phba: HBA buffer is associated with * @ctxp: context to clean up * @mp: Buffer to free @@ -155,24 +155,24 @@ out: * Returns: None **/ void -lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp, - struct lpfc_dmabuf *mp) +lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) { - if (ctxp) { - if (ctxp->flag) - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6314 rq_post ctx xri x%x flag x%x\n", - ctxp->oxid, ctxp->flag); - - if (ctxp->txrdy) { - pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy, - ctxp->txrdy_phys); - ctxp->txrdy = NULL; - ctxp->txrdy_phys = 0; - } - ctxp->state = LPFC_NVMET_STE_FREE; + struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context; + unsigned long iflag; + + if (ctxp->txrdy) { + pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy, + ctxp->txrdy_phys); + ctxp->txrdy = NULL; + ctxp->txrdy_phys = 0; } - lpfc_rq_buf_free(phba, mp); + ctxp->state = LPFC_NVMET_STE_FREE; + + spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag); + list_add_tail(&ctx_buf->list, + &phba->sli4_hba.lpfc_nvmet_ctx_list); + phba->sli4_hba.nvmet_ctx_cnt++; + spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag); } #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -718,7 +718,7 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, if (aborting) return; - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); } static struct nvmet_fc_target_template lpfc_tgttemplate = { @@ -739,17 +739,128 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = { .target_priv_sz = sizeof(struct lpfc_nvmet_tgtport), }; +void +lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba) +{ + struct lpfc_nvmet_ctxbuf *ctx_buf, *next_ctx_buf; + unsigned long flags; + + list_for_each_entry_safe( + ctx_buf, next_ctx_buf, + &phba->sli4_hba.lpfc_nvmet_ctx_list, list) { + spin_lock_irqsave( + &phba->sli4_hba.abts_nvme_buf_list_lock, flags); + list_del_init(&ctx_buf->list); + spin_unlock_irqrestore( + &phba->sli4_hba.abts_nvme_buf_list_lock, flags); + __lpfc_clear_active_sglq(phba, + ctx_buf->sglq->sli4_lxritag); + ctx_buf->sglq->state = SGL_FREED; + ctx_buf->sglq->ndlp = NULL; + + spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags); + list_add_tail(&ctx_buf->sglq->list, + &phba->sli4_hba.lpfc_nvmet_sgl_list); + spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock, + flags); + + lpfc_sli_release_iocbq(phba, ctx_buf->iocbq); + kfree(ctx_buf->context); + } +} + +int +lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) +{ + struct lpfc_nvmet_ctxbuf *ctx_buf; + struct lpfc_iocbq *nvmewqe; + union lpfc_wqe128 *wqe; + int i; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME, + "6403 Allocate NVMET resources for %d XRIs\n", + phba->sli4_hba.nvmet_xri_cnt); + + /* For all nvmet xris, allocate resources needed to process a + * received command on a per xri basis. + */ + for (i = 0; i < phba->sli4_hba.nvmet_xri_cnt; i++) { + ctx_buf = kzalloc(sizeof(*ctx_buf), GFP_KERNEL); + if (!ctx_buf) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME, + "6404 Ran out of memory for NVMET\n"); + return -ENOMEM; + } + + ctx_buf->context = kzalloc(sizeof(*ctx_buf->context), + GFP_KERNEL); + if (!ctx_buf->context) { + kfree(ctx_buf); + lpfc_printf_log(phba, KERN_ERR, LOG_NVME, + "6405 Ran out of NVMET " + "context memory\n"); + return -ENOMEM; + } + ctx_buf->context->ctxbuf = ctx_buf; + + ctx_buf->iocbq = lpfc_sli_get_iocbq(phba); + if (!ctx_buf->iocbq) { + kfree(ctx_buf->context); + kfree(ctx_buf); + lpfc_printf_log(phba, KERN_ERR, LOG_NVME, + "6406 Ran out of NVMET iocb/WQEs\n"); + return -ENOMEM; + } + ctx_buf->iocbq->iocb_flag = LPFC_IO_NVMET; + nvmewqe = ctx_buf->iocbq; + wqe = (union lpfc_wqe128 *)&nvmewqe->wqe; + /* Initialize WQE */ + memset(wqe, 0, sizeof(union lpfc_wqe)); + /* Word 7 */ + bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI); + bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3); + bf_set(wqe_pu, &wqe->generic.wqe_com, 1); + /* Word 10 */ + bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1); + bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0); + bf_set(wqe_qosd, &wqe->generic.wqe_com, 0); + + ctx_buf->iocbq->context1 = NULL; + spin_lock(&phba->sli4_hba.sgl_list_lock); + ctx_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, ctx_buf->iocbq); + spin_unlock(&phba->sli4_hba.sgl_list_lock); + if (!ctx_buf->sglq) { + lpfc_sli_release_iocbq(phba, ctx_buf->iocbq); + kfree(ctx_buf->context); + kfree(ctx_buf); + lpfc_printf_log(phba, KERN_ERR, LOG_NVME, + "6407 Ran out of NVMET XRIs\n"); + return -ENOMEM; + } + spin_lock(&phba->sli4_hba.nvmet_io_lock); + list_add_tail(&ctx_buf->list, + &phba->sli4_hba.lpfc_nvmet_ctx_list); + spin_unlock(&phba->sli4_hba.nvmet_io_lock); + } + phba->sli4_hba.nvmet_ctx_cnt = phba->sli4_hba.nvmet_xri_cnt; + return 0; +} + int lpfc_nvmet_create_targetport(struct lpfc_hba *phba) { struct lpfc_vport *vport = phba->pport; struct lpfc_nvmet_tgtport *tgtp; struct nvmet_fc_port_info pinfo; - int error = 0; + int error; if (phba->targetport) return 0; + error = lpfc_nvmet_setup_io_context(phba); + if (error) + return error; + memset(&pinfo, 0, sizeof(struct nvmet_fc_port_info)); pinfo.node_name = wwn_to_u64(vport->fc_nodename.u.wwn); pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn); @@ -778,13 +889,16 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) &phba->pcidev->dev, &phba->targetport); #else - error = -ENOMEM; + error = -ENOENT; #endif if (error) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC, "6025 Cannot register NVME targetport " "x%x\n", error); phba->targetport = NULL; + + lpfc_nvmet_cleanup_io_context(phba); + } else { tgtp = (struct lpfc_nvmet_tgtport *) phba->targetport->private; @@ -874,7 +988,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, list_for_each_entry_safe(ctxp, next_ctxp, &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, list) { - if (ctxp->rqb_buffer->sglq->sli4_xritag != xri) + if (ctxp->ctxbuf->sglq->sli4_xritag != xri) continue; /* Check if we already received a free context call @@ -895,7 +1009,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE || ndlp->nlp_state == NLP_STE_MAPPED_NODE)) { lpfc_set_rrq_active(phba, ndlp, - ctxp->rqb_buffer->sglq->sli4_lxritag, + ctxp->ctxbuf->sglq->sli4_lxritag, rxid, 1); lpfc_sli4_abts_err_handler(phba, ndlp, axri); } @@ -904,8 +1018,8 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, "6318 XB aborted %x flg x%x (%x)\n", ctxp->oxid, ctxp->flag, released); if (released) - lpfc_nvmet_rq_post(phba, ctxp, - &ctxp->rqb_buffer->hbuf); + lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); + if (rrq_empty) lpfc_worker_wake_up(phba); return; @@ -933,7 +1047,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, list_for_each_entry_safe(ctxp, next_ctxp, &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, list) { - if (ctxp->rqb_buffer->sglq->sli4_xritag != xri) + if (ctxp->ctxbuf->sglq->sli4_xritag != xri) continue; spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); @@ -985,6 +1099,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) init_completion(&tgtp->tport_unreg_done); nvmet_fc_unregister_targetport(phba->targetport); wait_for_completion_timeout(&tgtp->tport_unreg_done, 5); + lpfc_nvmet_cleanup_io_context(phba); } phba->targetport = NULL; #endif @@ -1115,15 +1230,18 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; + struct lpfc_nvmet_ctxbuf *ctx_buf; uint32_t *payload; uint32_t size, oxid, sid, rc; + unsigned long iflag; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint32_t id; #endif + ctx_buf = NULL; if (!nvmebuf || !phba->targetport) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6157 FCP Drop IO\n"); + "6157 NVMET FCP Drop IO\n"); oxid = 0; size = 0; sid = 0; @@ -1131,6 +1249,23 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, goto dropit; } + spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag); + if (phba->sli4_hba.nvmet_ctx_cnt) { + list_remove_head(&phba->sli4_hba.lpfc_nvmet_ctx_list, + ctx_buf, struct lpfc_nvmet_ctxbuf, list); + phba->sli4_hba.nvmet_ctx_cnt--; + } + spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag); + + if (!ctx_buf) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, + "6408 No NVMET ctx Drop IO\n"); + oxid = 0; + size = 0; + sid = 0; + ctxp = NULL; + goto dropit; + } tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; payload = (uint32_t *)(nvmebuf->dbuf.virt); @@ -1139,16 +1274,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, oxid = be16_to_cpu(fc_hdr->fh_ox_id); sid = sli4_sid_from_fc_hdr(fc_hdr); - ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmebuf->context; - if (ctxp == NULL) { - atomic_inc(&tgtp->rcv_fcp_cmd_drop); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6158 FCP Drop IO x%x: Alloc\n", - oxid); - lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf); - /* Cannot send ABTS without context */ - return; - } + ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context; memset(ctxp, 0, sizeof(ctxp->ctx)); ctxp->wqeq = NULL; ctxp->txrdy = NULL; @@ -1158,9 +1284,9 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, ctxp->oxid = oxid; ctxp->sid = sid; ctxp->state = LPFC_NVMET_STE_RCV; - ctxp->rqb_buffer = nvmebuf; ctxp->entry_cnt = 1; ctxp->flag = 0; + ctxp->ctxbuf = ctx_buf; spin_lock_init(&ctxp->ctxlock); #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -1192,6 +1318,9 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, * The calling sequence should be: * nvmet_fc_rcv_fcp_req -> lpfc_nvmet_xmt_fcp_op/cmp -> req->done * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp. + * When we return from nvmet_fc_rcv_fcp_req, all relevant info in + * the NVME command / FC header is stored, so we are free to repost + * the buffer. */ rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req, payload, size); @@ -1199,6 +1328,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, /* Process FCP command */ if (rc == 0) { atomic_inc(&tgtp->rcv_fcp_cmd_out); + lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */ return; } @@ -1213,15 +1343,17 @@ dropit: lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n", oxid, size, sid); if (oxid) { + lpfc_nvmet_defer_release(phba, ctxp); lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid); + lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */ return; } - if (nvmebuf) { - nvmebuf->iocbq->hba_wqidx = 0; - /* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */ - lpfc_nvmet_rq_post(phba, ctxp, &nvmebuf->hbuf); - } + if (ctx_buf) + lpfc_nvmet_ctxbuf_post(phba, ctx_buf); + + if (nvmebuf) + lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */ #endif } @@ -1273,7 +1405,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint64_t isr_timestamp) { if (phba->nvmet_support == 0) { - lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf); + lpfc_rq_buf_free(phba, &nvmebuf->hbuf); return; } lpfc_nvmet_unsol_fcp_buffer(phba, pring, nvmebuf, @@ -1474,7 +1606,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, nvmewqe = ctxp->wqeq; if (nvmewqe == NULL) { /* Allocate buffer for command wqe */ - nvmewqe = ctxp->rqb_buffer->iocbq; + nvmewqe = ctxp->ctxbuf->iocbq; if (nvmewqe == NULL) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6110 lpfc_nvmet_prep_fcp_wqe: No " @@ -1501,7 +1633,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, return NULL; } - sgl = (struct sli4_sge *)ctxp->rqb_buffer->sglq->sgl; + sgl = (struct sli4_sge *)ctxp->ctxbuf->sglq->sgl; switch (rsp->op) { case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: @@ -1851,15 +1983,16 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, wcqe->word0, wcqe->total_data_placed, result, wcqe->word3); + cmdwqe->context2 = NULL; + cmdwqe->context3 = NULL; /* * if transport has released ctx, then can reuse it. Otherwise, * will be recycled by transport release call. */ if (released) - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); - cmdwqe->context2 = NULL; - cmdwqe->context3 = NULL; + /* This is the iocbq for the abort, not the command */ lpfc_sli_release_iocbq(phba, cmdwqe); /* Since iaab/iaar are NOT set, there is no work left. @@ -1932,15 +2065,15 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, ctxp->oxid, ctxp->flag, released, wcqe->word0, wcqe->total_data_placed, result, wcqe->word3); + + cmdwqe->context2 = NULL; + cmdwqe->context3 = NULL; /* * if transport has released ctx, then can reuse it. Otherwise, * will be recycled by transport release call. */ if (released) - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); - - cmdwqe->context2 = NULL; - cmdwqe->context3 = NULL; + lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); /* Since iaab/iaar are NOT set, there is no work left. * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted @@ -2002,10 +2135,6 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, sid, xri, ctxp->wqeq->sli4_xritag); tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - if (!ctxp->wqeq) { - ctxp->wqeq = ctxp->rqb_buffer->iocbq; - ctxp->wqeq->hba_wqidx = 0; - } ndlp = lpfc_findnode_did(phba->pport, sid); if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || @@ -2101,7 +2230,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; if (!ctxp->wqeq) { - ctxp->wqeq = ctxp->rqb_buffer->iocbq; + ctxp->wqeq = ctxp->ctxbuf->iocbq; ctxp->wqeq->hba_wqidx = 0; } @@ -2239,7 +2368,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; if (!ctxp->wqeq) { - ctxp->wqeq = ctxp->rqb_buffer->iocbq; + ctxp->wqeq = ctxp->ctxbuf->iocbq; ctxp->wqeq->hba_wqidx = 0; } @@ -2294,6 +2423,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba, } abts_wqeq = ctxp->wqeq; wqe_abts = &abts_wqeq->wqe; + lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri); spin_lock_irqsave(&phba->hbalock, flags); diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index 55f2a859dc70..6eb2f5d8d4ed 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -106,6 +106,7 @@ struct lpfc_nvmet_rcv_ctx { #define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */ #define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */ struct rqb_dmabuf *rqb_buffer; + struct lpfc_nvmet_ctxbuf *ctxbuf; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint64_t ts_isr_cmd; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 49d5c4700054..d68ee3ee299a 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -6513,6 +6513,49 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) (phba->hba_flag & HBA_FCOE_MODE) ? "FCoE" : "FC"); } +static int +lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq, + struct lpfc_queue *drq, int count) +{ + int rc, i; + struct lpfc_rqe hrqe; + struct lpfc_rqe drqe; + struct lpfc_rqb *rqbp; + struct rqb_dmabuf *rqb_buffer; + LIST_HEAD(rqb_buf_list); + + rqbp = hrq->rqbp; + for (i = 0; i < count; i++) { + /* IF RQ is already full, don't bother */ + if (rqbp->buffer_count + i >= rqbp->entry_count - 1) + break; + rqb_buffer = rqbp->rqb_alloc_buffer(phba); + if (!rqb_buffer) + break; + rqb_buffer->hrq = hrq; + rqb_buffer->drq = drq; + list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list); + } + while (!list_empty(&rqb_buf_list)) { + list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf, + hbuf.list); + + hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys); + hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys); + drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys); + drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys); + rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe); + if (rc < 0) { + rqbp->rqb_free_buffer(phba, rqb_buffer); + } else { + list_add_tail(&rqb_buffer->hbuf.list, + &rqbp->rqb_buffer_list); + rqbp->buffer_count++; + } + } + return 1; +} + /** * lpfc_sli4_hba_setup - SLI4 device initialization PCI function * @phba: Pointer to HBA context object. @@ -6525,7 +6568,7 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) int lpfc_sli4_hba_setup(struct lpfc_hba *phba) { - int rc, i; + int rc, i, cnt; LPFC_MBOXQ_t *mboxq; struct lpfc_mqe *mqe; uint8_t *vpd; @@ -6876,6 +6919,21 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) goto out_destroy_queue; } phba->sli4_hba.nvmet_xri_cnt = rc; + + cnt = phba->cfg_iocb_cnt * 1024; + /* We need 1 iocbq for every SGL, for IO processing */ + cnt += phba->sli4_hba.nvmet_xri_cnt; + /* Initialize and populate the iocb list per host */ + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "2821 initialize iocb list %d total %d\n", + phba->cfg_iocb_cnt, cnt); + rc = lpfc_init_iocb_list(phba, cnt); + if (rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "1413 Failed to init iocb list.\n"); + goto out_destroy_queue; + } + lpfc_nvmet_create_targetport(phba); } else { /* update host scsi xri-sgl sizes and mappings */ @@ -6895,10 +6953,21 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) "and mapping: %d\n", rc); goto out_destroy_queue; } + + cnt = phba->cfg_iocb_cnt * 1024; + /* Initialize and populate the iocb list per host */ + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "2820 initialize iocb list %d total %d\n", + phba->cfg_iocb_cnt, cnt); + rc = lpfc_init_iocb_list(phba, cnt); + if (rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "6301 Failed to init iocb list.\n"); + goto out_destroy_queue; + } } if (phba->nvmet_support && phba->cfg_nvmet_mrq) { - /* Post initial buffers to all RQs created */ for (i = 0; i < phba->cfg_nvmet_mrq; i++) { rqbp = phba->sli4_hba.nvmet_mrq_hdr[i]->rqbp; @@ -6911,7 +6980,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) lpfc_post_rq_buffer( phba, phba->sli4_hba.nvmet_mrq_hdr[i], phba->sli4_hba.nvmet_mrq_data[i], - phba->cfg_nvmet_mrq_post); + LPFC_NVMET_RQE_DEF_COUNT); } } @@ -7078,6 +7147,7 @@ out_unset_queue: /* Unset all the queues set up in this routine when error out */ lpfc_sli4_queue_unset(phba); out_destroy_queue: + lpfc_free_iocb_list(phba); lpfc_sli4_queue_destroy(phba); out_stop_timers: lpfc_stop_hba_timers(phba); @@ -18731,7 +18801,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number, spin_lock_irqsave(&pring->ring_lock, iflags); ctxp = pwqe->context2; - sglq = ctxp->rqb_buffer->sglq; + sglq = ctxp->ctxbuf->sglq; if (pwqe->sli4_xritag == NO_XRI) { pwqe->sli4_lxritag = sglq->sli4_lxritag; pwqe->sli4_xritag = sglq->sli4_xritag; diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 422bde85c9f1..19e2f190ea2e 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -618,10 +618,12 @@ struct lpfc_sli4_hba { uint16_t scsi_xri_start; uint16_t els_xri_cnt; uint16_t nvmet_xri_cnt; + uint16_t nvmet_ctx_cnt; struct list_head lpfc_els_sgl_list; struct list_head lpfc_abts_els_sgl_list; struct list_head lpfc_nvmet_sgl_list; struct list_head lpfc_abts_nvmet_ctx_list; + struct list_head lpfc_nvmet_ctx_list; struct list_head lpfc_abts_scsi_buf_list; struct list_head lpfc_abts_nvme_buf_list; struct lpfc_sglq **lpfc_sglq_active_list; @@ -662,8 +664,6 @@ struct lpfc_sli4_hba { uint16_t num_online_cpu; uint16_t num_present_cpu; uint16_t curr_disp_cpu; - - uint16_t nvmet_mrq_post_idx; }; enum lpfc_sge_type { -- cgit v1.2.3-59-g8ed1b From a8cf5dfeb4d84248c0ad12386ae0cb36ee21589a Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:46 -0700 Subject: scsi: lpfc: Added recovery logic for running out of NVMET IO context resources Previous logic would just drop the IO. Added logic to queue the IO to wait for an IO context resource from an IO thats already in progress. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_attr.c | 6 ++ drivers/scsi/lpfc/lpfc_crtn.h | 2 + drivers/scsi/lpfc/lpfc_debugfs.c | 6 ++ drivers/scsi/lpfc/lpfc_init.c | 2 + drivers/scsi/lpfc/lpfc_nvmet.c | 138 +++++++++++++++++++++++++++++++++------ drivers/scsi/lpfc/lpfc_sli.c | 7 +- drivers/scsi/lpfc/lpfc_sli4.h | 6 +- 8 files changed, 144 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 72641b1d3ab8..c47bde6205c9 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -170,6 +170,7 @@ struct rqb_dmabuf { struct lpfc_dmabuf dbuf; uint16_t total_size; uint16_t bytes_recv; + uint16_t idx; struct lpfc_queue *hrq; /* ptr to associated Header RQ */ struct lpfc_queue *drq; /* ptr to associated Data RQ */ }; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 65264582915a..bb2d9e238225 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -245,6 +245,12 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, atomic_read(&tgtp->xmt_abort_rsp), atomic_read(&tgtp->xmt_abort_rsp_error)); + len += snprintf(buf + len, PAGE_SIZE - len, + "IO_CTX: %08x outstanding %08x total %x", + phba->sli4_hba.nvmet_ctx_cnt, + phba->sli4_hba.nvmet_io_wait_cnt, + phba->sli4_hba.nvmet_io_wait_total); + len += snprintf(buf+len, PAGE_SIZE-len, "\n"); return len; } diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index cc95abd130b4..8912767e7bc8 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -77,6 +77,8 @@ void lpfc_retry_pport_discovery(struct lpfc_hba *); void lpfc_release_rpi(struct lpfc_hba *, struct lpfc_vport *, uint16_t); int lpfc_init_iocb_list(struct lpfc_hba *phba, int cnt); void lpfc_free_iocb_list(struct lpfc_hba *phba); +int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq, + struct lpfc_queue *drq, int count, int idx); void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 7284533f4df2..c7d1c9d37a64 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -842,6 +842,12 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) } spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); } + + len += snprintf(buf + len, size - len, + "IO_CTX: %08x outstanding %08x total %08x\n", + phba->sli4_hba.nvmet_ctx_cnt, + phba->sli4_hba.nvmet_io_wait_cnt, + phba->sli4_hba.nvmet_io_wait_total); } else { if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) return len; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 86b0b26dfeea..9f6c7e71814b 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -5825,6 +5825,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_list); + INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_io_wait_list); /* Fast-path XRI aborted CQ Event work queue list */ INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue); @@ -5833,6 +5834,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* This abort list used by worker thread */ spin_lock_init(&phba->sli4_hba.sgl_list_lock); spin_lock_init(&phba->sli4_hba.nvmet_io_lock); + spin_lock_init(&phba->sli4_hba.nvmet_io_wait_lock); /* * Initialize driver internal slow-path work queues diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index fcc77ae0c71c..312f54278bd4 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -158,6 +158,12 @@ void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) { struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context; + struct lpfc_nvmet_tgtport *tgtp; + struct fc_frame_header *fc_hdr; + struct rqb_dmabuf *nvmebuf; + struct lpfc_dmabuf *hbufp; + uint32_t *payload; + uint32_t size, oxid, sid, rc; unsigned long iflag; if (ctxp->txrdy) { @@ -168,6 +174,87 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) } ctxp->state = LPFC_NVMET_STE_FREE; + spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag); + if (phba->sli4_hba.nvmet_io_wait_cnt) { + hbufp = &nvmebuf->hbuf; + list_remove_head(&phba->sli4_hba.lpfc_nvmet_io_wait_list, + nvmebuf, struct rqb_dmabuf, + hbuf.list); + phba->sli4_hba.nvmet_io_wait_cnt--; + spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock, + iflag); + + fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); + oxid = be16_to_cpu(fc_hdr->fh_ox_id); + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + payload = (uint32_t *)(nvmebuf->dbuf.virt); + size = nvmebuf->bytes_recv; + sid = sli4_sid_from_fc_hdr(fc_hdr); + + ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context; + memset(ctxp, 0, sizeof(ctxp->ctx)); + ctxp->wqeq = NULL; + ctxp->txrdy = NULL; + ctxp->offset = 0; + ctxp->phba = phba; + ctxp->size = size; + ctxp->oxid = oxid; + ctxp->sid = sid; + ctxp->state = LPFC_NVMET_STE_RCV; + ctxp->entry_cnt = 1; + ctxp->flag = 0; + ctxp->ctxbuf = ctx_buf; + spin_lock_init(&ctxp->ctxlock); + +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + if (phba->ktime_on) { + ctxp->ts_cmd_nvme = ktime_get_ns(); + ctxp->ts_isr_cmd = ctxp->ts_cmd_nvme; + ctxp->ts_nvme_data = 0; + ctxp->ts_data_wqput = 0; + ctxp->ts_isr_data = 0; + ctxp->ts_data_nvme = 0; + ctxp->ts_nvme_status = 0; + ctxp->ts_status_wqput = 0; + ctxp->ts_isr_status = 0; + ctxp->ts_status_nvme = 0; + } +#endif + atomic_inc(&tgtp->rcv_fcp_cmd_in); + /* + * The calling sequence should be: + * nvmet_fc_rcv_fcp_req->lpfc_nvmet_xmt_fcp_op/cmp- req->done + * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp. + * When we return from nvmet_fc_rcv_fcp_req, all relevant info + * the NVME command / FC header is stored. + * A buffer has already been reposted for this IO, so just free + * the nvmebuf. + */ + rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req, + payload, size); + + /* Process FCP command */ + if (rc == 0) { + atomic_inc(&tgtp->rcv_fcp_cmd_out); + nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf); + return; + } + + atomic_inc(&tgtp->rcv_fcp_cmd_drop); + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, + "2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n", + ctxp->oxid, rc, + atomic_read(&tgtp->rcv_fcp_cmd_in), + atomic_read(&tgtp->rcv_fcp_cmd_out), + atomic_read(&tgtp->xmt_fcp_release)); + + lpfc_nvmet_defer_release(phba, ctxp); + lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid); + nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf); + return; + } + spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock, iflag); + spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag); list_add_tail(&ctx_buf->list, &phba->sli4_hba.lpfc_nvmet_ctx_list); @@ -1232,7 +1319,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr; struct lpfc_nvmet_ctxbuf *ctx_buf; uint32_t *payload; - uint32_t size, oxid, sid, rc; + uint32_t size, oxid, sid, rc, qno; unsigned long iflag; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint32_t id; @@ -1257,21 +1344,41 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, } spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag); + fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); + oxid = be16_to_cpu(fc_hdr->fh_ox_id); + size = nvmebuf->bytes_recv; + +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) { + id = smp_processor_id(); + if (id < LPFC_CHECK_CPU_CNT) + phba->cpucheck_rcv_io[id]++; + } +#endif + + lpfc_nvmeio_data(phba, "NVMET FCP RCV: xri x%x sz %d CPU %02x\n", + oxid, size, smp_processor_id()); + if (!ctx_buf) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6408 No NVMET ctx Drop IO\n"); - oxid = 0; - size = 0; - sid = 0; - ctxp = NULL; - goto dropit; + /* Queue this NVME IO to process later */ + spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag); + list_add_tail(&nvmebuf->hbuf.list, + &phba->sli4_hba.lpfc_nvmet_io_wait_list); + phba->sli4_hba.nvmet_io_wait_cnt++; + phba->sli4_hba.nvmet_io_wait_total++; + spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock, + iflag); + + /* Post a brand new DMA buffer to RQ */ + qno = nvmebuf->idx; + lpfc_post_rq_buffer( + phba, phba->sli4_hba.nvmet_mrq_hdr[qno], + phba->sli4_hba.nvmet_mrq_data[qno], 1, qno); + return; } tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; payload = (uint32_t *)(nvmebuf->dbuf.virt); - fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); - size = nvmebuf->bytes_recv; - oxid = be16_to_cpu(fc_hdr->fh_ox_id); sid = sli4_sid_from_fc_hdr(fc_hdr); ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context; @@ -1302,17 +1409,8 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, ctxp->ts_isr_status = 0; ctxp->ts_status_nvme = 0; } - - if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) { - id = smp_processor_id(); - if (id < LPFC_CHECK_CPU_CNT) - phba->cpucheck_rcv_io[id]++; - } #endif - lpfc_nvmeio_data(phba, "NVMET FCP RCV: xri x%x sz %d CPU %02x\n", - oxid, size, smp_processor_id()); - atomic_inc(&tgtp->rcv_fcp_cmd_in); /* * The calling sequence should be: diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d68ee3ee299a..3fb4e715bfa2 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -6513,9 +6513,9 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) (phba->hba_flag & HBA_FCOE_MODE) ? "FCoE" : "FC"); } -static int +int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq, - struct lpfc_queue *drq, int count) + struct lpfc_queue *drq, int count, int idx) { int rc, i; struct lpfc_rqe hrqe; @@ -6534,6 +6534,7 @@ lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq, break; rqb_buffer->hrq = hrq; rqb_buffer->drq = drq; + rqb_buffer->idx = idx; list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list); } while (!list_empty(&rqb_buf_list)) { @@ -6980,7 +6981,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) lpfc_post_rq_buffer( phba, phba->sli4_hba.nvmet_mrq_hdr[i], phba->sli4_hba.nvmet_mrq_data[i], - LPFC_NVMET_RQE_DEF_COUNT); + LPFC_NVMET_RQE_DEF_COUNT, i); } } diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 19e2f190ea2e..c1c9a9125266 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -619,13 +619,16 @@ struct lpfc_sli4_hba { uint16_t els_xri_cnt; uint16_t nvmet_xri_cnt; uint16_t nvmet_ctx_cnt; + uint16_t nvmet_io_wait_cnt; + uint16_t nvmet_io_wait_total; struct list_head lpfc_els_sgl_list; struct list_head lpfc_abts_els_sgl_list; struct list_head lpfc_nvmet_sgl_list; struct list_head lpfc_abts_nvmet_ctx_list; - struct list_head lpfc_nvmet_ctx_list; struct list_head lpfc_abts_scsi_buf_list; struct list_head lpfc_abts_nvme_buf_list; + struct list_head lpfc_nvmet_ctx_list; + struct list_head lpfc_nvmet_io_wait_list; struct lpfc_sglq **lpfc_sglq_active_list; struct list_head lpfc_rpi_hdr_list; unsigned long *rpi_bmask; @@ -657,6 +660,7 @@ struct lpfc_sli4_hba { spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */ spinlock_t sgl_list_lock; /* list of aborted els IOs */ spinlock_t nvmet_io_lock; + spinlock_t nvmet_io_wait_lock; /* IOs waiting for ctx resources */ uint32_t physical_port; /* CPU to vector mapping information */ -- cgit v1.2.3-59-g8ed1b From 82820f0cf19aa62e2608c2909bd44e7a68268ff5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:47 -0700 Subject: scsi: lpfc: Fix NVME I+T not registering NVME as a supported FC4 type When the driver send the RPA command, it does not send supported FC4 Type NVME to the management server. Encode NVME (type x28) in the AttribEntry in the RPA command. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_ct.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index c7962dae4dab..f2cd19c6c2df 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -2092,6 +2092,7 @@ lpfc_fdmi_port_attr_fc4type(struct lpfc_vport *vport, ae->un.AttrTypes[3] = 0x02; /* Type 1 - ELS */ ae->un.AttrTypes[2] = 0x01; /* Type 8 - FCP */ + ae->un.AttrTypes[6] = 0x01; /* Type 40 - NVME */ ae->un.AttrTypes[7] = 0x01; /* Type 32 - CT */ size = FOURBYTES + 32; ad->AttrLen = cpu_to_be16(size); -- cgit v1.2.3-59-g8ed1b From 667a7662529bf0afb1d84a32ceb0da0a875a3b6c Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:48 -0700 Subject: scsi: lpfc: Fix debugfs root inode "lpfc" not getting deleted on driver unload. When unloading and reloading the driver, the driver fails to recreate the lpfc root inode in the debugfs tree. The driver is incorrectly removing the lpfc root inode in lpfc_debugfs_terminate in the first driver instance that unloads and then sets the lpfc_debugfs_root global parameter to NULL. When the final driver instance unloads, the debugfs calls quietly ignore the remove on a NULL pointer. The bug is that the debugfs_remove call returns void so the driver doesn't know to correctly set the global parameter to NULL. Base the debugfs_remove of the lpfc_debugfs_root parameter on lpfc_debugfs_hba_count because this parameter tracks the fnX instance tracked per driver instance. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index c7d1c9d37a64..4bcb92c844ca 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -5866,8 +5866,10 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) atomic_dec(&lpfc_debugfs_hba_count); } - debugfs_remove(lpfc_debugfs_root); /* lpfc */ - lpfc_debugfs_root = NULL; + if (atomic_read(&lpfc_debugfs_hba_count) == 0) { + debugfs_remove(lpfc_debugfs_root); /* lpfc */ + lpfc_debugfs_root = NULL; + } } #endif return; -- cgit v1.2.3-59-g8ed1b From 64eb4dcb140a7c5547f6e965fb471b1b75c01108 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:49 -0700 Subject: scsi: lpfc: Cleanup entry_repost settings on SLI4 queues Too many work items being processed in IRQ context take a lot of CPU time and cause problems. With a recent change, we get out of the ISR after hitting entry_repost work items on a queue. However, the actual values for entry repost are still high. EQ is 128 and CQ is 128, this could translate into processing 128 * 128 (16384) work items under IRQ context. Set entry_repost in the actual queue creation routine now. Limit EQ repost to 8 and CQ repost to 64 to further limit the amount of time spent in the IRQ. Fix fof IRQ routines as well. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 19 ++++++++----------- drivers/scsi/lpfc/lpfc_sli4.h | 6 ++++-- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 3fb4e715bfa2..903c06ff828a 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -13922,17 +13922,10 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t entry_size, } queue->entry_size = entry_size; queue->entry_count = entry_count; - - /* - * entry_repost is calculated based on the number of entries in the - * queue. This works out except for RQs. If buffers are NOT initially - * posted for every RQE, entry_repost should be adjusted accordingly. - */ - queue->entry_repost = (entry_count >> 3); - if (queue->entry_repost < LPFC_QUEUE_MIN_REPOST) - queue->entry_repost = LPFC_QUEUE_MIN_REPOST; queue->phba = phba; + /* entry_repost will be set during q creation */ + return queue; out_fail: lpfc_sli4_queue_free(queue); @@ -14163,6 +14156,7 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax) status = -ENXIO; eq->host_index = 0; eq->hba_index = 0; + eq->entry_repost = LPFC_EQ_REPOST; mempool_free(mbox, phba->mbox_mem_pool); return status; @@ -14236,9 +14230,9 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq, default: lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0361 Unsupported CQ count: " - "entry cnt %d sz %d pg cnt %d repost %d\n", + "entry cnt %d sz %d pg cnt %d\n", cq->entry_count, cq->entry_size, - cq->page_count, cq->entry_repost); + cq->page_count); if (cq->entry_count < 256) { status = -EINVAL; goto out; @@ -14291,6 +14285,7 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq, cq->assoc_qid = eq->queue_id; cq->host_index = 0; cq->hba_index = 0; + cq->entry_repost = LPFC_CQ_REPOST; out: mempool_free(mbox, phba->mbox_mem_pool); @@ -14482,6 +14477,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp, cq->assoc_qid = eq->queue_id; cq->host_index = 0; cq->hba_index = 0; + cq->entry_repost = LPFC_CQ_REPOST; rc = 0; list_for_each_entry(dmabuf, &cq->page_list, list) { @@ -14730,6 +14726,7 @@ lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq, mq->subtype = subtype; mq->host_index = 0; mq->hba_index = 0; + mq->entry_repost = LPFC_MQ_REPOST; /* link the mq onto the parent cq child list */ list_add_tail(&mq->list, &cq->child_list); diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index c1c9a9125266..cf863db27700 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -24,7 +24,6 @@ #define LPFC_XRI_EXCH_BUSY_WAIT_TMO 10000 #define LPFC_XRI_EXCH_BUSY_WAIT_T1 10 #define LPFC_XRI_EXCH_BUSY_WAIT_T2 30000 -#define LPFC_RELEASE_NOTIFICATION_INTERVAL 32 #define LPFC_RPI_LOW_WATER_MARK 10 #define LPFC_UNREG_FCF 1 @@ -155,8 +154,11 @@ struct lpfc_queue { uint32_t entry_count; /* Number of entries to support on the queue */ uint32_t entry_size; /* Size of each queue entry. */ uint32_t entry_repost; /* Count of entries before doorbell is rung */ -#define LPFC_QUEUE_MIN_REPOST 8 +#define LPFC_EQ_REPOST 8 +#define LPFC_MQ_REPOST 8 +#define LPFC_CQ_REPOST 64 #define LPFC_RQ_REPOST 64 +#define LPFC_RELEASE_NOTIFICATION_INTERVAL 32 /* For WQs */ uint32_t queue_id; /* Queue ID assigned by the hardware */ uint32_t assoc_qid; /* Queue ID associated with, for CQ/WQ/MQ */ uint32_t page_count; /* Number of pages allocated for this queue */ -- cgit v1.2.3-59-g8ed1b From dc53a61852279f25909d99dad4638b4aee0b2d82 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:50 -0700 Subject: scsi: lpfc: Fix NVMEI's handling of NVMET's PRLI response attributes Code review of NVMEI's FC_PORT_ROLE_NVME_DISCOVERY looked wrong. Discussions with storage architecture team clarified NVMEI's audit of the PRLI response port roles. Following up discussion with code review showed a few minor corrections were required - especially in anticipation of NVME auto discovery. During PRLI, NVMEI should sent prli_init - which it it does. NVMET should send prli_tgt and prli_disc - which it does. When NVMEI receives a PRLI Response now, it audits the incoming target bits and stores the attributes in the corresponding NDLP. Later, when NVMEI registers the NVME rport, it uses the stored ndlp attributes to set the rport port_roles correctly. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_disc.h | 1 + drivers/scsi/lpfc/lpfc_nportdisc.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index 9d5a379f4b15..094c97b9e5f7 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h +++ b/drivers/scsi/lpfc/lpfc_disc.h @@ -90,6 +90,7 @@ struct lpfc_nodelist { #define NLP_FCP_INITIATOR 0x10 /* entry is an FCP Initiator */ #define NLP_NVME_TARGET 0x20 /* entry is a NVME Target */ #define NLP_NVME_INITIATOR 0x40 /* entry is a NVME Initiator */ +#define NLP_NVME_DISCOVERY 0x80 /* entry has NVME disc srvc */ uint16_t nlp_fc4_type; /* FC types node supports. */ /* Assigned from GID_FF, only diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 8777c2d5f50d..bff3de053df4 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1944,7 +1944,13 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, /* Target driver cannot solicit NVME FB. */ if (bf_get_be32(prli_tgt, nvpr)) { + /* Complete the nvme target roles. The transport + * needs to know if the rport is capable of + * discovery in addition to its role. + */ ndlp->nlp_type |= NLP_NVME_TARGET; + if (bf_get_be32(prli_disc, nvpr)) + ndlp->nlp_type |= NLP_NVME_DISCOVERY; if ((bf_get_be32(prli_fba, nvpr) == 1) && (bf_get_be32(prli_fb_sz, nvpr) > 0) && (phba->cfg_nvme_enable_fb) && -- cgit v1.2.3-59-g8ed1b From ae9e28f36a6cca4e5760f4927b70b6c9e588db1a Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:51 -0700 Subject: scsi: lpfc: Add MDS Diagnostic support. Added code to support Cisco MDS loopback diagnostic. The diagnostics run various loopbacks including one which loops-back frame through the driver. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 2 + drivers/scsi/lpfc/lpfc_els.c | 7 +++ drivers/scsi/lpfc/lpfc_hbadisc.c | 3 +- drivers/scsi/lpfc/lpfc_hw4.h | 15 ++++- drivers/scsi/lpfc/lpfc_init.c | 13 ++++ drivers/scsi/lpfc/lpfc_sli.c | 131 ++++++++++++++++++++++++++++++++++++--- 6 files changed, 161 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index c47bde6205c9..f2c0ba6ced78 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -675,6 +675,8 @@ struct lpfc_hba { /* INIT_LINK mailbox command */ #define LS_NPIV_FAB_SUPPORTED 0x2 /* Fabric supports NPIV */ #define LS_IGNORE_ERATT 0x4 /* intr handler should ignore ERATT */ +#define LS_MDS_LINK_DOWN 0x8 /* MDS Diagnostics Link Down */ +#define LS_MDS_LOOPBACK 0x16 /* MDS Diagnostics Link Up (Loopback) */ uint32_t hba_flag; /* hba generic flags */ #define HBA_ERATT_HANDLED 0x1 /* This flag is set when eratt handled */ diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 3085895464d9..1d36f82fa369 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1047,6 +1047,13 @@ stop_rr_fcf_flogi: irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout); + + /* If this is not a loop open failure, bail out */ + if (!(irsp->ulpStatus == IOSTAT_LOCAL_REJECT && + ((irsp->un.ulpWord[4] & IOERR_PARAM_MASK) == + IOERR_LOOP_OPEN_FAILURE))) + goto flogifail; + /* FLOGI failed, so there is no fabric */ spin_lock_irq(shost->host_lock); vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index dcc9b3858778..3ffcd9215ca8 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -701,7 +701,8 @@ lpfc_work_done(struct lpfc_hba *phba) /* Set the lpfc data pending flag */ set_bit(LPFC_DATA_READY, &phba->data_flags); } else { - if (phba->link_state >= LPFC_LINK_UP) { + if (phba->link_state >= LPFC_LINK_UP || + phba->link_flag & LS_MDS_LOOPBACK) { pring->flag &= ~LPFC_DEFERRED_RING_EVENT; lpfc_sli_handle_slow_ring_event(phba, pring, (status & diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index df97c6b7433b..e0a5fce416ae 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -4421,6 +4421,19 @@ struct fcp_treceive64_wqe { }; #define TXRDY_PAYLOAD_LEN 12 +#define CMD_SEND_FRAME 0xE1 + +struct send_frame_wqe { + struct ulp_bde64 bde; /* words 0-2 */ + uint32_t frame_len; /* word 3 */ + uint32_t fc_hdr_wd0; /* word 4 */ + uint32_t fc_hdr_wd1; /* word 5 */ + struct wqe_common wqe_com; /* words 6-11 */ + uint32_t fc_hdr_wd2; /* word 12 */ + uint32_t fc_hdr_wd3; /* word 13 */ + uint32_t fc_hdr_wd4; /* word 14 */ + uint32_t fc_hdr_wd5; /* word 15 */ +}; union lpfc_wqe { uint32_t words[16]; @@ -4439,7 +4452,7 @@ union lpfc_wqe { struct fcp_trsp64_wqe fcp_trsp; struct fcp_tsend64_wqe fcp_tsend; struct fcp_treceive64_wqe fcp_treceive; - + struct send_frame_wqe send_frame; }; union lpfc_wqe128 { diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 9f6c7e71814b..9add9473cae5 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -4540,6 +4540,19 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc) pmb->vport = phba->pport; if (phba->sli4_hba.link_state.status != LPFC_FC_LA_TYPE_LINK_UP) { + phba->link_flag &= ~(LS_MDS_LINK_DOWN | LS_MDS_LOOPBACK); + + switch (phba->sli4_hba.link_state.status) { + case LPFC_FC_LA_TYPE_MDS_LINK_DOWN: + phba->link_flag |= LS_MDS_LINK_DOWN; + break; + case LPFC_FC_LA_TYPE_MDS_LOOPBACK: + phba->link_flag |= LS_MDS_LOOPBACK; + break; + default: + break; + } + /* Parse and translate status field */ mb = &pmb->u.mb; mb->mbxStatus = lpfc_sli4_parse_latt_fault(phba, diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 903c06ff828a..d6b184839bc2 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -74,6 +74,8 @@ static struct lpfc_iocbq *lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *, struct lpfc_iocbq *); static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *, struct hbq_dmabuf *); +static void lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport, + struct hbq_dmabuf *dmabuf); static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *, struct lpfc_cqe *); static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *, @@ -5907,7 +5909,7 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox, bf_set(lpfc_mbx_set_feature_mds, &mbox->u.mqe.un.set_feature, 1); bf_set(lpfc_mbx_set_feature_mds_deep_loopbk, - &mbox->u.mqe.un.set_feature, 0); + &mbox->u.mqe.un.set_feature, 1); mbox->u.mqe.un.set_feature.feature = LPFC_SET_MDS_DIAGS; mbox->u.mqe.un.set_feature.param_len = 8; break; @@ -8688,8 +8690,11 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, memset(wqe, 0, sizeof(union lpfc_wqe128)); /* Some of the fields are in the right position already */ memcpy(wqe, &iocbq->iocb, sizeof(union lpfc_wqe)); - wqe->generic.wqe_com.word7 = 0; /* The ct field has moved so reset */ - wqe->generic.wqe_com.word10 = 0; + if (iocbq->iocb.ulpCommand != CMD_SEND_FRAME) { + /* The ct field has moved so reset */ + wqe->generic.wqe_com.word7 = 0; + wqe->generic.wqe_com.word10 = 0; + } abort_tag = (uint32_t) iocbq->iotag; xritag = iocbq->sli4_xritag; @@ -9183,6 +9188,10 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, } break; + case CMD_SEND_FRAME: + bf_set(wqe_xri_tag, &wqe->generic.wqe_com, xritag); + bf_set(wqe_reqtag, &wqe->generic.wqe_com, iocbq->iotag); + return 0; case CMD_XRI_ABORTED_CX: case CMD_CREATE_XRI_CR: /* Do we expect to use this? */ case CMD_IOCB_FCP_IBIDIR64_CR: /* bidirectional xfer */ @@ -16137,6 +16146,8 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) struct fc_vft_header *fc_vft_hdr; uint32_t *header = (uint32_t *) fc_hdr; +#define FC_RCTL_MDS_DIAGS 0xF4 + switch (fc_hdr->fh_r_ctl) { case FC_RCTL_DD_UNCAT: /* uncategorized information */ case FC_RCTL_DD_SOL_DATA: /* solicited data */ @@ -16164,6 +16175,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) case FC_RCTL_F_BSY: /* fabric busy to data frame */ case FC_RCTL_F_BSYL: /* fabric busy to link control frame */ case FC_RCTL_LCR: /* link credit reset */ + case FC_RCTL_MDS_DIAGS: /* MDS Diagnostics */ case FC_RCTL_END: /* end */ break; case FC_RCTL_VFTH: /* Virtual Fabric tagging Header */ @@ -16173,12 +16185,16 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) default: goto drop; } + +#define FC_TYPE_VENDOR_UNIQUE 0xFF + switch (fc_hdr->fh_type) { case FC_TYPE_BLS: case FC_TYPE_ELS: case FC_TYPE_FCP: case FC_TYPE_CT: case FC_TYPE_NVME: + case FC_TYPE_VENDOR_UNIQUE: break; case FC_TYPE_IP: case FC_TYPE_ILS: @@ -16189,12 +16205,14 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) lpfc_printf_log(phba, KERN_INFO, LOG_ELS, "2538 Received frame rctl:%s (x%x), type:%s (x%x), " "frame Data:%08x %08x %08x %08x %08x %08x %08x\n", + (fc_hdr->fh_r_ctl == FC_RCTL_MDS_DIAGS) ? "MDS Diags" : lpfc_rctl_names[fc_hdr->fh_r_ctl], fc_hdr->fh_r_ctl, - lpfc_type_names[fc_hdr->fh_type], fc_hdr->fh_type, - be32_to_cpu(header[0]), be32_to_cpu(header[1]), - be32_to_cpu(header[2]), be32_to_cpu(header[3]), - be32_to_cpu(header[4]), be32_to_cpu(header[5]), - be32_to_cpu(header[6])); + (fc_hdr->fh_type == FC_TYPE_VENDOR_UNIQUE) ? + "Vendor Unique" : lpfc_type_names[fc_hdr->fh_type], + fc_hdr->fh_type, be32_to_cpu(header[0]), + be32_to_cpu(header[1]), be32_to_cpu(header[2]), + be32_to_cpu(header[3]), be32_to_cpu(header[4]), + be32_to_cpu(header[5]), be32_to_cpu(header[6])); return 0; drop: lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, @@ -17000,6 +17018,96 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport, lpfc_sli_release_iocbq(phba, iocbq); } +static void +lpfc_sli4_mds_loopback_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) +{ + struct lpfc_dmabuf *pcmd = cmdiocb->context2; + + if (pcmd && pcmd->virt) + pci_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys); + kfree(pcmd); + lpfc_sli_release_iocbq(phba, cmdiocb); +} + +static void +lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport, + struct hbq_dmabuf *dmabuf) +{ + struct fc_frame_header *fc_hdr; + struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *iocbq = NULL; + union lpfc_wqe *wqe; + struct lpfc_dmabuf *pcmd = NULL; + uint32_t frame_len; + int rc; + + fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt; + frame_len = bf_get(lpfc_rcqe_length, &dmabuf->cq_event.cqe.rcqe_cmpl); + + /* Send the received frame back */ + iocbq = lpfc_sli_get_iocbq(phba); + if (!iocbq) + goto exit; + + /* Allocate buffer for command payload */ + pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); + if (pcmd) + pcmd->virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL, + &pcmd->phys); + if (!pcmd || !pcmd->virt) + goto exit; + + INIT_LIST_HEAD(&pcmd->list); + + /* copyin the payload */ + memcpy(pcmd->virt, dmabuf->dbuf.virt, frame_len); + + /* fill in BDE's for command */ + iocbq->iocb.un.xseq64.bdl.addrHigh = putPaddrHigh(pcmd->phys); + iocbq->iocb.un.xseq64.bdl.addrLow = putPaddrLow(pcmd->phys); + iocbq->iocb.un.xseq64.bdl.bdeFlags = BUFF_TYPE_BDE_64; + iocbq->iocb.un.xseq64.bdl.bdeSize = frame_len; + + iocbq->context2 = pcmd; + iocbq->vport = vport; + iocbq->iocb_flag &= ~LPFC_FIP_ELS_ID_MASK; + iocbq->iocb_flag |= LPFC_USE_FCPWQIDX; + + /* + * Setup rest of the iocb as though it were a WQE + * Build the SEND_FRAME WQE + */ + wqe = (union lpfc_wqe *)&iocbq->iocb; + + wqe->send_frame.frame_len = frame_len; + wqe->send_frame.fc_hdr_wd0 = be32_to_cpu(*((uint32_t *)fc_hdr)); + wqe->send_frame.fc_hdr_wd1 = be32_to_cpu(*((uint32_t *)fc_hdr + 1)); + wqe->send_frame.fc_hdr_wd2 = be32_to_cpu(*((uint32_t *)fc_hdr + 2)); + wqe->send_frame.fc_hdr_wd3 = be32_to_cpu(*((uint32_t *)fc_hdr + 3)); + wqe->send_frame.fc_hdr_wd4 = be32_to_cpu(*((uint32_t *)fc_hdr + 4)); + wqe->send_frame.fc_hdr_wd5 = be32_to_cpu(*((uint32_t *)fc_hdr + 5)); + + iocbq->iocb.ulpCommand = CMD_SEND_FRAME; + iocbq->iocb.ulpLe = 1; + iocbq->iocb_cmpl = lpfc_sli4_mds_loopback_cmpl; + rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocbq, 0); + if (rc == IOCB_ERROR) + goto exit; + + lpfc_in_buf_free(phba, &dmabuf->dbuf); + return; + +exit: + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "2023 Unable to process MDS loopback frame\n"); + if (pcmd && pcmd->virt) + pci_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys); + kfree(pcmd); + lpfc_sli_release_iocbq(phba, iocbq); + lpfc_in_buf_free(phba, &dmabuf->dbuf); +} + /** * lpfc_sli4_handle_received_buffer - Handle received buffers from firmware * @phba: Pointer to HBA context object. @@ -17038,6 +17146,13 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba, fcfi = bf_get(lpfc_rcqe_fcf_id, &dmabuf->cq_event.cqe.rcqe_cmpl); + if (fc_hdr->fh_r_ctl == 0xF4 && fc_hdr->fh_type == 0xFF) { + vport = phba->pport; + /* Handle MDS Loopback frames */ + lpfc_sli4_handle_mds_loopback(vport, dmabuf); + return; + } + /* d_id this frame is directed to */ did = sli4_did_from_fc_hdr(fc_hdr); -- cgit v1.2.3-59-g8ed1b From 2848e1d503d60955ff51ae9ec8d5eada6bd9ba6d Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 15:20:52 -0700 Subject: scsi: lpfc: update version to 11.2.0.14 Change driver version to 11.2.0.14. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 1c26dc67151b..c2653244221c 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "11.2.0.12" +#define LPFC_DRIVER_VERSION "11.2.0.14" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ -- cgit v1.2.3-59-g8ed1b From bfb9956ab4d8242f4594b5f4bee534b935384fd9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 May 2017 20:42:53 +1000 Subject: powerpc/mm: Fix crash in page table dump with huge pages The page table dump code doesn't know about huge pages, so currently it crashes (or walks random memory, usually leading to a crash), if it finds a huge page. On Book3S we only see huge pages in the Linux page tables when we're using the P9 Radix MMU. Teaching the code to properly handle huge pages is a bit more involved, so for now just prevent the crash. Cc: stable@vger.kernel.org # v4.10+ Fixes: 8eb07b187000 ("powerpc/mm: Dump linux pagetables") Signed-off-by: Michael Ellerman --- arch/powerpc/mm/dump_linuxpagetables.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index d659345a98d6..44fe4833910f 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -16,6 +16,7 @@ */ #include #include +#include #include #include #include @@ -391,7 +392,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { addr = start + i * PMD_SIZE; - if (!pmd_none(*pmd)) + if (!pmd_none(*pmd) && !pmd_huge(*pmd)) /* pmd exists */ walk_pte(st, pmd, addr); else @@ -407,7 +408,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) for (i = 0; i < PTRS_PER_PUD; i++, pud++) { addr = start + i * PUD_SIZE; - if (!pud_none(*pud)) + if (!pud_none(*pud) && !pud_huge(*pud)) /* pud exists */ walk_pmd(st, pud, addr); else @@ -427,7 +428,7 @@ static void walk_pagetables(struct pg_state *st) */ for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { addr = KERN_VIRT_START + i * PGDIR_SIZE; - if (!pgd_none(*pgd)) + if (!pgd_none(*pgd) && !pgd_huge(*pgd)) /* pgd exists */ walk_pud(st, pgd, addr); else -- cgit v1.2.3-59-g8ed1b From 83345d51a49a4b3f3b4a08a5db644dae438b0189 Mon Sep 17 00:00:00 2001 From: Tin Huynh Date: Wed, 17 May 2017 11:25:34 +0700 Subject: i2c: xgene: Set ACPI_COMPANION_I2C With ACPI, i2c-core requires ACPI companion to be set in order for it to create slave device. This patch sets the ACPI companion accordingly. Signed-off-by: Tin Huynh Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xgene-slimpro.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index dbe7e44c9321..6ba6c83ca8f1 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -416,6 +416,7 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev) adapter->class = I2C_CLASS_HWMON; adapter->dev.parent = &pdev->dev; adapter->dev.of_node = pdev->dev.of_node; + ACPI_COMPANION_SET(&adapter->dev, ACPI_COMPANION(&pdev->dev)); i2c_set_adapdata(adapter, ctx); rc = i2c_add_adapter(adapter); if (rc) { -- cgit v1.2.3-59-g8ed1b From 5f63424ab7daac840df2b12dd5bcc5b38d50f779 Mon Sep 17 00:00:00 2001 From: Andrey Korolyov Date: Tue, 16 May 2017 23:54:41 +0300 Subject: USB: serial: ftdi_sio: add Olimex ARM-USB-TINY(H) PIDs This patch adds support for recognition of ARM-USB-TINY(H) devices which are almost identical to ARM-USB-OCD(H) but lacking separate barrel jack and serial console. By suggestion from Johan Hovold it is possible to replace ftdi_jtag_quirk with a bit more generic construction. Since all Olimex-ARM debuggers has exactly two ports, we could safely always use only second port within the debugger family. Signed-off-by: Andrey Korolyov Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 8 ++++---- drivers/usb/serial/ftdi_sio_ids.h | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 0e634c11abbf..aba74f817dc6 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -809,10 +809,10 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) }, { USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, - { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID), - .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, - { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID), - .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_H_PID, 1) }, { USB_DEVICE(FIC_VID, FIC_NEO1973_DEBUG_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_OOCDLINK_PID), diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 71fb9e59db71..4fcf1cecb6d7 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -882,6 +882,8 @@ /* Olimex */ #define OLIMEX_VID 0x15BA #define OLIMEX_ARM_USB_OCD_PID 0x0003 +#define OLIMEX_ARM_USB_TINY_PID 0x0004 +#define OLIMEX_ARM_USB_TINY_H_PID 0x002a #define OLIMEX_ARM_USB_OCD_H_PID 0x002b /* -- cgit v1.2.3-59-g8ed1b From 628c2893d44876ddd11602400c70606ade62e129 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 16 May 2017 11:47:29 -0400 Subject: USB: ene_usb6250: fix DMA to the stack The ene_usb6250 sub-driver in usb-storage does USB I/O to buffers on the stack, which doesn't work with vmapped stacks. This patch fixes the problem by allocating a separate 512-byte buffer at probe time and using it for all of the offending I/O operations. Signed-off-by: Alan Stern Reported-and-tested-by: Andreas Hartmann CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/ene_ub6250.c | 90 ++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 35 deletions(-) diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c index 369f3c24815a..44af719194b2 100644 --- a/drivers/usb/storage/ene_ub6250.c +++ b/drivers/usb/storage/ene_ub6250.c @@ -446,6 +446,10 @@ struct ms_lib_ctrl { #define SD_BLOCK_LEN 9 struct ene_ub6250_info { + + /* I/O bounce buffer */ + u8 *bbuf; + /* for 6250 code */ struct SD_STATUS SD_Status; struct MS_STATUS MS_Status; @@ -493,8 +497,11 @@ static int ene_load_bincode(struct us_data *us, unsigned char flag); static void ene_ub6250_info_destructor(void *extra) { + struct ene_ub6250_info *info = (struct ene_ub6250_info *) extra; + if (!extra) return; + kfree(info->bbuf); } static int ene_send_scsi_cmd(struct us_data *us, u8 fDir, void *buf, int use_sg) @@ -860,8 +867,9 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr, u8 PageNum, u32 *PageBuf, struct ms_lib_type_extdat *ExtraDat) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; + struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; int result; - u8 ExtBuf[4]; u32 bn = PhyBlockAddr * 0x20 + PageNum; result = ene_load_bincode(us, MS_RW_PATTERN); @@ -901,7 +909,7 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr, bcb->CDB[2] = (unsigned char)(PhyBlockAddr>>16); bcb->CDB[6] = 0x01; - result = ene_send_scsi_cmd(us, FDIR_READ, &ExtBuf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; @@ -910,9 +918,9 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr, ExtraDat->status0 = 0x10; /* Not yet,fireware support */ ExtraDat->status1 = 0x00; /* Not yet,fireware support */ - ExtraDat->ovrflg = ExtBuf[0]; - ExtraDat->mngflg = ExtBuf[1]; - ExtraDat->logadr = memstick_logaddr(ExtBuf[2], ExtBuf[3]); + ExtraDat->ovrflg = bbuf[0]; + ExtraDat->mngflg = bbuf[1]; + ExtraDat->logadr = memstick_logaddr(bbuf[2], bbuf[3]); return USB_STOR_TRANSPORT_GOOD; } @@ -1332,8 +1340,9 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock, u8 PageNum, struct ms_lib_type_extdat *ExtraDat) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; + struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; int result; - u8 ExtBuf[4]; memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); @@ -1347,7 +1356,7 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock, bcb->CDB[2] = (unsigned char)(PhyBlock>>16); bcb->CDB[6] = 0x01; - result = ene_send_scsi_cmd(us, FDIR_READ, &ExtBuf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; @@ -1355,9 +1364,9 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock, ExtraDat->intr = 0x80; /* Not yet, waiting for fireware support */ ExtraDat->status0 = 0x10; /* Not yet, waiting for fireware support */ ExtraDat->status1 = 0x00; /* Not yet, waiting for fireware support */ - ExtraDat->ovrflg = ExtBuf[0]; - ExtraDat->mngflg = ExtBuf[1]; - ExtraDat->logadr = memstick_logaddr(ExtBuf[2], ExtBuf[3]); + ExtraDat->ovrflg = bbuf[0]; + ExtraDat->mngflg = bbuf[1]; + ExtraDat->logadr = memstick_logaddr(bbuf[2], bbuf[3]); return USB_STOR_TRANSPORT_GOOD; } @@ -1556,9 +1565,9 @@ static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st) u16 PhyBlock, newblk, i; u16 LogStart, LogEnde; struct ms_lib_type_extdat extdat; - u8 buf[0x200]; u32 count = 0, index = 0; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; for (PhyBlock = 0; PhyBlock < info->MS_Lib.NumberOfPhyBlock;) { ms_lib_phy_to_log_range(PhyBlock, &LogStart, &LogEnde); @@ -1572,14 +1581,16 @@ static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st) } if (count == PhyBlock) { - ms_lib_read_extrablock(us, PhyBlock, 0, 0x80, &buf); + ms_lib_read_extrablock(us, PhyBlock, 0, 0x80, + bbuf); count += 0x80; } index = (PhyBlock % 0x80) * 4; - extdat.ovrflg = buf[index]; - extdat.mngflg = buf[index+1]; - extdat.logadr = memstick_logaddr(buf[index+2], buf[index+3]); + extdat.ovrflg = bbuf[index]; + extdat.mngflg = bbuf[index+1]; + extdat.logadr = memstick_logaddr(bbuf[index+2], + bbuf[index+3]); if ((extdat.ovrflg & MS_REG_OVR_BKST) != MS_REG_OVR_BKST_OK) { ms_lib_setacquired_errorblock(us, PhyBlock); @@ -2062,9 +2073,9 @@ static int ene_ms_init(struct us_data *us) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; int result; - u8 buf[0x200]; u16 MSP_BlockSize, MSP_UserAreaBlocks; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; printk(KERN_INFO "transport --- ENE_MSInit\n"); @@ -2083,13 +2094,13 @@ static int ene_ms_init(struct us_data *us) bcb->CDB[0] = 0xF1; bcb->CDB[1] = 0x01; - result = ene_send_scsi_cmd(us, FDIR_READ, &buf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) { printk(KERN_ERR "Execution MS Init Code Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } /* the same part to test ENE */ - info->MS_Status = *(struct MS_STATUS *)&buf[0]; + info->MS_Status = *(struct MS_STATUS *) bbuf; if (info->MS_Status.Insert && info->MS_Status.Ready) { printk(KERN_INFO "Insert = %x\n", info->MS_Status.Insert); @@ -2098,15 +2109,15 @@ static int ene_ms_init(struct us_data *us) printk(KERN_INFO "IsMSPHG = %x\n", info->MS_Status.IsMSPHG); printk(KERN_INFO "WtP= %x\n", info->MS_Status.WtP); if (info->MS_Status.IsMSPro) { - MSP_BlockSize = (buf[6] << 8) | buf[7]; - MSP_UserAreaBlocks = (buf[10] << 8) | buf[11]; + MSP_BlockSize = (bbuf[6] << 8) | bbuf[7]; + MSP_UserAreaBlocks = (bbuf[10] << 8) | bbuf[11]; info->MSP_TotalBlock = MSP_BlockSize * MSP_UserAreaBlocks; } else { ms_card_init(us); /* Card is MS (to ms.c)*/ } usb_stor_dbg(us, "MS Init Code OK !!\n"); } else { - usb_stor_dbg(us, "MS Card Not Ready --- %x\n", buf[0]); + usb_stor_dbg(us, "MS Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; } @@ -2116,9 +2127,9 @@ static int ene_ms_init(struct us_data *us) static int ene_sd_init(struct us_data *us) { int result; - u8 buf[0x200]; struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; usb_stor_dbg(us, "transport --- ENE_SDInit\n"); /* SD Init Part-1 */ @@ -2152,17 +2163,17 @@ static int ene_sd_init(struct us_data *us) bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; - result = ene_send_scsi_cmd(us, FDIR_READ, &buf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Execution SD Init Code Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } - info->SD_Status = *(struct SD_STATUS *)&buf[0]; + info->SD_Status = *(struct SD_STATUS *) bbuf; if (info->SD_Status.Insert && info->SD_Status.Ready) { struct SD_STATUS *s = &info->SD_Status; - ene_get_card_status(us, (unsigned char *)&buf); + ene_get_card_status(us, bbuf); usb_stor_dbg(us, "Insert = %x\n", s->Insert); usb_stor_dbg(us, "Ready = %x\n", s->Ready); usb_stor_dbg(us, "IsMMC = %x\n", s->IsMMC); @@ -2170,7 +2181,7 @@ static int ene_sd_init(struct us_data *us) usb_stor_dbg(us, "HiSpeed = %x\n", s->HiSpeed); usb_stor_dbg(us, "WtP = %x\n", s->WtP); } else { - usb_stor_dbg(us, "SD Card Not Ready --- %x\n", buf[0]); + usb_stor_dbg(us, "SD Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; } return USB_STOR_TRANSPORT_GOOD; @@ -2180,13 +2191,15 @@ static int ene_sd_init(struct us_data *us) static int ene_init(struct us_data *us) { int result; - u8 misc_reg03 = 0; + u8 misc_reg03; struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); + u8 *bbuf = info->bbuf; - result = ene_get_card_type(us, REG_CARD_STATUS, &misc_reg03); + result = ene_get_card_type(us, REG_CARD_STATUS, bbuf); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; + misc_reg03 = bbuf[0]; if (misc_reg03 & 0x01) { if (!info->SD_Status.Ready) { result = ene_sd_init(us); @@ -2303,8 +2316,9 @@ static int ene_ub6250_probe(struct usb_interface *intf, const struct usb_device_id *id) { int result; - u8 misc_reg03 = 0; + u8 misc_reg03; struct us_data *us; + struct ene_ub6250_info *info; result = usb_stor_probe1(&us, intf, id, (id - ene_ub6250_usb_ids) + ene_ub6250_unusual_dev_list, @@ -2313,11 +2327,16 @@ static int ene_ub6250_probe(struct usb_interface *intf, return result; /* FIXME: where should the code alloc extra buf ? */ - if (!us->extra) { - us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL); - if (!us->extra) - return -ENOMEM; - us->extra_destructor = ene_ub6250_info_destructor; + us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL); + if (!us->extra) + return -ENOMEM; + us->extra_destructor = ene_ub6250_info_destructor; + + info = (struct ene_ub6250_info *)(us->extra); + info->bbuf = kmalloc(512, GFP_KERNEL); + if (!info->bbuf) { + kfree(us->extra); + return -ENOMEM; } us->transport_name = "ene_ub6250"; @@ -2329,12 +2348,13 @@ static int ene_ub6250_probe(struct usb_interface *intf, return result; /* probe card type */ - result = ene_get_card_type(us, REG_CARD_STATUS, &misc_reg03); + result = ene_get_card_type(us, REG_CARD_STATUS, info->bbuf); if (result != USB_STOR_XFER_GOOD) { usb_stor_disconnect(intf); return USB_STOR_TRANSPORT_ERROR; } + misc_reg03 = info->bbuf[0]; if (!(misc_reg03 & 0x01)) { pr_info("ums_eneub6250: This driver only supports SD/MS cards. " "It does not support SM cards.\n"); -- cgit v1.2.3-59-g8ed1b From 2f964780c03b73de269b08d12aff96a9618d13f3 Mon Sep 17 00:00:00 2001 From: Vamsi Krishna Samavedam Date: Tue, 16 May 2017 14:38:08 +0200 Subject: USB: core: replace %p with %pK Format specifier %p can leak kernel addresses while not valuing the kptr_restrict system settings. When kptr_restrict is set to (1), kernel pointers printed using the %pK format specifier will be replaced with Zeros. Debugging Note : &pK prints only Zeros as address. If you need actual address information, write 0 to kptr_restrict. echo 0 > /proc/sys/kernel/kptr_restrict [Found by poking around in a random vendor kernel tree, it would be nice if someone would actually send these types of patches upstream - gkh] Signed-off-by: Vamsi Krishna Samavedam Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 14 +++++++------- drivers/usb/core/hcd.c | 4 ++-- drivers/usb/core/urb.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index cfc3cff6e8d5..8e6ef671be9b 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -475,11 +475,11 @@ static void snoop_urb(struct usb_device *udev, if (userurb) { /* Async */ if (when == SUBMIT) - dev_info(&udev->dev, "userurb %p, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " "length %u\n", userurb, ep, t, d, length); else - dev_info(&udev->dev, "userurb %p, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " "actual_length %u status %d\n", userurb, ep, t, d, length, timeout_or_status); @@ -1895,7 +1895,7 @@ static int proc_reapurb(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); return retval; @@ -1912,7 +1912,7 @@ static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg) as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); } else { @@ -2043,7 +2043,7 @@ static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); return retval; @@ -2060,7 +2060,7 @@ static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *ar as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); } else { @@ -2489,7 +2489,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd, #endif case USBDEVFS_DISCARDURB: - snoop(&dev->dev, "%s: DISCARDURB %p\n", __func__, p); + snoop(&dev->dev, "%s: DISCARDURB %pK\n", __func__, p); ret = proc_unlinkurb(ps, p); break; diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 49550790a3cb..5235d6be1bdf 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1723,7 +1723,7 @@ int usb_hcd_unlink_urb (struct urb *urb, int status) if (retval == 0) retval = -EINPROGRESS; else if (retval != -EIDRM && retval != -EBUSY) - dev_dbg(&udev->dev, "hcd_unlink_urb %p fail %d\n", + dev_dbg(&udev->dev, "hcd_unlink_urb %pK fail %d\n", urb, retval); usb_put_dev(udev); } @@ -1890,7 +1890,7 @@ rescan: /* kick hcd */ unlink1(hcd, urb, -ESHUTDOWN); dev_dbg (hcd->self.controller, - "shutdown urb %p ep%d%s%s\n", + "shutdown urb %pK ep%d%s%s\n", urb, usb_endpoint_num(&ep->desc), is_in ? "in" : "out", ({ char *s; diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index d75cb8c0f7df..47903d510955 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -338,7 +338,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags) if (!urb || !urb->complete) return -EINVAL; if (urb->hcpriv) { - WARN_ONCE(1, "URB %p submitted while active\n", urb); + WARN_ONCE(1, "URB %pK submitted while active\n", urb); return -EBUSY; } -- cgit v1.2.3-59-g8ed1b From 0bd193d62b4270a2a7a09da43ad1034c7ca5b3d3 Mon Sep 17 00:00:00 2001 From: Maksim Salau Date: Sat, 13 May 2017 23:49:26 +0300 Subject: usb: misc: legousbtower: Fix memory leak get_version_reply is not freed if function returns with success. Fixes: 942a48730faf ("usb: misc: legousbtower: Fix buffers on stack") Reported-by: Heikki Krogerus Signed-off-by: Maksim Salau Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/legousbtower.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index aa3c280fdf8d..0782ac6f5edf 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -926,6 +926,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device USB_MAJOR, dev->minor); exit: + kfree(get_version_reply); return retval; error: -- cgit v1.2.3-59-g8ed1b From 41318a2b82f5d5fe1fb408f6d6e0b22aa557111d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 12 May 2017 12:06:32 +0200 Subject: uwb: fix device quirk on big-endian hosts Add missing endianness conversion when using the USB device-descriptor idProduct field to apply a hardware quirk. Fixes: 1ba47da52712 ("uwb: add the i1480 DFU driver") Cc: stable # 2.6.28 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/uwb/i1480/dfu/usb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c index 6345e85822a4..a50cf45e530f 100644 --- a/drivers/uwb/i1480/dfu/usb.c +++ b/drivers/uwb/i1480/dfu/usb.c @@ -341,6 +341,7 @@ error_submit_ep1: static int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id) { + struct usb_device *udev = interface_to_usbdev(iface); struct i1480_usb *i1480_usb; struct i1480 *i1480; struct device *dev = &iface->dev; @@ -352,8 +353,8 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id) iface->cur_altsetting->desc.bInterfaceNumber); goto error; } - if (iface->num_altsetting > 1 - && interface_to_usbdev(iface)->descriptor.idProduct == 0xbabe) { + if (iface->num_altsetting > 1 && + le16_to_cpu(udev->descriptor.idProduct) == 0xbabe) { /* Need altsetting #1 [HW QUIRK] or EP1 won't work */ result = usb_set_interface(interface_to_usbdev(iface), 0, 1); if (result < 0) -- cgit v1.2.3-59-g8ed1b From 7cdfe4ddea47dbc66306edf802fd2ef86a6c8867 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 12 May 2017 10:06:29 +0200 Subject: sisusb_con: fix coccinelle warning After commit d705ff3818 (tty: vt, cleanup and document con_scroll), in the coccinelle output, we can see: drivers/usb/misc/sisusbvga/sisusb_con.c:852:8-9: WARNING: return of 0/1 in function 'sisusbcon_scroll_area' with return type bool Return true instead of 1 in the function returning bool which was intended to do in d705ff3818 but omitted. Signed-off-by: Jiri Slaby Fixes: d705ff3818 (tty: vt, cleanup and document con_scroll) Cc: Thomas Winischhofer Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/sisusbvga/sisusb_con.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c index 3c6948af726a..f019d80ca9e4 100644 --- a/drivers/usb/misc/sisusbvga/sisusb_con.c +++ b/drivers/usb/misc/sisusbvga/sisusb_con.c @@ -973,7 +973,7 @@ sisusbcon_set_origin(struct vc_data *c) mutex_unlock(&sisusb->lock); - return 1; + return true; } /* Interface routine */ -- cgit v1.2.3-59-g8ed1b From 63afd5cc78775018ea2dec4004428dafa5283e93 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 May 2017 11:36:01 +0200 Subject: USB: chaoskey: fix Alea quirk on big-endian hosts Add missing endianness conversion when applying the Alea timeout quirk. Found using sparse: warning: restricted __le16 degrades to integer Fixes: e4a886e811cd ("hwrng: chaoskey - Fix URB warning due to timeout on Alea") Cc: stable # 4.8 Cc: Bob Ham Cc: Herbert Xu Cc: Keith Packard Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/chaoskey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index e9cae4d82af2..15d4e64d3b65 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -192,7 +192,7 @@ static int chaoskey_probe(struct usb_interface *interface, dev->in_ep = in_ep; - if (udev->descriptor.idVendor != ALEA_VENDOR_ID) + if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID) dev->reads_started = 1; dev->size = size; -- cgit v1.2.3-59-g8ed1b From dd5ca753fa92fb736b1395db892bd29f78e6d408 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 May 2017 11:36:02 +0200 Subject: USB: iowarrior: fix info ioctl on big-endian hosts Drop erroneous le16_to_cpu when returning the USB device speed which is already in host byte order. Found using sparse: warning: cast to restricted __le16 Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.") Cc: stable # 2.6.21 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 77569531b78a..83b05a287b0c 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -554,7 +554,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, info.revision = le16_to_cpu(dev->udev->descriptor.bcdDevice); /* 0==UNKNOWN, 1==LOW(usb1.1) ,2=FULL(usb1.1), 3=HIGH(usb2.0) */ - info.speed = le16_to_cpu(dev->udev->speed); + info.speed = dev->udev->speed; info.if_num = dev->interface->cur_altsetting->desc.bInterfaceNumber; info.report_size = dev->report_size; -- cgit v1.2.3-59-g8ed1b From 1a744d2eb76aaafb997fda004ae3ae62a1538f85 Mon Sep 17 00:00:00 2001 From: Anton Bondarenko Date: Sun, 7 May 2017 01:53:46 +0200 Subject: usb: core: fix potential memory leak in error path during hcd creation Free memory allocated for address0_mutex if allocation of bandwidth_mutex failed. Fixes: feb26ac31a2a ("usb: core: hub: hub_port_init lock controller instead of bus") Signed-off-by: Anton Bondarenko Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 5235d6be1bdf..5dea98358c05 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2520,6 +2520,7 @@ struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver, hcd->bandwidth_mutex = kmalloc(sizeof(*hcd->bandwidth_mutex), GFP_KERNEL); if (!hcd->bandwidth_mutex) { + kfree(hcd->address0_mutex); kfree(hcd); dev_dbg(dev, "hcd bandwidth mutex alloc failed\n"); return NULL; -- cgit v1.2.3-59-g8ed1b From dd14a3e9b92ac6f0918054f9e3477438760a4fa6 Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Thu, 27 Apr 2017 12:12:49 -0700 Subject: usb: r8a66597-hcd: decrease timeout The timeout for BULK packets was 300ms which is a long time if other endpoints or devices are waiting for their turn. Changing it to 50ms greatly increased the overall performance for multi-endpoint devices. Fixes: 5d3043586db4 ("usb: r8a66597-hcd: host controller driver for R8A6659") Signed-off-by: Chris Brandt Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/r8a66597-hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index bfa7fa3d2eea..698ec8721403 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -1269,7 +1269,7 @@ static void set_td_timer(struct r8a66597 *r8a66597, struct r8a66597_td *td) time = 30; break; default: - time = 300; + time = 50; break; } -- cgit v1.2.3-59-g8ed1b From 1f873d857b6c2fefb4dada952674aa01bcfb92bd Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Thu, 27 Apr 2017 12:12:02 -0700 Subject: usb: r8a66597-hcd: select a different endpoint on timeout If multiple endpoints on a single device have pending IN URBs and one endpoint times out due to NAKs (perfectly legal), select a different endpoint URB to try. The existing code only checked to see another device address has pending URBs and ignores other IN endpoints on the current device address. This leads to endpoints never getting serviced if one endpoint is using NAK as a flow control method. Fixes: 5d3043586db4 ("usb: r8a66597-hcd: host controller driver for R8A6659") Signed-off-by: Chris Brandt Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/r8a66597-hcd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index 698ec8721403..7bf78be1fd32 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -1785,6 +1785,7 @@ static void r8a66597_td_timer(unsigned long _r8a66597) pipe = td->pipe; pipe_stop(r8a66597, pipe); + /* Select a different address or endpoint */ new_td = td; do { list_move_tail(&new_td->queue, @@ -1794,7 +1795,8 @@ static void r8a66597_td_timer(unsigned long _r8a66597) new_td = td; break; } - } while (td != new_td && td->address == new_td->address); + } while (td != new_td && td->address == new_td->address && + td->pipe->info.epnum == new_td->pipe->info.epnum); start_transfer(r8a66597, new_td); -- cgit v1.2.3-59-g8ed1b From a7415477a20448bbb7d13765784c0b29249a176f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 16 May 2017 16:26:13 +0200 Subject: USB: ehci-platform: fix companion-device leak Make sure do drop the reference taken to the companion device during resume. Fixes: d4d75128b8fd ("usb: host: ehci-platform: fix usb 1.1 device is not connected in system resume") Cc: stable # 4.11 Signed-off-by: Johan Hovold Acked-by: Yoshihiro Shimoda Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-platform.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c index bc7b9be12f54..f1908ea9fbd8 100644 --- a/drivers/usb/host/ehci-platform.c +++ b/drivers/usb/host/ehci-platform.c @@ -384,8 +384,10 @@ static int ehci_platform_resume(struct device *dev) } companion_dev = usb_of_get_companion_dev(hcd->self.controller); - if (companion_dev) + if (companion_dev) { device_pm_wait_for_dev(hcd->self.controller, companion_dev); + put_device(companion_dev); + } ehci_resume(hcd, priv->reset_on_resume); return 0; -- cgit v1.2.3-59-g8ed1b From ef53b92ece675ed9778b50f4432e004683696d01 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 16 May 2017 16:26:14 +0200 Subject: USB: core: of: document reference taken by companion helper Document that the new companion-device lookup helper takes a reference to the companion device which needs to be dropped after use. Signed-off-by: Johan Hovold Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/of.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/of.c b/drivers/usb/core/of.c index d787f195a9a6..d563cbcf76cf 100644 --- a/drivers/usb/core/of.c +++ b/drivers/usb/core/of.c @@ -53,6 +53,9 @@ EXPORT_SYMBOL_GPL(usb_of_get_child_node); * * Find the companion device from platform bus. * + * Takes a reference to the returned struct device which needs to be dropped + * after use. + * * Return: On success, a pointer to the companion device, %NULL on failure. */ struct device *usb_of_get_companion_dev(struct device *dev) -- cgit v1.2.3-59-g8ed1b From 5383fae76b8224a8f0465be6ab9c7a645042951a Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sat, 13 May 2017 15:49:17 +0200 Subject: doc-rst: fixed kernel-doc directives in usb/typec.rst Even if this file is not yet included in any toctree, it is parsed by Sphinx since it is named '.rst'. This patch fixes the following two ERRORs from Sphinx build: Documentation/usb/typec.rst:116: ERROR: Error in "kernel-doc" directive: invalid option block. .. kernel-doc:: drivers/usb/typec/typec.c :functions: typec_register_cable typec_unregister_cable typec_register_plug typec_unregister_plug Documentation/usb/typec.rst:139: ERROR: Error in "kernel-doc" directive: invalid option block. .. kernel-doc:: drivers/usb/typec/typec.c :functions: typec_set_data_role typec_set_pwr_role typec_set_vconn_role typec_set_pwr_opmode Signed-off-by: Markus Heiser Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/typec.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/usb/typec.rst b/Documentation/usb/typec.rst index b67a46779de9..8a7249f2ff04 100644 --- a/Documentation/usb/typec.rst +++ b/Documentation/usb/typec.rst @@ -114,8 +114,7 @@ the details during registration. The class offers the following API for registering/unregistering cables and their plugs: .. kernel-doc:: drivers/usb/typec/typec.c - :functions: typec_register_cable typec_unregister_cable typec_register_plug - typec_unregister_plug + :functions: typec_register_cable typec_unregister_cable typec_register_plug typec_unregister_plug The class will provide a handle to struct typec_cable and struct typec_plug if the registration is successful, or NULL if it isn't. @@ -137,8 +136,7 @@ during connection of a partner or cable, the port driver must use the following APIs to report it to the class: .. kernel-doc:: drivers/usb/typec/typec.c - :functions: typec_set_data_role typec_set_pwr_role typec_set_vconn_role - typec_set_pwr_opmode + :functions: typec_set_data_role typec_set_pwr_role typec_set_vconn_role typec_set_pwr_opmode Alternate Modes ~~~~~~~~~~~~~~~ -- cgit v1.2.3-59-g8ed1b From d81182ce30dbd497a1e7047d7fda2af040347790 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 May 2017 18:18:25 +0200 Subject: USB: gadget: dummy_hcd: fix hub-descriptor removable fields Flag the first and only port as removable while also leaving the remaining bits (including the reserved bit zero) unset in accordance with the specifications: "Within a byte, if no port exists for a given location, the bit field representing the port characteristics shall be 0." Also add a comment marking the legacy PortPwrCtrlMask field. Fixes: 1cd8fd2887e1 ("usb: gadget: dummy_hcd: add SuperSpeed support") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Tatyana Brokhman Signed-off-by: Johan Hovold Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index c79081952ea0..ccabb51cb98d 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -2008,7 +2008,7 @@ ss_hub_descriptor(struct usb_hub_descriptor *desc) HUB_CHAR_COMMON_OCPM); desc->bNbrPorts = 1; desc->u.ss.bHubHdrDecLat = 0x04; /* Worst case: 0.4 micro sec*/ - desc->u.ss.DeviceRemovable = 0xffff; + desc->u.ss.DeviceRemovable = 0; } static inline void hub_descriptor(struct usb_hub_descriptor *desc) @@ -2020,8 +2020,8 @@ static inline void hub_descriptor(struct usb_hub_descriptor *desc) HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM); desc->bNbrPorts = 1; - desc->u.hs.DeviceRemovable[0] = 0xff; - desc->u.hs.DeviceRemovable[1] = 0xff; + desc->u.hs.DeviceRemovable[0] = 0; + desc->u.hs.DeviceRemovable[1] = 0xff; /* PortPwrCtrlMask */ } static int dummy_hub_control( -- cgit v1.2.3-59-g8ed1b From ec963b412a54aac8e527708ecad06a6988a86fb4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 May 2017 18:18:26 +0200 Subject: USB: usbip: fix nonconforming hub descriptor Fix up the root-hub descriptor to accommodate the variable-length DeviceRemovable and PortPwrCtrlMask fields, while marking all ports as removable (and leaving the reserved bit zero unset). Also add a build-time constraint on VHCI_HC_PORTS which must never be greater than USB_MAXCHILDREN (but this was only enforced through a KConfig constant). This specifically fixes the descriptor layout whenever VHCI_HC_PORTS is greater than seven (default is 8). Fixes: 04679b3489e0 ("Staging: USB/IP: add client driver") Cc: Takahiro Hirofuchi Cc: Valentina Manea Signed-off-by: Johan Hovold Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 5d8b2c261940..0585078638db 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -235,14 +235,19 @@ done: static inline void hub_descriptor(struct usb_hub_descriptor *desc) { + int width; + memset(desc, 0, sizeof(*desc)); desc->bDescriptorType = USB_DT_HUB; - desc->bDescLength = 9; desc->wHubCharacteristics = cpu_to_le16( HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM); + desc->bNbrPorts = VHCI_HC_PORTS; - desc->u.hs.DeviceRemovable[0] = 0xff; - desc->u.hs.DeviceRemovable[1] = 0xff; + BUILD_BUG_ON(VHCI_HC_PORTS > USB_MAXCHILDREN); + width = desc->bNbrPorts / 8 + 1; + desc->bDescLength = USB_DT_HUB_NONVAR_SIZE + 2 * width; + memset(&desc->u.hs.DeviceRemovable[0], 0, width); + memset(&desc->u.hs.DeviceRemovable[width], 0xff, width); } static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, -- cgit v1.2.3-59-g8ed1b From 2c25a2c818023df64463aac3288a9f969491e507 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 May 2017 18:18:27 +0200 Subject: USB: hub: fix SS hub-descriptor handling A SuperSpeed hub descriptor does not have any variable-length fields so bail out when reading a short descriptor. This avoids parsing and leaking two bytes of uninitialised slab data through sysfs removable-attributes. Fixes: dbe79bbe9dcb ("USB 3.0 Hub Changes") Cc: stable # 2.6.39 Cc: John Youn Acked-by: Alan Stern Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 9dca59ef18b3..3ff1e9f89f2d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -380,8 +380,12 @@ static int get_hub_descriptor(struct usb_device *hdev, void *data) USB_REQ_GET_DESCRIPTOR, USB_DIR_IN | USB_RT_HUB, dtype << 8, 0, data, size, USB_CTRL_GET_TIMEOUT); - if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2)) + if (hub_is_superspeed(hdev)) { + if (ret == size) + return ret; + } else if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2)) { return ret; + } } return -EINVAL; } @@ -1321,7 +1325,7 @@ static int hub_configure(struct usb_hub *hub, /* Request the entire hub descriptor. * hub->descriptor can handle USB_MAXCHILDREN ports, - * but the hub can/will return fewer bytes here. + * but a (non-SS) hub can/will return fewer bytes here. */ ret = get_hub_descriptor(hdev, hub->descriptor); if (ret < 0) { -- cgit v1.2.3-59-g8ed1b From bec444cd1c94c48df409a35ad4e5b143c245c3f7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 May 2017 18:18:28 +0200 Subject: USB: hub: fix non-SS hub-descriptor handling Add missing sanity check on the non-SuperSpeed hub-descriptor length in order to avoid parsing and leaking two bytes of uninitialised slab data through sysfs removable-attributes (or a compound-device debug statement). Note that we only make sure that the DeviceRemovable field is always present (and specifically ignore the unused PortPwrCtrlMask field) in order to continue support any hubs with non-compliant descriptors. As a further safeguard, the descriptor buffer is also cleared. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable # 2.6.12 Signed-off-by: Johan Hovold Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3ff1e9f89f2d..f77a4ebde7d5 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -362,7 +362,8 @@ static void usb_set_lpm_parameters(struct usb_device *udev) } /* USB 2.0 spec Section 11.24.4.5 */ -static int get_hub_descriptor(struct usb_device *hdev, void *data) +static int get_hub_descriptor(struct usb_device *hdev, + struct usb_hub_descriptor *desc) { int i, ret, size; unsigned dtype; @@ -378,12 +379,16 @@ static int get_hub_descriptor(struct usb_device *hdev, void *data) for (i = 0; i < 3; i++) { ret = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0), USB_REQ_GET_DESCRIPTOR, USB_DIR_IN | USB_RT_HUB, - dtype << 8, 0, data, size, + dtype << 8, 0, desc, size, USB_CTRL_GET_TIMEOUT); if (hub_is_superspeed(hdev)) { if (ret == size) return ret; - } else if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2)) { + } else if (ret >= USB_DT_HUB_NONVAR_SIZE + 2) { + /* Make sure we have the DeviceRemovable field. */ + size = USB_DT_HUB_NONVAR_SIZE + desc->bNbrPorts / 8 + 1; + if (ret < size) + return -EMSGSIZE; return ret; } } @@ -1317,7 +1322,7 @@ static int hub_configure(struct usb_hub *hub, } mutex_init(&hub->status_mutex); - hub->descriptor = kmalloc(sizeof(*hub->descriptor), GFP_KERNEL); + hub->descriptor = kzalloc(sizeof(*hub->descriptor), GFP_KERNEL); if (!hub->descriptor) { ret = -ENOMEM; goto fail; -- cgit v1.2.3-59-g8ed1b From 93491ced3c87c94b12220dbac0527e1356702179 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 May 2017 18:18:29 +0200 Subject: USB: hub: fix SS max number of ports Add define for the maximum number of ports on a SuperSpeed hub as per USB 3.1 spec Table 10-5, and use it when verifying the retrieved hub descriptor. This specifically avoids benign attempts to update the DeviceRemovable mask for non-existing ports (should we get that far). Fixes: dbe79bbe9dcb ("USB 3.0 Hub Changes") Acked-by: Alan Stern Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 8 +++++++- include/uapi/linux/usb/ch11.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index f77a4ebde7d5..b8bb20d7acdb 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1336,7 +1336,13 @@ static int hub_configure(struct usb_hub *hub, if (ret < 0) { message = "can't read hub descriptor"; goto fail; - } else if (hub->descriptor->bNbrPorts > USB_MAXCHILDREN) { + } + + maxchild = USB_MAXCHILDREN; + if (hub_is_superspeed(hdev)) + maxchild = min_t(unsigned, maxchild, USB_SS_MAXPORTS); + + if (hub->descriptor->bNbrPorts > maxchild) { message = "hub has too many ports!"; ret = -ENODEV; goto fail; diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h index 361297e96f58..576c704e3fb8 100644 --- a/include/uapi/linux/usb/ch11.h +++ b/include/uapi/linux/usb/ch11.h @@ -22,6 +22,9 @@ */ #define USB_MAXCHILDREN 31 +/* See USB 3.1 spec Table 10-5 */ +#define USB_SS_MAXPORTS 15 + /* * Hub request types */ -- cgit v1.2.3-59-g8ed1b From 5120a266928a07231d198bb518f6fe73148786a3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 May 2017 18:18:30 +0200 Subject: USB: host: xhci: use max-port define Use the new define for the maximum number of SuperSpeed ports instead of a constant when allocating xHCI root hubs. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index bbe22bcc550a..69428e970925 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2307,10 +2307,11 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) /* Place limits on the number of roothub ports so that the hub * descriptors aren't longer than the USB core will allocate. */ - if (xhci->num_usb3_ports > 15) { + if (xhci->num_usb3_ports > USB_SS_MAXPORTS) { xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "Limiting USB 3.0 roothub ports to 15."); - xhci->num_usb3_ports = 15; + "Limiting USB 3.0 roothub ports to %u.", + USB_SS_MAXPORTS); + xhci->num_usb3_ports = USB_SS_MAXPORTS; } if (xhci->num_usb2_ports > USB_MAXCHILDREN) { xhci_dbg_trace(xhci, trace_xhci_dbg_init, -- cgit v1.2.3-59-g8ed1b From 5667c86acf021e6dcf02584408b4484a273ac68f Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Sun, 14 May 2017 21:41:55 -0700 Subject: mac80211: strictly check mesh address extension mode Mesh forwarding path checks for address extension mode to fetch appropriate proxied address and MPP address. Existing condition that looks for 6 address format is not strict enough so that frames with improper values are processed and invalid entries are added into MPP table. Fix that by adding a stricter check before processing the packet. Per IEEE Std 802.11s-2011 spec. Table 7-6g1 lists address extension mode 0x3 as reserved one. And also Table Table 9-13 does not specify 0x3 as valid address field. Fixes: 9b395bc3be1c ("mac80211: verify that skb data is present") Signed-off-by: Rajkumar Manoharan Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- net/wireless/util.c | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 35f4c7d7a500..1f75280ba26c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2492,7 +2492,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(hdr->addr1)) { mpp_addr = hdr->addr3; proxied_addr = mesh_hdr->eaddr1; - } else if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6) { + } else if ((mesh_hdr->flags & MESH_FLAGS_AE) == + MESH_FLAGS_AE_A5_A6) { /* has_a4 already checked in ieee80211_rx_mesh_check */ mpp_addr = hdr->addr4; proxied_addr = mesh_hdr->eaddr2; diff --git a/net/wireless/util.c b/net/wireless/util.c index 7198373e2920..4992f1025c9d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -454,6 +454,8 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, if (iftype == NL80211_IFTYPE_MESH_POINT) skb_copy_bits(skb, hdrlen, &mesh_flags, 1); + mesh_flags &= MESH_FLAGS_AE; + switch (hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { case cpu_to_le16(IEEE80211_FCTL_TODS): @@ -469,9 +471,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, iftype != NL80211_IFTYPE_STATION)) return -1; if (iftype == NL80211_IFTYPE_MESH_POINT) { - if (mesh_flags & MESH_FLAGS_AE_A4) + if (mesh_flags == MESH_FLAGS_AE_A4) return -1; - if (mesh_flags & MESH_FLAGS_AE_A5_A6) { + if (mesh_flags == MESH_FLAGS_AE_A5_A6) { skb_copy_bits(skb, hdrlen + offsetof(struct ieee80211s_hdr, eaddr1), tmp.h_dest, 2 * ETH_ALEN); @@ -487,9 +489,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, ether_addr_equal(tmp.h_source, addr))) return -1; if (iftype == NL80211_IFTYPE_MESH_POINT) { - if (mesh_flags & MESH_FLAGS_AE_A5_A6) + if (mesh_flags == MESH_FLAGS_AE_A5_A6) return -1; - if (mesh_flags & MESH_FLAGS_AE_A4) + if (mesh_flags == MESH_FLAGS_AE_A4) skb_copy_bits(skb, hdrlen + offsetof(struct ieee80211s_hdr, eaddr1), tmp.h_source, ETH_ALEN); -- cgit v1.2.3-59-g8ed1b From 1cc896ed61fa0441dffef726ff678fd82a9e6265 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 May 2017 16:01:30 +0100 Subject: iommu/dma: Don't touch invalid iova_domain members When __iommu_dma_map() and iommu_dma_free_iova() are called from iommu_dma_get_msi_page(), various iova_*() helpers are still invoked in the process, whcih is unwise since they access a different member of the union (the iova_domain) from that which was last written, and there's no guarantee that sensible values will result anyway. CLean up the code paths that are valid for an MSI cookie to ensure we only do iova_domain-specific things when we're actually dealing with one. Fixes: a44e6657585b ("iommu/dma: Clean up MSI IOVA allocation") Reported-by: Nate Watterson Tested-by: Shanker Donthineni Tested-by: Bharat Bhushan Signed-off-by: Robin Murphy Tested-by: Eric Auger Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 8348f366ddd1..62618e77bedc 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -396,13 +396,13 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, dma_addr_t iova, size_t size) { struct iova_domain *iovad = &cookie->iovad; - unsigned long shift = iova_shift(iovad); /* The MSI case is only ever cleaning up its most recent allocation */ if (cookie->type == IOMMU_DMA_MSI_COOKIE) cookie->msi_iova -= size; else - free_iova_fast(iovad, iova >> shift, size >> shift); + free_iova_fast(iovad, iova_pfn(iovad, iova), + size >> iova_shift(iovad)); } static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, @@ -617,11 +617,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t iova_off = iova_offset(iovad, phys); + size_t iova_off = 0; dma_addr_t iova; - size = iova_align(iovad, size + iova_off); + if (cookie->type == IOMMU_DMA_IOVA_COOKIE) { + iova_off = iova_offset(&cookie->iovad, phys); + size = iova_align(&cookie->iovad, size + iova_off); + } + iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); if (!iova) return DMA_ERROR_CODE; -- cgit v1.2.3-59-g8ed1b From f73a7eee900e95404b61408a23a1df5c5811704c Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Fri, 5 May 2017 11:39:59 -0700 Subject: iommu/vt-d: Flush the IOTLB to get rid of the initial kdump mappings Ever since commit 091d42e43d ("iommu/vt-d: Copy translation tables from old kernel") the kdump kernel copies the IOMMU context tables from the previous kernel. Each device mappings will be destroyed once the driver for the respective device takes over. This unfortunately breaks the workflow of mapping and unmapping a new context to the IOMMU. The mapping function assumes that either: 1) Unmapping did the proper IOMMU flushing and it only ever flush if the IOMMU unit supports caching invalid entries. 2) The system just booted and the initialization code took care of flushing all IOMMU caches. This assumption is not true for the kdump kernel since the context tables have been copied from the previous kernel and translations could have been cached ever since. So make sure to flush the IOTLB as well when we destroy these old copied mappings. Cc: Joerg Roedel Cc: David Woodhouse Cc: David Woodhouse Cc: Anthony Liguori Signed-off-by: KarimAllah Ahmed Acked-by: David Woodhouse Cc: stable@vger.kernel.org v4.2+ Fixes: 091d42e43d ("iommu/vt-d: Copy translation tables from old kernel") Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 90ab0115d78e..fc2765ccdb57 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2055,11 +2055,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_copied(context)) { u16 did_old = context_domain_id(context); - if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) + if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) { iommu->flush.flush_context(iommu, did_old, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, did_old, 0, 0, + DMA_TLB_DSI_FLUSH); + } } pgd = domain->pgd; -- cgit v1.2.3-59-g8ed1b From 745b6e74704782488dd875292bc49e24d23e81fd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 May 2017 13:35:51 +0200 Subject: iommu/mediatek: Include linux/dma-mapping.h The mediatek iommu driver relied on an implicit include of dma-mapping.h, but for some reason that is no longer there in 4.12-rc1: drivers/iommu/mtk_iommu_v1.c: In function 'mtk_iommu_domain_finalise': drivers/iommu/mtk_iommu_v1.c:233:16: error: implicit declaration of function 'dma_zalloc_coherent'; did you mean 'debug_dma_alloc_coherent'? [-Werror=implicit-function-declaration] drivers/iommu/mtk_iommu_v1.c: In function 'mtk_iommu_domain_free': drivers/iommu/mtk_iommu_v1.c:265:2: error: implicit declaration of function 'dma_free_coherent'; did you mean 'debug_dma_free_coherent'? [-Werror=implicit-function-declaration] This adds an explicit #include to make it build again. Signed-off-by: Arnd Bergmann Fixes: 208480bb27 ('iommu: Remove trace-events include from iommu.h') Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu_v1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index a27ef570c328..bc1efbfb9ddf 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-59-g8ed1b From 7e1b9521f5a8356553f5e58b07952bf346632ea4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 11 Mar 2017 19:09:45 +0000 Subject: dm cache: handle kmalloc failure allocating background_tracker struct Currently there is no kmalloc failure check on the allocation of the background_tracker struct in btracker_create(), and so a NULL return will lead to a NULL pointer dereference. Add a NULL check. Detected by CoverityScan, CID#1416587 ("Dereference null return value") Fixes: b29d4986d ("dm cache: significant rework to leverage dm-bio-prison-v2") Signed-off-by: Colin Ian King Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-background-tracker.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c index 9b1afdfb13f0..707233891291 100644 --- a/drivers/md/dm-cache-background-tracker.c +++ b/drivers/md/dm-cache-background-tracker.c @@ -33,6 +33,11 @@ struct background_tracker *btracker_create(unsigned max_work) { struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL); + if (!b) { + DMERR("couldn't create background_tracker"); + return NULL; + } + b->max_work = max_work; atomic_set(&b->pending_promotes, 0); atomic_set(&b->pending_writebacks, 0); -- cgit v1.2.3-59-g8ed1b From bca5238816939436d72ae6bab124c4b0641a3a99 Mon Sep 17 00:00:00 2001 From: Ravikumar Kattekola Date: Wed, 17 May 2017 06:51:38 -0700 Subject: ARM: dts: dra7: Reduce cpu thermal shutdown temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On dra7, as per TRM, the HW shutdown (TSHUT) temperature is hardcoded to 123C and cannot be modified by SW. This means when the temperature reaches 123C HW asserts TSHUT output which signals a warm reset. This reset is held until the temperature goes below the TSHUT low (105C). While in SW, the thermal driver continuously monitors current temperature and takes decisions based on whether it reached an alert or a critical point. The intention of setting a SW critical point is to prevent force reset by HW and instead do an orderly_poweroff(). But if the SW critical temperature is greater than or equal to that of HW then it defeats the purpose. To address this and let SW take action before HW does keep the SW critical temperature less than HW TSHUT value. The value for SW critical temperature was chosen as 120C just to ensure we give SW sometime before HW catches up. Document reference SPRUI30C – DRA75x, DRA74x Technical Reference Manual - November 2016 SPRUHZ6H - AM572x Technical Reference Manual - November 2016 Tested on: DRA75x PG 2.0 Rev H EVM Signed-off-by: Ravikumar Kattekola Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 57892f264cea..e7144662af45 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -2017,4 +2017,8 @@ coefficients = <0 2000>; }; +&cpu_crit { + temperature = <120000>; /* milli Celsius */ +}; + /include/ "dra7xx-clocks.dtsi" -- cgit v1.2.3-59-g8ed1b From 69c8ebf83213e6165b13d94ec599b861467ee2dc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 16 May 2017 12:22:22 +0200 Subject: fuseblk: Fix warning in super_setup_bdi_name() Commit 5f7f7543f52e "fuse: Convert to separately allocated bdi" didn't properly handle fuseblk filesystem. When fuse_bdi_init() is called for that filesystem type, sb->s_bdi is already initialized (by set_bdev_super()) to point to block device's bdi and consequently super_setup_bdi_name() complains about this fact when reseting bdi to the private one. Fix the problem by properly dropping bdi reference in fuse_bdi_init() before creating a private bdi in super_setup_bdi_name(). Fixes: 5f7f7543f52e ("fuse: Convert to separately allocated bdi") Reported-by: Rakesh Pandit Tested-by: Rakesh Pandit Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/fuse/inode.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 73cf05135252..9da1a61276d1 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -972,8 +972,15 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) int err; char *suffix = ""; - if (sb->s_bdev) + if (sb->s_bdev) { suffix = "-fuseblk"; + /* + * sb->s_bdi points to blkdev's bdi however we want to redirect + * it to our private bdi... + */ + bdi_put(sb->s_bdi); + sb->s_bdi = &noop_backing_dev_info; + } err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), MINOR(fc->dev), suffix); if (err) -- cgit v1.2.3-59-g8ed1b From 8d7a10dd323993cc40bd37bce8bc570133b0c396 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Wed, 17 May 2017 16:30:50 +0200 Subject: USB: serial: qcserial: add more Lenovo EM74xx device IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In their infinite wisdom, and never ending quest for end user frustration, Lenovo has decided to use new USB device IDs for the wwan modules in their 2017 laptops. The actual hardware is still the Sierra Wireless EM7455 or EM7430, depending on region. Cc: Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 38b3f0d8cd58..fd509ed6cf70 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx */ {DEVICE_SWI(0x1199, 0x9078)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */ + {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ -- cgit v1.2.3-59-g8ed1b From 23d268eb240954e6e78f7cfab04f2b1e79f84489 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 16 May 2017 07:53:43 -0700 Subject: arp: honour gratuitous ARP _replies_ When arp_accept is 1, gratuitous ARPs are supposed to override matching entries irrespective of whether they arrive during locktime. This was implemented in commit 56022a8fdd87 ("ipv4: arp: update neighbour address when a gratuitous arp is received and arp_accept is set") There is a glitch in the patch though. RFC 2002, section 4.6, "ARP, Proxy ARP, and Gratuitous ARP", defines gratuitous ARPs so that they can be either of Request or Reply type. Those Reply gratuitous ARPs can be triggered with standard tooling, for example, arping -A option does just that. This patch fixes the glitch, making both Request and Reply flavours of gratuitous ARPs to behave identically. As per RFC, if gratuitous ARPs are of Reply type, their Target Hardware Address field should also be set to the link-layer address to which this cache entry should be updated. The field is present in ARP over Ethernet but not in IEEE 1394. In this patch, I don't consider any broadcasted ARP replies as gratuitous if the field is not present, to conform the standard. It's not clear whether there is such a thing for IEEE 1394 as a gratuitous ARP reply; until it's cleared up, we will ignore such broadcasts. Note that they will still update existing ARP cache entries, assuming they arrive out of locktime time interval. Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/ipv4/arp.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 0937b34c27ca..d54345a06f72 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -653,6 +653,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) unsigned char *arp_ptr; struct rtable *rt; unsigned char *sha; + unsigned char *tha = NULL; __be32 sip, tip; u16 dev_type = dev->type; int addr_type; @@ -724,6 +725,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) break; #endif default: + tha = arp_ptr; arp_ptr += dev->addr_len; } memcpy(&tip, arp_ptr, 4); @@ -842,8 +844,18 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) It is possible, that this option should be enabled for some devices (strip is candidate) */ - is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip && - addr_type == RTN_UNICAST; + is_garp = tip == sip && addr_type == RTN_UNICAST; + + /* Unsolicited ARP _replies_ also require target hwaddr to be + * the same as source. + */ + if (is_garp && arp->ar_op == htons(ARPOP_REPLY)) + is_garp = + /* IPv4 over IEEE 1394 doesn't provide target + * hardware address field in its ARP payload. + */ + tha && + !memcmp(tha, sha, dev->addr_len); if (!n && ((arp->ar_op == htons(ARPOP_REPLY) && -- cgit v1.2.3-59-g8ed1b From 77d7123342dcf6442341b67816321d71da8b2b16 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 16 May 2017 08:44:24 -0700 Subject: neighbour: update neigh timestamps iff update is effective It's a common practice to send gratuitous ARPs after moving an IP address to another device to speed up healing of a service. To fulfill service availability constraints, the timing of network peers updating their caches to point to a new location of an IP address can be particularly important. Sometimes neigh_update calls won't touch neither lladdr nor state, for example if an update arrives in locktime interval. The neigh->updated value is tested by the protocol specific neigh code, which in turn will influence whether NEIGH_UPDATE_F_OVERRIDE gets set in the call to neigh_update() or not. As a result, we may effectively ignore the update request, bailing out of touching the neigh entry, except that we still bump its timestamps inside neigh_update. This may be a problem for updates arriving in quick succession. For example, consider the following scenario: A service is moved to another device with its IP address. The new device sends three gratuitous ARP requests into the network with ~1 seconds interval between them. Just before the first request arrives to one of network peer nodes, its neigh entry for the IP address transitions from STALE to DELAY. This transition, among other things, updates neigh->updated. Once the kernel receives the first gratuitous ARP, it ignores it because its arrival time is inside the locktime interval. The kernel still bumps neigh->updated. Then the second gratuitous ARP request arrives, and it's also ignored because it's still in the (new) locktime interval. Same happens for the third request. The node eventually heals itself (after delay_first_probe_time seconds since the initial transition to DELAY state), but it just wasted some time and require a new ARP request/reply round trip. This unfortunate behaviour both puts more load on the network, as well as reduces service availability. This patch changes neigh_update so that it bumps neigh->updated (as well as neigh->confirmed) only once we are sure that either lladdr or entry state will change). In the scenario described above, it means that the second gratuitous ARP request will actually update the entry lladdr. Ideally, we would update the neigh entry on the very first gratuitous ARP request. The locktime mechanism is designed to ignore ARP updates in a short timeframe after a previous ARP update was honoured by the kernel layer. This would require tracking timestamps for state transitions separately from timestamps when actual updates are received. This would probably involve changes in neighbour struct. Therefore, the patch doesn't tackle the issue of the first gratuitous APR ignored, leaving it for a follow-up. Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/core/neighbour.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 58b0bcc125b5..d274f81fcc2c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1132,10 +1132,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, lladdr = neigh->ha; } - if (new & NUD_CONNECTED) - neigh->confirmed = jiffies; - neigh->updated = jiffies; - /* If entry was valid and address is not changed, do not change entry state, if new one is STALE. */ @@ -1157,6 +1153,16 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, } } + /* Update timestamps only once we know we will make a change to the + * neighbour entry. Otherwise we risk to move the locktime window with + * noop updates and ignore relevant ARP updates. + */ + if (new != old || lladdr != neigh->ha) { + if (new & NUD_CONNECTED) + neigh->confirmed = jiffies; + neigh->updated = jiffies; + } + if (new != old) { neigh_del_timer(neigh); if (new & NUD_PROBE) -- cgit v1.2.3-59-g8ed1b From 63a1e1c95e60e798fa09ab3c536fb555aa5bbf2b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 16 May 2017 15:18:05 +0100 Subject: arm64/cpufeature: don't use mutex in bringup path Currently, cpus_set_cap() calls static_branch_enable_cpuslocked(), which must take the jump_label mutex. We call cpus_set_cap() in the secondary bringup path, from the idle thread where interrupts are disabled. Taking a mutex in this path "is a NONO" regardless of whether it's contended, and something we must avoid. We didn't spot this until recently, as ___might_sleep() won't warn for this case until all CPUs have been brought up. This patch avoids taking the mutex in the secondary bringup path. The poking of static keys is deferred until enable_cpu_capabilities(), which runs in a suitable context on the boot CPU. To account for the static keys being set later, cpus_have_const_cap() is updated to use another static key to check whether the const cap keys have been initialised, falling back to the caps bitmap until this is the case. This means that users of cpus_have_const_cap() gain should only gain a single additional NOP in the fast path once the const caps are initialised, but should always see the current cap value. The hyp code should never dereference the caps array, since the caps are initialized before we run the module initcall to initialise hyp. A check is added to the hyp init code to document this requirement. This change will sidestep a number of issues when the upcoming hotplug locking rework is merged. Signed-off-by: Mark Rutland Reviewed-by: Marc Zyniger Reviewed-by: Suzuki Poulose Acked-by: Will Deacon Cc: Christoffer Dall Cc: Peter Zijlstra Cc: Sebastian Sewior Cc: Thomas Gleixner Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 12 ++++++++++-- arch/arm64/include/asm/kvm_host.h | 8 ++++++-- arch/arm64/kernel/cpufeature.c | 23 +++++++++++++++++++++-- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index e7f84a7b4465..428ee1f2468c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -115,6 +115,7 @@ struct arm64_cpu_capabilities { extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; +extern struct static_key_false arm64_const_caps_ready; bool this_cpu_has_cap(unsigned int cap); @@ -124,7 +125,7 @@ static inline bool cpu_have_feature(unsigned int num) } /* System capability check for constant caps */ -static inline bool cpus_have_const_cap(int num) +static inline bool __cpus_have_const_cap(int num) { if (num >= ARM64_NCAPS) return false; @@ -138,6 +139,14 @@ static inline bool cpus_have_cap(unsigned int num) return test_bit(num, cpu_hwcaps); } +static inline bool cpus_have_const_cap(int num) +{ + if (static_branch_likely(&arm64_const_caps_ready)) + return __cpus_have_const_cap(num); + else + return cpus_have_cap(num); +} + static inline void cpus_set_cap(unsigned int num) { if (num >= ARM64_NCAPS) { @@ -145,7 +154,6 @@ static inline void cpus_set_cap(unsigned int num) num, ARM64_NCAPS); } else { __set_bit(num, cpu_hwcaps); - static_branch_enable(&cpu_hwcap_keys[num]); } } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5e19165c5fa8..1f252a95bc02 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -355,9 +356,12 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long vector_ptr) { /* - * Call initialization code, and switch to the full blown - * HYP code. + * Call initialization code, and switch to the full blown HYP code. + * If the cpucaps haven't been finalized yet, something has gone very + * wrong, and hyp will crash and burn when it uses any + * cpus_have_const_cap() wrapper. */ + BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 94b8f7fc3310..817ce3365e20 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -985,8 +985,16 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, */ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { - for (; caps->matches; caps++) - if (caps->enable && cpus_have_cap(caps->capability)) + for (; caps->matches; caps++) { + unsigned int num = caps->capability; + + if (!cpus_have_cap(num)) + continue; + + /* Ensure cpus_have_const_cap(num) works */ + static_branch_enable(&cpu_hwcap_keys[num]); + + if (caps->enable) { /* * Use stop_machine() as it schedules the work allowing * us to modify PSTATE, instead of on_each_cpu() which @@ -994,6 +1002,8 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) * we return. */ stop_machine(caps->enable, NULL, cpu_online_mask); + } + } } /* @@ -1096,6 +1106,14 @@ static void __init setup_feature_capabilities(void) enable_cpu_capabilities(arm64_features); } +DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); +EXPORT_SYMBOL(arm64_const_caps_ready); + +static void __init mark_const_caps_ready(void) +{ + static_branch_enable(&arm64_const_caps_ready); +} + /* * Check if the current CPU has a given feature capability. * Should be called from non-preemptible context. @@ -1131,6 +1149,7 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); enable_errata_workarounds(); + mark_const_caps_ready(); setup_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) -- cgit v1.2.3-59-g8ed1b From 828d4cdd012c8ffbf76625c3ff164312e8666784 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 16 May 2017 10:08:08 -0600 Subject: dtc: check.c fix compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following compile error found on odroid-xu4: checks.c: In function ‘check_simple_bus_reg’: checks.c:876:41: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 4 has type ‘uint64_t{aka long long unsigned int}’ [-Werror=format=] snprintf(unit_addr, sizeof(unit_addr), "%lx", reg); ^ checks.c:876:41: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 4 has type ‘uint64_t {aka long long unsigned int}’ [-Werror=format=] cc1: all warnings being treated as errors Makefile:304: recipe for target 'checks.o' failed make: *** [checks.o] Error 1 Signed-off-by: Shuah Khan [dwg: Correct new format to be correct in general] Signed-off-by: David Gibson [robh: cherry-picked from upstream dtc commit 2a42b14d0d03] Signed-off-by: Rob Herring --- scripts/dtc/checks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/dtc/checks.c b/scripts/dtc/checks.c index 5adfc8f52b4f..4b72b530c84f 100644 --- a/scripts/dtc/checks.c +++ b/scripts/dtc/checks.c @@ -873,7 +873,7 @@ static void check_simple_bus_reg(struct check *c, struct dt_info *dti, struct no while (size--) reg = (reg << 32) | fdt32_to_cpu(*(cells++)); - snprintf(unit_addr, sizeof(unit_addr), "%lx", reg); + snprintf(unit_addr, sizeof(unit_addr), "%zx", reg); if (!streq(unitname, unit_addr)) FAIL(c, dti, "Node %s simple-bus unit address format error, expected \"%s\"", node->fullpath, unit_addr); -- cgit v1.2.3-59-g8ed1b From 49e67dd17649b60b4d54966e18ec9c80198227f0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 17 May 2017 17:29:09 +0200 Subject: of: fdt: add missing allocation-failure check The memory allocator passed to __unflatten_device_tree() (e.g. a wrapped kzalloc) can fail so add the missing sanity check to avoid dereferencing a NULL pointer. Fixes: fe14042358fa ("of/flattree: Refactor unflatten_device_tree and add fdt_unflatten_tree") Cc: stable # 2.6.38 Signed-off-by: Johan Hovold Signed-off-by: Rob Herring --- drivers/of/fdt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index a0972219ccfc..0373ae49d24d 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -507,6 +507,9 @@ void *__unflatten_device_tree(const void *blob, /* Allocate memory for the expanded device tree */ mem = dt_alloc(size + 4, __alignof__(struct device_node)); + if (!mem) + return NULL; + memset(mem, 0, size); *(__be32 *)(mem + size) = cpu_to_be32(0xdeadbeef); -- cgit v1.2.3-59-g8ed1b From 05d8cba4a1e8c7e2d1f91a24a2f3d26852938a04 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 16 May 2017 14:15:03 +0900 Subject: kbuild: skip install/check of headers right under uapi directories Since commit 61562f981e92 ("uapi: export all arch specifics directories"), "make INSTALL_HDR_PATH=$root/usr headers_install" deletes standard glibc headers and others in $(root)/usr/include. The cause of the issue is that headers_install now starts descending from arch/$(hdr-arch)/include/uapi with $(root)/usr/include for its destination when installing asm headers. So, headers already there are assumed to be unwanted. When headers_install starts descending from include/uapi with $(root)/usr/include for its destination, it works around the problem by creating an dummy destination $(root)/usr/include/uapi, but this is tricky. To fix the problem in a clean way is to skip headers install/check in include/uapi and arch/$(hdr-arch)/include/uapi because we know there are only sub-directories in uapi directories. A good side effect is the empty destination $(root)/usr/include/uapi will go away. I am also removing the trailing slash in the headers_check target to skip checking in arch/$(hdr-arch)/include/uapi. Fixes: 61562f981e92 ("uapi: export all arch specifics directories") Reported-by: Dan Williams Signed-off-by: Masahiro Yamada Tested-by: Dan Williams Acked-by: Nicolas Dichtel --- Makefile | 2 +- scripts/Makefile.headersinst | 43 +++++++++++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index b400c0604fac..b1ee4a49efa2 100644 --- a/Makefile +++ b/Makefile @@ -1172,7 +1172,7 @@ headers_check_all: headers_install_all PHONY += headers_check headers_check: headers_install $(Q)$(MAKE) $(hdr-inst)=include/uapi HDRCHECK=1 - $(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/ $(hdr-dst) HDRCHECK=1 + $(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi $(hdr-dst) HDRCHECK=1 # --------------------------------------------------------------------------- # Kernel selftest diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst index 6ba97a1f9c5a..ce753a408c56 100644 --- a/scripts/Makefile.headersinst +++ b/scripts/Makefile.headersinst @@ -8,6 +8,29 @@ # # ========================================================================== +PHONY := __headers +__headers: + +include scripts/Kbuild.include + +srcdir := $(srctree)/$(obj) +subdirs := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.)) +# caller may set destination dir (when installing to asm/) +_dst := $(if $(dst),$(dst),$(obj)) + +# Recursion +__headers: $(subdirs) + +.PHONY: $(subdirs) +$(subdirs): + $(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@ + +# Skip header install/check for include/uapi and arch/$(hdr-arch)/include/uapi. +# We have only sub-directories there. +skip-inst := $(if $(filter %/uapi,$(obj)),1) + +ifeq ($(skip-inst),) + # generated header directory gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj))) @@ -15,21 +38,14 @@ gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj))) kbuild-file := $(srctree)/$(obj)/Kbuild -include $(kbuild-file) -# called may set destination dir (when installing to asm/) -_dst := $(if $(dst),$(dst),$(obj)) - old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild ifneq ($(wildcard $(old-kbuild-file)),) include $(old-kbuild-file) endif -include scripts/Kbuild.include - installdir := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst)) -srcdir := $(srctree)/$(obj) gendir := $(objtree)/$(gen) -subdirs := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.)) header-files := $(notdir $(wildcard $(srcdir)/*.h)) header-files += $(notdir $(wildcard $(srcdir)/*.agh)) header-files := $(filter-out $(no-export-headers), $(header-files)) @@ -88,11 +104,9 @@ quiet_cmd_check = CHECK $(printdir) ($(words $(all-files)) files) $(PERL) $< $(INSTALL_HDR_PATH)/include $(SRCARCH); \ touch $@ -PHONY += __headersinst __headerscheck - ifndef HDRCHECK # Rules for installing headers -__headersinst: $(subdirs) $(install-file) +__headers: $(install-file) @: targets += $(install-file) @@ -104,7 +118,7 @@ $(install-file): scripts/headers_install.sh \ $(call if_changed,install) else -__headerscheck: $(subdirs) $(check-file) +__headers: $(check-file) @: targets += $(check-file) @@ -113,11 +127,6 @@ $(check-file): scripts/headers_check.pl $(output-files) FORCE endif -# Recursion -.PHONY: $(subdirs) -$(subdirs): - $(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@ - targets := $(wildcard $(sort $(targets))) cmd_files := $(wildcard \ $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) @@ -126,6 +135,8 @@ ifneq ($(cmd_files),) include $(cmd_files) endif +endif # skip-inst + .PHONY: $(PHONY) PHONY += FORCE FORCE: ; -- cgit v1.2.3-59-g8ed1b From 2423496af35d94a87156b063ea5cedffc10a70a1 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 16 May 2017 14:36:23 -0400 Subject: ipv6: Prevent overrun when parsing v6 header options The KASAN warning repoted below was discovered with a syzkaller program. The reproducer is basically: int s = socket(AF_INET6, SOCK_RAW, NEXTHDR_HOP); send(s, &one_byte_of_data, 1, MSG_MORE); send(s, &more_than_mtu_bytes_data, 2000, 0); The socket() call sets the nexthdr field of the v6 header to NEXTHDR_HOP, the first send call primes the payload with a non zero byte of data, and the second send call triggers the fragmentation path. The fragmentation code tries to parse the header options in order to figure out where to insert the fragment option. Since nexthdr points to an invalid option, the calculation of the size of the network header can made to be much larger than the linear section of the skb and data is read outside of it. This fix makes ip6_find_1stfrag return an error if it detects running out-of-bounds. [ 42.361487] ================================================================== [ 42.364412] BUG: KASAN: slab-out-of-bounds in ip6_fragment+0x11c8/0x3730 [ 42.365471] Read of size 840 at addr ffff88000969e798 by task ip6_fragment-oo/3789 [ 42.366469] [ 42.366696] CPU: 1 PID: 3789 Comm: ip6_fragment-oo Not tainted 4.11.0+ #41 [ 42.367628] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014 [ 42.368824] Call Trace: [ 42.369183] dump_stack+0xb3/0x10b [ 42.369664] print_address_description+0x73/0x290 [ 42.370325] kasan_report+0x252/0x370 [ 42.370839] ? ip6_fragment+0x11c8/0x3730 [ 42.371396] check_memory_region+0x13c/0x1a0 [ 42.371978] memcpy+0x23/0x50 [ 42.372395] ip6_fragment+0x11c8/0x3730 [ 42.372920] ? nf_ct_expect_unregister_notifier+0x110/0x110 [ 42.373681] ? ip6_copy_metadata+0x7f0/0x7f0 [ 42.374263] ? ip6_forward+0x2e30/0x2e30 [ 42.374803] ip6_finish_output+0x584/0x990 [ 42.375350] ip6_output+0x1b7/0x690 [ 42.375836] ? ip6_finish_output+0x990/0x990 [ 42.376411] ? ip6_fragment+0x3730/0x3730 [ 42.376968] ip6_local_out+0x95/0x160 [ 42.377471] ip6_send_skb+0xa1/0x330 [ 42.377969] ip6_push_pending_frames+0xb3/0xe0 [ 42.378589] rawv6_sendmsg+0x2051/0x2db0 [ 42.379129] ? rawv6_bind+0x8b0/0x8b0 [ 42.379633] ? _copy_from_user+0x84/0xe0 [ 42.380193] ? debug_check_no_locks_freed+0x290/0x290 [ 42.380878] ? ___sys_sendmsg+0x162/0x930 [ 42.381427] ? rcu_read_lock_sched_held+0xa3/0x120 [ 42.382074] ? sock_has_perm+0x1f6/0x290 [ 42.382614] ? ___sys_sendmsg+0x167/0x930 [ 42.383173] ? lock_downgrade+0x660/0x660 [ 42.383727] inet_sendmsg+0x123/0x500 [ 42.384226] ? inet_sendmsg+0x123/0x500 [ 42.384748] ? inet_recvmsg+0x540/0x540 [ 42.385263] sock_sendmsg+0xca/0x110 [ 42.385758] SYSC_sendto+0x217/0x380 [ 42.386249] ? SYSC_connect+0x310/0x310 [ 42.386783] ? __might_fault+0x110/0x1d0 [ 42.387324] ? lock_downgrade+0x660/0x660 [ 42.387880] ? __fget_light+0xa1/0x1f0 [ 42.388403] ? __fdget+0x18/0x20 [ 42.388851] ? sock_common_setsockopt+0x95/0xd0 [ 42.389472] ? SyS_setsockopt+0x17f/0x260 [ 42.390021] ? entry_SYSCALL_64_fastpath+0x5/0xbe [ 42.390650] SyS_sendto+0x40/0x50 [ 42.391103] entry_SYSCALL_64_fastpath+0x1f/0xbe [ 42.391731] RIP: 0033:0x7fbbb711e383 [ 42.392217] RSP: 002b:00007ffff4d34f28 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 42.393235] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fbbb711e383 [ 42.394195] RDX: 0000000000001000 RSI: 00007ffff4d34f60 RDI: 0000000000000003 [ 42.395145] RBP: 0000000000000046 R08: 00007ffff4d34f40 R09: 0000000000000018 [ 42.396056] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000400aad [ 42.396598] R13: 0000000000000066 R14: 00007ffff4d34ee0 R15: 00007fbbb717af00 [ 42.397257] [ 42.397411] Allocated by task 3789: [ 42.397702] save_stack_trace+0x16/0x20 [ 42.398005] save_stack+0x46/0xd0 [ 42.398267] kasan_kmalloc+0xad/0xe0 [ 42.398548] kasan_slab_alloc+0x12/0x20 [ 42.398848] __kmalloc_node_track_caller+0xcb/0x380 [ 42.399224] __kmalloc_reserve.isra.32+0x41/0xe0 [ 42.399654] __alloc_skb+0xf8/0x580 [ 42.400003] sock_wmalloc+0xab/0xf0 [ 42.400346] __ip6_append_data.isra.41+0x2472/0x33d0 [ 42.400813] ip6_append_data+0x1a8/0x2f0 [ 42.401122] rawv6_sendmsg+0x11ee/0x2db0 [ 42.401505] inet_sendmsg+0x123/0x500 [ 42.401860] sock_sendmsg+0xca/0x110 [ 42.402209] ___sys_sendmsg+0x7cb/0x930 [ 42.402582] __sys_sendmsg+0xd9/0x190 [ 42.402941] SyS_sendmsg+0x2d/0x50 [ 42.403273] entry_SYSCALL_64_fastpath+0x1f/0xbe [ 42.403718] [ 42.403871] Freed by task 1794: [ 42.404146] save_stack_trace+0x16/0x20 [ 42.404515] save_stack+0x46/0xd0 [ 42.404827] kasan_slab_free+0x72/0xc0 [ 42.405167] kfree+0xe8/0x2b0 [ 42.405462] skb_free_head+0x74/0xb0 [ 42.405806] skb_release_data+0x30e/0x3a0 [ 42.406198] skb_release_all+0x4a/0x60 [ 42.406563] consume_skb+0x113/0x2e0 [ 42.406910] skb_free_datagram+0x1a/0xe0 [ 42.407288] netlink_recvmsg+0x60d/0xe40 [ 42.407667] sock_recvmsg+0xd7/0x110 [ 42.408022] ___sys_recvmsg+0x25c/0x580 [ 42.408395] __sys_recvmsg+0xd6/0x190 [ 42.408753] SyS_recvmsg+0x2d/0x50 [ 42.409086] entry_SYSCALL_64_fastpath+0x1f/0xbe [ 42.409513] [ 42.409665] The buggy address belongs to the object at ffff88000969e780 [ 42.409665] which belongs to the cache kmalloc-512 of size 512 [ 42.410846] The buggy address is located 24 bytes inside of [ 42.410846] 512-byte region [ffff88000969e780, ffff88000969e980) [ 42.411941] The buggy address belongs to the page: [ 42.412405] page:ffffea000025a780 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 [ 42.413298] flags: 0x100000000008100(slab|head) [ 42.413729] raw: 0100000000008100 0000000000000000 0000000000000000 00000001800c000c [ 42.414387] raw: ffffea00002a9500 0000000900000007 ffff88000c401280 0000000000000000 [ 42.415074] page dumped because: kasan: bad access detected [ 42.415604] [ 42.415757] Memory state around the buggy address: [ 42.416222] ffff88000969e880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 42.416904] ffff88000969e900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 42.417591] >ffff88000969e980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 42.418273] ^ [ 42.418588] ffff88000969ea00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 42.419273] ffff88000969ea80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 42.419882] ================================================================== Reported-by: Andrey Konovalov Signed-off-by: Craig Gallek Signed-off-by: David S. Miller --- net/ipv6/ip6_offload.c | 2 ++ net/ipv6/ip6_output.c | 4 ++++ net/ipv6/output_core.c | 14 ++++++++------ net/ipv6/udp_offload.c | 2 ++ 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 93e58a5e1837..eab36abc9f22 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -117,6 +117,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (udpfrag) { unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + if (unfrag_ip6hlen < 0) + return ERR_PTR(unfrag_ip6hlen); fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); if (skb->next) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 58f6288e9ba5..01deecda2f84 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -598,6 +598,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, u8 *prevhdr, nexthdr = 0; hlen = ip6_find_1stfragopt(skb, &prevhdr); + if (hlen < 0) { + err = hlen; + goto fail; + } nexthdr = *prevhdr; mtu = ip6_skb_dst_mtu(skb); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index cd4252346a32..e9065b8d3af8 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -79,14 +79,13 @@ EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); unsigned int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; - while (offset + 1 <= packet_len) { + while (offset <= packet_len) { + struct ipv6_opt_hdr *exthdr; switch (**nexthdr) { @@ -107,13 +106,16 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; + if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) + return -EINVAL; + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; } - return offset; + return -EINVAL; } EXPORT_SYMBOL(ip6_find_1stfragopt); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index ac858c480f2f..b348cff47395 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -91,6 +91,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, * bytes to insert fragment header. */ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + if (unfrag_ip6hlen < 0) + return ERR_PTR(unfrag_ip6hlen); nexthdr = *prevhdr; *prevhdr = NEXTHDR_FRAGMENT; unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + -- cgit v1.2.3-59-g8ed1b From b6c41cb050d5debc7e4eaa0a81cbdbad72588891 Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Mon, 15 May 2017 16:28:17 -0700 Subject: sparc64: Fix mapping of 64k pages with MAP_FIXED An incorrect huge page alignment check caused mmap failure for 64K pages when MAP_FIXED is used with address not aligned to HPAGE_SIZE. Orabug: 25885991 Fixes: dcd1912d21a0 ("sparc64: Add 64K page size support") Signed-off-by: Nitin Gupta Signed-off-by: David S. Miller --- arch/sparc/include/asm/hugetlb.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index dcbf985ab243..d1f837dc77a4 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -24,9 +24,11 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { - if (len & ~HPAGE_MASK) + struct hstate *h = hstate_file(file); + + if (len & ~huge_page_mask(h)) return -EINVAL; - if (addr & ~HPAGE_MASK) + if (addr & ~huge_page_mask(h)) return -EINVAL; return 0; } -- cgit v1.2.3-59-g8ed1b From deba804c90642c8ed0f15ac1083663976d578f54 Mon Sep 17 00:00:00 2001 From: Orlando Arias Date: Tue, 16 May 2017 15:34:00 -0400 Subject: sparc: Fix -Wstringop-overflow warning Greetings, GCC 7 introduced the -Wstringop-overflow flag to detect buffer overflows in calls to string handling functions [1][2]. Due to the way ``empty_zero_page'' is declared in arch/sparc/include/setup.h, this causes a warning to trigger at compile time in the function mem_init(), which is subsequently converted to an error. The ensuing patch fixes this issue and aligns the declaration of empty_zero_page to that of other architectures. Thank you. Cheers, Orlando. [1] https://gcc.gnu.org/ml/gcc-patches/2016-10/msg02308.html [2] https://gcc.gnu.org/gcc-7/changes.html Signed-off-by: Orlando Arias -------------------------------------------------------------------------------- Signed-off-by: David S. Miller --- arch/sparc/include/asm/pgtable_32.h | 4 ++-- arch/sparc/include/asm/setup.h | 2 +- arch/sparc/mm/init_32.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index ce6f56980aef..cf190728360b 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -91,9 +91,9 @@ extern unsigned long pfn_base; * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) (virt_to_page(&empty_zero_page)) +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) /* * In general all page table modifications should use the V8 atomic diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 478bf6bb4598..3fae200dd251 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -16,7 +16,7 @@ extern char reboot_command[]; */ extern unsigned char boot_cpu_id; -extern unsigned long empty_zero_page; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern int serial_console; static inline int con_is_present(void) diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index c6afe98de4d9..3bd0d513bddb 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -290,7 +290,7 @@ void __init mem_init(void) /* Saves us work later. */ - memset((void *)&empty_zero_page, 0, PAGE_SIZE); + memset((void *)empty_zero_page, 0, PAGE_SIZE); i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5); i += 1; -- cgit v1.2.3-59-g8ed1b From 48078d2dac0a26f84f5f3ec704f24f7c832cce14 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 17 May 2017 11:47:00 -0400 Subject: sparc/ftrace: Fix ftrace graph time measurement The ftrace function_graph time measurements of a given function is not accurate according to those recorded by ftrace using the function filters. This change pulls the x86_64 fix from 'commit 722b3c746953 ("ftrace/graph: Trace function entry before updating index")' into the sparc specific prepare_ftrace_return which stops ftrace from counting interrupted tasks in the time measurement. Example measurements for select_task_rq_fair running "hackbench 100 process 1000": | tracing/trace_stat/function0 | function_graph Before patch | 2.802 us | 4.255 us After patch | 2.749 us | 3.094 us Signed-off-by: Liam R. Howlett Signed-off-by: David S. Miller --- arch/sparc/kernel/ftrace.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 6bcff698069b..cec54dc4ab81 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -130,17 +130,16 @@ unsigned long prepare_ftrace_return(unsigned long parent, if (unlikely(atomic_read(¤t->tracing_graph_pause))) return parent + 8UL; - if (ftrace_push_return_trace(parent, self_addr, &trace.depth, - frame_pointer, NULL) == -EBUSY) - return parent + 8UL; - trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; + if (!ftrace_graph_entry(&trace)) + return parent + 8UL; + + if (ftrace_push_return_trace(parent, self_addr, &trace.depth, + frame_pointer, NULL) == -EBUSY) return parent + 8UL; - } return return_hooker; } -- cgit v1.2.3-59-g8ed1b From 9142e9007f2d7ab58a587a1e1d921b0064a339aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 May 2017 13:27:53 -0700 Subject: net: fix compile error in skb_orphan_partial() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_INET is not set, net/core/sock.c can not compile : net/core/sock.c: In function ‘skb_orphan_partial’: net/core/sock.c:1810:2: error: implicit declaration of function ‘skb_is_tcp_pure_ack’ [-Werror=implicit-function-declaration] if (skb_is_tcp_pure_ack(skb)) ^ Fix this by always including Fixes: f6ba8d33cfbb ("netem: fix skb_orphan_partial()") Signed-off-by: Eric Dumazet Reported-by: Paul Gortmaker Reported-by: Randy Dunlap Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- net/core/sock.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index e43e71d7856b..727f924b7f91 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,10 +139,7 @@ #include -#ifdef CONFIG_INET #include -#endif - #include static DEFINE_MUTEX(proto_list_mutex); -- cgit v1.2.3-59-g8ed1b From 87fe603274aa9889c05cca3c3e45675e1997cb13 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 16 May 2017 16:39:43 -0400 Subject: bnxt_en: Call bnxt_dcb_init() after getting firmware DCBX configuration. In the current code, bnxt_dcb_init() is called too early before we determine if the firmware DCBX agent is running or not. As a result, we are not setting the DCB_CAP_DCBX_HOST and DCB_CAP_DCBX_LLD_MANAGED flags properly to report to DCBNL. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b56c54d68d5e..03f55daecb20 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7630,8 +7630,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->min_mtu = ETH_ZLEN; dev->max_mtu = BNXT_MAX_MTU; - bnxt_dcb_init(bp); - #ifdef CONFIG_BNXT_SRIOV init_waitqueue_head(&bp->sriov_cfg_wait); #endif @@ -7669,6 +7667,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_hwrm_func_qcfg(bp); bnxt_hwrm_port_led_qcaps(bp); bnxt_ethtool_init(bp); + bnxt_dcb_init(bp); bnxt_set_rx_skb_mode(bp, false); bnxt_set_tpa_flags(bp); -- cgit v1.2.3-59-g8ed1b From f667724b99ad1afc91f16064d8fb293d2805bd57 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 16 May 2017 16:39:44 -0400 Subject: bnxt_en: Check status of firmware DCBX agent before setting DCB_CAP_DCBX_HOST. Otherwise, all the host based DCBX settings from lldpad will fail if the firmware DCBX agent is running. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 46de2f8ff024..5c6dd0ce209f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -553,8 +553,10 @@ static u8 bnxt_dcbnl_setdcbx(struct net_device *dev, u8 mode) if ((mode & DCB_CAP_DCBX_VER_CEE) || !(mode & DCB_CAP_DCBX_VER_IEEE)) return 1; - if ((mode & DCB_CAP_DCBX_HOST) && BNXT_VF(bp)) - return 1; + if (mode & DCB_CAP_DCBX_HOST) { + if (BNXT_VF(bp) || (bp->flags & BNXT_FLAG_FW_LLDP_AGENT)) + return 1; + } if (mode == bp->dcbx_cap) return 0; -- cgit v1.2.3-59-g8ed1b From 579f1d926c66cc0bd3bd87b1fe2e091084b07430 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 17 May 2017 15:18:05 -0700 Subject: selftests/bpf: fix broken build due to types.h Commit 0a5539f66133 ("bpf: Provide a linux/types.h override for bpf selftests.") caused a build failure for tools/testing/selftest/bpf because of some missing types: $ make -C tools/testing/selftests/bpf/ ... In file included from /home/yhs/work/net-next/tools/testing/selftests/bpf/test_pkt_access.c:8: ../../../include/uapi/linux/bpf.h:170:3: error: unknown type name '__aligned_u64' __aligned_u64 key; ... /usr/include/linux/swab.h:160:8: error: unknown type name '__always_inline' static __always_inline __u16 __swab16p(const __u16 *p) ... The type __aligned_u64 is defined in linux:include/uapi/linux/types.h. The fix is to copy missing type definition into tools/testing/selftests/bpf/include/uapi/linux/types.h. Adding additional include "string.h" resolves __always_inline issue. Fixes: 0a5539f66133 ("bpf: Provide a linux/types.h override for bpf selftests.") Signed-off-by: Yonghong Song Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/include/uapi/linux/types.h | 16 ++++++++++++++++ tools/testing/selftests/bpf/test_pkt_access.c | 1 + 2 files changed, 17 insertions(+) diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h index fbd16a7554af..51841848fbfe 100644 --- a/tools/testing/selftests/bpf/include/uapi/linux/types.h +++ b/tools/testing/selftests/bpf/include/uapi/linux/types.h @@ -3,4 +3,20 @@ #include +/* copied from linux:include/uapi/linux/types.h */ +#define __bitwise +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; + +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_be64 __be64 __attribute__((aligned(8))) +#define __aligned_le64 __le64 __attribute__((aligned(8))) + #endif /* _UAPI_LINUX_TYPES_H */ diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c index 39387bb7e08c..6e11ba11709e 100644 --- a/tools/testing/selftests/bpf/test_pkt_access.c +++ b/tools/testing/selftests/bpf/test_pkt_access.c @@ -5,6 +5,7 @@ * License as published by the Free Software Foundation. */ #include +#include #include #include #include -- cgit v1.2.3-59-g8ed1b From 53cf29d3b1bc5b86fcff5fdc52f873d79d908ef4 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 17 May 2017 19:02:17 -0300 Subject: scsi: lpfc: Fix NULL pointer dereference during PCI error recovery Recent commit on patchset "lpfc updates for 11.2.0.14" fixed an issue about dereferencing a NULL pointer on port reset. The specific commit, named "lpfc: Fix system crash when port is reset.", is missing a check against NULL pointer on lpfc_els_flush_cmd() though. Since we destroy the queues on adapter resets, like in PCI error recovery path, we need the validation present on this patch in order to avoid a NULL pointer dereference when trying to flush commands of ELS wq, after it has been destroyed (which would lead to a kernel oops). Tested-by: Raphael Silva Signed-off-by: Guilherme G. Piccoli Acked-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 1d36f82fa369..8e532b39ae93 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -7451,6 +7451,13 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) */ spin_lock_irq(&phba->hbalock); pring = lpfc_phba_elsring(phba); + + /* Bail out if we've no ELS wq, like in PCI error recovery case. */ + if (unlikely(!pring)) { + spin_unlock_irq(&phba->hbalock); + return; + } + if (phba->sli_rev == LPFC_SLI_REV4) spin_lock(&pring->ring_lock); -- cgit v1.2.3-59-g8ed1b From eeeb51d834d76c66784e7fe1a9ace3ce3f8d2af1 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 16 May 2017 20:52:29 -0700 Subject: scsi: lpfc: fix build issue if NVME_FC_TARGET is not defined fix build issue if NVME_FC_TARGET is not defined. noop the code. The code will never be invoked if target mode is not enabled. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvmet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 312f54278bd4..f94294b77b7b 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -157,6 +157,7 @@ out: void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) { +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; @@ -260,6 +261,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) &phba->sli4_hba.lpfc_nvmet_ctx_list); phba->sli4_hba.nvmet_ctx_cnt++; spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag); +#endif } #ifdef CONFIG_SCSI_LPFC_DEBUG_FS -- cgit v1.2.3-59-g8ed1b From b9ef0326c05a008c3c576bd4d676208b50c344d5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 17 May 2017 11:14:35 -0400 Subject: tracing: Move postpone selftests to core from early_initcall I hit the following lockdep splat when booting with ftrace selftests enabled, as well as CONFIG_PREEMPT and LOCKDEP. Testing dynamic ftrace ops #1: (1 0 1 0 0) (1 1 2 0 0) (2 1 3 0 169) (2 2 4 0 50066) ------------[ cut here ]------------ WARNING: CPU: 0 PID: 13 at kernel/rcu/srcutree.c:202 check_init_srcu_struct+0x60/0x70 Modules linked in: CPU: 0 PID: 13 Comm: rcu_tasks_kthre Not tainted 4.12.0-rc1-test+ #587 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012 task: ffff880119628040 task.stack: ffffc900006a4000 RIP: 0010:check_init_srcu_struct+0x60/0x70 RSP: 0000:ffffc900006a7d98 EFLAGS: 00010246 RAX: 0000000000000246 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff880119628040 RSI: 00000000ffffffff RDI: ffffffff81e5fb40 RBP: ffffc900006a7e20 R08: 00000023b403c000 R09: 0000000000000001 R10: ffffc900006a7e40 R11: 0000000000000000 R12: ffffffff81e5fb40 R13: 0000000000000286 R14: ffff880119628040 R15: ffffc900006a7e98 FS: 0000000000000000(0000) GS:ffff88011ea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88011edff000 CR3: 0000000001e0f000 CR4: 00000000001406f0 Call Trace: ? __synchronize_srcu+0x6e/0x140 ? lock_acquire+0xdc/0x1d0 ? ktime_get_mono_fast_ns+0x5d/0xb0 synchronize_srcu+0x6f/0x110 ? synchronize_srcu+0x6f/0x110 rcu_tasks_kthread+0x20a/0x540 kthread+0x114/0x150 ? __rcu_read_unlock+0x70/0x70 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x2e/0x40 Code: f6 83 70 06 00 00 03 49 89 c5 74 0d be 01 00 00 00 48 89 df e8 42 fa ff ff 4c 89 ee 4c 89 e7 e8 b7 42 75 00 5b 41 5c 41 5d 5d c3 <0f> ff eb aa 66 90 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 ---[ end trace 5c3f4206ce50f6ac ]--- What happens is that the selftests include a creating of a dynamically allocated ftrace_ops, which requires the use of synchronize_rcu_tasks() which uses srcu, and triggers the above warning. It appears that synchronize_rcu_tasks() is not set up at early_initcall(), but it is at core_initcall(). By moving the tests down to that location works out properly. Link: http://lkml.kernel.org/r/20170517111435.7388c033@gandalf.local.home Acked-by: "Paul E. McKenney" Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c4536c449021..cdf97ce8cff2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1558,7 +1558,7 @@ static __init int init_trace_selftests(void) return 0; } -early_initcall(init_trace_selftests); +core_initcall(init_trace_selftests); #else static inline int run_tracer_selftest(struct tracer *type) { -- cgit v1.2.3-59-g8ed1b From 30e7d894c1478c88d50ce94ddcdbd7f9763d9cdd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 May 2017 10:19:49 +0200 Subject: tracing/kprobes: Enforce kprobes teardown after testing Enabling the tracer selftest triggers occasionally the warning in text_poke(), which warns when the to be modified page is not marked reserved. The reason is that the tracer selftest installs kprobes on functions marked __init for testing. These probes are removed after the tests, but that removal schedules the delayed kprobes_optimizer work, which will do the actual text poke. If the work is executed after the init text is freed, then the warning triggers. The bug can be reproduced reliably when the work delay is increased. Flush the optimizer work and wait for the optimizing/unoptimizing lists to become empty before returning from the kprobes tracer selftest. That ensures that all operations which were queued due to the probes removal have completed. Link: http://lkml.kernel.org/r/20170516094802.76a468bb@gandalf.local.home Signed-off-by: Thomas Gleixner Acked-by: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 6274de498 ("kprobes: Support delayed unoptimizing") Signed-off-by: Steven Rostedt (VMware) --- include/linux/kprobes.h | 3 +++ kernel/kprobes.c | 2 +- kernel/trace/trace_kprobe.c | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 30f90c1a0aaf..541df0b5b815 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -349,6 +349,9 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif +extern void wait_for_kprobe_optimizer(void); +#else +static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7367e0ec6f81..199243bba554 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -595,7 +595,7 @@ static void kprobe_optimizer(struct work_struct *work) } /* Wait for completing optimization and unoptimization */ -static void wait_for_kprobe_optimizer(void) +void wait_for_kprobe_optimizer(void) { mutex_lock(&kprobe_mutex); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8485f6738a87..c129fca6ec99 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1535,6 +1535,11 @@ static __init int kprobe_trace_self_tests_init(void) end: release_all_trace_kprobes(); + /* + * Wait for the optimizer work to finish. Otherwise it might fiddle + * with probes in already freed __init text. + */ + wait_for_kprobe_optimizer(); if (warn) pr_cont("NG: Some tests are failed. Please check them.\n"); else -- cgit v1.2.3-59-g8ed1b From cbab567c3dc7d6f443b4c84eab76e8967d5c1dee Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 16 May 2017 23:21:25 +0530 Subject: ftrace: Simplify glob handling in unregister_ftrace_function_probe_func() Handle a NULL glob properly and simplify the check. Link: http://lkml.kernel.org/r/5df74d4ffb4721db6d5a22fa08ca031d62ead493.1494956770.git.naveen.n.rao@linux.vnet.ibm.com Reviewed-by: Masami Hiramatsu Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 39dca4e86a94..c35c3e67d09a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4144,9 +4144,9 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, int i, ret = -ENODEV; int size; - if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) + if (!glob || !strlen(glob) || !strcmp(glob, "*")) func_g.search = NULL; - else if (glob) { + else { int not; func_g.type = filter_parse_regex(glob, strlen(glob), -- cgit v1.2.3-59-g8ed1b From a0e6369e4bac8844825ae1a66ccd122b290dcc86 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 16 May 2017 23:21:26 +0530 Subject: ftrace/instances: Clear function triggers when removing instances If instance directories are deleted while there are registered function triggers: # cd /sys/kernel/debug/tracing/instances # mkdir test # echo "schedule:enable_event:sched:sched_switch" > test/set_ftrace_filter # rmdir test Unable to handle kernel paging request for data at address 0x00000008 Unable to handle kernel paging request for data at address 0x00000008 Faulting instruction address: 0xc0000000021edde8 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=2048 NUMA pSeries Modules linked in: iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge stp llc kvm iptable_filter fuse binfmt_misc pseries_rng rng_core vmx_crypto ib_iser rdma_cm iw_cm ib_cm ib_core libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c multipath virtio_net virtio_blk virtio_pci crc32c_vpmsum virtio_ring virtio CPU: 8 PID: 8694 Comm: rmdir Not tainted 4.11.0-nnr+ #113 task: c0000000bab52800 task.stack: c0000000baba0000 NIP: c0000000021edde8 LR: c0000000021f0590 CTR: c000000002119620 REGS: c0000000baba3870 TRAP: 0300 Not tainted (4.11.0-nnr+) MSR: 8000000000009033 CR: 22002422 XER: 20000000 CFAR: 00007fffabb725a8 DAR: 0000000000000008 DSISR: 40000000 SOFTE: 0 GPR00: c00000000220f750 c0000000baba3af0 c000000003157e00 0000000000000000 GPR04: 0000000000000040 00000000000000eb 0000000000000040 0000000000000000 GPR08: 0000000000000000 0000000000000113 0000000000000000 c00000000305db98 GPR12: c000000002119620 c00000000fd42c00 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 c0000000bab52e90 0000000000000000 GPR24: 0000000000000000 00000000000000eb 0000000000000040 c0000000baba3bb0 GPR28: c00000009cb06eb0 c0000000bab52800 c00000009cb06eb0 c0000000baba3bb0 NIP [c0000000021edde8] ring_buffer_lock_reserve+0x8/0x4e0 LR [c0000000021f0590] trace_event_buffer_lock_reserve+0xe0/0x1a0 Call Trace: [c0000000baba3af0] [c0000000021f96c8] trace_event_buffer_commit+0x1b8/0x280 (unreliable) [c0000000baba3b60] [c00000000220f750] trace_event_buffer_reserve+0x80/0xd0 [c0000000baba3b90] [c0000000021196b8] trace_event_raw_event_sched_switch+0x98/0x180 [c0000000baba3c10] [c0000000029d9980] __schedule+0x6e0/0xab0 [c0000000baba3ce0] [c000000002122230] do_task_dead+0x70/0xc0 [c0000000baba3d10] [c0000000020ea9c8] do_exit+0x828/0xd00 [c0000000baba3dd0] [c0000000020eaf70] do_group_exit+0x60/0x100 [c0000000baba3e10] [c0000000020eb034] SyS_exit_group+0x24/0x30 [c0000000baba3e30] [c00000000200bcec] system_call+0x38/0x54 Instruction dump: 60000000 60420000 7d244b78 7f63db78 4bffaa09 393efff8 793e0020 39200000 4bfffecc 60420000 3c4c00f7 3842a020 <81230008> 2f890000 409e02f0 a14d0008 ---[ end trace b917b8985d0e650b ]--- Unable to handle kernel paging request for data at address 0x00000008 Faulting instruction address: 0xc0000000021edde8 Unable to handle kernel paging request for data at address 0x00000008 Faulting instruction address: 0xc0000000021edde8 Faulting instruction address: 0xc0000000021edde8 To address this, let's clear all registered function probes before deleting the ftrace instance. Link: http://lkml.kernel.org/r/c5f1ca624043690bd94642bb6bffd3f2fc504035.1494956770.git.naveen.n.rao@linux.vnet.ibm.com Reported-by: Michael Ellerman Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 8 ++++++++ kernel/trace/trace.c | 3 +++ kernel/trace/trace.h | 1 + 3 files changed, 12 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c35c3e67d09a..74fdfe9ed3db 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4256,6 +4256,14 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, return ret; } +void clear_ftrace_function_probes(struct trace_array *tr) +{ + struct ftrace_func_probe *probe, *n; + + list_for_each_entry_safe(probe, n, &tr->func_probes, list) + unregister_ftrace_function_probe_func(NULL, tr, probe->probe_ops); +} + static LIST_HEAD(ftrace_commands); static DEFINE_MUTEX(ftrace_cmd_mutex); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cdf97ce8cff2..664c44a6d48f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7550,6 +7550,9 @@ static int instance_rmdir(const char *name) } tracing_set_nop(tr); +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) + clear_ftrace_function_probes(tr); +#endif event_trace_del_tracer(tr); ftrace_clear_pids(tr); ftrace_destroy_function_files(tr); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 291a1bca5748..98e0845f7235 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -980,6 +980,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, extern int unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, struct ftrace_probe_ops *ops); +extern void clear_ftrace_function_probes(struct trace_array *tr); int register_ftrace_command(struct ftrace_func_command *cmd); int unregister_ftrace_command(struct ftrace_func_command *cmd); -- cgit v1.2.3-59-g8ed1b From 8a49f3e03c8ac52fe1b706fffb13142295fa0c47 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 17 May 2017 21:53:32 -0400 Subject: ftrace: Remove #ifdef from code and add clear_ftrace_function_probes() stub No need to add ugly #ifdefs in the code. Having a standard stub file is much prettier. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 -- kernel/trace/trace.h | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 664c44a6d48f..fcc9a2d774c3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7550,9 +7550,7 @@ static int instance_rmdir(const char *name) } tracing_set_nop(tr); -#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) clear_ftrace_function_probes(tr); -#endif event_trace_del_tracer(tr); ftrace_clear_pids(tr); ftrace_destroy_function_files(tr); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 98e0845f7235..39fd77330aab 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -999,6 +999,10 @@ static inline __init int unregister_ftrace_command(char *cmd_name) { return -EINVAL; } +static inline void clear_ftrace_function_probes(struct trace_array *tr) +{ +} + /* * The ops parameter passed in is usually undefined. * This must be a macro. -- cgit v1.2.3-59-g8ed1b From d2ffb8d3cc3458e2102b2f067a2e82c84947deea Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 16 May 2017 23:21:27 +0530 Subject: selftests/ftrace: Fix bashisms Fix a few bashisms in ftrace selftests. Link: http://lkml.kernel.org/r/5fbf4613eef0766918fa04e3ff537cae271223ee.1494956770.git.naveen.n.rao@linux.vnet.ibm.com Acked-by: Masami Hiramatsu Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- tools/testing/selftests/ftrace/ftracetest | 2 +- tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc | 2 +- tools/testing/selftests/ftrace/test.d/functions | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 32e6211e1c6e..717581145cfc 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -58,7 +58,7 @@ parse_opts() { # opts ;; --verbose|-v|-vv) VERBOSE=$((VERBOSE + 1)) - [ $1 == '-vv' ] && VERBOSE=$((VERBOSE + 1)) + [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1)) shift 1 ;; --debug|-d) diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc index 07bb3e5930b4..aa31368851c9 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc @@ -48,7 +48,7 @@ test_event_enabled() { e=`cat $EVENT_ENABLE` if [ "$e" != $val ]; then echo "Expected $val but found $e" - exit -1 + exit 1 fi } diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 9aec6fcb7729..f2019b37370d 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -34,10 +34,10 @@ reset_ftrace_filter() { # reset all triggers in set_ftrace_filter echo > set_ftrace_filter grep -v '^#' set_ftrace_filter | while read t; do tr=`echo $t | cut -d: -f2` - if [ "$tr" == "" ]; then + if [ "$tr" = "" ]; then continue fi - if [ $tr == "enable_event" -o $tr == "disable_event" ]; then + if [ $tr = "enable_event" -o $tr = "disable_event" ]; then tr=`echo $t | cut -d: -f1-4` limit=`echo $t | cut -d: -f5` else -- cgit v1.2.3-59-g8ed1b From b172296b90b799c8b634521c248e9316581c8154 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 16 May 2017 23:21:28 +0530 Subject: selftests/ftrace: Add test to remove instance with active event triggers Add a test to ensure we clean up properly when removing an instance with active event triggers. Link: http://lkml.kernel.org/r/c479465b2009397708d6c52c8561e1523c22cd31.1494956770.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- tools/testing/selftests/ftrace/test.d/instances/instance-event.tc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc index 4c5a061a5b4e..c73db7863adb 100644 --- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -75,9 +75,13 @@ rmdir foo if [ -d foo ]; then fail "foo still exists" fi -exit 0 - +mkdir foo +echo "schedule:enable_event:sched:sched_switch" > foo/set_ftrace_filter +rmdir foo +if [ -d foo ]; then + fail "foo still exists" +fi instance_slam() { -- cgit v1.2.3-59-g8ed1b From 545a028190dae4437aac4f86da7c8ab20857647c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 16 May 2017 14:58:35 -0400 Subject: kprobes: Document how optimized kprobes are removed from module unload Thomas discovered a bug where the kprobe trace tests had a race condition where the kprobe_optimizer called from a delayed work queue that does the optimizing and "unoptimizing" of a kprobe, can try to modify the text after it has been freed by the init code. The kprobe trace selftest is a special case, and Thomas and myself investigated to see if there's a chance that this could also be a bug with module unloading, as the code is not obvious to how it handles this. After adding lots of printks, I figured it out. Thomas suggested that this should be commented so that others will not have to go through this exercise again. Link: http://lkml.kernel.org/r/20170516145835.3827d3aa@gandalf.local.home Acked-by: Masami Hiramatsu Suggested-by: Thomas Gleixner Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 199243bba554..2d2d3a568e4e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2183,6 +2183,12 @@ static int kprobes_module_callback(struct notifier_block *nb, * The vaddr this probe is installed will soon * be vfreed buy not synced to disk. Hence, * disarming the breakpoint isn't needed. + * + * Note, this will also move any optimized probes + * that are pending to be removed from their + * corresponding lists to the freeing_list and + * will not be touched by the delayed + * kprobe_optimizer work handler. */ kill_kprobe(p); } -- cgit v1.2.3-59-g8ed1b From 7dd7eb9513bd02184d45f000ab69d78cb1fa1531 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 17 May 2017 22:54:11 -0400 Subject: ipv6: Check ip6_find_1stfragopt() return value properly. Do not use unsigned variables to see if it returns a negative error or not. Fixes: 2423496af35d ("ipv6: Prevent overrun when parsing v6 header options") Reported-by: Julia Lawall Signed-off-by: David S. Miller --- net/ipv6/ip6_offload.c | 9 ++++----- net/ipv6/ip6_output.c | 7 +++---- net/ipv6/udp_offload.c | 8 +++++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index eab36abc9f22..280268f1dd7b 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -63,7 +63,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, const struct net_offload *ops; int proto; struct frag_hdr *fptr; - unsigned int unfrag_ip6hlen; unsigned int payload_len; u8 *prevhdr; int offset = 0; @@ -116,10 +115,10 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, skb->network_header = (u8 *)ipv6h - skb->head; if (udpfrag) { - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - if (unfrag_ip6hlen < 0) - return ERR_PTR(unfrag_ip6hlen); - fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); + int err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + return ERR_PTR(err); + fptr = (struct frag_hdr *)((u8 *)ipv6h + err); fptr->frag_off = htons(offset); if (skb->next) fptr->frag_off |= htons(IP6_MF); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 01deecda2f84..d4a31becbd25 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -597,11 +597,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; - hlen = ip6_find_1stfragopt(skb, &prevhdr); - if (hlen < 0) { - err = hlen; + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) goto fail; - } + hlen = err; nexthdr = *prevhdr; mtu = ip6_skb_dst_mtu(skb); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b348cff47395..a2267f80febb 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -29,6 +29,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u8 frag_hdr_sz = sizeof(struct frag_hdr); __wsum csum; int tnl_hlen; + int err; mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -90,9 +91,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Find the unfragmentable header and shift it left by frag_hdr_sz * bytes to insert fragment header. */ - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - if (unfrag_ip6hlen < 0) - return ERR_PTR(unfrag_ip6hlen); + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + return ERR_PTR(err); + unfrag_ip6hlen = err; nexthdr = *prevhdr; *prevhdr = NEXTHDR_FRAGMENT; unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + -- cgit v1.2.3-59-g8ed1b From 3c2ce60bdd3d57051bf85615deec04a694473840 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 May 2017 03:00:06 +0200 Subject: bpf: adjust verifier heuristics Current limits with regards to processing program paths do not really reflect today's needs anymore due to programs becoming more complex and verifier smarter, keeping track of more data such as const ALU operations, alignment tracking, spilling of PTR_TO_MAP_VALUE_ADJ registers, and other features allowing for smarter matching of what LLVM generates. This also comes with the side-effect that we result in fewer opportunities to prune search states and thus often need to do more work to prove safety than in the past due to different register states and stack layout where we mismatch. Generally, it's quite hard to determine what caused a sudden increase in complexity, it could be caused by something as trivial as a single branch somewhere at the beginning of the program where LLVM assigned a stack slot that is marked differently throughout other branches and thus causing a mismatch, where verifier then needs to prove safety for the whole rest of the program. Subsequently, programs with even less than half the insn size limit can get rejected. We noticed that while some programs load fine under pre 4.11, they get rejected due to hitting limits on more recent kernels. We saw that in the vast majority of cases (90+%) pruning failed due to register mismatches. In case of stack mismatches, majority of cases failed due to different stack slot types (invalid, spill, misc) rather than differences in spilled registers. This patch makes pruning more aggressive by also adding markers that sit at conditional jumps as well. Currently, we only mark jump targets for pruning. For example in direct packet access, these are usually error paths where we bail out. We found that adding these markers, it can reduce number of processed insns by up to 30%. Another option is to ignore reg->id in probing PTR_TO_MAP_VALUE_OR_NULL registers, which can help pruning slightly as well by up to 7% observed complexity reduction as stand-alone. Meaning, if a previous path with register type PTR_TO_MAP_VALUE_OR_NULL for map X was found to be safe, then in the current state a PTR_TO_MAP_VALUE_OR_NULL register for the same map X must be safe as well. Last but not least the patch also adds a scheduling point and bumps the current limit for instructions to be processed to a more adequate value. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 39f2dcbc4cbc..1eddb713b815 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -140,7 +140,7 @@ struct bpf_verifier_stack_elem { struct bpf_verifier_stack_elem *next; }; -#define BPF_COMPLEXITY_LIMIT_INSNS 65536 +#define BPF_COMPLEXITY_LIMIT_INSNS 98304 #define BPF_COMPLEXITY_LIMIT_STACK 1024 #define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA) @@ -2640,6 +2640,7 @@ peek_stack: env->explored_states[t + 1] = STATE_LIST_MARK; } else { /* conditional jump with two edges */ + env->explored_states[t] = STATE_LIST_MARK; ret = push_insn(t, t + 1, FALLTHROUGH, env); if (ret == 1) goto peek_stack; @@ -2798,6 +2799,12 @@ static bool states_equal(struct bpf_verifier_env *env, rcur->type != NOT_INIT)) continue; + /* Don't care about the reg->id in this case. */ + if (rold->type == PTR_TO_MAP_VALUE_OR_NULL && + rcur->type == PTR_TO_MAP_VALUE_OR_NULL && + rold->map_ptr == rcur->map_ptr) + continue; + if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && compare_ptrs_to_packet(rold, rcur)) continue; @@ -2932,6 +2939,9 @@ static int do_check(struct bpf_verifier_env *env) goto process_bpf_exit; } + if (need_resched()) + cond_resched(); + if (log_level > 1 || (log_level && do_print_state)) { if (log_level > 1) verbose("%d:", insn_idx); -- cgit v1.2.3-59-g8ed1b From 9933e113c2e87a9f46a40fde8dafbf801dca1ab9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 10 May 2017 03:48:23 +0800 Subject: crypto: skcipher - Add missing API setkey checks The API setkey checks for key sizes and alignment went AWOL during the skcipher conversion. This patch restores them. Cc: Fixes: 4e6c3df4d729 ("crypto: skcipher - Add low-level skcipher...") Reported-by: Baozeng Signed-off-by: Herbert Xu --- crypto/skcipher.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 014af741fc6a..4faa0fd53b0c 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -764,6 +764,44 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm) return 0; } +static int skcipher_setkey_unaligned(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + unsigned long alignmask = crypto_skcipher_alignmask(tfm); + struct skcipher_alg *cipher = crypto_skcipher_alg(tfm); + u8 *buffer, *alignbuffer; + unsigned long absize; + int ret; + + absize = keylen + alignmask; + buffer = kmalloc(absize, GFP_ATOMIC); + if (!buffer) + return -ENOMEM; + + alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + memcpy(alignbuffer, key, keylen); + ret = cipher->setkey(tfm, alignbuffer, keylen); + kzfree(buffer); + return ret; +} + +static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct skcipher_alg *cipher = crypto_skcipher_alg(tfm); + unsigned long alignmask = crypto_skcipher_alignmask(tfm); + + if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) { + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + if ((unsigned long)key & alignmask) + return skcipher_setkey_unaligned(tfm, key, keylen); + + return cipher->setkey(tfm, key, keylen); +} + static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm) { struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm); @@ -784,7 +822,7 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm) tfm->__crt_alg->cra_type == &crypto_givcipher_type) return crypto_init_skcipher_ops_ablkcipher(tfm); - skcipher->setkey = alg->setkey; + skcipher->setkey = skcipher_setkey; skcipher->encrypt = alg->encrypt; skcipher->decrypt = alg->decrypt; skcipher->ivsize = alg->ivsize; -- cgit v1.2.3-59-g8ed1b From 552c9f47f8d451830a6b47151c6d2db77f77cc3e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 17 May 2017 13:12:51 +0200 Subject: KVM: arm/arm64: Fix bug when registering redist iodevs If userspace creates the VCPUs after initializing the VGIC, then we end up in a situation where we trigger a bug in kvm_vcpu_get_idx(), because it is called prior to adding the VCPU into the vcpus array on the VM. There is no tight coupling between the VCPU index and the area of the redistributor region used for the VCPU, so we can simply ensure that all creations of redistributors are serialized per VM, and increment an offset when we successfully add a redistributor. The vgic_register_redist_iodev() function can be called from two paths: vgic_redister_all_redist_iodev() which is called via the kvm_vgic_addr() device attribute handler. This patch already holds the kvm->lock mutex. The other path is via kvm_vgic_vcpu_init, which is called through a longer chain from kvm_vm_ioctl_create_vcpu(), which releases the kvm->lock mutex just before calling kvm_arch_vcpu_create(), so we can simply take this mutex again later for our purposes. Fixes: ab6f468c10 ("KVM: arm/arm64: Register iodevs when setting redist base and creating VCPUs") Signed-off-by: Christoffer Dall Tested-by: Jean-Philippe Brucker Reviewed-by: Eric Auger --- include/kvm/arm_vgic.h | 5 ++++- virt/kvm/arm/vgic/vgic-init.c | 5 ++++- virt/kvm/arm/vgic/vgic-mmio-v3.c | 9 ++++++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 97b8d3728b31..ef718586321c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -195,7 +195,10 @@ struct vgic_dist { /* either a GICv2 CPU interface */ gpa_t vgic_cpu_base; /* or a number of GICv3 redistributor regions */ - gpa_t vgic_redist_base; + struct { + gpa_t vgic_redist_base; + gpa_t vgic_redist_free_offset; + }; }; /* distributor enabled */ diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index dc68e2e424ab..3a0b8999f011 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -242,8 +242,11 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * If we are creating a VCPU with a GICv3 we must also register the * KVM io device for the redistributor that belongs to this VCPU. */ - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + mutex_lock(&vcpu->kvm->lock); ret = vgic_register_redist_iodev(vcpu); + mutex_unlock(&vcpu->kvm->lock); + } return ret; } diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 99da1a207c19..9b0f6810e7a8 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -586,7 +586,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) if (!vgic_v3_check_base(kvm)) return -EINVAL; - rd_base = vgic->vgic_redist_base + kvm_vcpu_get_idx(vcpu) * SZ_64K * 2; + rd_base = vgic->vgic_redist_base + vgic->vgic_redist_free_offset; sgi_base = rd_base + SZ_64K; kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); @@ -615,11 +615,14 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base, SZ_64K, &sgi_dev->dev); mutex_unlock(&kvm->slots_lock); - if (ret) + if (ret) { kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &rd_dev->dev); + return ret; + } - return ret; + vgic->vgic_redist_free_offset += 2 * SZ_64K; + return 0; } static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) -- cgit v1.2.3-59-g8ed1b From fa472fa91a5a0b241f5ddae927d2e235d07545df Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 17 May 2017 21:16:09 +0200 Subject: KVM: arm/arm64: Hold slots_lock when unregistering kvm io bus devices We were not holding the kvm->slots_lock as required when calling kvm_io_bus_unregister_dev() as required. This only affects the error path, but still, let's do our due diligence. Reported by: Eric Auger Signed-off-by: Christoffer Dall Reviewed-by: Eric Auger --- virt/kvm/arm/vgic/vgic-mmio-v3.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 9b0f6810e7a8..201d5e2e973d 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -614,15 +614,16 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) mutex_lock(&kvm->slots_lock); ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base, SZ_64K, &sgi_dev->dev); - mutex_unlock(&kvm->slots_lock); if (ret) { kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &rd_dev->dev); - return ret; + goto out; } vgic->vgic_redist_free_offset += 2 * SZ_64K; - return 0; +out: + mutex_unlock(&kvm->slots_lock); + return ret; } static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) @@ -647,10 +648,12 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) if (ret) { /* The current c failed, so we start with the previous one. */ + mutex_lock(&kvm->slots_lock); for (c--; c >= 0; c--) { vcpu = kvm_get_vcpu(kvm, c); vgic_unregister_redist_iodev(vcpu); } + mutex_unlock(&kvm->slots_lock); } return ret; -- cgit v1.2.3-59-g8ed1b From 751a9c763849f5859cb69ea44b0430d00672f637 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 17 May 2017 11:24:47 -0400 Subject: netfilter: xtables: fix build failure from COMPAT_XT_ALIGN outside CONFIG_COMPAT The patch in the Fixes references COMPAT_XT_ALIGN in the definition of XT_DATA_TO_USER, outside an #ifdef CONFIG_COMPAT block. Split XT_DATA_TO_USER into separate compat and non compat variants and define the first inside an CONFIG_COMPAT block. This simplifies both variants by removing branches inside the macro. Fixes: 324318f0248c ("netfilter: xtables: zero padding in data_to_user") Reported-by: Stephen Rothwell Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d17769599c10..1770c1d9b37f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -296,18 +296,17 @@ int xt_data_to_user(void __user *dst, const void *src, } EXPORT_SYMBOL_GPL(xt_data_to_user); -#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ +#define XT_DATA_TO_USER(U, K, TYPE) \ xt_data_to_user(U->data, K->data, \ K->u.kernel.TYPE->usersize, \ - C_SIZE ? : K->u.kernel.TYPE->TYPE##size, \ - C_SIZE ? COMPAT_XT_ALIGN(C_SIZE) : \ - XT_ALIGN(K->u.kernel.TYPE->TYPE##size)) + K->u.kernel.TYPE->TYPE##size, \ + XT_ALIGN(K->u.kernel.TYPE->TYPE##size)) int xt_match_to_user(const struct xt_entry_match *m, struct xt_entry_match __user *u) { return XT_OBJ_TO_USER(u, m, match, 0) || - XT_DATA_TO_USER(u, m, match, 0); + XT_DATA_TO_USER(u, m, match); } EXPORT_SYMBOL_GPL(xt_match_to_user); @@ -315,7 +314,7 @@ int xt_target_to_user(const struct xt_entry_target *t, struct xt_entry_target __user *u) { return XT_OBJ_TO_USER(u, t, target, 0) || - XT_DATA_TO_USER(u, t, target, 0); + XT_DATA_TO_USER(u, t, target); } EXPORT_SYMBOL_GPL(xt_target_to_user); @@ -614,6 +613,12 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); +#define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ + xt_data_to_user(U->data, K->data, \ + K->u.kernel.TYPE->usersize, \ + C_SIZE, \ + COMPAT_XT_ALIGN(C_SIZE)) + int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size) { @@ -629,7 +634,7 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, if (match->compat_to_user((void __user *)cm->data, m->data)) return -EFAULT; } else { - if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm))) + if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm))) return -EFAULT; } @@ -975,7 +980,7 @@ int xt_compat_target_to_user(const struct xt_entry_target *t, if (target->compat_to_user((void __user *)ct->data, t->data)) return -EFAULT; } else { - if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct))) + if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct))) return -EFAULT; } -- cgit v1.2.3-59-g8ed1b From d3e7dec054174fdddae33eaa0032a82c3f42181d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 18 May 2017 10:38:53 +0300 Subject: KVM: Silence underflow warning in avic_get_physical_id_entry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Smatch complains that we check cap the upper bound of "index" but don't check for negatives. It's a false positive because "index" is never negative. But it's also simple enough to make it unsigned which makes the code easier to audit. Signed-off-by: Dan Carpenter Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c27ac6923a18..183ddb235fb4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1272,7 +1272,8 @@ static void init_vmcb(struct vcpu_svm *svm) } -static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index) +static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, + unsigned int index) { u64 *avic_physical_id_table; struct kvm_arch *vm_data = &vcpu->kvm->arch; -- cgit v1.2.3-59-g8ed1b From 7bc5d5aff356f3ba16c4d1e9eaf95cc99b7574ab Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 17 May 2017 18:31:59 +0300 Subject: usb: xhci: trace URB before giving it back instead of after Don't access any members of a URB after giving it back. URB might be freed by then already. Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 74bf5c60a260..507ba7734b94 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -641,8 +641,8 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, xhci_urb_free_priv(urb_priv); usb_hcd_unlink_urb_from_ep(hcd, urb); spin_unlock(&xhci->lock); - usb_hcd_giveback_urb(hcd, urb, status); trace_xhci_urb_giveback(urb); + usb_hcd_giveback_urb(hcd, urb, status); spin_lock(&xhci->lock); } -- cgit v1.2.3-59-g8ed1b From a0c16630d35a874e82bdf2088f58ecaca1024315 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 17 May 2017 18:32:00 +0300 Subject: xhci: apply PME_STUCK_QUIRK and MISSING_CAS quirk for Denverton Intel Denverton microserver is Atom based and need the PME and CAS quirks as well. Cc: stable Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 7b86508ac8cf..fcf1f3f63e7a 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -52,6 +52,7 @@ #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI 0x0aa8 #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 +#define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 static const char hcd_name[] = "xhci_hcd"; @@ -166,7 +167,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && @@ -175,7 +177,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) xhci->quirks |= XHCI_MISSING_CAS; if (pdev->vendor == PCI_VENDOR_ID_ETRON && -- cgit v1.2.3-59-g8ed1b From 7480d912d549f414e0ce39331870899e89a5598c Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 17 May 2017 18:32:01 +0300 Subject: usb: host: xhci-mem: allocate zeroed Scratchpad Buffer According to xHCI ch4.20 Scratchpad Buffers, the Scratchpad Buffer needs to be zeroed. ... The following operations take place to allocate Scratchpad Buffers to the xHC: ... b. Software clears the Scratchpad Buffer to '0' Cc: stable Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 69428e970925..12b573cfb846 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1724,7 +1724,7 @@ static int scratchpad_alloc(struct xhci_hcd *xhci, gfp_t flags) xhci->dcbaa->dev_context_ptrs[0] = cpu_to_le64(xhci->scratchpad->sp_dma); for (i = 0; i < num_sp; i++) { dma_addr_t dma; - void *buf = dma_alloc_coherent(dev, xhci->page_size, &dma, + void *buf = dma_zalloc_coherent(dev, xhci->page_size, &dma, flags); if (!buf) goto fail_sp4; -- cgit v1.2.3-59-g8ed1b From 6a29beef9d1b16c762e469d77e28c3de3f5c3dbb Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 17 May 2017 18:32:02 +0300 Subject: usb: host: xhci-ring: don't need to clear interrupt pending for MSI enabled hcd According to xHCI spec Figure 30: Interrupt Throttle Flow Diagram If PCI Message Signaled Interrupts (MSI or MSI-X) are enabled, then the assertion of the Interrupt Pending (IP) flag in Figure 30 generates a PCI Dword write. The IP flag is automatically cleared by the completion of the PCI write. the MSI enabled HCs don't need to clear interrupt pending bit, but hcd->irq = 0 doesn't equal to MSI enabled HCD. At some Dual-role controller software designs, it sets hcd->irq as 0 to avoid HCD requesting interrupt, and they want to decide when to call usb_hcd_irq by software. Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 5 +---- drivers/usb/host/xhci.c | 5 +++-- include/linux/usb/hcd.h | 1 + 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 507ba7734b94..0830b25f9499 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2707,12 +2707,9 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) */ status |= STS_EINT; writel(status, &xhci->op_regs->status); - /* FIXME when MSI-X is supported and there are multiple vectors */ - /* Clear the MSI-X event interrupt status */ - if (hcd->irq) { + if (!hcd->msi_enabled) { u32 irq_pending; - /* Acknowledge the PCI interrupt */ irq_pending = readl(&xhci->ir_set->irq_pending); irq_pending |= IMAN_IP; writel(irq_pending, &xhci->ir_set->irq_pending); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2d1310220832..71eb2991c698 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -359,9 +359,10 @@ static int xhci_try_enable_msi(struct usb_hcd *hcd) /* fall back to msi*/ ret = xhci_setup_msi(xhci); - if (!ret) - /* hcd->irq is 0, we have MSI */ + if (!ret) { + hcd->msi_enabled = 1; return 0; + } if (!pdev->irq) { xhci_err(xhci, "No msi-x/msi found and no IRQ in BIOS\n"); diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index a469999a106d..50398b69ca44 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -148,6 +148,7 @@ struct usb_hcd { unsigned rh_registered:1;/* is root hub registered? */ unsigned rh_pollable:1; /* may we poll the root hub? */ unsigned msix_enabled:1; /* driver has MSI-X enabled? */ + unsigned msi_enabled:1; /* driver has MSI enabled? */ unsigned remove_phy:1; /* auto-remove USB phy */ /* The next flag is a stopgap, to be removed when all the HCDs -- cgit v1.2.3-59-g8ed1b From 63aea0dbab90a2461faaae357cbc8cfd6c8de9fe Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 17 May 2017 18:32:03 +0300 Subject: USB: xhci: fix lock-inversion problem With threaded interrupts, bottom-half handlers are called with interrupts enabled. Therefore they can't safely use spin_lock(); they have to use spin_lock_irqsave(). Lockdep warns about a violation occurring in xhci_irq(): ========================================================= [ INFO: possible irq lock inversion dependency detected ] 4.11.0-rc8-dbg+ #1 Not tainted --------------------------------------------------------- swapper/7/0 just changed the state of lock: (&(&ehci->lock)->rlock){-.-...}, at: [] ehci_hrtimer_func+0x29/0xc0 [ehci_hcd] but this lock took another, HARDIRQ-unsafe lock in the past: (hcd_urb_list_lock){+.....} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(hcd_urb_list_lock); local_irq_disable(); lock(&(&ehci->lock)->rlock); lock(hcd_urb_list_lock); lock(&(&ehci->lock)->rlock); *** DEADLOCK *** no locks held by swapper/7/0. the shortest dependencies between 2nd lock and 1st lock: -> (hcd_urb_list_lock){+.....} ops: 252 { HARDIRQ-ON-W at: __lock_acquire+0x602/0x1280 lock_acquire+0xd5/0x1c0 _raw_spin_lock+0x2f/0x40 usb_hcd_unlink_urb_from_ep+0x1b/0x60 [usbcore] xhci_giveback_urb_in_irq.isra.45+0x70/0x1b0 [xhci_hcd] finish_td.constprop.60+0x1d8/0x2e0 [xhci_hcd] xhci_irq+0xdd6/0x1fa0 [xhci_hcd] usb_hcd_irq+0x26/0x40 [usbcore] irq_forced_thread_fn+0x2f/0x70 irq_thread+0x149/0x1d0 kthread+0x113/0x150 ret_from_fork+0x2e/0x40 This patch fixes the problem. Signed-off-by: Alan Stern Reported-and-tested-by: Bart Van Assche CC: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 0830b25f9499..6d2492c1c643 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2677,11 +2677,12 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) struct xhci_hcd *xhci = hcd_to_xhci(hcd); union xhci_trb *event_ring_deq; irqreturn_t ret = IRQ_NONE; + unsigned long flags; dma_addr_t deq; u64 temp_64; u32 status; - spin_lock(&xhci->lock); + spin_lock_irqsave(&xhci->lock, flags); /* Check if the xHC generated the interrupt, or the irq is shared */ status = readl(&xhci->op_regs->status); if (status == ~(u32)0) { @@ -2754,7 +2755,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) ret = IRQ_HANDLED; out: - spin_unlock(&xhci->lock); + spin_unlock_irqrestore(&xhci->lock, flags); return ret; } -- cgit v1.2.3-59-g8ed1b From 5db851cf20857c5504b146046e97cb7781f2a743 Mon Sep 17 00:00:00 2001 From: Matthias Lange Date: Wed, 17 May 2017 18:32:04 +0300 Subject: xhci: remove GFP_DMA flag from allocation There is no reason to restrict allocations to the first 16MB ISA DMA addresses. It is causing problems in a virtualization setup with enabled IOMMU (x86_64). The result is that USB is not working in the VM. CC: Signed-off-by: Matthias Lange Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 12b573cfb846..1f1687e888d6 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -56,7 +56,7 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci, } if (max_packet) { - seg->bounce_buf = kzalloc(max_packet, flags | GFP_DMA); + seg->bounce_buf = kzalloc(max_packet, flags); if (!seg->bounce_buf) { dma_pool_free(xhci->segment_pool, seg->trbs, dma); kfree(seg); -- cgit v1.2.3-59-g8ed1b From 604d02a2a66ab7f93fd3b2bde3698c29ef057b65 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 17 May 2017 18:32:05 +0300 Subject: xhci: Fix command ring stop regression in 4.11 In 4.11 TRB completion codes were renamed to match spec. Completion codes for command ring stopped and endpoint stopped were mixed, leading to failures while handling a stopped command ring. Use the correct completion code for command ring stopped events. Fixes: 0b7c105a04ca ("usb: host: xhci: rename completion codes to match spec") Cc: # 4.11 Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 2 +- drivers/usb/host/xhci-ring.c | 8 ++++---- drivers/usb/host/xhci.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 5e3e9d4c6956..0dde49c35dd2 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -419,7 +419,7 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) wait_for_completion(cmd->completion); if (cmd->status == COMP_COMMAND_ABORTED || - cmd->status == COMP_STOPPED) { + cmd->status == COMP_COMMAND_RING_STOPPED) { xhci_warn(xhci, "Timeout while waiting for stop endpoint command\n"); ret = -ETIME; } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 6d2492c1c643..03f63f50afb6 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -323,7 +323,7 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, if (i_cmd->status != COMP_COMMAND_ABORTED) continue; - i_cmd->status = COMP_STOPPED; + i_cmd->status = COMP_COMMAND_RING_STOPPED; xhci_dbg(xhci, "Turn aborted command %p to no-op\n", i_cmd->command_trb); @@ -1380,7 +1380,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, cmd_comp_code = GET_COMP_CODE(le32_to_cpu(event->status)); /* If CMD ring stopped we own the trbs between enqueue and dequeue */ - if (cmd_comp_code == COMP_STOPPED) { + if (cmd_comp_code == COMP_COMMAND_RING_STOPPED) { complete_all(&xhci->cmd_ring_stop_completion); return; } @@ -1436,8 +1436,8 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, break; case TRB_CMD_NOOP: /* Is this an aborted command turned to NO-OP? */ - if (cmd->status == COMP_STOPPED) - cmd_comp_code = COMP_STOPPED; + if (cmd->status == COMP_COMMAND_RING_STOPPED) + cmd_comp_code = COMP_COMMAND_RING_STOPPED; break; case TRB_RESET_EP: WARN_ON(slot_id != TRB_TO_SLOT_ID( diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 71eb2991c698..30f47d92a610 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1764,7 +1764,7 @@ static int xhci_configure_endpoint_result(struct xhci_hcd *xhci, switch (*cmd_status) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout while waiting for configure endpoint command\n"); ret = -ETIME; break; @@ -1814,7 +1814,7 @@ static int xhci_evaluate_context_result(struct xhci_hcd *xhci, switch (*cmd_status) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout while waiting for evaluate context command\n"); ret = -ETIME; break; @@ -3433,7 +3433,7 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd, ret = reset_device_cmd->status; switch (ret) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout waiting for reset device command\n"); ret = -ETIME; goto command_cleanup; @@ -3818,7 +3818,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, */ switch (command->status) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout while waiting for setup device command\n"); ret = -ETIME; break; -- cgit v1.2.3-59-g8ed1b From 4b148d5144d64ee135b8924350cb0b3a7fd21150 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 17 May 2017 18:32:06 +0300 Subject: usb: host: xhci-plat: propagate return value of platform_get_irq() platform_get_irq() returns an error code, but the xhci-plat driver ignores it and always returns -ENODEV. This is not correct, and prevents -EPROBE_DEFER from being propagated properly. CC: Signed-off-by: Thomas Petazzoni Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 7c2a9e7c8e0f..c04144b25a67 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -177,7 +177,7 @@ static int xhci_plat_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENODEV; + return irq; /* * sysdev must point to a device that is known to the system firmware -- cgit v1.2.3-59-g8ed1b From 3c50ffef25855a9d9e4b07b02d756a8cdd653069 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 17 May 2017 11:23:10 -0500 Subject: usb: musb: Fix trying to suspend while active for OTG configurations Commit d8e5f0eca1e8 ("usb: musb: Fix hardirq-safe hardirq-unsafe lock order error") caused a regression where musb keeps trying to enable host mode with no cable connected. This seems to be caused by the fact that now phy is enabled earlier, and we are wrongly trying to force USB host mode on an OTG port. The errors we are getting are "trying to suspend as a_idle while active". For ports configured as OTG, we should not need to do anything to try to force USB host mode on it's OTG port. Trying to force host mode in this case just seems to completely confuse the musb state machine. Let's fix the issue by making musb_host_setup() attempt to force the mode only if port_mode is configured for host mode. Fixes: d8e5f0eca1e8 ("usb: musb: Fix hardirq-safe hardirq-unsafe lock order error") Cc: Johan Hovold Cc: stable Reported-by: Laurent Pinchart Reported-by: Peter Ujfalusi Tested-by: Peter Ujfalusi Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index ac3a4952abb4..dbe617a735d8 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2780,10 +2780,11 @@ int musb_host_setup(struct musb *musb, int power_budget) int ret; struct usb_hcd *hcd = musb->hcd; - MUSB_HST_MODE(musb); - musb->xceiv->otg->default_a = 1; - musb->xceiv->otg->state = OTG_STATE_A_IDLE; - + if (musb->port_mode == MUSB_PORT_MODE_HOST) { + MUSB_HST_MODE(musb); + musb->xceiv->otg->default_a = 1; + musb->xceiv->otg->state = OTG_STATE_A_IDLE; + } otg_set_host(musb->xceiv->otg, &hcd->self); hcd->self.otg_port = 1; musb->xceiv->otg->host = &hcd->self; -- cgit v1.2.3-59-g8ed1b From 6df2b42f7c040d57d9ecb67244e04e905ab87ac6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 17 May 2017 11:23:11 -0500 Subject: usb: musb: tusb6010_omap: Do not reset the other direction's packet size We have one register for each EP to set the maximum packet size for both TX and RX. If for example an RX programming would happen before the previous TX transfer finishes we would reset the TX packet side. To fix this issue, only modify the TX or RX part of the register. Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support") Signed-off-by: Peter Ujfalusi Tested-by: Tony Lindgren Signed-off-by: Bin Liu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/tusb6010_omap.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c index 8b43c4b99f04..7870b37e0ea5 100644 --- a/drivers/usb/musb/tusb6010_omap.c +++ b/drivers/usb/musb/tusb6010_omap.c @@ -219,6 +219,7 @@ static int tusb_omap_dma_program(struct dma_channel *channel, u16 packet_sz, u32 dma_remaining; int src_burst, dst_burst; u16 csr; + u32 psize; int ch; s8 dmareq; s8 sync_dev; @@ -390,15 +391,19 @@ static int tusb_omap_dma_program(struct dma_channel *channel, u16 packet_sz, if (chdat->tx) { /* Send transfer_packet_sz packets at a time */ - musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, - chdat->transfer_packet_sz); + psize = musb_readl(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET); + psize &= ~0x7ff; + psize |= chdat->transfer_packet_sz; + musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, psize); musb_writel(ep_conf, TUSB_EP_TX_OFFSET, TUSB_EP_CONFIG_XFR_SIZE(chdat->transfer_len)); } else { /* Receive transfer_packet_sz packets at a time */ - musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, - chdat->transfer_packet_sz << 16); + psize = musb_readl(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET); + psize &= ~(0x7ff << 16); + psize |= (chdat->transfer_packet_sz << 16); + musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, psize); musb_writel(ep_conf, TUSB_EP_RX_OFFSET, TUSB_EP_CONFIG_XFR_SIZE(chdat->transfer_len)); -- cgit v1.2.3-59-g8ed1b From 66ea5974b36b73129bbdc129847ec73cecb3f14d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 15 May 2017 10:16:30 -0700 Subject: MAINTAINERS: greybus-dev list is members-only The greybus-dev mailing list is a members-only list and is moderated for non-subscribers. Signed-off-by: Randy Dunlap Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index da0149f2d16c..f37700c663f1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5702,7 +5702,7 @@ M: Alex Elder M: Greg Kroah-Hartman S: Maintained F: drivers/staging/greybus/ -L: greybus-dev@lists.linaro.org +L: greybus-dev@lists.linaro.org (moderated for non-subscribers) GREYBUS AUDIO PROTOCOLS DRIVERS M: Vaibhav Agarwal -- cgit v1.2.3-59-g8ed1b From 64df6d525fcff1630098db9238bfd2b3e092d5c1 Mon Sep 17 00:00:00 2001 From: linzhang Date: Wed, 17 May 2017 12:05:07 +0800 Subject: net: x25: fix one potential use-after-free issue The function x25_init is not properly unregister related resources on error handler.It is will result in kernel oops if x25_init init failed, so add properly unregister call on error handler. Also, i adjust the coding style and make x25_register_sysctl properly return failure. Signed-off-by: linzhang Signed-off-by: David S. Miller --- include/net/x25.h | 4 ++-- net/x25/af_x25.c | 24 ++++++++++++++++-------- net/x25/sysctl_net_x25.c | 5 ++++- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/include/net/x25.h b/include/net/x25.h index c383aa4edbf0..6d30a01d281d 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -298,10 +298,10 @@ void x25_check_rbuf(struct sock *); /* sysctl_net_x25.c */ #ifdef CONFIG_SYSCTL -void x25_register_sysctl(void); +int x25_register_sysctl(void); void x25_unregister_sysctl(void); #else -static inline void x25_register_sysctl(void) {}; +static inline int x25_register_sysctl(void) { return 0; }; static inline void x25_unregister_sysctl(void) {}; #endif /* CONFIG_SYSCTL */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 8b911c29860e..5a1a98df3499 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1791,32 +1791,40 @@ void x25_kill_by_neigh(struct x25_neigh *nb) static int __init x25_init(void) { - int rc = proto_register(&x25_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&x25_proto, 0); + if (rc) goto out; rc = sock_register(&x25_family_ops); - if (rc != 0) + if (rc) goto out_proto; dev_add_pack(&x25_packet_type); rc = register_netdevice_notifier(&x25_dev_notifier); - if (rc != 0) + if (rc) goto out_sock; - pr_info("Linux Version 0.2\n"); + rc = x25_register_sysctl(); + if (rc) + goto out_dev; - x25_register_sysctl(); rc = x25_proc_init(); - if (rc != 0) - goto out_dev; + if (rc) + goto out_sysctl; + + pr_info("Linux Version 0.2\n"); + out: return rc; +out_sysctl: + x25_unregister_sysctl(); out_dev: unregister_netdevice_notifier(&x25_dev_notifier); out_sock: + dev_remove_pack(&x25_packet_type); sock_unregister(AF_X25); out_proto: proto_unregister(&x25_proto); diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index a06dfe143c67..ba078c85f0a1 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -73,9 +73,12 @@ static struct ctl_table x25_table[] = { { }, }; -void __init x25_register_sysctl(void) +int __init x25_register_sysctl(void) { x25_table_header = register_net_sysctl(&init_net, "net/x25", x25_table); + if (!x25_table_header) + return -ENOMEM; + return 0; } void x25_unregister_sysctl(void) -- cgit v1.2.3-59-g8ed1b From 47ab37a19a8112670e63474016d5fbf86bc8737f Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Wed, 17 May 2017 15:28:19 +0800 Subject: net: ethernet: faraday: To support device tree usage. To support device tree usage for ftmac100. Signed-off-by: Greentime Hu Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftmac100.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 6ac336b546e6..1536356e2ea8 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -1174,11 +1174,17 @@ static int ftmac100_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id ftmac100_of_ids[] = { + { .compatible = "andestech,atmac100" }, + { } +}; + static struct platform_driver ftmac100_driver = { .probe = ftmac100_probe, .remove = ftmac100_remove, .driver = { .name = DRV_NAME, + .of_match_table = ftmac100_of_ids }, }; @@ -1202,3 +1208,4 @@ module_exit(ftmac100_exit); MODULE_AUTHOR("Po-Yu Chuang "); MODULE_DESCRIPTION("FTMAC100 driver"); MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(of, ftmac100_of_ids); -- cgit v1.2.3-59-g8ed1b From a285860211bf257b0e6d522dac6006794be348af Mon Sep 17 00:00:00 2001 From: Tobias Jungel Date: Wed, 17 May 2017 09:29:12 +0200 Subject: bridge: netlink: check vlan_default_pvid range Currently it is allowed to set the default pvid of a bridge to a value above VLAN_VID_MASK (0xfff). This patch adds a check to br_validate and returns -EINVAL in case the pvid is out of bounds. Reproduce by calling: [root@test ~]# ip l a type bridge [root@test ~]# ip l a type dummy [root@test ~]# ip l s bridge0 type bridge vlan_filtering 1 [root@test ~]# ip l s bridge0 type bridge vlan_default_pvid 9999 [root@test ~]# ip l s dummy0 master bridge0 [root@test ~]# bridge vlan port vlan ids bridge0 9999 PVID Egress Untagged dummy0 9999 PVID Egress Untagged Fixes: 0f963b7592ef ("bridge: netlink: add support for default_pvid") Acked-by: Nikolay Aleksandrov Signed-off-by: Tobias Jungel Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index c5ce7745b230..574f78824d8a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -835,6 +835,13 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return -EPROTONOSUPPORT; } } + + if (data[IFLA_BR_VLAN_DEFAULT_PVID]) { + __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]); + + if (defpvid >= VLAN_VID_MASK) + return -EINVAL; + } #endif return 0; -- cgit v1.2.3-59-g8ed1b From a3f96c47c8d4c38be71fdbb440a99968c764ba62 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 17 May 2017 14:52:16 +0200 Subject: udp: make *udp*_queue_rcv_skb() functions static Since the udp memory accounting refactor, we don't need any more to export the *udp*_queue_rcv_skb(). Make them static and fix a couple of sparse warnings: net/ipv4/udp.c:1615:5: warning: symbol 'udp_queue_rcv_skb' was not declared. Should it be static? net/ipv6/udp.c:572:5: warning: symbol 'udpv6_queue_rcv_skb' was not declared. Should it be static? Fixes: 850cbaddb52d ("udp: use it's own memory accounting schema") Fixes: c915fe13cbaa ("udplite: fix NULL pointer dereference") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- net/ipv4/udp_impl.h | 1 - net/ipv6/udp.c | 4 ++-- net/ipv6/udp_impl.h | 1 - 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ea6e4cff9faf..1d6219bf2d6b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1612,7 +1612,7 @@ static void udp_v4_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -1657,7 +1657,7 @@ EXPORT_SYMBOL(udp_encap_enable); * Note that in the success and error cases, the skb is assumed to * have either been requeued or freed. */ -int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index feb50a16398d..a8cf8c6fb60c 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -25,7 +25,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 04862abfe4ec..06ec39b79609 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -526,7 +526,7 @@ out: return; } -int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -569,7 +569,7 @@ void udpv6_encap_enable(void) } EXPORT_SYMBOL(udpv6_encap_enable); -int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index e78bdc76dcc3..f180b3d85e31 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -26,7 +26,6 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); -int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS -- cgit v1.2.3-59-g8ed1b From fdcee2cbb8438702ea1b328fb6e0ac5e9a40c7f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 May 2017 07:16:40 -0700 Subject: sctp: do not inherit ipv6_{mc|ac|fl}_list from parent SCTP needs fixes similar to 83eaddab4378 ("ipv6/dccp: do not inherit ipv6_mc_list from parent"), otherwise bad things can happen. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 142b70e959af..f5b45b8b8b16 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -677,6 +677,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; rcu_read_lock(); opt = rcu_dereference(np->opt); -- cgit v1.2.3-59-g8ed1b From 486181bcb3248e2f1977f4e69387a898234a4e1e Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Wed, 17 May 2017 16:31:41 +0200 Subject: qmi_wwan: add another Lenovo EM74xx device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In their infinite wisdom, and never ending quest for end user frustration, Lenovo has decided to use a new USB device ID for the wwan modules in their 2017 laptops. The actual hardware is still the Sierra Wireless EM7455 or EM7430, depending on region. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d7165767ca9d..8f923a147fa9 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1196,6 +1196,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx */ {QMI_FIXED_INTF(0x1199, 0x9079, 8)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ -- cgit v1.2.3-59-g8ed1b From 1ac91bff9c65d4a5e0da244495ea6275fd514c5a Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 18 May 2017 00:08:16 +0530 Subject: cxgb4: update latest firmware version supported Change t4fw_version.h to update latest firmware version number to 1.16.43.0. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index fa376444e57c..3549d3876278 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -37,7 +37,7 @@ #define T4FW_VERSION_MAJOR 0x01 #define T4FW_VERSION_MINOR 0x10 -#define T4FW_VERSION_MICRO 0x21 +#define T4FW_VERSION_MICRO 0x2B #define T4FW_VERSION_BUILD 0x00 #define T4FW_MIN_VERSION_MAJOR 0x01 @@ -46,7 +46,7 @@ #define T5FW_VERSION_MAJOR 0x01 #define T5FW_VERSION_MINOR 0x10 -#define T5FW_VERSION_MICRO 0x21 +#define T5FW_VERSION_MICRO 0x2B #define T5FW_VERSION_BUILD 0x00 #define T5FW_MIN_VERSION_MAJOR 0x00 @@ -55,7 +55,7 @@ #define T6FW_VERSION_MAJOR 0x01 #define T6FW_VERSION_MINOR 0x10 -#define T6FW_VERSION_MICRO 0x21 +#define T6FW_VERSION_MICRO 0x2B #define T6FW_VERSION_BUILD 0x00 #define T6FW_MIN_VERSION_MAJOR 0x00 -- cgit v1.2.3-59-g8ed1b From 52499a6ad2aef20f7baf5872e97988c385170f1b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Apr 2017 20:24:41 +0200 Subject: gpu: host1x: select IOMMU_IOVA When IOMMU_IOVA is not built-in but host1x is, we get a link error: drivers/gpu/host1x/dev.o: In function `host1x_remove': dev.c:(.text.host1x_remove+0x50): undefined reference to `put_iova_domain' drivers/gpu/host1x/dev.o: In function `host1x_probe': dev.c:(.text.host1x_probe+0x31c): undefined reference to `init_iova_domain' dev.c:(.text.host1x_probe+0x38c): undefined reference to `put_iova_domain' drivers/gpu/host1x/cdma.o: In function `host1x_cdma_init': cdma.c:(.text.host1x_cdma_init+0x238): undefined reference to `alloc_iova' cdma.c:(.text.host1x_cdma_init+0x2c0): undefined reference to `__free_iova' drivers/gpu/host1x/cdma.o: In function `host1x_cdma_deinit': cdma.c:(.text.host1x_cdma_deinit+0xb0): undefined reference to `free_iova' This adds the same select statement that we have for drm_tegra. Fixes: 404bfb78daf3 ("gpu: host1x: Add IOMMU support") Signed-off-by: Arnd Bergmann Reviewed-by: Mikko Perttunen Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/20170419182449.885312-1-arnd@arndb.de --- drivers/gpu/host1x/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig index b2fd029d67b3..91916326957f 100644 --- a/drivers/gpu/host1x/Kconfig +++ b/drivers/gpu/host1x/Kconfig @@ -1,6 +1,7 @@ config TEGRA_HOST1X tristate "NVIDIA Tegra host1x driver" depends on ARCH_TEGRA || (ARM && COMPILE_TEST) + select IOMMU_IOVA if IOMMU_SUPPORT help Driver for the NVIDIA Tegra host1x hardware. -- cgit v1.2.3-59-g8ed1b From b299cde245b0b76c977f4291162cf668e087b408 Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Fri, 12 May 2017 14:42:58 -0700 Subject: drivers: char: mem: Check for address space wraparound with mmap() /dev/mem currently allows mmap() mappings that wrap around the end of the physical address space, which should probably be illegal. It circumvents the existing STRICT_DEVMEM permission check because the loop immediately terminates (as the start address is already higher than the end address). On the x86_64 architecture it will then cause a panic (from the BUG(start >= end) in arch/x86/mm/pat.c:reserve_memtype()). This patch adds an explicit check to make sure offset + size will not wrap around in the physical address type. Signed-off-by: Julius Werner Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/char/mem.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 7e4a9d1296bb..6e0cbe092220 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -340,6 +340,11 @@ static const struct vm_operations_struct mmap_mem_ops = { static int mmap_mem(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; + phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + + /* It's illegal to wrap around the end of the physical address space. */ + if (offset + (phys_addr_t)size < offset) + return -EINVAL; if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 6bee9b78a7a5ea257b24d93974538938c82b1169 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 18 May 2017 14:35:21 +0200 Subject: drm/atmel-hlcdc: Fix output initialization drm_of_find_panel_or_bridge() is expecting np to point to the encoder node, not the bridge or panel this encoder is feeding. Moreover, the endpoint parameter passed to drm_of_find_panel_or_bridge() is always set to zero, which prevents us from probing all outputs. We also move the atmel_hlcdc_rgb_output allocation after the panel/bridge detection to avoid useless allocations. Reported-by: Alexandre Belloni Fixes: ebc944613567 ("drm: convert drivers to use drm_of_find_panel_or_bridge") Signed-off-by: Boris Brezillon Tested-by: Alexandre Belloni Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1495110921-4032-1-git-send-email-boris.brezillon@free-electrons.com --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c | 36 +++++++++--------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c index 65a3bd7a0c00..423dda2785d4 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c @@ -152,8 +152,7 @@ static const struct drm_connector_funcs atmel_hlcdc_panel_connector_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; -static int atmel_hlcdc_attach_endpoint(struct drm_device *dev, - const struct device_node *np) +static int atmel_hlcdc_attach_endpoint(struct drm_device *dev, int endpoint) { struct atmel_hlcdc_dc *dc = dev->dev_private; struct atmel_hlcdc_rgb_output *output; @@ -161,6 +160,11 @@ static int atmel_hlcdc_attach_endpoint(struct drm_device *dev, struct drm_bridge *bridge; int ret; + ret = drm_of_find_panel_or_bridge(dev->dev->of_node, 0, endpoint, + &panel, &bridge); + if (ret) + return ret; + output = devm_kzalloc(dev->dev, sizeof(*output), GFP_KERNEL); if (!output) return -EINVAL; @@ -177,10 +181,6 @@ static int atmel_hlcdc_attach_endpoint(struct drm_device *dev, output->encoder.possible_crtcs = 0x1; - ret = drm_of_find_panel_or_bridge(np, 0, 0, &panel, &bridge); - if (ret) - return ret; - if (panel) { output->connector.dpms = DRM_MODE_DPMS_OFF; output->connector.polled = DRM_CONNECTOR_POLL_CONNECT; @@ -220,22 +220,14 @@ err_encoder_cleanup: int atmel_hlcdc_create_outputs(struct drm_device *dev) { - struct device_node *remote; - int ret = -ENODEV; - int endpoint = 0; - - while (true) { - /* Loop thru possible multiple connections to the output */ - remote = of_graph_get_remote_node(dev->dev->of_node, 0, - endpoint++); - if (!remote) - break; - - ret = atmel_hlcdc_attach_endpoint(dev, remote); - of_node_put(remote); - if (ret) - return ret; - } + int endpoint, ret = 0; + + for (endpoint = 0; !ret; endpoint++) + ret = atmel_hlcdc_attach_endpoint(dev, endpoint); + + /* At least one device was successfully attached.*/ + if (ret == -ENODEV && endpoint) + return 0; return ret; } -- cgit v1.2.3-59-g8ed1b From 9434cec130a941e8a0698d598dfa5499dbdeb949 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 5 May 2017 21:08:44 +0200 Subject: firmware: Google VPD: Fix memory allocation error handling This patch fixes several issues: - if the 1st 'kzalloc' fails, we dereference a NULL pointer - if the 2nd 'kzalloc' fails, there is a memory leak - if 'sysfs_create_bin_file' fails there is also a memory leak Fix it by adding a test after the first memory allocation and some error handling paths to correctly free memory if needed. Signed-off-by: Christophe JAILLET Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/vpd.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c index 3ce813110d5e..1e7860f02f4f 100644 --- a/drivers/firmware/google/vpd.c +++ b/drivers/firmware/google/vpd.c @@ -116,9 +116,13 @@ static int vpd_section_attrib_add(const u8 *key, s32 key_len, return VPD_OK; info = kzalloc(sizeof(*info), GFP_KERNEL); - info->key = kzalloc(key_len + 1, GFP_KERNEL); - if (!info->key) + if (!info) return -ENOMEM; + info->key = kzalloc(key_len + 1, GFP_KERNEL); + if (!info->key) { + ret = -ENOMEM; + goto free_info; + } memcpy(info->key, key, key_len); @@ -135,12 +139,17 @@ static int vpd_section_attrib_add(const u8 *key, s32 key_len, list_add_tail(&info->list, &sec->attribs); ret = sysfs_create_bin_file(sec->kobj, &info->bin_attr); - if (ret) { - kfree(info->key); - return ret; - } + if (ret) + goto free_info_key; return 0; + +free_info_key: + kfree(info->key); +free_info: + kfree(info); + + return ret; } static void vpd_section_attrib_destroy(struct vpd_section *sec) -- cgit v1.2.3-59-g8ed1b From 6dd4aba36f2dca00c3b6976a0d59c5cee16ad545 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 18 May 2017 09:18:52 +0200 Subject: mlxsw: spectrum_dpipe: Fix incorrect entry index In case of disabled counters the entry index will be incorrect. Fix this by moving the entry index set before the counter status check. Fixes: 2ba5999f009d ("mlxsw: spectrum: Add Support for erif table entries access") Signed-off-by: Arkadi Sharshevsky Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index ea56f6ade6b4..5f0a7bc692a4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -199,10 +199,11 @@ static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp, entry->counter_valid = false; entry->counter = 0; + entry->index = mlxsw_sp_rif_index(rif); + if (!counters_enabled) return 0; - entry->index = mlxsw_sp_rif_index(rif); err = mlxsw_sp_rif_counter_value_get(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS, &cnt); -- cgit v1.2.3-59-g8ed1b From 6b1206bbbce6092b2ec412125300889e6e551bc2 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 18 May 2017 09:18:53 +0200 Subject: mlxsw: spectrum_router: Fix rif counter freeing routine During rif counter freeing the counter index can be invalid. Add check of validity before freeing the counter. Fixes: e0c0afd8aa4e ("mlxsw: spectrum: Support for counters on router interfaces") Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 33cec1cc1642..9f89c4137d21 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -206,6 +206,9 @@ void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, { unsigned int *p_counter_index; + if (!mlxsw_sp_rif_counter_valid_get(rif, dir)) + return; + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); if (WARN_ON(!p_counter_index)) return; -- cgit v1.2.3-59-g8ed1b From 5f5c5449acad0cd3322e53e1ac68c044483b0aa5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 18 May 2017 15:01:34 +0200 Subject: sh_eth: Use platform device for printing before register_netdev() The MDIO initialization failure message is printed using the network device, before it has been registered, leading to: (null): failed to initialise MDIO Use the platform device instead to fix this: sh-eth ee700000.ethernet: failed to initialise MDIO Fixes: daacf03f0bbfefee ("sh_eth: Register MDIO bus before registering the network device") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index f68c4db656ed..c85222b02754 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3220,7 +3220,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* MDIO bus init */ ret = sh_mdio_init(mdp, pd); if (ret) { - dev_err(&ndev->dev, "failed to initialise MDIO\n"); + dev_err(&pdev->dev, "failed to initialise MDIO\n"); goto out_release; } -- cgit v1.2.3-59-g8ed1b From b7ce520e9f71ff65d0aa0ad86223f94ae4095fae Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 18 May 2017 15:01:35 +0200 Subject: sh_eth: Do not print an error message for probe deferral EPROBE_DEFER is not an error, hence printing an error message like sh-eth ee700000.ethernet: failed to initialise MDIO may confuse the user. To fix this, suppress the error message in case of probe deferral. While at it, shorten the message, and add the actual error code. Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index c85222b02754..2d686ccf971b 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3220,7 +3220,8 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* MDIO bus init */ ret = sh_mdio_init(mdp, pd); if (ret) { - dev_err(&pdev->dev, "failed to initialise MDIO\n"); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "MDIO init failed: %d\n", ret); goto out_release; } -- cgit v1.2.3-59-g8ed1b From c0e01eac7ada785fdeaea1ae5476ec1cf3b00374 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 May 2017 13:03:52 +0200 Subject: mlxsw: spectrum: Avoid possible NULL pointer dereference In case we got an FDB notification for a port that doesn't exist we execute an FDB entry delete to prevent it from re-appearing the next time we poll for notifications. If the operation failed we would trigger a NULL pointer dereference as 'mlxsw_sp_port' is NULL. Fix it by reporting the error using the underlying bus device instead. Fixes: 12f1501e7511 ("mlxsw: spectrum: remove FDB entry in case we get unknown object notification") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 0d8411f1f954..f4bb0c0b7c1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1497,8 +1497,7 @@ do_fdb_op: err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, true); if (err) { - if (net_ratelimit()) - netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to set FDB entry\n"); return; } @@ -1558,8 +1557,7 @@ do_fdb_op: err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid, adding, true); if (err) { - if (net_ratelimit()) - netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to set FDB entry\n"); return; } -- cgit v1.2.3-59-g8ed1b From d8f1deaa5256aba3296025e103e8abb96f3e6479 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Mar 2017 13:09:52 +0100 Subject: watchdog: orion: fix compile-test dependencies I ran into one corner case with the orion watchdog using the atomic_io_modify interface: drivers/watchdog/orion_wdt.o: In function `orion_stop': orion_wdt.c:(.text.orion_stop+0x28): undefined reference to `atomic_io_modify' drivers/watchdog/orion_wdt.o: In function `armada375_stop': orion_wdt.c:(.text.armada375_stop+0x28): undefined reference to `atomic_io_modify' This function is available on all 32-bit ARM builds except for ebsa110, so we have to specifically exclude that from compile-testing. Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible") Signed-off-by: Arnd Bergmann Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 52a70ee6014f..8b9049dac094 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -452,7 +452,7 @@ config DAVINCI_WATCHDOG config ORION_WATCHDOG tristate "Orion watchdog" - depends on ARCH_ORION5X || ARCH_DOVE || MACH_DOVE || ARCH_MVEBU || COMPILE_TEST + depends on ARCH_ORION5X || ARCH_DOVE || MACH_DOVE || ARCH_MVEBU || (COMPILE_TEST && !ARCH_EBSA110) depends on ARM select WATCHDOG_CORE help -- cgit v1.2.3-59-g8ed1b From 015b528644a84b0018d3286ecd6ea5f82dce0180 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 2 Mar 2017 18:31:11 +0100 Subject: watchdog: sama5d4: fix WDDIS handling The datasheet states: "When setting the WDDIS bit, and while it is set, the fields WDV and WDD must not be modified." Because the whole configuration is already cached inside .mr, wait for the user to enable the watchdog to configure it so it is enabled and configured at the same time (what the IP is actually expecting). When the watchdog is already enabled, it is not an issue to reconfigure it. Signed-off-by: Alexandre Belloni Acked-by: Wenyou.Yang Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sama5d4_wdt.c | 48 ++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c index f709962018ac..5cee20caca78 100644 --- a/drivers/watchdog/sama5d4_wdt.c +++ b/drivers/watchdog/sama5d4_wdt.c @@ -44,6 +44,8 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +#define wdt_enabled (!(wdt->mr & AT91_WDT_WDDIS)) + #define wdt_read(wdt, field) \ readl_relaxed((wdt)->reg_base + (field)) @@ -89,7 +91,16 @@ static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd, wdt->mr &= ~AT91_WDT_WDD; wdt->mr |= AT91_WDT_SET_WDV(value); wdt->mr |= AT91_WDT_SET_WDD(value); - wdt_write(wdt, AT91_WDT_MR, wdt->mr); + + /* + * WDDIS has to be 0 when updating WDD/WDV. The datasheet states: When + * setting the WDDIS bit, and while it is set, the fields WDV and WDD + * must not be modified. + * If the watchdog is enabled, then the timeout can be updated. Else, + * wait that the user enables it. + */ + if (wdt_enabled) + wdt_write(wdt, AT91_WDT_MR, wdt->mr & ~AT91_WDT_WDDIS); wdd->timeout = timeout; @@ -145,23 +156,20 @@ static int of_sama5d4_wdt_init(struct device_node *np, struct sama5d4_wdt *wdt) static int sama5d4_wdt_init(struct sama5d4_wdt *wdt) { - struct watchdog_device *wdd = &wdt->wdd; - u32 value = WDT_SEC2TICKS(wdd->timeout); u32 reg; - /* - * Because the fields WDV and WDD must not be modified when the WDDIS - * bit is set, so clear the WDDIS bit before writing the WDT_MR. + * When booting and resuming, the bootloader may have changed the + * watchdog configuration. + * If the watchdog is already running, we can safely update it. + * Else, we have to disable it properly. */ - reg = wdt_read(wdt, AT91_WDT_MR); - reg &= ~AT91_WDT_WDDIS; - wdt_write(wdt, AT91_WDT_MR, reg); - - wdt->mr |= AT91_WDT_SET_WDD(value); - wdt->mr |= AT91_WDT_SET_WDV(value); - - wdt_write(wdt, AT91_WDT_MR, wdt->mr); - + if (wdt_enabled) { + wdt_write(wdt, AT91_WDT_MR, wdt->mr); + } else { + reg = wdt_read(wdt, AT91_WDT_MR); + if (!(reg & AT91_WDT_WDDIS)) + wdt_write(wdt, AT91_WDT_MR, reg | AT91_WDT_WDDIS); + } return 0; } @@ -172,6 +180,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) struct resource *res; void __iomem *regs; u32 irq = 0; + u32 timeout; int ret; wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL); @@ -221,6 +230,11 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) return ret; } + timeout = WDT_SEC2TICKS(wdd->timeout); + + wdt->mr |= AT91_WDT_SET_WDD(timeout); + wdt->mr |= AT91_WDT_SET_WDV(timeout); + ret = sama5d4_wdt_init(wdt); if (ret) return ret; @@ -263,9 +277,7 @@ static int sama5d4_wdt_resume(struct device *dev) { struct sama5d4_wdt *wdt = dev_get_drvdata(dev); - wdt_write(wdt, AT91_WDT_MR, wdt->mr & ~AT91_WDT_WDDIS); - if (wdt->mr & AT91_WDT_WDDIS) - wdt_write(wdt, AT91_WDT_MR, wdt->mr); + sama5d4_wdt_init(wdt); return 0; } -- cgit v1.2.3-59-g8ed1b From ddd6d240b26dcb8b8dc98bd493eba944dd97ebc8 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 2 Mar 2017 18:31:12 +0100 Subject: watchdog: sama5d4: fix race condition WDT_MR and WDT_CR must not updated within three slow clock periods after the last ping (write to WDT_CR or WDT_MR). Ensure enough time has elapsed before writing those registers. wdt_write() waits for 4 periods to ensure at least 3 edges are seen by the IP. Signed-off-by: Alexandre Belloni Acked-by: Wenyou.Yang Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sama5d4_wdt.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c index 5cee20caca78..362fd229786d 100644 --- a/drivers/watchdog/sama5d4_wdt.c +++ b/drivers/watchdog/sama5d4_wdt.c @@ -6,6 +6,7 @@ * Licensed under GPLv2. */ +#include #include #include #include @@ -29,6 +30,7 @@ struct sama5d4_wdt { struct watchdog_device wdd; void __iomem *reg_base; u32 mr; + unsigned long last_ping; }; static int wdt_timeout = WDT_DEFAULT_TIMEOUT; @@ -49,8 +51,29 @@ MODULE_PARM_DESC(nowayout, #define wdt_read(wdt, field) \ readl_relaxed((wdt)->reg_base + (field)) -#define wdt_write(wtd, field, val) \ - writel_relaxed((val), (wdt)->reg_base + (field)) +/* 4 slow clock periods is 4/32768 = 122.07µs*/ +#define WDT_DELAY usecs_to_jiffies(123) + +static void wdt_write(struct sama5d4_wdt *wdt, u32 field, u32 val) +{ + /* + * WDT_CR and WDT_MR must not be modified within three slow clock + * periods following a restart of the watchdog performed by a write + * access in WDT_CR. + */ + while (time_before(jiffies, wdt->last_ping + WDT_DELAY)) + usleep_range(30, 125); + writel_relaxed(val, wdt->reg_base + field); + wdt->last_ping = jiffies; +} + +static void wdt_write_nosleep(struct sama5d4_wdt *wdt, u32 field, u32 val) +{ + if (time_before(jiffies, wdt->last_ping + WDT_DELAY)) + udelay(123); + writel_relaxed(val, wdt->reg_base + field); + wdt->last_ping = jiffies; +} static int sama5d4_wdt_start(struct watchdog_device *wdd) { @@ -164,11 +187,12 @@ static int sama5d4_wdt_init(struct sama5d4_wdt *wdt) * Else, we have to disable it properly. */ if (wdt_enabled) { - wdt_write(wdt, AT91_WDT_MR, wdt->mr); + wdt_write_nosleep(wdt, AT91_WDT_MR, wdt->mr); } else { reg = wdt_read(wdt, AT91_WDT_MR); if (!(reg & AT91_WDT_WDDIS)) - wdt_write(wdt, AT91_WDT_MR, reg | AT91_WDT_WDDIS); + wdt_write_nosleep(wdt, AT91_WDT_MR, + reg | AT91_WDT_WDDIS); } return 0; } @@ -193,6 +217,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) wdd->ops = &sama5d4_wdt_ops; wdd->min_timeout = MIN_WDT_TIMEOUT; wdd->max_timeout = MAX_WDT_TIMEOUT; + wdt->last_ping = jiffies; watchdog_set_drvdata(wdd, wdt); -- cgit v1.2.3-59-g8ed1b From 46c319b848268dab3f0e7c4a5b6e9146d3bca8a4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:49:45 +0100 Subject: watchdog: pcwd_usb: fix NULL-deref at probe Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/pcwd_usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c index 99ebf6ea3de6..5615f4013924 100644 --- a/drivers/watchdog/pcwd_usb.c +++ b/drivers/watchdog/pcwd_usb.c @@ -630,6 +630,9 @@ static int usb_pcwd_probe(struct usb_interface *interface, return -ENODEV; } + if (iface_desc->desc.bNumEndpoints < 1) + return -ENODEV; + /* check out the endpoint: it has to be Interrupt & IN */ endpoint = &iface_desc->endpoint[0].desc; -- cgit v1.2.3-59-g8ed1b From 0ddad77b90cb52075b5a9498f0621e3e265cc19f Mon Sep 17 00:00:00 2001 From: Tomas Melin Date: Mon, 20 Mar 2017 09:29:31 +0200 Subject: watchdog: cadence_wdt: fix timeout setting wdt_timeout must not be initialized to CDNS_WDT_DEFAULT_TIMEOUT in order to allow the value to be overriddden by a device tree setting. This way, the default timeout value will be used only in case module_param has not been set, or device tree timeout-sec has not been defined. Signed-off-by: Tomas Melin Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/cadence_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c index 8d61e8bfe60b..86e0b5d2e761 100644 --- a/drivers/watchdog/cadence_wdt.c +++ b/drivers/watchdog/cadence_wdt.c @@ -49,7 +49,7 @@ /* Counter maximum value */ #define CDNS_WDT_COUNTER_MAX 0xFFF -static int wdt_timeout = CDNS_WDT_DEFAULT_TIMEOUT; +static int wdt_timeout; static int nowayout = WATCHDOG_NOWAYOUT; module_param(wdt_timeout, int, 0); -- cgit v1.2.3-59-g8ed1b From 455a9a60b6d4afb293b0e63ec75cc8e82912a767 Mon Sep 17 00:00:00 2001 From: Shile Zhang Date: Mon, 10 Apr 2017 22:39:33 +0800 Subject: watchdog: wdt_pci: fix build error if define SOFTWARE_REBOOT To fix following build error when SOFTWARE_REBOOT is defined: CC [M] driver/watchdog/wdt_pci.o driver/watchdog/wdt_pci.c: In function 'wdtpci_interrupt': driver/watchdog/wdt_pci.c:335:3: error: too many arguments to function 'emergency_restart' emergency_restart(NULL); ^ In file included from driver/watchdog/wdt_pci.c:51:0: include/linux/reboot.h:80:13: note: declared here extern void emergency_restart(void); ^ Signed-off-by: Shile Zhang Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/wdt_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/wdt_pci.c b/drivers/watchdog/wdt_pci.c index 48b2c058b009..bc7addc2dc06 100644 --- a/drivers/watchdog/wdt_pci.c +++ b/drivers/watchdog/wdt_pci.c @@ -332,7 +332,7 @@ static irqreturn_t wdtpci_interrupt(int irq, void *dev_id) pr_crit("Would Reboot\n"); #else pr_crit("Initiating system reboot\n"); - emergency_restart(NULL); + emergency_restart(); #endif #else pr_crit("Reset in 5ms\n"); -- cgit v1.2.3-59-g8ed1b From 14e3995e63759b80eb22a3c06958d105db4d3f79 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 16 May 2017 14:22:47 +0000 Subject: xen/9pfs: fix return value check in xen_9pfs_front_probe() In case of error, the function xenbus_read() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: 71ebd71921e4 ("xen/9pfs: connect to the backend") Signed-off-by: Wei Yongjun Reviewed-by: Stefano Stabellini --- net/9p/trans_xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 71e85643b3f9..83fe487f460e 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -454,8 +454,8 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, goto error_xenbus; } priv->tag = xenbus_read(xbt, dev->nodename, "tag", NULL); - if (!priv->tag) { - ret = -EINVAL; + if (IS_ERR(priv->tag)) { + ret = PTR_ERR(priv->tag); goto error_xenbus; } ret = xenbus_transaction_end(xbt, 0); -- cgit v1.2.3-59-g8ed1b From aaf0475a0b3f445000c50f7fc75d5e846bf7ee7b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 18 May 2017 15:22:41 +0000 Subject: xen/9pfs: p9_trans_xen_init and p9_trans_xen_exit can be static Fixes the following sparse warnings: net/9p/trans_xen.c:528:5: warning: symbol 'p9_trans_xen_init' was not declared. Should it be static? net/9p/trans_xen.c:540:6: warning: symbol 'p9_trans_xen_exit' was not declared. Should it be static? Signed-off-by: Wei Yongjun Reviewed-by: Stefano Stabellini --- net/9p/trans_xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 83fe487f460e..6ad3e043c617 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -525,7 +525,7 @@ static struct xenbus_driver xen_9pfs_front_driver = { .otherend_changed = xen_9pfs_front_changed, }; -int p9_trans_xen_init(void) +static int p9_trans_xen_init(void) { if (!xen_domain()) return -ENODEV; @@ -537,7 +537,7 @@ int p9_trans_xen_init(void) } module_init(p9_trans_xen_init); -void p9_trans_xen_exit(void) +static void p9_trans_xen_exit(void) { v9fs_unregister_trans(&p9_xen_trans); return xenbus_unregister_driver(&xen_9pfs_front_driver); -- cgit v1.2.3-59-g8ed1b From 463f620b1256e0488d932088e04a372817e8c42e Mon Sep 17 00:00:00 2001 From: Michał Potomski Date: Fri, 12 May 2017 08:36:27 +0200 Subject: scsi: ufs: Clean up some rpm/spm level SysFS nodes upon remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reloading module these two attributes aren't cleaned up properly and they persist causing warnings when trying to load module again. Additionally they are not recreated properly due to that. Signed-off-by: Michał Potomski Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index abc7e87937cc..ffe8d8608818 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7698,6 +7698,12 @@ static inline void ufshcd_add_sysfs_nodes(struct ufs_hba *hba) ufshcd_add_spm_lvl_sysfs_nodes(hba); } +static inline void ufshcd_remove_sysfs_nodes(struct ufs_hba *hba) +{ + device_remove_file(hba->dev, &hba->rpm_lvl_attr); + device_remove_file(hba->dev, &hba->spm_lvl_attr); +} + /** * ufshcd_shutdown - shutdown routine * @hba: per adapter instance @@ -7735,6 +7741,7 @@ EXPORT_SYMBOL(ufshcd_shutdown); */ void ufshcd_remove(struct ufs_hba *hba) { + ufshcd_remove_sysfs_nodes(hba); scsi_remove_host(hba->host); /* disable interrupts */ ufshcd_disable_intr(hba, hba->intr_mask); -- cgit v1.2.3-59-g8ed1b From a351e40b6de550049423a26f7ded7b639e363d89 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 17 May 2017 20:30:43 +0530 Subject: scsi: csiostor: fix use after free in csio_hw_use_fwconfig() mbp pointer is passed to csio_hw_validate_caps() so call mempool_free() after calling csio_hw_validate_caps(). Signed-off-by: Varun Prakash Fixes: 541c571fa2fd ("csiostor:Use firmware version from cxgb4/t4fw_version.h") Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/csiostor/csio_hw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 622bdabc8894..dab195f04da7 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -1769,7 +1769,6 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param) goto bye; } - mempool_free(mbp, hw->mb_mempool); if (finicsum != cfcsum) { csio_warn(hw, "Config File checksum mismatch: csum=%#x, computed=%#x\n", @@ -1780,6 +1779,10 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param) rv = csio_hw_validate_caps(hw, mbp); if (rv != 0) goto bye; + + mempool_free(mbp, hw->mb_mempool); + mbp = NULL; + /* * Note that we're operating with parameters * not supplied by the driver, rather than from hard-wired -- cgit v1.2.3-59-g8ed1b From 1bad6c4a57efda0d5f5bf8a2403b21b1ed24875c Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 18 May 2017 15:40:05 -0700 Subject: scsi: zero per-cmd private driver data for each MQ I/O In lower layer driver's (LLD) scsi_host_template, the driver may optionally ask SCSI to allocate its private driver memory for each command, by specifying cmd_size. This memory is allocated at the end of scsi_cmnd by SCSI. Later when SCSI queues a command, the LLD can use scsi_cmd_priv to get to its private data. Some LLD, e.g. hv_storvsc, doesn't clear its private data before use. In this case, the LLD may get to stale or uninitialized data in its private driver memory. This may result in unexpected driver and hardware behavior. Fix this problem by also zeroing the private driver memory before passing them to LLD. Signed-off-by: Long Li Reviewed-by: Bart Van Assche Reviewed-by: KY Srinivasan Reviewed-by: Christoph Hellwig CC: # 4.11+ Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e31f1cc90b81..99e16ac479e3 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1851,7 +1851,7 @@ static int scsi_mq_prep_fn(struct request *req) /* zero out the cmd, except for the embedded scsi_request */ memset((char *)cmd + sizeof(cmd->req), 0, - sizeof(*cmd) - sizeof(cmd->req)); + sizeof(*cmd) - sizeof(cmd->req) + shost->hostt->cmd_size); req->special = cmd; -- cgit v1.2.3-59-g8ed1b From e41e53cd4fe331d0d1f06f8e4ed7e2cc63ee2c34 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 18 May 2017 20:37:31 +1000 Subject: powerpc/mm: Fix virt_addr_valid() etc. on 64-bit hash virt_addr_valid() is supposed to tell you if it's OK to call virt_to_page() on an address. What this means in practice is that it should only return true for addresses in the linear mapping which are backed by a valid PFN. We are failing to properly check that the address is in the linear mapping, because virt_to_pfn() will return a valid looking PFN for more or less any address. That bug is actually caused by __pa(), used in virt_to_pfn(). eg: __pa(0xc000000000010000) = 0x10000 # Good __pa(0xd000000000010000) = 0x10000 # Bad! __pa(0x0000000000010000) = 0x10000 # Bad! This started happening after commit bdbc29c19b26 ("powerpc: Work around gcc miscompilation of __pa() on 64-bit") (Aug 2013), where we changed the definition of __pa() to work around a GCC bug. Prior to that we subtracted PAGE_OFFSET from the value passed to __pa(), meaning __pa() of a 0xd or 0x0 address would give you something bogus back. Until we can verify if that GCC bug is no longer an issue, or come up with another solution, this commit does the minimal fix to make virt_addr_valid() work, by explicitly checking that the address is in the linear mapping region. Fixes: bdbc29c19b26 ("powerpc: Work around gcc miscompilation of __pa() on 64-bit") Signed-off-by: Michael Ellerman Reviewed-by: Paul Mackerras Reviewed-by: Balbir Singh Tested-by: Breno Leitao --- arch/powerpc/include/asm/page.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 2a32483c7b6c..8da5d4c1cab2 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,7 +132,19 @@ extern long long virt_phys_offset; #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) + +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * On hash the vmalloc and other regions alias to the kernel region when passed + * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can + * return true for some vmalloc addresses, which is incorrect. So explicitly + * check that the address is in the kernel region. + */ +#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \ + pfn_valid(virt_to_pfn(kaddr))) +#else #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#endif /* * On Book-E parts we need __va to parse the device tree and we can't -- cgit v1.2.3-59-g8ed1b From a33d7d94eed92b23fbbc7b0de06a41b2bbaa49e3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 12 May 2017 13:15:45 -0400 Subject: tracing: Make sure RCU is watching before calling a stack trace As stack tracing now requires "rcu watching", force RCU to be watching when recording a stack trace. Link: http://lkml.kernel.org/r/20170512172449.879684501@goodmis.org Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fcc9a2d774c3..1122f151466f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2568,7 +2568,36 @@ static inline void ftrace_trace_stack(struct trace_array *tr, void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { - __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL); + struct ring_buffer *buffer = tr->trace_buffer.buffer; + + if (rcu_is_watching()) { + __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + return; + } + + /* + * When an NMI triggers, RCU is enabled via rcu_nmi_enter(), + * but if the above rcu_is_watching() failed, then the NMI + * triggered someplace critical, and rcu_irq_enter() should + * not be called from NMI. + */ + if (unlikely(in_nmi())) + return; + + /* + * It is possible that a function is being traced in a + * location that RCU is not watching. A call to + * rcu_irq_enter() will make sure that it is, but there's + * a few internal rcu functions that could be traced + * where that wont work either. In those cases, we just + * do nothing. + */ + if (unlikely(rcu_irq_enter_disabled())) + return; + + rcu_irq_enter_irqson(); + __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + rcu_irq_exit_irqson(); } /** -- cgit v1.2.3-59-g8ed1b From 989513a735f51407280acd91e436d83eb48514cd Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 16 May 2017 09:41:06 +0200 Subject: xen: cleanup pvh leftovers from pv-only sources There are some leftovers testing for pvh guest mode in pv-only source files. Remove them. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 15 ++----- arch/x86/xen/mmu_pv.c | 102 +++++++++++++++++--------------------------- 2 files changed, 42 insertions(+), 75 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 7cd442690f9d..f33eef4ebd12 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -142,9 +142,7 @@ static void __init xen_banner(void) struct xen_extraversion extra; HYPERVISOR_xen_version(XENVER_extraversion, &extra); - pr_info("Booting paravirtualized kernel %son %s\n", - xen_feature(XENFEAT_auto_translated_physmap) ? - "with PVH extensions " : "", pv_info.name); + pr_info("Booting paravirtualized kernel on %s\n", pv_info.name); printk(KERN_INFO "Xen version: %d.%d%s%s\n", version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); @@ -957,15 +955,10 @@ static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) void xen_setup_shared_info(void) { - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - set_fixmap(FIX_PARAVIRT_BOOTMAP, - xen_start_info->shared_info); + set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info); - HYPERVISOR_shared_info = - (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); - } else - HYPERVISOR_shared_info = - (struct shared_info *)__va(xen_start_info->shared_info); + HYPERVISOR_shared_info = + (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); #ifndef CONFIG_SMP /* In UP this is as good a place as any to set up shared info */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 7397d8b8459d..1f386d7fdf70 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -355,10 +355,8 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) pteval_t flags = val & PTE_FLAGS_MASK; unsigned long mfn; - if (!xen_feature(XENFEAT_auto_translated_physmap)) - mfn = __pfn_to_mfn(pfn); - else - mfn = pfn; + mfn = __pfn_to_mfn(pfn); + /* * If there's no mfn for the pfn, then just create an * empty non-present pte. Unfortunately this loses @@ -647,9 +645,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, limit--; BUG_ON(limit >= FIXADDR_TOP); - if (xen_feature(XENFEAT_auto_translated_physmap)) - return 0; - /* * 64-bit has a great big hole in the middle of the address * space, which contains the Xen mappings. On 32-bit these @@ -1289,9 +1284,6 @@ static void __init xen_pagetable_cleanhighmap(void) static void __init xen_pagetable_p2m_setup(void) { - if (xen_feature(XENFEAT_auto_translated_physmap)) - return; - xen_vmalloc_p2m_tree(); #ifdef CONFIG_X86_64 @@ -1314,8 +1306,7 @@ static void __init xen_pagetable_init(void) xen_build_mfn_list_list(); /* Remap memory freed due to conflicts with E820 map */ - if (!xen_feature(XENFEAT_auto_translated_physmap)) - xen_remap_memory(); + xen_remap_memory(); xen_setup_shared_info(); } @@ -1925,21 +1916,20 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Pre-constructed entries are in pfn, so convert to mfn */ - /* L4[272] -> level3_ident_pgt - * L4[511] -> level3_kernel_pgt */ - convert_pfn_mfn(init_level4_pgt); + /* Pre-constructed entries are in pfn, so convert to mfn */ + /* L4[272] -> level3_ident_pgt */ + /* L4[511] -> level3_kernel_pgt */ + convert_pfn_mfn(init_level4_pgt); - /* L3_i[0] -> level2_ident_pgt */ - convert_pfn_mfn(level3_ident_pgt); - /* L3_k[510] -> level2_kernel_pgt - * L3_k[511] -> level2_fixmap_pgt */ - convert_pfn_mfn(level3_kernel_pgt); + /* L3_i[0] -> level2_ident_pgt */ + convert_pfn_mfn(level3_ident_pgt); + /* L3_k[510] -> level2_kernel_pgt */ + /* L3_k[511] -> level2_fixmap_pgt */ + convert_pfn_mfn(level3_kernel_pgt); + + /* L3_k[511][506] -> level1_fixmap_pgt */ + convert_pfn_mfn(level2_fixmap_pgt); - /* L3_k[511][506] -> level1_fixmap_pgt */ - convert_pfn_mfn(level2_fixmap_pgt); - } /* We get [511][511] and have Xen's version of level2_kernel_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); @@ -1962,34 +1952,30 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) if (i && i < pgd_index(__START_KERNEL_map)) init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Make pagetable pieces RO */ - set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); - set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); - - /* Pin down new L4 */ - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa_symbol(init_level4_pgt))); - - /* Unpin Xen-provided one */ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + /* Make pagetable pieces RO */ + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); + set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa_symbol(init_level4_pgt))); + + /* Unpin Xen-provided one */ + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - /* - * At this stage there can be no user pgd, and no page - * structure to attach it to, so make sure we just set kernel - * pgd. - */ - xen_mc_batch(); - __xen_write_cr3(true, __pa(init_level4_pgt)); - xen_mc_issue(PARAVIRT_LAZY_CPU); - } else - native_write_cr3(__pa(init_level4_pgt)); + /* + * At this stage there can be no user pgd, and no page structure to + * attach it to, so make sure we just set kernel pgd. + */ + xen_mc_batch(); + __xen_write_cr3(true, __pa(init_level4_pgt)); + xen_mc_issue(PARAVIRT_LAZY_CPU); /* We can't that easily rip out L3 and L2, as the Xen pagetables are * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for @@ -2403,9 +2389,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) static void __init xen_post_allocator_init(void) { - if (xen_feature(XENFEAT_auto_translated_physmap)) - return; - pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; @@ -2511,9 +2494,6 @@ void __init xen_init_mmu_ops(void) { x86_init.paging.pagetable_init = xen_pagetable_init; - if (xen_feature(XENFEAT_auto_translated_physmap)) - return; - pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); @@ -2650,9 +2630,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, * this function are redundant and can be ignored. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return 0; - if (unlikely(order > MAX_CONTIG_ORDER)) return -ENOMEM; @@ -2689,9 +2666,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) int success; unsigned long vstart; - if (xen_feature(XENFEAT_auto_translated_physmap)) - return; - if (unlikely(order > MAX_CONTIG_ORDER)) return; -- cgit v1.2.3-59-g8ed1b From c71e6d804c88168ecf02aaf14e1fd5773d683b5f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 18 May 2017 17:46:48 +0200 Subject: xen: make xen_flush_tlb_all() static xen_flush_tlb_all() is used in arch/x86/xen/mmu.c only. Make it static. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5e375a5e815f..3be06f3caf3c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -42,7 +42,7 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) } EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); -void xen_flush_tlb_all(void) +static void xen_flush_tlb_all(void) { struct mmuext_op *op; struct multicall_space mcs; -- cgit v1.2.3-59-g8ed1b From d5d332d3f7e8435e264a71b90178dee69428d630 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 12 May 2017 20:13:26 -0700 Subject: devicetree: Move include prefixes from arch to separate directory We use a directory under arch/$ARCH/boot/dts as an include path that has links outside of the subtree to find dt-bindings from under include/dt-bindings. That's been working well, but new DT architectures haven't been adding them by default. Recently there's been a desire to share some of the DT material between arm and arm64, which originally caused developers to create symlinks or relative includes between the subtrees. This isn't ideal -- it breaks if the DT files aren't stored in the exact same hierarchy as the kernel tree, and generally it's just icky. As a somewhat cleaner solution we decided to add a $ARCH/ prefix link once, and allow DTS files to reference dtsi (and dts) files in other architectures that way. Original approach was to create these links under each architecture, but it lead to the problem of recursive symlinks. As a remedy, move the include link directories out of the architecture trees into a common location. At the same time, they can now share one directory and one dt-bindings/ link as well. Fixes: 4027494ae6e3 ('ARM: dts: add arm/arm64 include symlinks') Reported-by: Russell King Reported-by: Omar Sandoval Reviewed-by: Heiko Stuebner Reviewed-by: Masahiro Yamada Tested-by: Heiko Stuebner Acked-by: Rob Herring Cc: Heiko Stuebner Cc: Mark Rutland Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Mikael Starvik Cc: Jesper Nilsson Cc: James Hogan Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Frank Rowand Cc: linux-arch Signed-off-by: Olof Johansson --- arch/arm/boot/dts/include/arm | 1 - arch/arm/boot/dts/include/arm64 | 1 - arch/arm/boot/dts/include/dt-bindings | 1 - arch/arm64/boot/dts/include/arm | 1 - arch/arm64/boot/dts/include/arm64 | 1 - arch/arm64/boot/dts/include/dt-bindings | 1 - arch/cris/boot/dts/include/dt-bindings | 1 - arch/metag/boot/dts/include/dt-bindings | 1 - arch/mips/boot/dts/include/dt-bindings | 1 - arch/powerpc/boot/dts/include/dt-bindings | 1 - scripts/Makefile.lib | 2 +- scripts/dtc/include-prefixes/arc | 1 + scripts/dtc/include-prefixes/arm | 1 + scripts/dtc/include-prefixes/arm64 | 1 + scripts/dtc/include-prefixes/c6x | 1 + scripts/dtc/include-prefixes/cris | 1 + scripts/dtc/include-prefixes/dt-bindings | 1 + scripts/dtc/include-prefixes/h8300 | 1 + scripts/dtc/include-prefixes/metag | 1 + scripts/dtc/include-prefixes/microblaze | 1 + scripts/dtc/include-prefixes/mips | 1 + scripts/dtc/include-prefixes/nios2 | 1 + scripts/dtc/include-prefixes/openrisc | 1 + scripts/dtc/include-prefixes/powerpc | 1 + scripts/dtc/include-prefixes/sh | 1 + scripts/dtc/include-prefixes/xtensa | 1 + 26 files changed, 16 insertions(+), 11 deletions(-) delete mode 120000 arch/arm/boot/dts/include/arm delete mode 120000 arch/arm/boot/dts/include/arm64 delete mode 120000 arch/arm/boot/dts/include/dt-bindings delete mode 120000 arch/arm64/boot/dts/include/arm delete mode 120000 arch/arm64/boot/dts/include/arm64 delete mode 120000 arch/arm64/boot/dts/include/dt-bindings delete mode 120000 arch/cris/boot/dts/include/dt-bindings delete mode 120000 arch/metag/boot/dts/include/dt-bindings delete mode 120000 arch/mips/boot/dts/include/dt-bindings delete mode 120000 arch/powerpc/boot/dts/include/dt-bindings create mode 120000 scripts/dtc/include-prefixes/arc create mode 120000 scripts/dtc/include-prefixes/arm create mode 120000 scripts/dtc/include-prefixes/arm64 create mode 120000 scripts/dtc/include-prefixes/c6x create mode 120000 scripts/dtc/include-prefixes/cris create mode 120000 scripts/dtc/include-prefixes/dt-bindings create mode 120000 scripts/dtc/include-prefixes/h8300 create mode 120000 scripts/dtc/include-prefixes/metag create mode 120000 scripts/dtc/include-prefixes/microblaze create mode 120000 scripts/dtc/include-prefixes/mips create mode 120000 scripts/dtc/include-prefixes/nios2 create mode 120000 scripts/dtc/include-prefixes/openrisc create mode 120000 scripts/dtc/include-prefixes/powerpc create mode 120000 scripts/dtc/include-prefixes/sh create mode 120000 scripts/dtc/include-prefixes/xtensa diff --git a/arch/arm/boot/dts/include/arm b/arch/arm/boot/dts/include/arm deleted file mode 120000 index a96aa0ea9d8c..000000000000 --- a/arch/arm/boot/dts/include/arm +++ /dev/null @@ -1 +0,0 @@ -.. \ No newline at end of file diff --git a/arch/arm/boot/dts/include/arm64 b/arch/arm/boot/dts/include/arm64 deleted file mode 120000 index 074a835fca3e..000000000000 --- a/arch/arm/boot/dts/include/arm64 +++ /dev/null @@ -1 +0,0 @@ -../../../../arm64/boot/dts \ No newline at end of file diff --git a/arch/arm/boot/dts/include/dt-bindings b/arch/arm/boot/dts/include/dt-bindings deleted file mode 120000 index 08c00e4972fa..000000000000 --- a/arch/arm/boot/dts/include/dt-bindings +++ /dev/null @@ -1 +0,0 @@ -../../../../../include/dt-bindings \ No newline at end of file diff --git a/arch/arm64/boot/dts/include/arm b/arch/arm64/boot/dts/include/arm deleted file mode 120000 index cf63d80e2b93..000000000000 --- a/arch/arm64/boot/dts/include/arm +++ /dev/null @@ -1 +0,0 @@ -../../../../arm/boot/dts \ No newline at end of file diff --git a/arch/arm64/boot/dts/include/arm64 b/arch/arm64/boot/dts/include/arm64 deleted file mode 120000 index a96aa0ea9d8c..000000000000 --- a/arch/arm64/boot/dts/include/arm64 +++ /dev/null @@ -1 +0,0 @@ -.. \ No newline at end of file diff --git a/arch/arm64/boot/dts/include/dt-bindings b/arch/arm64/boot/dts/include/dt-bindings deleted file mode 120000 index 08c00e4972fa..000000000000 --- a/arch/arm64/boot/dts/include/dt-bindings +++ /dev/null @@ -1 +0,0 @@ -../../../../../include/dt-bindings \ No newline at end of file diff --git a/arch/cris/boot/dts/include/dt-bindings b/arch/cris/boot/dts/include/dt-bindings deleted file mode 120000 index 08c00e4972fa..000000000000 --- a/arch/cris/boot/dts/include/dt-bindings +++ /dev/null @@ -1 +0,0 @@ -../../../../../include/dt-bindings \ No newline at end of file diff --git a/arch/metag/boot/dts/include/dt-bindings b/arch/metag/boot/dts/include/dt-bindings deleted file mode 120000 index 08c00e4972fa..000000000000 --- a/arch/metag/boot/dts/include/dt-bindings +++ /dev/null @@ -1 +0,0 @@ -../../../../../include/dt-bindings \ No newline at end of file diff --git a/arch/mips/boot/dts/include/dt-bindings b/arch/mips/boot/dts/include/dt-bindings deleted file mode 120000 index 08c00e4972fa..000000000000 --- a/arch/mips/boot/dts/include/dt-bindings +++ /dev/null @@ -1 +0,0 @@ -../../../../../include/dt-bindings \ No newline at end of file diff --git a/arch/powerpc/boot/dts/include/dt-bindings b/arch/powerpc/boot/dts/include/dt-bindings deleted file mode 120000 index 08c00e4972fa..000000000000 --- a/arch/powerpc/boot/dts/include/dt-bindings +++ /dev/null @@ -1 +0,0 @@ -../../../../../include/dt-bindings \ No newline at end of file diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 6dc1eda13b8e..58c05e5d9870 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -175,7 +175,7 @@ ld_flags = $(LDFLAGS) $(ldflags-y) dtc_cpp_flags = -Wp,-MD,$(depfile).pre.tmp -nostdinc \ -I$(srctree)/arch/$(SRCARCH)/boot/dts \ - -I$(srctree)/arch/$(SRCARCH)/boot/dts/include \ + -I$(srctree)/scripts/dtc/include-prefixes \ -I$(srctree)/drivers/of/testcase-data \ -undef -D__DTS__ diff --git a/scripts/dtc/include-prefixes/arc b/scripts/dtc/include-prefixes/arc new file mode 120000 index 000000000000..5d21b5a69a11 --- /dev/null +++ b/scripts/dtc/include-prefixes/arc @@ -0,0 +1 @@ +../../../arch/arc/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/arm b/scripts/dtc/include-prefixes/arm new file mode 120000 index 000000000000..eb14d4515a57 --- /dev/null +++ b/scripts/dtc/include-prefixes/arm @@ -0,0 +1 @@ +../../../arch/arm/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/arm64 b/scripts/dtc/include-prefixes/arm64 new file mode 120000 index 000000000000..275c42c21d71 --- /dev/null +++ b/scripts/dtc/include-prefixes/arm64 @@ -0,0 +1 @@ +../../../arch/arm64/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/c6x b/scripts/dtc/include-prefixes/c6x new file mode 120000 index 000000000000..49ded4cae2be --- /dev/null +++ b/scripts/dtc/include-prefixes/c6x @@ -0,0 +1 @@ +../../../arch/c6x/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/cris b/scripts/dtc/include-prefixes/cris new file mode 120000 index 000000000000..736d998ba506 --- /dev/null +++ b/scripts/dtc/include-prefixes/cris @@ -0,0 +1 @@ +../../../arch/cris/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/dt-bindings b/scripts/dtc/include-prefixes/dt-bindings new file mode 120000 index 000000000000..04fdbb3af016 --- /dev/null +++ b/scripts/dtc/include-prefixes/dt-bindings @@ -0,0 +1 @@ +../../../include/dt-bindings \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/h8300 b/scripts/dtc/include-prefixes/h8300 new file mode 120000 index 000000000000..3bdaa332c54c --- /dev/null +++ b/scripts/dtc/include-prefixes/h8300 @@ -0,0 +1 @@ +../../../arch/h8300/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/metag b/scripts/dtc/include-prefixes/metag new file mode 120000 index 000000000000..87a3c847db8f --- /dev/null +++ b/scripts/dtc/include-prefixes/metag @@ -0,0 +1 @@ +../../../arch/metag/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/microblaze b/scripts/dtc/include-prefixes/microblaze new file mode 120000 index 000000000000..d9830330a21d --- /dev/null +++ b/scripts/dtc/include-prefixes/microblaze @@ -0,0 +1 @@ +../../../arch/microblaze/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/mips b/scripts/dtc/include-prefixes/mips new file mode 120000 index 000000000000..ae8d4948dc8d --- /dev/null +++ b/scripts/dtc/include-prefixes/mips @@ -0,0 +1 @@ +../../../arch/mips/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/nios2 b/scripts/dtc/include-prefixes/nios2 new file mode 120000 index 000000000000..51772336d13f --- /dev/null +++ b/scripts/dtc/include-prefixes/nios2 @@ -0,0 +1 @@ +../../../arch/nios2/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/openrisc b/scripts/dtc/include-prefixes/openrisc new file mode 120000 index 000000000000..71c3bc75c560 --- /dev/null +++ b/scripts/dtc/include-prefixes/openrisc @@ -0,0 +1 @@ +../../../arch/openrisc/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/powerpc b/scripts/dtc/include-prefixes/powerpc new file mode 120000 index 000000000000..7cd6ec16e899 --- /dev/null +++ b/scripts/dtc/include-prefixes/powerpc @@ -0,0 +1 @@ +../../../arch/powerpc/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/sh b/scripts/dtc/include-prefixes/sh new file mode 120000 index 000000000000..67d37808c599 --- /dev/null +++ b/scripts/dtc/include-prefixes/sh @@ -0,0 +1 @@ +../../../arch/sh/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/xtensa b/scripts/dtc/include-prefixes/xtensa new file mode 120000 index 000000000000..d1eaf6ec7a2b --- /dev/null +++ b/scripts/dtc/include-prefixes/xtensa @@ -0,0 +1 @@ +../../../arch/xtensa/boot/dts \ No newline at end of file -- cgit v1.2.3-59-g8ed1b From 877dd4b1f7986bd7ca31b1e69589289e4aa3e066 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 14 May 2017 23:45:02 +0200 Subject: ARM: configs: add a gemini defconfig It makes sense to have a stripped-down defconfig for just Gemini, as it is a pretty small platform used in NAS etc, and will use appended device tree. It is also quick to compile and test. Hopefully this defconfig can be a good base for distributions such as OpenWRT. I plan to add in the config options needed for the different variants of Gemini as we go along. Cc: Janos Laube Cc: Paulius Zaleckas Cc: Hans Ulli Kroll Cc: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/configs/gemini_defconfig | 68 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 arch/arm/configs/gemini_defconfig diff --git a/arch/arm/configs/gemini_defconfig b/arch/arm/configs/gemini_defconfig new file mode 100644 index 000000000000..d2d75fa664a6 --- /dev/null +++ b/arch/arm/configs/gemini_defconfig @@ -0,0 +1,68 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SYSVIPC=y +CONFIG_NO_HZ_IDLE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_USER_NS=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_ARCH_MULTI_V4=y +# CONFIG_ARCH_MULTI_V7 is not set +CONFIG_ARCH_GEMINI=y +CONFIG_PCI=y +CONFIG_PREEMPT=y +CONFIG_AEABI=y +CONFIG_CMDLINE="console=ttyS0,115200n8" +CONFIG_KEXEC=y +CONFIG_BINFMT_MISC=y +CONFIG_PM=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_MTD=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_CFI_STAA=y +CONFIG_MTD_PHYSMAP=y +CONFIG_MTD_PHYSMAP_OF=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_INPUT_EVDEV=y +CONFIG_KEYBOARD_GPIO=y +# CONFIG_INPUT_MOUSE is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=1 +CONFIG_SERIAL_8250_RUNTIME_UARTS=1 +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_GEMINI_WATCHDOG=y +CONFIG_USB=y +CONFIG_USB_MON=y +CONFIG_USB_FOTG210_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_GEMINI=y +CONFIG_DMADEVICES=y +# CONFIG_DNOTIFY is not set +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_ROMFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_DEBUG_FS=y -- cgit v1.2.3-59-g8ed1b From eb1e6716cc9c6fd22e706379ab082b4ac198a1b1 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 17 May 2017 16:52:29 -0500 Subject: arm64: defconfig: sync with savedefconfig Sync the defconfig with savedefconfig as config options change/move over time. Generated with the following commands: make defconfig make savedefconfig cp defconfig arch/arm64/configs/defconfig Signed-off-by: Rob Herring Signed-off-by: Olof Johansson --- arch/arm64/configs/defconfig | 103 ++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 61 deletions(-) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index ce072859e3b2..d916fc316698 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -30,7 +30,6 @@ CONFIG_PROFILING=y CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set CONFIG_ARCH_SUNXI=y CONFIG_ARCH_ALPINE=y @@ -62,16 +61,15 @@ CONFIG_ARCH_XGENE=y CONFIG_ARCH_ZX=y CONFIG_ARCH_ZYNQMP=y CONFIG_PCI=y -CONFIG_PCI_MSI=y CONFIG_PCI_IOV=y -CONFIG_PCI_AARDVARK=y -CONFIG_PCIE_RCAR=y -CONFIG_PCI_HOST_GENERIC=y -CONFIG_PCI_XGENE=y CONFIG_PCI_LAYERSCAPE=y CONFIG_PCI_HISI=y CONFIG_PCIE_QCOM=y CONFIG_PCIE_ARMADA_8K=y +CONFIG_PCI_AARDVARK=y +CONFIG_PCIE_RCAR=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCI_XGENE=y CONFIG_ARM64_VA_BITS_48=y CONFIG_SCHED_MC=y CONFIG_NUMA=y @@ -80,12 +78,11 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA=y CONFIG_SECCOMP=y -CONFIG_XEN=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y +CONFIG_XEN=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y -CONFIG_CPU_IDLE=y CONFIG_HIBERNATION=y CONFIG_ARM_CPUIDLE=y CONFIG_CPU_FREQ=y @@ -155,8 +152,8 @@ CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m CONFIG_VIRTIO_BLK=y -CONFIG_EEPROM_AT25=m CONFIG_SRAM=y +CONFIG_EEPROM_AT25=m # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y CONFIG_SCSI_SAS_ATA=y @@ -168,8 +165,8 @@ CONFIG_AHCI_CEVA=y CONFIG_AHCI_MVEBU=y CONFIG_AHCI_XGENE=y CONFIG_AHCI_QORIQ=y -CONFIG_SATA_RCAR=y CONFIG_SATA_SIL24=y +CONFIG_SATA_RCAR=y CONFIG_PATA_PLATFORM=y CONFIG_PATA_OF_PLATFORM=y CONFIG_NETDEVICES=y @@ -186,18 +183,17 @@ CONFIG_HNS_ENET=y CONFIG_E1000E=y CONFIG_IGB=y CONFIG_IGBVF=y -CONFIG_MVPP2=y CONFIG_MVNETA=y +CONFIG_MVPP2=y CONFIG_SKY2=y CONFIG_RAVB=y CONFIG_SMC91X=y CONFIG_SMSC911X=y CONFIG_STMMAC_ETH=m -CONFIG_REALTEK_PHY=m +CONFIG_MDIO_BUS_MUX_MMIOREG=y CONFIG_MESON_GXL_PHY=m CONFIG_MICREL_PHY=y -CONFIG_MDIO_BUS_MUX=y -CONFIG_MDIO_BUS_MUX_MMIOREG=y +CONFIG_REALTEK_PHY=m CONFIG_USB_PEGASUS=m CONFIG_USB_RTL8150=m CONFIG_USB_RTL8152=m @@ -230,14 +226,14 @@ CONFIG_SERIAL_8250_UNIPHIER=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_MESON=y +CONFIG_SERIAL_MESON_CONSOLE=y CONFIG_SERIAL_SAMSUNG=y CONFIG_SERIAL_SAMSUNG_CONSOLE=y CONFIG_SERIAL_TEGRA=y CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=11 CONFIG_SERIAL_SH_SCI_CONSOLE=y -CONFIG_SERIAL_MESON=y -CONFIG_SERIAL_MESON_CONSOLE=y CONFIG_SERIAL_MSM=y CONFIG_SERIAL_MSM_CONSOLE=y CONFIG_SERIAL_XILINX_PS_UART=y @@ -261,14 +257,14 @@ CONFIG_I2C_UNIPHIER_F=y CONFIG_I2C_RCAR=y CONFIG_I2C_CROS_EC_TUNNEL=y CONFIG_SPI=y -CONFIG_SPI_MESON_SPIFC=m CONFIG_SPI_BCM2835=m CONFIG_SPI_BCM2835AUX=m +CONFIG_SPI_MESON_SPIFC=m CONFIG_SPI_ORION=y CONFIG_SPI_PL022=y CONFIG_SPI_QUP=y -CONFIG_SPI_SPIDEV=m CONFIG_SPI_S3C64XX=y +CONFIG_SPI_SPIDEV=m CONFIG_SPMI=y CONFIG_PINCTRL_SINGLE=y CONFIG_PINCTRL_MAX77620=y @@ -286,39 +282,35 @@ CONFIG_GPIO_PCA953X=y CONFIG_GPIO_PCA953X_IRQ=y CONFIG_GPIO_MAX77620=y CONFIG_POWER_RESET_MSM=y -CONFIG_BATTERY_BQ27XXX=y CONFIG_POWER_RESET_XGENE=y CONFIG_POWER_RESET_SYSCON=y +CONFIG_BATTERY_BQ27XXX=y +CONFIG_SENSORS_ARM_SCPI=y CONFIG_SENSORS_LM90=m CONFIG_SENSORS_INA2XX=m -CONFIG_SENSORS_ARM_SCPI=y -CONFIG_THERMAL=y -CONFIG_THERMAL_EMULATION=y CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y CONFIG_CPU_THERMAL=y -CONFIG_BCM2835_THERMAL=y +CONFIG_THERMAL_EMULATION=y CONFIG_EXYNOS_THERMAL=y CONFIG_WATCHDOG=y -CONFIG_BCM2835_WDT=y -CONFIG_RENESAS_WDT=y CONFIG_S3C2410_WATCHDOG=y CONFIG_MESON_GXBB_WATCHDOG=m CONFIG_MESON_WATCHDOG=m +CONFIG_RENESAS_WDT=y +CONFIG_BCM2835_WDT=y +CONFIG_MFD_CROS_EC=y +CONFIG_MFD_CROS_EC_I2C=y CONFIG_MFD_EXYNOS_LPASS=m +CONFIG_MFD_HI655X_PMIC=y CONFIG_MFD_MAX77620=y -CONFIG_MFD_RK808=y CONFIG_MFD_SPMI_PMIC=y +CONFIG_MFD_RK808=y CONFIG_MFD_SEC_CORE=y -CONFIG_MFD_HI655X_PMIC=y -CONFIG_REGULATOR=y -CONFIG_MFD_CROS_EC=y -CONFIG_MFD_CROS_EC_I2C=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_GPIO=y CONFIG_REGULATOR_HI655X=y CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_PWM=y -CONFIG_REGULATOR_QCOM_SMD_RPM=y CONFIG_REGULATOR_QCOM_SPMI=y CONFIG_REGULATOR_RK808=y CONFIG_REGULATOR_S2MPS11=y @@ -345,13 +337,12 @@ CONFIG_DRM_EXYNOS_DSI=y CONFIG_DRM_EXYNOS_HDMI=y CONFIG_DRM_EXYNOS_MIC=y CONFIG_DRM_RCAR_DU=m -CONFIG_DRM_RCAR_HDMI=y CONFIG_DRM_RCAR_LVDS=y CONFIG_DRM_RCAR_VSP=y CONFIG_DRM_TEGRA=m -CONFIG_DRM_VC4=m CONFIG_DRM_PANEL_SIMPLE=m CONFIG_DRM_I2C_ADV7511=m +CONFIG_DRM_VC4=m CONFIG_DRM_HISI_KIRIN=m CONFIG_DRM_MESON=m CONFIG_FB=y @@ -366,26 +357,24 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SOC=y CONFIG_SND_BCM2835_SOC_I2S=m -CONFIG_SND_SOC_RCAR=y CONFIG_SND_SOC_SAMSUNG=y +CONFIG_SND_SOC_RCAR=y CONFIG_SND_SOC_AK4613=y CONFIG_USB=y CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y -CONFIG_USB_XHCI_PLATFORM=y -CONFIG_USB_XHCI_RCAR=y -CONFIG_USB_EHCI_EXYNOS=y CONFIG_USB_XHCI_TEGRA=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_MSM=y +CONFIG_USB_EHCI_EXYNOS=y CONFIG_USB_EHCI_HCD_PLATFORM=y -CONFIG_USB_OHCI_EXYNOS=y CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_EXYNOS=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_RENESAS_USBHS=m CONFIG_USB_STORAGE=y -CONFIG_USB_DWC2=y CONFIG_USB_DWC3=y +CONFIG_USB_DWC2=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y @@ -398,7 +387,6 @@ CONFIG_USB_RENESAS_USBHS_UDC=m CONFIG_MMC=y CONFIG_MMC_BLOCK_MINORS=32 CONFIG_MMC_ARMMMCI=y -CONFIG_MMC_MESON_GX=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_ACPI=y CONFIG_MMC_SDHCI_PLTFM=y @@ -406,6 +394,7 @@ CONFIG_MMC_SDHCI_OF_ARASAN=y CONFIG_MMC_SDHCI_OF_ESDHC=y CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SDHCI_TEGRA=y +CONFIG_MMC_MESON_GX=y CONFIG_MMC_SDHCI_MSM=y CONFIG_MMC_SPI=y CONFIG_MMC_SDHI=y @@ -414,32 +403,31 @@ CONFIG_MMC_DW_EXYNOS=y CONFIG_MMC_DW_K3=y CONFIG_MMC_DW_ROCKCHIP=y CONFIG_MMC_SUNXI=y -CONFIG_MMC_SDHCI_XENON=y CONFIG_MMC_BCM2835=y +CONFIG_MMC_SDHCI_XENON=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_GPIO=y CONFIG_LEDS_PWM=y CONFIG_LEDS_SYSCON=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_CPU=y +CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_MAX77686=y +CONFIG_RTC_DRV_RK808=m CONFIG_RTC_DRV_S5M=y CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_EFI=y +CONFIG_RTC_DRV_S3C=y CONFIG_RTC_DRV_PL031=y CONFIG_RTC_DRV_SUN6I=y -CONFIG_RTC_DRV_RK808=m CONFIG_RTC_DRV_TEGRA=y CONFIG_RTC_DRV_XGENE=y -CONFIG_RTC_DRV_S3C=y CONFIG_DMADEVICES=y +CONFIG_DMA_BCM2835=m CONFIG_MV_XOR_V2=y CONFIG_PL330_DMA=y -CONFIG_DMA_BCM2835=m CONFIG_TEGRA20_APB_DMA=y CONFIG_QCOM_BAM_DMA=y CONFIG_QCOM_HIDMA_MGMT=y @@ -452,18 +440,17 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_MMIO=y CONFIG_XEN_GNTDEV=y CONFIG_XEN_GRANT_DEV_ALLOC=y +CONFIG_COMMON_CLK_RK808=y CONFIG_COMMON_CLK_SCPI=y CONFIG_COMMON_CLK_CS2000_CP=y CONFIG_COMMON_CLK_S2MPS11=y -CONFIG_COMMON_CLK_PWM=y -CONFIG_COMMON_CLK_RK808=y CONFIG_CLK_QORIQ=y +CONFIG_COMMON_CLK_PWM=y CONFIG_COMMON_CLK_QCOM=y CONFIG_MSM_GCC_8916=y CONFIG_MSM_GCC_8994=y CONFIG_MSM_MMCC_8996=y CONFIG_HWSPINLOCK_QCOM=y -CONFIG_MAILBOX=y CONFIG_ARM_MHU=y CONFIG_PLATFORM_MHU=y CONFIG_BCM2835_MBOX=y @@ -472,32 +459,29 @@ CONFIG_ARM_SMMU=y CONFIG_ARM_SMMU_V3=y CONFIG_RASPBERRYPI_POWER=y CONFIG_QCOM_SMEM=y -CONFIG_QCOM_SMD=y -CONFIG_QCOM_SMD_RPM=y CONFIG_ROCKCHIP_PM_DOMAINS=y CONFIG_ARCH_TEGRA_132_SOC=y CONFIG_ARCH_TEGRA_210_SOC=y CONFIG_ARCH_TEGRA_186_SOC=y CONFIG_EXTCON_USB_GPIO=y +CONFIG_IIO=y +CONFIG_EXYNOS_ADC=y CONFIG_PWM=y CONFIG_PWM_BCM2835=m +CONFIG_PWM_MESON=m CONFIG_PWM_ROCKCHIP=y +CONFIG_PWM_SAMSUNG=y CONFIG_PWM_TEGRA=m -CONFIG_PWM_MESON=m -CONFIG_COMMON_RESET_HI6220=y CONFIG_PHY_RCAR_GEN3_USB2=y CONFIG_PHY_HI6220_USB=y +CONFIG_PHY_SUN4I_USB=y CONFIG_PHY_ROCKCHIP_INNO_USB2=y CONFIG_PHY_ROCKCHIP_EMMC=y -CONFIG_PHY_SUN4I_USB=y CONFIG_PHY_XGENE=y CONFIG_PHY_TEGRA_XUSB=y CONFIG_ARM_SCPI_PROTOCOL=y -CONFIG_ACPI=y -CONFIG_IIO=y -CONFIG_EXYNOS_ADC=y -CONFIG_PWM_SAMSUNG=y CONFIG_RASPBERRYPI_FIRMWARE=y +CONFIG_ACPI=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -511,7 +495,6 @@ CONFIG_FUSE_FS=m CONFIG_CUSE=m CONFIG_OVERLAY_FS=m CONFIG_VFAT_FS=y -CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=y CONFIG_EFIVAR_FS=y @@ -539,11 +522,9 @@ CONFIG_MEMTEST=y CONFIG_SECURITY=y CONFIG_CRYPTO_ECHAINIV=y CONFIG_CRYPTO_ANSI_CPRNG=y -CONFIG_CRYPTO_DEV_SAFEXCEL=m CONFIG_ARM64_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y CONFIG_CRYPTO_GHASH_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE_BLK=y -# CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set -- cgit v1.2.3-59-g8ed1b From bae3dee0992dcb336a591468376b046e5447997b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 16 May 2017 14:17:20 +0800 Subject: mmc: sdhci-xenon: kill xenon_clean_phy() Currently, the xenon_clean_phy() is only used for freeing phy_params. The phy_params is allocated by devm_kzalloc(), there's no need to free is explicitly. Signed-off-by: Jisheng Zhang Acked-by: Hu Ziji Acked-by: Adrian Hunter --- drivers/mmc/host/sdhci-xenon-phy.c | 14 +------------- drivers/mmc/host/sdhci-xenon.c | 6 +----- drivers/mmc/host/sdhci-xenon.h | 1 - 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 6356781f1cca..f7e26b031e76 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -787,14 +787,6 @@ int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios) return ret; } -void xenon_clean_phy(struct sdhci_host *host) -{ - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); - - kfree(priv->phy_params); -} - static int xenon_add_phy(struct device_node *np, struct sdhci_host *host, const char *phy_name) { @@ -819,11 +811,7 @@ static int xenon_add_phy(struct device_node *np, struct sdhci_host *host, if (ret) return ret; - ret = xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params); - if (ret) - xenon_clean_phy(host); - - return ret; + return xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params); } int xenon_phy_parse_dt(struct device_node *np, struct sdhci_host *host) diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 67246655315b..bc1781bb070b 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -486,7 +486,7 @@ static int xenon_probe(struct platform_device *pdev) err = xenon_sdhc_prepare(host); if (err) - goto clean_phy_param; + goto err_clk; err = sdhci_add_host(host); if (err) @@ -496,8 +496,6 @@ static int xenon_probe(struct platform_device *pdev) remove_sdhc: xenon_sdhc_unprepare(host); -clean_phy_param: - xenon_clean_phy(host); err_clk: clk_disable_unprepare(pltfm_host->clk); free_pltfm: @@ -510,8 +508,6 @@ static int xenon_remove(struct platform_device *pdev) struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - xenon_clean_phy(host); - sdhci_remove_host(host, 0); xenon_sdhc_unprepare(host); diff --git a/drivers/mmc/host/sdhci-xenon.h b/drivers/mmc/host/sdhci-xenon.h index 6e6523ea01ce..73debb42dc2f 100644 --- a/drivers/mmc/host/sdhci-xenon.h +++ b/drivers/mmc/host/sdhci-xenon.h @@ -93,7 +93,6 @@ struct xenon_priv { }; int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios); -void xenon_clean_phy(struct sdhci_host *host); int xenon_phy_parse_dt(struct device_node *np, struct sdhci_host *host); void xenon_soc_pad_ctrl(struct sdhci_host *host, -- cgit v1.2.3-59-g8ed1b From f4e506c5a3a026a28c99ca2cbc1c79aeca1a1b68 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 17 May 2017 16:52:30 -0500 Subject: arm64: defconfig: enable options needed for QCom DB410c board Enable Qualcomm drivers needed to boot Dragonboard 410c with HDMI. This enables support for clocks, regulators, and USB PHY. Cc: Bjorn Andersson Cc: John Stultz Signed-off-by: Rob Herring [Olof: Turned off _RPM configs per follow-up email] Signed-off-by: Olof Johansson --- arch/arm64/configs/defconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index d916fc316698..65cdd878cfbd 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -311,6 +311,7 @@ CONFIG_REGULATOR_GPIO=y CONFIG_REGULATOR_HI655X=y CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_PWM=y +CONFIG_REGULATOR_QCOM_SMD_RPM=y CONFIG_REGULATOR_QCOM_SPMI=y CONFIG_REGULATOR_RK808=y CONFIG_REGULATOR_S2MPS11=y @@ -381,6 +382,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_ISP1760=y CONFIG_USB_HSIC_USB3503=y CONFIG_USB_MSM_OTG=y +CONFIG_USB_QCOM_8X16_PHY=y CONFIG_USB_ULPI=y CONFIG_USB_GADGET=y CONFIG_USB_RENESAS_USBHS_UDC=m @@ -447,6 +449,7 @@ CONFIG_COMMON_CLK_S2MPS11=y CONFIG_CLK_QORIQ=y CONFIG_COMMON_CLK_PWM=y CONFIG_COMMON_CLK_QCOM=y +CONFIG_QCOM_CLK_SMD_RPM=y CONFIG_MSM_GCC_8916=y CONFIG_MSM_GCC_8994=y CONFIG_MSM_MMCC_8996=y @@ -457,8 +460,12 @@ CONFIG_BCM2835_MBOX=y CONFIG_HI6220_MBOX=y CONFIG_ARM_SMMU=y CONFIG_ARM_SMMU_V3=y +CONFIG_RPMSG_QCOM_SMD=y CONFIG_RASPBERRYPI_POWER=y CONFIG_QCOM_SMEM=y +CONFIG_QCOM_SMD_RPM=y +CONFIG_QCOM_SMP2P=y +CONFIG_QCOM_SMSM=y CONFIG_ROCKCHIP_PM_DOMAINS=y CONFIG_ARCH_TEGRA_132_SOC=y CONFIG_ARCH_TEGRA_210_SOC=y -- cgit v1.2.3-59-g8ed1b From aca69344c8a99e7374d913e42ba9120c398ee16f Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 16 May 2017 11:36:51 +0200 Subject: mmc: cavium-octeon: Fix interrupt enable code OCTEON SoCs with CIU3 do not have interrupt masking local to the MMC bus interface. Unfortunately, some even have a diagnostic register at the same address of the enable register, which causes the interrupts to fire immediately if stored to, thus breaking the driver. The proper action on these SoCs is not to touch this register. Fixes: 01d95843335c ("mmc: cavium: Add MMC support for Octeon SOCs.") Signed-off-by: David Daney [jglauber@cavium.com: removed point after subject line] Signed-off-by: Jan Glauber Signed-off-by: Ulf Hansson --- drivers/mmc/host/cavium-octeon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c index 772d0900026d..d698d66e3327 100644 --- a/drivers/mmc/host/cavium-octeon.c +++ b/drivers/mmc/host/cavium-octeon.c @@ -108,7 +108,7 @@ static void octeon_mmc_release_bus(struct cvm_mmc_host *host) static void octeon_mmc_int_enable(struct cvm_mmc_host *host, u64 val) { writeq(val, host->base + MIO_EMM_INT(host)); - if (!host->dma_active || (host->dma_active && !host->has_ciu3)) + if (!host->has_ciu3) writeq(val, host->base + MIO_EMM_INT_EN(host)); } -- cgit v1.2.3-59-g8ed1b From 899e4aad15e93315fa18ab9e9c88904ad237cfa0 Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 16 May 2017 11:36:52 +0200 Subject: mmc: cavium-octeon: Use proper GPIO name for power control The devm_gpiod_get_optional() function appends a "-gpios" to the string passed to it, so if we want to find the "power-gpios" signal, we must pass "power" to this function. Fixes: 01d95843335c ("mmc: cavium: Add MMC support for Octeon SOCs.") Signed-off-by: David Daney [jglauber@cavium.com: removed point after subject line] Signed-off-by: Jan Glauber Signed-off-by: Ulf Hansson --- drivers/mmc/host/cavium-octeon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c index d698d66e3327..cbb566377508 100644 --- a/drivers/mmc/host/cavium-octeon.c +++ b/drivers/mmc/host/cavium-octeon.c @@ -267,7 +267,7 @@ static int octeon_mmc_probe(struct platform_device *pdev) } host->global_pwr_gpiod = devm_gpiod_get_optional(&pdev->dev, - "power-gpios", + "power", GPIOD_OUT_HIGH); if (IS_ERR(host->global_pwr_gpiod)) { dev_err(&pdev->dev, "Invalid power GPIO\n"); -- cgit v1.2.3-59-g8ed1b From 0527873b29b077fc8e656acd63e1866b429fef55 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 May 2017 13:50:16 +0200 Subject: ARM: remove duplicate 'const' annotations' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-7 warns about some declarations that are more 'const' than necessary: arch/arm/mach-at91/pm.c:338:34: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier] static const struct of_device_id const ramc_ids[] __initconst = { arch/arm/mach-bcm/bcm_kona_smc.c:36:34: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier] static const struct of_device_id const bcm_kona_smc_ids[] __initconst = { arch/arm/mach-spear/time.c:207:34: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier] static const struct of_device_id const timer_of_match[] __initconst = { arch/arm/mach-omap2/prm_common.c:714:34: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier] static const struct of_device_id const omap_prcm_dt_match_table[] __initconst = { arch/arm/mach-omap2/vc.c:562:35: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier] static const struct i2c_init_data const omap4_i2c_timing_data[] __initconst = { The ones in arch/arm were apparently all introduced accidentally by one commit that correctly marked a lot of variables as __initconst. Fixes: 19c233b79d1a ("ARM: appropriate __init annotation for const data") Acked-by: Alexandre Belloni Acked-by: Tony Lindgren Acked-by: Nicolas Pitre Acked-by: Florian Fainelli Acked-by: Viresh Kumar Acked-by: Krzysztof Hałasa Signed-off-by: Arnd Bergmann --- arch/arm/mach-at91/pm.c | 2 +- arch/arm/mach-bcm/bcm_kona_smc.c | 2 +- arch/arm/mach-cns3xxx/core.c | 2 +- arch/arm/mach-omap2/prm_common.c | 2 +- arch/arm/mach-omap2/vc.c | 2 +- arch/arm/mach-spear/time.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 2cd27c830ab6..283e79ab587d 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -335,7 +335,7 @@ static const struct ramc_info ramc_infos[] __initconst = { { .idle = sama5d3_ddr_standby, .memctrl = AT91_MEMCTRL_DDRSDR}, }; -static const struct of_device_id const ramc_ids[] __initconst = { +static const struct of_device_id ramc_ids[] __initconst = { { .compatible = "atmel,at91rm9200-sdramc", .data = &ramc_infos[0] }, { .compatible = "atmel,at91sam9260-sdramc", .data = &ramc_infos[1] }, { .compatible = "atmel,at91sam9g45-ddramc", .data = &ramc_infos[2] }, diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c index cf3f8658f0e5..a55a7ecf146a 100644 --- a/arch/arm/mach-bcm/bcm_kona_smc.c +++ b/arch/arm/mach-bcm/bcm_kona_smc.c @@ -33,7 +33,7 @@ struct bcm_kona_smc_data { unsigned result; }; -static const struct of_device_id const bcm_kona_smc_ids[] __initconst = { +static const struct of_device_id bcm_kona_smc_ids[] __initconst = { {.compatible = "brcm,kona-smc"}, {.compatible = "bcm,kona-smc"}, /* deprecated name */ {}, diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index 03da3813f1ab..7d5a44a06648 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -346,7 +346,7 @@ static struct usb_ohci_pdata cns3xxx_usb_ohci_pdata = { .power_off = csn3xxx_usb_power_off, }; -static const struct of_dev_auxdata const cns3xxx_auxdata[] __initconst = { +static const struct of_dev_auxdata cns3xxx_auxdata[] __initconst = { { "intel,usb-ehci", CNS3XXX_USB_BASE, "ehci-platform", &cns3xxx_usb_ehci_pdata }, { "intel,usb-ohci", CNS3XXX_USB_OHCI_BASE, "ohci-platform", &cns3xxx_usb_ohci_pdata }, { "cavium,cns3420-ahci", CNS3XXX_SATA2_BASE, "ahci", NULL }, diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c index 2b138b65129a..dc11841ca334 100644 --- a/arch/arm/mach-omap2/prm_common.c +++ b/arch/arm/mach-omap2/prm_common.c @@ -711,7 +711,7 @@ static struct omap_prcm_init_data scrm_data __initdata = { }; #endif -static const struct of_device_id const omap_prcm_dt_match_table[] __initconst = { +static const struct of_device_id omap_prcm_dt_match_table[] __initconst = { #ifdef CONFIG_SOC_AM33XX { .compatible = "ti,am3-prcm", .data = &am3_prm_data }, #endif diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c index 2028167fff31..d76b1e5eb8ba 100644 --- a/arch/arm/mach-omap2/vc.c +++ b/arch/arm/mach-omap2/vc.c @@ -559,7 +559,7 @@ struct i2c_init_data { u8 hsscll_12; }; -static const struct i2c_init_data const omap4_i2c_timing_data[] __initconst = { +static const struct i2c_init_data omap4_i2c_timing_data[] __initconst = { { .load = 50, .loadbits = 0x3, diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c index 4878ba90026d..289e036c9c30 100644 --- a/arch/arm/mach-spear/time.c +++ b/arch/arm/mach-spear/time.c @@ -204,7 +204,7 @@ static void __init spear_clockevent_init(int irq) setup_irq(irq, &spear_timer_irq); } -static const struct of_device_id const timer_of_match[] __initconst = { +static const struct of_device_id timer_of_match[] __initconst = { { .compatible = "st,spear-timer", }, { }, }; -- cgit v1.2.3-59-g8ed1b From 76cefef8e838304a71725a0b5007c375619d78fb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 12:53:05 +0100 Subject: firmware: ti_sci: fix strncat length check gcc-7 notices that the length we pass to strncat is wrong: drivers/firmware/ti_sci.c: In function 'ti_sci_probe': drivers/firmware/ti_sci.c:204:32: error: specified bound 50 equals the size of the destination [-Werror=stringop-overflow=] Instead of the total length, we must pass the length of the remaining space here. Fixes: aa276781a64a ("firmware: Add basic support for TI System Control Interface (TI-SCI) protocol") Cc: stable@vger.kernel.org Acked-by: Nishanth Menon Acked-by: Santosh Shilimkar Signed-off-by: Arnd Bergmann --- drivers/firmware/ti_sci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 874ff32db366..00cfed3c3e1a 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -202,7 +202,8 @@ static int ti_sci_debugfs_create(struct platform_device *pdev, info->debug_buffer[info->debug_region_size] = 0; info->d = debugfs_create_file(strncat(debug_name, dev_name(dev), - sizeof(debug_name)), + sizeof(debug_name) - + sizeof("ti_sci_debug@")), 0444, NULL, info, &ti_sci_debug_fops); if (IS_ERR(info->d)) return PTR_ERR(info->d); -- cgit v1.2.3-59-g8ed1b From 1fccb73011ea8a5fa0c6d357c33fa29c695139ea Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Apr 2017 13:41:15 +0200 Subject: iTCO_wdt: all versions count down twice The ICH9 is listed as having TCO v2, and indeed the behavior in the datasheet corresponds to v2 (for example the NO_REBOOT flag is accessible via the 16KiB-aligned Root Complex Base Address). However, the TCO counts twice just like in v1; the documentation of the SECOND_TO_STS bit says: "ICH9 sets this bit to 1 to indicate that the TIMEOUT bit had been (or is currently) set and a second timeout occurred before the TCO_RLD register was written. If this bit is set and the NO_REBOOT config bit is 0, then the ICH9 will reboot the system after the second timeout. The same can be found in the BayTrail (Atom E3800) datasheet, and even HOWTOs around the Internet say that it will reboot after _twice_ the specified heartbeat. I did not find the Apollo Lake datasheet, but because v4/v5 has a SECOND_TO_STS bit just like the previous version I'm enabling this for Apollo Lake as well. Cc: linux-watchdog@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: Paolo Bonzini Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/watchdog/watchdog-parameters.txt | 2 +- drivers/watchdog/iTCO_wdt.c | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt index 4f7d86dd0a5d..914518aeb972 100644 --- a/Documentation/watchdog/watchdog-parameters.txt +++ b/Documentation/watchdog/watchdog-parameters.txt @@ -117,7 +117,7 @@ nowayout: Watchdog cannot be stopped once started ------------------------------------------------- iTCO_wdt: heartbeat: Watchdog heartbeat in seconds. - (2smi_res, wd_dev->timeout); + /* Reset the timeout status bit so that the timer + * needs to count down twice again before rebooting */ + outw(0x0008, TCO1_STS(p)); /* write 1 to clear bit */ + /* Reload the timer by writing to the TCO Timer Counter register */ - if (p->iTCO_version >= 2) { + if (p->iTCO_version >= 2) outw(0x01, TCO_RLD(p)); - } else if (p->iTCO_version == 1) { - /* Reset the timeout status bit so that the timer - * needs to count down twice again before rebooting */ - outw(0x0008, TCO1_STS(p)); /* write 1 to clear bit */ - + else if (p->iTCO_version == 1) outb(0x01, TCO_RLD(p)); - } spin_unlock(&p->io_lock); return 0; @@ -328,11 +327,8 @@ static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t) unsigned char val8; unsigned int tmrval; - tmrval = seconds_to_ticks(p, t); - - /* For TCO v1 the timer counts down twice before rebooting */ - if (p->iTCO_version == 1) - tmrval /= 2; + /* The timer counts down twice before rebooting */ + tmrval = seconds_to_ticks(p, t) / 2; /* from the specs: */ /* "Values of 0h-3h are ignored and should not be attempted" */ @@ -385,6 +381,8 @@ static unsigned int iTCO_wdt_get_timeleft(struct watchdog_device *wd_dev) spin_lock(&p->io_lock); val16 = inw(TCO_RLD(p)); val16 &= 0x3ff; + if (!(inw(TCO1_STS(p)) & 0x0008)) + val16 += (inw(TCOv2_TMR(p)) & 0x3ff); spin_unlock(&p->io_lock); time_left = ticks_to_seconds(p, val16); -- cgit v1.2.3-59-g8ed1b From 07441a7dd11f6855bcf55fbbfc6abba42258b2c6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 25 Apr 2017 16:17:33 +0000 Subject: watchdog: zx2967: remove redundant dev_err call in zx2967_wdt_probe() There is a error message within devm_ioremap_resource already, so remove the dev_err call to avoid redundant error message. Signed-off-by: Wei Yongjun Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/zx2967_wdt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/watchdog/zx2967_wdt.c b/drivers/watchdog/zx2967_wdt.c index e290d5a13a6d..c98252733c30 100644 --- a/drivers/watchdog/zx2967_wdt.c +++ b/drivers/watchdog/zx2967_wdt.c @@ -211,10 +211,8 @@ static int zx2967_wdt_probe(struct platform_device *pdev) base = platform_get_resource(pdev, IORESOURCE_MEM, 0); wdt->reg_base = devm_ioremap_resource(dev, base); - if (IS_ERR(wdt->reg_base)) { - dev_err(dev, "ioremap failed\n"); + if (IS_ERR(wdt->reg_base)) return PTR_ERR(wdt->reg_base); - } zx2967_wdt_reset_sysctrl(dev); -- cgit v1.2.3-59-g8ed1b From fedf266f9955d9a019643cde199a2fd9a0259f6f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 27 Apr 2017 18:02:32 -0700 Subject: watchdog: bcm281xx: Fix use of uninitialized spinlock. The bcm_kona_wdt_set_resolution_reg() call takes the spinlock, so initialize it earlier. Fixes a warning at boot with lock debugging enabled. Fixes: 6adb730dc208 ("watchdog: bcm281xx: Watchdog Driver") Signed-off-by: Eric Anholt Reviewed-by: Florian Fainelli Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/bcm_kona_wdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c index 6fce17d5b9f1..a5775dfd8d5f 100644 --- a/drivers/watchdog/bcm_kona_wdt.c +++ b/drivers/watchdog/bcm_kona_wdt.c @@ -304,6 +304,8 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev) if (!wdt) return -ENOMEM; + spin_lock_init(&wdt->lock); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); wdt->base = devm_ioremap_resource(dev, res); if (IS_ERR(wdt->base)) @@ -316,7 +318,6 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev) return ret; } - spin_lock_init(&wdt->lock); platform_set_drvdata(pdev, wdt); watchdog_set_drvdata(&bcm_kona_wdt_wdd, wdt); bcm_kona_wdt_wdd.parent = &pdev->dev; -- cgit v1.2.3-59-g8ed1b From a486cd23661c9387fb076c3f6ae8b2aa9d20d54a Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Fri, 19 May 2017 12:47:00 +0200 Subject: xfrm: fix state migration copy replay sequence numbers During xfrm migration copy replay and preplay sequence numbers from the previous state. Here is a tcpdump output showing the problem. 10.0.10.46 is running vanilla kernel, is the IKE/IPsec responder. After the migration it sent wrong sequence number, reset to 1. The migration is from 10.0.0.52 to 10.0.0.53. IP 10.0.0.52.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7cf), length 136 IP 10.0.10.46.4500 > 10.0.0.52.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x7cf), length 136 IP 10.0.0.52.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7d0), length 136 IP 10.0.10.46.4500 > 10.0.0.52.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x7d0), length 136 IP 10.0.0.53.4500 > 10.0.10.46.4500: NONESP-encap: isakmp: child_sa inf2[I] IP 10.0.10.46.4500 > 10.0.0.53.4500: NONESP-encap: isakmp: child_sa inf2[R] IP 10.0.0.53.4500 > 10.0.10.46.4500: NONESP-encap: isakmp: child_sa inf2[I] IP 10.0.10.46.4500 > 10.0.0.53.4500: NONESP-encap: isakmp: child_sa inf2[R] IP 10.0.0.53.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7d1), length 136 NOTE: next sequence is wrong 0x1 IP 10.0.10.46.4500 > 10.0.0.53.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x1), length 136 IP 10.0.0.53.4500 > 10.0.10.46.4500: UDP-encap: ESP(spi=0x43ef462d,seq=0x7d2), length 136 IP 10.0.10.46.4500 > 10.0.0.53.4500: UDP-encap: ESP(spi=0xca1c282d,seq=0x2), length 136 Signed-off-by: Antony Antony Reviewed-by: Richard Guy Briggs Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index fc3c5aa38754..2e291bc5f1fc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1383,6 +1383,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig) x->curlft.add_time = orig->curlft.add_time; x->km.state = orig->km.state; x->km.seq = orig->km.seq; + x->replay = orig->replay; + x->preplay = orig->preplay; return x; -- cgit v1.2.3-59-g8ed1b From 6bf1c2d26716dcd483699cc62474e49d164c5563 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 May 2017 14:12:00 +0200 Subject: arm64: dts: rockchip: fix include reference The way we handle include paths for DT has changed a bit, which broke a file that had an unconventional way to reference a common header file: arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts:47:10: fatal error: include/dt-bindings/input/linux-event-codes.h: No such file or directory This removes the leading "include/" from the path name, which fixes it. Fixes: d5d332d3f7e8 ("devicetree: Move include prefixes from arch to separate directory") Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts index 658bb9dc9dfd..7bd31066399b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts @@ -44,7 +44,7 @@ /dts-v1/; #include "rk3399-gru.dtsi" -#include +#include /* * Kevin-specific things -- cgit v1.2.3-59-g8ed1b From 9d6408433019bfae15e2d0d5f4498c4ff70b86c0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 May 2017 09:56:40 +0100 Subject: i2c: designware: don't infer timings described by ACPI from clock rate Commit bd698d24b1b57 ("i2c: designware: Get selected speed mode sda-hold-time via ACPI") updated the logic that reads the timing parameters for various I2C bus rates from the DSDT, to only read the timing parameters for the currently selected mode. This causes a WARN_ON() splat on platforms that legally omit the clock frequency from the ACPI description, because in the new situation, the core I2C designware driver still accesses the fields in the driver struct that we no longer populate, and proceeds to calculate them from the clock frequency. Since the clock frequency is unspecified, the driver complains loudly using a WARN_ON(). So revert back to the old situation, where the struct fields for all timings are populated, but retain the new logic which chooses the SDA hold time from the timing mode that is currently in use. Fixes: bd698d24b1b57 ("i2c: designware: Get selected speed mode ...") Signed-off-by: Ard Biesheuvel Reported-by: Lorenzo Pieralisi Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-platdrv.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index f2acd4b6bf01..6283b99d2b17 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -96,6 +96,7 @@ static int dw_i2c_acpi_configure(struct platform_device *pdev) struct dw_i2c_dev *dev = platform_get_drvdata(pdev); acpi_handle handle = ACPI_HANDLE(&pdev->dev); const struct acpi_device_id *id; + u32 ss_ht, fp_ht, hs_ht, fs_ht; struct acpi_device *adev; const char *uid; @@ -107,23 +108,24 @@ static int dw_i2c_acpi_configure(struct platform_device *pdev) * Try to get SDA hold time and *CNT values from an ACPI method for * selected speed modes. */ + dw_i2c_acpi_params(pdev, "SSCN", &dev->ss_hcnt, &dev->ss_lcnt, &ss_ht); + dw_i2c_acpi_params(pdev, "FPCN", &dev->fp_hcnt, &dev->fp_lcnt, &fp_ht); + dw_i2c_acpi_params(pdev, "HSCN", &dev->hs_hcnt, &dev->hs_lcnt, &hs_ht); + dw_i2c_acpi_params(pdev, "FMCN", &dev->fs_hcnt, &dev->fs_lcnt, &fs_ht); + switch (dev->clk_freq) { case 100000: - dw_i2c_acpi_params(pdev, "SSCN", &dev->ss_hcnt, &dev->ss_lcnt, - &dev->sda_hold_time); + dev->sda_hold_time = ss_ht; break; case 1000000: - dw_i2c_acpi_params(pdev, "FPCN", &dev->fp_hcnt, &dev->fp_lcnt, - &dev->sda_hold_time); + dev->sda_hold_time = fp_ht; break; case 3400000: - dw_i2c_acpi_params(pdev, "HSCN", &dev->hs_hcnt, &dev->hs_lcnt, - &dev->sda_hold_time); + dev->sda_hold_time = hs_ht; break; case 400000: default: - dw_i2c_acpi_params(pdev, "FMCN", &dev->fs_hcnt, &dev->fs_lcnt, - &dev->sda_hold_time); + dev->sda_hold_time = fs_ht; break; } -- cgit v1.2.3-59-g8ed1b From 3ecb3ac7b950ff8f6c6a61e8b7b0d6e3546429a0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 15 May 2017 19:16:15 -0700 Subject: xfs: avoid mount-time deadlock in CoW extent recovery If a malicious user corrupts the refcount btree to cause a cycle between different levels of the tree, the next mount attempt will deadlock in the CoW recovery routine while grabbing buffer locks. We can use the ability to re-grab a buffer that was previous locked to a transaction to avoid deadlocks, so do that here. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_refcount.c | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index b177ef33cd4c..82a38d86ebad 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1629,13 +1629,28 @@ xfs_refcount_recover_cow_leftovers( if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) return -EOPNOTSUPP; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + INIT_LIST_HEAD(&debris); + + /* + * In this first part, we use an empty transaction to gather up + * all the leftover CoW extents so that we can subsequently + * delete them. The empty transaction is used to avoid + * a buffer lock deadlock if there happens to be a loop in the + * refcountbt because we're allowed to re-grab a buffer that is + * already attached to our transaction. When we're done + * recording the CoW debris we cancel the (empty) transaction + * and everything goes away cleanly. + */ + error = xfs_trans_alloc_empty(mp, &tp); if (error) return error; - cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); + + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + if (error) + goto out_trans; + cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); /* Find all the leftover CoW staging extents. */ - INIT_LIST_HEAD(&debris); memset(&low, 0, sizeof(low)); memset(&high, 0, sizeof(high)); low.rc.rc_startblock = XFS_REFC_COW_START; @@ -1645,10 +1660,11 @@ xfs_refcount_recover_cow_leftovers( if (error) goto out_cursor; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - xfs_buf_relse(agbp); + xfs_trans_brelse(tp, agbp); + xfs_trans_cancel(tp); /* Now iterate the list to free the leftovers */ - list_for_each_entry(rr, &debris, rr_list) { + list_for_each_entry_safe(rr, n, &debris, rr_list) { /* Set up transaction. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); if (error) @@ -1676,8 +1692,16 @@ xfs_refcount_recover_cow_leftovers( error = xfs_trans_commit(tp); if (error) goto out_free; + + list_del(&rr->rr_list); + kmem_free(rr); } + return error; +out_defer: + xfs_defer_cancel(&dfops); +out_trans: + xfs_trans_cancel(tp); out_free: /* Free the leftover list */ list_for_each_entry_safe(rr, n, &debris, rr_list) { @@ -1688,11 +1712,6 @@ out_free: out_cursor: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - xfs_buf_relse(agbp); - goto out_free; - -out_defer: - xfs_defer_cancel(&dfops); - xfs_trans_cancel(tp); - goto out_free; + xfs_trans_brelse(tp, agbp); + goto out_trans; } -- cgit v1.2.3-59-g8ed1b From 5f3394530fbe90d3bcd1c204618960bc50236578 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 19 May 2017 08:04:59 -0700 Subject: blktrace: fix integer parse sscanf is a very poor way to parse integer. For example, I input "discard" for act_mask, it gets 0xd and completely messes up. Using correct API to do integer parse. This patch also makes attributes accept any base of integer. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index bd8ae8d5ae9c..193c5f5e3f79 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1662,14 +1662,14 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out; if (attr == &dev_attr_act_mask) { - if (sscanf(buf, "%llx", &value) != 1) { + if (kstrtoull(buf, 0, &value)) { /* Assume it is a list of trace category names */ ret = blk_trace_str2mask(buf); if (ret < 0) goto out; value = ret; } - } else if (sscanf(buf, "%llu", &value) != 1) + } else if (kstrtoull(buf, 0, &value)) goto out; ret = -ENXIO; -- cgit v1.2.3-59-g8ed1b From e2c2206a18993bc9f62393d49c7b2066c3845b25 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 11 May 2017 18:12:05 -0700 Subject: KVM: x86: Fix potential preemption when get the current kvmclock timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BUG: using __this_cpu_read() in preemptible [00000000] code: qemu-system-x86/2809 caller is __this_cpu_preempt_check+0x13/0x20 CPU: 2 PID: 2809 Comm: qemu-system-x86 Not tainted 4.11.0+ #13 Call Trace: dump_stack+0x99/0xce check_preemption_disabled+0xf5/0x100 __this_cpu_preempt_check+0x13/0x20 get_kvmclock_ns+0x6f/0x110 [kvm] get_time_ref_counter+0x5d/0x80 [kvm] kvm_hv_process_stimers+0x2a1/0x8a0 [kvm] ? kvm_hv_process_stimers+0x2a1/0x8a0 [kvm] ? kvm_arch_vcpu_ioctl_run+0xac9/0x1ce0 [kvm] kvm_arch_vcpu_ioctl_run+0x5bf/0x1ce0 [kvm] kvm_vcpu_ioctl+0x384/0x7b0 [kvm] ? kvm_vcpu_ioctl+0x384/0x7b0 [kvm] ? __fget+0xf3/0x210 do_vfs_ioctl+0xa4/0x700 ? __fget+0x114/0x210 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x23/0xc2 RIP: 0033:0x7f9d164ed357 ? __this_cpu_preempt_check+0x13/0x20 This can be reproduced by run kvm-unit-tests/hyperv_stimer.flat w/ CONFIG_PREEMPT and CONFIG_DEBUG_PREEMPT enabled. Safe access to per-CPU data requires a couple of constraints, though: the thread working with the data cannot be preempted and it cannot be migrated while it manipulates per-CPU variables. If the thread is preempted, the thread that replaces it could try to work with the same variables; migration to another CPU could also cause confusion. However there is no preemption disable when reads host per-CPU tsc rate to calculate the current kvmclock timestamp. This patch fixes it by utilizing get_cpu/put_cpu pair to guarantee both __this_cpu_read() and rdtsc() are not preempted. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b54125b590e8..3b5fc7e35f6e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1763,6 +1763,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) { struct kvm_arch *ka = &kvm->arch; struct pvclock_vcpu_time_info hv_clock; + u64 ret; spin_lock(&ka->pvclock_gtod_sync_lock); if (!ka->use_master_clock) { @@ -1774,10 +1775,17 @@ u64 get_kvmclock_ns(struct kvm *kvm) hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; spin_unlock(&ka->pvclock_gtod_sync_lock); + /* both __this_cpu_read() and rdtsc() should be on the same cpu */ + get_cpu(); + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, &hv_clock.tsc_shift, &hv_clock.tsc_to_system_mul); - return __pvclock_read_cycles(&hv_clock, rdtsc()); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + + put_cpu(); + + return ret; } static void kvm_setup_pvclock_page(struct kvm_vcpu *v) -- cgit v1.2.3-59-g8ed1b From cbfc6c9184ce71b52df4b1d82af5afc81a709178 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 19 May 2017 02:46:56 -0700 Subject: KVM: X86: Fix read out-of-bounds vulnerability in kvm pio emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Huawei folks reported a read out-of-bounds vulnerability in kvm pio emulation. - "inb" instruction to access PIT Mod/Command register (ioport 0x43, write only, a read should be ignored) in guest can get a random number. - "rep insb" instruction to access PIT register port 0x43 can control memcpy() in emulator_pio_in_emulated() to copy max 0x400 bytes but only read 1 bytes, which will disclose the unimportant kernel memory in host but no crash. The similar test program below can reproduce the read out-of-bounds vulnerability: void hexdump(void *mem, unsigned int len) { unsigned int i, j; for(i = 0; i < len + ((len % HEXDUMP_COLS) ? (HEXDUMP_COLS - len % HEXDUMP_COLS) : 0); i++) { /* print offset */ if(i % HEXDUMP_COLS == 0) { printf("0x%06x: ", i); } /* print hex data */ if(i < len) { printf("%02x ", 0xFF & ((char*)mem)[i]); } else /* end of block, just aligning for ASCII dump */ { printf(" "); } /* print ASCII dump */ if(i % HEXDUMP_COLS == (HEXDUMP_COLS - 1)) { for(j = i - (HEXDUMP_COLS - 1); j <= i; j++) { if(j >= len) /* end of block, not really printing */ { putchar(' '); } else if(isprint(((char*)mem)[j])) /* printable char */ { putchar(0xFF & ((char*)mem)[j]); } else /* other char */ { putchar('.'); } } putchar('\n'); } } } int main(void) { int i; if (iopl(3)) { err(1, "set iopl unsuccessfully\n"); return -1; } static char buf[0x40]; /* test ioport 0x40,0x41,0x42,0x43,0x44,0x45 */ memset(buf, 0xab, sizeof(buf)); asm volatile("push %rdi;"); asm volatile("mov %0, %%rdi;"::"q"(buf)); asm volatile ("mov $0x40, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("mov $0x41, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("mov $0x42, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("mov $0x43, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("mov $0x44, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("mov $0x45, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("pop %rdi;"); hexdump(buf, 0x40); printf("\n"); /* ins port 0x40 */ memset(buf, 0xab, sizeof(buf)); asm volatile("push %rdi;"); asm volatile("mov %0, %%rdi;"::"q"(buf)); asm volatile ("mov $0x20, %rcx;"); asm volatile ("mov $0x40, %rdx;"); asm volatile ("rep insb;"); asm volatile ("pop %rdi;"); hexdump(buf, 0x40); printf("\n"); /* ins port 0x43 */ memset(buf, 0xab, sizeof(buf)); asm volatile("push %rdi;"); asm volatile("mov %0, %%rdi;"::"q"(buf)); asm volatile ("mov $0x20, %rcx;"); asm volatile ("mov $0x43, %rdx;"); asm volatile ("rep insb;"); asm volatile ("pop %rdi;"); hexdump(buf, 0x40); printf("\n"); return 0; } The vcpu->arch.pio_data buffer is used by both in/out instrutions emulation w/o clear after using which results in some random datas are left over in the buffer. Guest reads port 0x43 will be ignored since it is write only, however, the function kernel_pio() can't distigush this ignore from successfully reads data from device's ioport. There is no new data fill the buffer from port 0x43, however, emulator_pio_in_emulated() will copy the stale data in the buffer to the guest unconditionally. This patch fixes it by clearing the buffer before in instruction emulation to avoid to grant guest the stale data in the buffer. In addition, string I/O is not supported for in kernel device. So there is no iteration to read ioport %RCX times for string I/O. The function kernel_pio() just reads one round, and then copy the io size * %RCX to the guest unconditionally, actually it copies the one round ioport data w/ other random datas which are left over in the vcpu->arch.pio_data buffer to the guest. This patch fixes it by introducing the string I/O support for in kernel device in order to grant the right ioport datas to the guest. Before the patch: 0x000000: fe 38 93 93 ff ff ab ab .8...... 0x000008: ab ab ab ab ab ab ab ab ........ 0x000010: ab ab ab ab ab ab ab ab ........ 0x000018: ab ab ab ab ab ab ab ab ........ 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ 0x000000: f6 00 00 00 00 00 00 00 ........ 0x000008: 00 00 00 00 00 00 00 00 ........ 0x000010: 00 00 00 00 4d 51 30 30 ....MQ00 0x000018: 30 30 20 33 20 20 20 20 00 3 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ 0x000000: f6 00 00 00 00 00 00 00 ........ 0x000008: 00 00 00 00 00 00 00 00 ........ 0x000010: 00 00 00 00 4d 51 30 30 ....MQ00 0x000018: 30 30 20 33 20 20 20 20 00 3 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ After the patch: 0x000000: 1e 02 f8 00 ff ff ab ab ........ 0x000008: ab ab ab ab ab ab ab ab ........ 0x000010: ab ab ab ab ab ab ab ab ........ 0x000018: ab ab ab ab ab ab ab ab ........ 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ 0x000000: d2 e2 d2 df d2 db d2 d7 ........ 0x000008: d2 d3 d2 cf d2 cb d2 c7 ........ 0x000010: d2 c4 d2 c0 d2 bc d2 b8 ........ 0x000018: d2 b4 d2 b0 d2 ac d2 a8 ........ 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ 0x000000: 00 00 00 00 00 00 00 00 ........ 0x000008: 00 00 00 00 00 00 00 00 ........ 0x000010: 00 00 00 00 00 00 00 00 ........ 0x000018: 00 00 00 00 00 00 00 00 ........ 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ Reported-by: Moguofang Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Moguofang Signed-off-by: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3b5fc7e35f6e..519f3572e48e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4831,16 +4831,20 @@ emul_write: static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) { - /* TODO: String I/O for in kernel device */ - int r; + int r = 0, i; - if (vcpu->arch.pio.in) - r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, - vcpu->arch.pio.size, pd); - else - r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, - vcpu->arch.pio.port, vcpu->arch.pio.size, - pd); + for (i = 0; i < vcpu->arch.pio.count; i++) { + if (vcpu->arch.pio.in) + r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, + vcpu->arch.pio.size, pd); + else + r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, + vcpu->arch.pio.port, vcpu->arch.pio.size, + pd); + if (r) + break; + pd += vcpu->arch.pio.size; + } return r; } @@ -4878,6 +4882,8 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, if (vcpu->arch.pio.count) goto data_avail; + memset(vcpu->arch.pio_data, 0, size * count); + ret = emulator_pio_in_out(vcpu, size, port, val, count, true); if (ret) { data_avail: -- cgit v1.2.3-59-g8ed1b From f0367ee1d64d27fa08be2407df5c125442e885e3 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 18 May 2017 19:37:30 +0200 Subject: KVM: x86: zero base3 of unusable segments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Static checker noticed that base3 could be used uninitialized if the segment was not present (useable). Random stack values probably would not pass VMCS entry checks. Reported-by: Dan Carpenter Fixes: 1aa366163b8b ("KVM: x86 emulator: consolidate segment accessors") Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 519f3572e48e..02363e37d4a6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5067,6 +5067,8 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, if (var.unusable) { memset(desc, 0, sizeof(*desc)); + if (base3) + *base3 = 0; return false; } -- cgit v1.2.3-59-g8ed1b From 34b0dadbdf698f9b277a31b2747b625b9a75ea1f Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 18 May 2017 19:37:31 +0200 Subject: KVM: x86/vPMU: fix undefined shift in intel_pmu_refresh() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Static analysis noticed that pmu->nr_arch_gp_counters can be 32 (INTEL_PMC_MAX_GENERIC) and therefore cannot be used to shift 'int'. I didn't add BUILD_BUG_ON for it as we have a better checker. Reported-by: Dan Carpenter Fixes: 25462f7f5295 ("KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch") Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/pmu_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index 9d4a8504a95a..5ab4a364348e 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -294,7 +294,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) ((u64)1 << edx.split.bit_width_fixed) - 1; } - pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | + pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); pmu->global_ctrl_mask = ~pmu->global_ctrl; -- cgit v1.2.3-59-g8ed1b From 92ceb7679ab8807d3b7fbcc6daf2279036954ef5 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 18 May 2017 19:37:32 +0200 Subject: KVM: x86: prevent uninitialized variable warning in check_svme() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_msr() of MSR_EFER is currently always going to succeed, but static checker doesn't see that far. Don't complicate stuff and just use 0 for the fallback -- it means that the feature is not present. Reported-by: Dan Carpenter Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c25cfaf584e7..0816ab2e8adc 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4173,7 +4173,7 @@ static int check_dr_write(struct x86_emulate_ctxt *ctxt) static int check_svme(struct x86_emulate_ctxt *ctxt) { - u64 efer; + u64 efer = 0; ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); -- cgit v1.2.3-59-g8ed1b From f63572dff1421b6ca6abce71d46e03411e605c94 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Fri, 5 May 2017 14:52:06 -0600 Subject: nvme: unmap CMB and remove sysfs file in reset path CMB doesn't get unmapped until removal while getting remapped on every reset. Add the unmapping and sysfs file removal to the reset path in nvme_pci_disable to match the mapping path in nvme_pci_enable. Fixes: 202021c1a ("nvme : Add sysfs entry for NVMe CMBs when appropriate") Signed-off-by: Jon Derrick Acked-by: Keith Busch Reviewed-By: Stephen Bates Cc: # 4.9+ Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 56a315bd4d96..0866f64890e5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1510,6 +1510,11 @@ static inline void nvme_release_cmb(struct nvme_dev *dev) if (dev->cmb) { iounmap(dev->cmb); dev->cmb = NULL; + if (dev->cmbsz) { + sysfs_remove_file_from_group(&dev->ctrl.device->kobj, + &dev_attr_cmb.attr, NULL); + dev->cmbsz = 0; + } } } @@ -1783,6 +1788,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); + nvme_release_cmb(dev); pci_free_irq_vectors(pdev); if (pci_is_enabled(pdev)) { @@ -2188,7 +2194,6 @@ static void nvme_remove(struct pci_dev *pdev) nvme_dev_disable(dev, true); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); - nvme_release_cmb(dev); nvme_release_prp_pools(dev); nvme_dev_unmap(dev); nvme_put_ctrl(&dev->ctrl); -- cgit v1.2.3-59-g8ed1b From 4123109050a869a8871e58a50f28f383d41e49ad Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 5 May 2017 16:13:02 -0700 Subject: nvme-fc: correct port role bits FC Port roles is a bit mask, not individual values. Correct nvme definitions to unique bits. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme-fc-driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 0db37158a61d..12e344b5b77f 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -27,8 +27,8 @@ /* FC Port role bitmask - can merge with FC Port Roles in fc transport */ #define FC_PORT_ROLE_NVME_INITIATOR 0x10 -#define FC_PORT_ROLE_NVME_TARGET 0x11 -#define FC_PORT_ROLE_NVME_DISCOVERY 0x12 +#define FC_PORT_ROLE_NVME_TARGET 0x20 +#define FC_PORT_ROLE_NVME_DISCOVERY 0x40 /** -- cgit v1.2.3-59-g8ed1b From 85e6a6adf8de7f992e01d2c3c59d9875d658b276 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 5 May 2017 16:13:15 -0700 Subject: nvme-fc: require target or discovery role for fc-nvme targets In order to create an association, the remoteport must be serving either a target role or a discovery role. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 70e689bf1cad..912d457150d5 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2720,6 +2720,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, unsigned long flags; int ret, idx; + if (!(rport->remoteport.port_role & + (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { + ret = -EBADR; + goto out_fail; + } + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) { ret = -ENOMEM; -- cgit v1.2.3-59-g8ed1b From 2952a879bacbfae8b03fd886754e64fe14b8041e Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 25 Apr 2017 15:32:01 -0700 Subject: nvme-fc: stop queues on error detection Per the recommendation by Sagi on: http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html Rather than waiting for reset work thread to stop queues and abort the ios, immediately stop the queues on error detection. Reset thread will restop the queues (as it's called on other paths), but it does not appear to have a side effect. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 912d457150d5..dca7165fabcf 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1754,6 +1754,10 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) dev_info(ctrl->ctrl.device, "NVME-FC{%d}: resetting controller\n", ctrl->cnum); + /* stop the queues on error, cleanup is in reset thread */ + if (ctrl->queue_count > 1) + nvme_stop_queues(&ctrl->ctrl); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { dev_err(ctrl->ctrl.device, "NVME-FC{%d}: error_recovery: Couldn't change state " -- cgit v1.2.3-59-g8ed1b From 4b8ba5fa525bc8bdaaed2a5c5433f0f2008d7bc5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 25 Apr 2017 16:23:09 -0700 Subject: nvmet-fc: remove target cpu scheduling flag Remove NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED. It's unnecessary. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 4 +--- drivers/nvme/target/fcloop.c | 1 - drivers/scsi/lpfc/lpfc_nvmet.c | 1 - include/linux/nvme-fc-driver.h | 12 ++---------- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 62eba29c85fb..2006fae61980 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -517,9 +517,7 @@ nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid) { int cpu, idx, cnt; - if (!(tgtport->ops->target_features & - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED) || - tgtport->ops->max_hw_queues == 1) + if (tgtport->ops->max_hw_queues == 1) return WORK_CPU_UNBOUND; /* Simple cpu selection based on qid modulo active cpu count */ diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 15551ef79c8c..294a6611fb24 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -698,7 +698,6 @@ static struct nvmet_fc_target_template tgttemplate = { .dma_boundary = FCLOOP_DMABOUND_4G, /* optional features */ .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | NVMET_FCTGTFEAT_OPDONE_IN_ISR, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 94434e621c33..0488580eea12 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -764,7 +764,6 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel; lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | NVMET_FCTGTFEAT_CMD_IN_ISR | NVMET_FCTGTFEAT_OPDONE_IN_ISR; diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 12e344b5b77f..6c8c5d8041b7 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -642,15 +642,7 @@ enum { * sequence in one LLDD operation. Errors during Data * sequence transmit must not allow RSP sequence to be sent. */ - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1), - /* Bit 1: When 0, the LLDD will deliver FCP CMD - * on the CPU it should be affinitized to. Thus work will - * be scheduled on the cpu received on. When 1, the LLDD - * may not deliver the CMD on the CPU it should be worked - * on. The transport should pick a cpu to schedule the work - * on. - */ - NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 2), + NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1), /* Bit 2: When 0, the LLDD is calling the cmd rcv handler * in a non-isr context, allowing the transport to finish * op completion in the calling context. When 1, the LLDD @@ -658,7 +650,7 @@ enum { * requiring the transport to transition to a workqueue * for op completion. */ - NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 3), + NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2), /* Bit 3: When 0, the LLDD is calling the op done handler * in a non-isr context, allowing the transport to finish * op completion in the calling context. When 1, the LLDD -- cgit v1.2.3-59-g8ed1b From 549f01ae7b913355bea76100d3f17694bc9ec769 Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Mon, 8 May 2017 16:38:35 -0700 Subject: nvmet: release the sq ref on rdma read errors On rdma read errors, release the sq ref that was taken when the req was initialized. This avoids a hang in nvmet_sq_destroy() when the queue is being freed. Signed-off-by: Vijay Immanuel Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/core.c | 6 ++++++ drivers/nvme/target/nvmet.h | 1 + drivers/nvme/target/rdma.c | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cf90713043da..eb9399ac97cf 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -529,6 +529,12 @@ fail: } EXPORT_SYMBOL_GPL(nvmet_req_init); +void nvmet_req_uninit(struct nvmet_req *req) +{ + percpu_ref_put(&req->sq->ref); +} +EXPORT_SYMBOL_GPL(nvmet_req_uninit); + static inline bool nvmet_cc_en(u32 cc) { return cc & 0x1; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7cb77ba5993b..cfc5c7fb0ab7 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -261,6 +261,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops); +void nvmet_req_uninit(struct nvmet_req *req); void nvmet_req_complete(struct nvmet_req *req, u16 status); void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid, diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 99c69018a35f..9e45cde63376 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -567,6 +567,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) rsp->n_rdma = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { + nvmet_req_uninit(&rsp->req); nvmet_rdma_release_rsp(rsp); if (wc->status != IB_WC_WR_FLUSH_ERR) { pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n", -- cgit v1.2.3-59-g8ed1b From a7cc722fff0b32bcd28bf4722dff816b0b695f7d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 May 2017 13:08:42 -0400 Subject: fix unsafe_put_user() __put_user_size() relies upon its first argument having the same type as what the second one points to; the only other user makes sure of that and unsafe_put_user() should do the same. Signed-off-by: Al Viro --- arch/x86/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 68766b276d9e..d9668c3beb5b 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -703,7 +703,7 @@ extern struct movsl_mask { #define unsafe_put_user(x, ptr, err_label) \ do { \ int __pu_err; \ - __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ + __put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ if (unlikely(__pu_err)) goto err_label; \ } while (0) -- cgit v1.2.3-59-g8ed1b From a8c39544a6eb2093c04afd5005b6192bd0e880c6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 May 2017 21:47:25 -0400 Subject: osf_wait4(): fix infoleak failing sys_wait4() won't fill struct rusage... Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/alpha/kernel/osf_sys.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 9ec56dc97374..ce93124a850b 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1201,8 +1201,10 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur))) return -EFAULT; - err = 0; - err |= put_user(status, ustatus); + err = put_user(status, ustatus); + if (ret < 0) + return err ? err : ret; + err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec); err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec); err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec); -- cgit v1.2.3-59-g8ed1b From 499350a5a6e7512d9ed369ed63a4244b6536f4f8 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 18 May 2017 11:22:33 -0700 Subject: tcp: initialize rcv_mss to TCP_MIN_MSS instead of 0 When tcp_disconnect() is called, inet_csk_delack_init() sets icsk->icsk_ack.rcv_mss to 0. This could potentially cause tcp_recvmsg() => tcp_cleanup_rbuf() => __tcp_select_window() call path to have division by 0 issue. So this patch initializes rcv_mss to TCP_MIN_MSS instead of 0. Reported-by: Andrey Konovalov Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1e4c76d2b827..842b575f8fdd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2320,6 +2320,10 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); + /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 + * issue in __tcp_select_window() + */ + icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); -- cgit v1.2.3-59-g8ed1b From 34eb5fe07831458cf8238d54c1fc847dedeaf68c Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 18 May 2017 12:41:18 -0700 Subject: arp: fixed error in a comment the is_garp code deals just with gratuitous ARP packets, not every unsolicited packet. This patch is a result of a discussion in netdev: http://marc.info/?l=linux-netdev&m=149506354216994 Suggested-by: Julian Anastasov Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/ipv4/arp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index d54345a06f72..053492af8a6e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -846,7 +846,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) */ is_garp = tip == sip && addr_type == RTN_UNICAST; - /* Unsolicited ARP _replies_ also require target hwaddr to be + /* Gratuitous ARP _replies_ also require target hwaddr to be * the same as source. */ if (is_garp && arp->ar_op == htons(ARPOP_REPLY)) -- cgit v1.2.3-59-g8ed1b From 6fd05633bdafc0ae6ec0d55e61af10780d4d3530 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 18 May 2017 12:41:19 -0700 Subject: arp: decompose is_garp logic into a separate function The code is quite involving already to earn a separate function for itself. If anything, it helps arp_process readability. Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/ipv4/arp.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 053492af8a6e..ca6e1e6c1496 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -641,6 +641,27 @@ void arp_xmit(struct sk_buff *skb) } EXPORT_SYMBOL(arp_xmit); +static bool arp_is_garp(struct net_device *dev, int addr_type, + __be16 ar_op, + __be32 sip, __be32 tip, + unsigned char *sha, unsigned char *tha) +{ + bool is_garp = tip == sip && addr_type == RTN_UNICAST; + + /* Gratuitous ARP _replies_ also require target hwaddr to be + * the same as source. + */ + if (is_garp && ar_op == htons(ARPOP_REPLY)) + is_garp = + /* IPv4 over IEEE 1394 doesn't provide target + * hardware address field in its ARP payload. + */ + tha && + !memcmp(tha, sha, dev->addr_len); + + return is_garp; +} + /* * Process an arp request. */ @@ -844,18 +865,8 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) It is possible, that this option should be enabled for some devices (strip is candidate) */ - is_garp = tip == sip && addr_type == RTN_UNICAST; - - /* Gratuitous ARP _replies_ also require target hwaddr to be - * the same as source. - */ - if (is_garp && arp->ar_op == htons(ARPOP_REPLY)) - is_garp = - /* IPv4 over IEEE 1394 doesn't provide target - * hardware address field in its ARP payload. - */ - tha && - !memcmp(tha, sha, dev->addr_len); + is_garp = arp_is_garp(dev, addr_type, arp->ar_op, + sip, tip, sha, tha); if (!n && ((arp->ar_op == htons(ARPOP_REPLY) && -- cgit v1.2.3-59-g8ed1b From d9ef2e7bf99f59179b89d5c1c4d5b4919375daee Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 18 May 2017 12:41:20 -0700 Subject: arp: postpone addr_type calculation to as late as possible The addr_type retrieval can be costly, so it's worth trying to avoid its calculation as much as possible. This patch makes it calculated only for gratuitous ARP packets. This is especially important since later we may want to move is_garp calculation outside of arp_accept block, at which point the costly operation will be executed for all setups. The patch is the result of a discussion in net-dev: http://marc.info/?l=linux-netdev&m=149506354216994 Suggested-by: Julian Anastasov Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/ipv4/arp.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ca6e1e6c1496..c22103cec823 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -641,12 +641,12 @@ void arp_xmit(struct sk_buff *skb) } EXPORT_SYMBOL(arp_xmit); -static bool arp_is_garp(struct net_device *dev, int addr_type, - __be16 ar_op, +static bool arp_is_garp(struct net *net, struct net_device *dev, + int *addr_type, __be16 ar_op, __be32 sip, __be32 tip, unsigned char *sha, unsigned char *tha) { - bool is_garp = tip == sip && addr_type == RTN_UNICAST; + bool is_garp = tip == sip; /* Gratuitous ARP _replies_ also require target hwaddr to be * the same as source. @@ -659,6 +659,11 @@ static bool arp_is_garp(struct net_device *dev, int addr_type, tha && !memcmp(tha, sha, dev->addr_len); + if (is_garp) { + *addr_type = inet_addr_type_dev_table(net, dev, sip); + if (*addr_type != RTN_UNICAST) + is_garp = false; + } return is_garp; } @@ -859,18 +864,23 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); if (IN_DEV_ARP_ACCEPT(in_dev)) { - unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip); + addr_type = -1; /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) */ - is_garp = arp_is_garp(dev, addr_type, arp->ar_op, + is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op, sip, tip, sha, tha); if (!n && - ((arp->ar_op == htons(ARPOP_REPLY) && - addr_type == RTN_UNICAST) || is_garp)) + (is_garp || + (arp->ar_op == htons(ARPOP_REPLY) && + (addr_type == RTN_UNICAST || + (addr_type < 0 && + /* postpone calculation to as late as possible */ + inet_addr_type_dev_table(net, dev, sip) == + RTN_UNICAST))))) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } -- cgit v1.2.3-59-g8ed1b From 7d472a59c0e5ec117220a05de6b370447fb6cb66 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 18 May 2017 12:41:21 -0700 Subject: arp: always override existing neigh entries with gratuitous ARP Currently, when arp_accept is 1, we always override existing neigh entries with incoming gratuitous ARP replies. Otherwise, we override them only if new replies satisfy _locktime_ conditional (packets arrive not earlier than _locktime_ seconds since the last update to the neigh entry). The idea behind locktime is to pick the very first (=> close) reply received in a unicast burst when ARP proxies are used. This helps to avoid ARP thrashing where Linux would switch back and forth from one proxy to another. This logic has nothing to do with gratuitous ARP replies that are generally not aligned in time when multiple IP address carriers send them into network. This patch enforces overriding of existing neigh entries by all incoming gratuitous ARP packets, irrespective of their time of arrival. This will make the kernel honour all incoming gratuitous ARP packets. Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/ipv4/arp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c22103cec823..ae96e6f3e0cb 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -863,16 +863,17 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); - if (IN_DEV_ARP_ACCEPT(in_dev)) { + if (n || IN_DEV_ARP_ACCEPT(in_dev)) { addr_type = -1; + is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op, + sip, tip, sha, tha); + } + if (IN_DEV_ARP_ACCEPT(in_dev)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) */ - is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op, - sip, tip, sha, tha); - if (!n && (is_garp || (arp->ar_op == htons(ARPOP_REPLY) && -- cgit v1.2.3-59-g8ed1b From fe0cd8ca1b82983db24b173bb8518ea646c02d25 Mon Sep 17 00:00:00 2001 From: Nisar Sayed Date: Fri, 19 May 2017 14:00:25 +0000 Subject: smsc95xx: Support only IPv4 TCP/UDP csum offload When TX checksum offload is used, if the computed checksum is 0 the LAN95xx device do not alter the checksum to 0xffff. In the case of ipv4 UDP checksum, it indicates to receiver that no checksum is calculated. Under ipv6, UDP checksum yields a result of zero must be changed to 0xffff. Hence disabling checksum offload for ipv6 packets. Signed-off-by: Nisar Sayed Reported-by: popcorn mix Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 765400b62168..2dfca96a63b6 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -681,7 +681,7 @@ static int smsc95xx_set_features(struct net_device *netdev, if (ret < 0) return ret; - if (features & NETIF_F_HW_CSUM) + if (features & NETIF_F_IP_CSUM) read_buf |= Tx_COE_EN_; else read_buf &= ~Tx_COE_EN_; @@ -1279,12 +1279,19 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) spin_lock_init(&pdata->mac_cr_lock); + /* LAN95xx devices do not alter the computed checksum of 0 to 0xffff. + * RFC 2460, ipv6 UDP calculated checksum yields a result of zero must + * be changed to 0xffff. RFC 768, ipv4 UDP computed checksum is zero, + * it is transmitted as all ones. The zero transmitted checksum means + * transmitter generated no checksum. Hence, enable csum offload only + * for ipv4 packets. + */ if (DEFAULT_TX_CSUM_ENABLE) - dev->net->features |= NETIF_F_HW_CSUM; + dev->net->features |= NETIF_F_IP_CSUM; if (DEFAULT_RX_CSUM_ENABLE) dev->net->features |= NETIF_F_RXCSUM; - dev->net->hw_features = NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM; smsc95xx_init_mac_address(dev); -- cgit v1.2.3-59-g8ed1b From 6d18c732b95c0a9d35e9f978b4438bba15412284 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 19 May 2017 22:20:29 +0800 Subject: bridge: start hello_timer when enabling KERNEL_STP in br_stp_start Since commit 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers"), bridge would not start hello_timer if stp_enabled is not KERNEL_STP when br_dev_open. The problem is even if users set stp_enabled with KERNEL_STP later, the timer will still not be started. It causes that KERNEL_STP can not really work. Users have to re-ifup the bridge to avoid this. This patch is to fix it by starting br->hello_timer when enabling KERNEL_STP in br_stp_start. As an improvement, it's also to start hello_timer again only when br->stp_enabled is KERNEL_STP in br_hello_timer_expired, there is no reason to start the timer again when it's NO_STP. Fixes: 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers") Reported-by: Haidong Li Signed-off-by: Xin Long Acked-by: Nikolay Aleksandrov Reviewed-by: Ivan Vecera Signed-off-by: David S. Miller --- net/bridge/br_stp_if.c | 1 + net/bridge/br_stp_timer.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 08341d2aa9c9..0db8102995a5 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -179,6 +179,7 @@ static void br_stp_start(struct net_bridge *br) br_debug(br, "using kernel STP\n"); /* To start timers on any ports left in blocking */ + mod_timer(&br->hello_timer, jiffies + br->hello_time); br_port_state_selection(br); } diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index c98b3e5c140a..60b6fe277a8b 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg) if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - if (br->stp_enabled != BR_USER_STP) + if (br->stp_enabled == BR_KERNEL_STP) mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); } -- cgit v1.2.3-59-g8ed1b From 334a023ee50997b45ffb8fbcc8bc875519040aac Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 May 2017 15:25:46 -0700 Subject: Clean up x86 unsafe_get/put_user() type handling Al noticed that unsafe_put_user() had type problems, and fixed them in commit a7cc722fff0b ("fix unsafe_put_user()"), which made me look more at those functions. It turns out that unsafe_get_user() had a type issue too: it limited the largest size of the type it could handle to "unsigned long". Which is fine with the current users, but doesn't match our existing normal get_user() semantics, which can also handle "u64" even when that does not fit in a long. While at it, also clean up the type cast in unsafe_put_user(). We actually want to just make it an assignment to the expected type of the pointer, because we actually do want warnings from types that don't convert silently. And it makes the code more readable by not having that one very long and complex line. [ This patch might become stable material if we ever end up back-porting any new users of the unsafe uaccess code, but as things stand now this doesn't matter for any current existing uses. ] Cc: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d9668c3beb5b..fc1eb64fdfff 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -703,14 +703,15 @@ extern struct movsl_mask { #define unsafe_put_user(x, ptr, err_label) \ do { \ int __pu_err; \ - __put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __put_user_size(__pu_val, (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ if (unlikely(__pu_err)) goto err_label; \ } while (0) #define unsafe_get_user(x, ptr, err_label) \ do { \ int __gu_err; \ - unsigned long __gu_val; \ + __inttype(*(ptr)) __gu_val; \ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ if (unlikely(__gu_err)) goto err_label; \ -- cgit v1.2.3-59-g8ed1b From 33c9e9729033387ef0521324c62e7eba529294af Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 May 2017 18:26:54 -0700 Subject: x86: fix 32-bit case of __get_user_asm_u64() The code to fetch a 64-bit value from user space was entirely buggered, and has been since the code was merged in early 2016 in commit b2f680380ddf ("x86/mm/32: Add support for 64-bit __get_user() on 32-bit kernels"). Happily the buggered routine is almost certainly entirely unused, since the normal way to access user space memory is just with the non-inlined "get_user()", and the inlined version didn't even historically exist. The normal "get_user()" case is handled by external hand-written asm in arch/x86/lib/getuser.S that doesn't have either of these issues. There were two independent bugs in __get_user_asm_u64(): - it still did the STAC/CLAC user space access marking, even though that is now done by the wrapper macros, see commit 11f1a4b9755f ("x86: reorganize SMAP handling in user space accesses"). This didn't result in a semantic error, it just means that the inlined optimized version was hugely less efficient than the allegedly slower standard version, since the CLAC/STAC overhead is quite high on modern Intel CPU's. - the double register %eax/%edx was marked as an output, but the %eax part of it was touched early in the asm, and could thus clobber other inputs to the asm that gcc didn't expect it to touch. In particular, that meant that the generated code could look like this: mov (%eax),%eax mov 0x4(%eax),%edx where the load of %edx obviously was _supposed_ to be from the 32-bit word that followed the source of %eax, but because %eax was overwritten by the first instruction, the source of %edx was basically random garbage. The fixes are trivial: remove the extraneous STAC/CLAC entries, and mark the 64-bit output as early-clobber to let gcc know that no inputs should alias with the output register. Cc: Al Viro Cc: Benjamin LaHaise Cc: Ingo Molnar Cc: stable@kernel.org # v4.8+ Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index fc1eb64fdfff..a059aac9e937 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -319,10 +319,10 @@ do { \ #define __get_user_asm_u64(x, ptr, retval, errret) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: movl %2,%%eax\n" \ "2: movl %3,%%edx\n" \ - "3: " ASM_CLAC "\n" \ + "3:\n" \ ".section .fixup,\"ax\"\n" \ "4: mov %4,%0\n" \ " xorl %%eax,%%eax\n" \ @@ -331,7 +331,7 @@ do { \ ".previous\n" \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ - : "=r" (retval), "=A"(x) \ + : "=r" (retval), "=&A"(x) \ : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \ "i" (errret), "0" (retval)); \ }) -- cgit v1.2.3-59-g8ed1b From 08332893e37af6ae779367e78e444f8f9571511d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 May 2017 19:30:23 -0700 Subject: Linux 4.12-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b1ee4a49efa2..63e10bd4f14a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3-59-g8ed1b From e480eabae232b92ff44ce63678280373713920a4 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 18 May 2017 21:33:44 +0200 Subject: drm/radeon: Fix oops upon driver load on PowerXpress laptops Nicolai Stange reports the following oops which is caused by dereferencing rdev->pdev before it's subsequently set by radeon_device_init(). Fix it. BUG: unable to handle kernel NULL pointer dereference at 00000000000007cb IP: radeon_driver_load_kms+0xeb/0x230 [radeon] ... Call Trace: drm_dev_register+0x146/0x1d0 [drm] drm_get_pci_dev+0x9a/0x180 [drm] radeon_pci_probe+0xb8/0xe0 [radeon] local_pci_probe+0x45/0xa0 pci_device_probe+0x14f/0x1a0 driver_probe_device+0x29c/0x450 __driver_attach+0xdf/0xf0 ? driver_probe_device+0x450/0x450 bus_for_each_dev+0x6c/0xc0 driver_attach+0x1e/0x20 bus_add_driver+0x170/0x270 driver_register+0x60/0xe0 ? 0xffffffffc0508000 __pci_register_driver+0x4c/0x50 drm_pci_init+0xeb/0x100 [drm] ? vga_switcheroo_register_handler+0x6a/0x90 ? 0xffffffffc0508000 radeon_init+0x98/0xb6 [radeon] do_one_initcall+0x52/0x1a0 ? __vunmap+0x81/0xb0 ? kmem_cache_alloc_trace+0x159/0x1b0 ? do_init_module+0x27/0x1f8 do_init_module+0x5f/0x1f8 load_module+0x27ce/0x2be0 SYSC_finit_module+0xdf/0x110 ? SYSC_finit_module+0xdf/0x110 SyS_finit_module+0xe/0x10 do_syscall_64+0x67/0x150 entry_SYSCALL64_slow_path+0x25/0x25 Fixes: 7ffb0ce31cf9 ("drm/radeon: Don't register Thunderbolt eGPU with vga_switcheroo") Reported-and-tested-by: Nicolai Stange Signed-off-by: Lukas Wunner Link: http://patchwork.freedesktop.org/patch/msgid/cfb91ba052af06117137eec0637543a2626a7979.1495135190.git.lukas@wunner.de --- drivers/gpu/drm/radeon/radeon_kms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index e3e7cb1d10a2..4761f27f2ca2 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -116,7 +116,7 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) if ((radeon_runtime_pm != 0) && radeon_has_atpx() && ((flags & RADEON_IS_IGP) == 0) && - !pci_is_thunderbolt_attached(rdev->pdev)) + !pci_is_thunderbolt_attached(dev->pdev)) flags |= RADEON_IS_PX; /* radeon_device_init should report only fatal error -- cgit v1.2.3-59-g8ed1b From 5165da5923d6c7df6f2927b0113b2e4d9288661e Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Fri, 5 May 2017 11:06:50 +0200 Subject: i2c: i2c-tiny-usb: fix buffer not being DMA capable Since v4.9 i2c-tiny-usb generates the below call trace and longer works, since it can't communicate with the USB device. The reason is, that since v4.9 the USB stack checks, that the buffer it should transfer is DMA capable. This was a requirement since v2.2 days, but it usually worked nevertheless. [ 17.504959] ------------[ cut here ]------------ [ 17.505488] WARNING: CPU: 0 PID: 93 at drivers/usb/core/hcd.c:1587 usb_hcd_map_urb_for_dma+0x37c/0x570 [ 17.506545] transfer buffer not dma capable [ 17.507022] Modules linked in: [ 17.507370] CPU: 0 PID: 93 Comm: i2cdetect Not tainted 4.11.0-rc8+ #10 [ 17.508103] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 17.509039] Call Trace: [ 17.509320] ? dump_stack+0x5c/0x78 [ 17.509714] ? __warn+0xbe/0xe0 [ 17.510073] ? warn_slowpath_fmt+0x5a/0x80 [ 17.510532] ? nommu_map_sg+0xb0/0xb0 [ 17.510949] ? usb_hcd_map_urb_for_dma+0x37c/0x570 [ 17.511482] ? usb_hcd_submit_urb+0x336/0xab0 [ 17.511976] ? wait_for_completion_timeout+0x12f/0x1a0 [ 17.512549] ? wait_for_completion_timeout+0x65/0x1a0 [ 17.513125] ? usb_start_wait_urb+0x65/0x160 [ 17.513604] ? usb_control_msg+0xdc/0x130 [ 17.514061] ? usb_xfer+0xa4/0x2a0 [ 17.514445] ? __i2c_transfer+0x108/0x3c0 [ 17.514899] ? i2c_transfer+0x57/0xb0 [ 17.515310] ? i2c_smbus_xfer_emulated+0x12f/0x590 [ 17.515851] ? _raw_spin_unlock_irqrestore+0x11/0x20 [ 17.516408] ? i2c_smbus_xfer+0x125/0x330 [ 17.516876] ? i2c_smbus_xfer+0x125/0x330 [ 17.517329] ? i2cdev_ioctl_smbus+0x1c1/0x2b0 [ 17.517824] ? i2cdev_ioctl+0x75/0x1c0 [ 17.518248] ? do_vfs_ioctl+0x9f/0x600 [ 17.518671] ? vfs_write+0x144/0x190 [ 17.519078] ? SyS_ioctl+0x74/0x80 [ 17.519463] ? entry_SYSCALL_64_fastpath+0x1e/0xad [ 17.519959] ---[ end trace d047c04982f5ac50 ]--- Cc: Signed-off-by: Sebastian Reichel Reviewed-by: Greg Kroah-Hartman Acked-by: Till Harbaum Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tiny-usb.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c index 0ed77eeff31e..a2e3dd715380 100644 --- a/drivers/i2c/busses/i2c-tiny-usb.c +++ b/drivers/i2c/busses/i2c-tiny-usb.c @@ -178,22 +178,39 @@ static int usb_read(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len) { struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data; + void *dmadata = kmalloc(len, GFP_KERNEL); + int ret; + + if (!dmadata) + return -ENOMEM; /* do control transfer */ - return usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0), + ret = usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | - USB_DIR_IN, value, index, data, len, 2000); + USB_DIR_IN, value, index, dmadata, len, 2000); + + memcpy(data, dmadata, len); + kfree(dmadata); + return ret; } static int usb_write(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len) { struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data; + void *dmadata = kmemdup(data, len, GFP_KERNEL); + int ret; + + if (!dmadata) + return -ENOMEM; /* do control transfer */ - return usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0), + ret = usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE, - value, index, data, len, 2000); + value, index, dmadata, len, 2000); + + kfree(dmadata); + return ret; } static void i2c_tiny_usb_free(struct i2c_tiny_usb *dev) -- cgit v1.2.3-59-g8ed1b From e2c824924cdb41528932c550647406ad81336b18 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 22 May 2017 07:46:55 +0200 Subject: i2c: designware: Fix bogus sda_hold_time due to uninitialized vars We need to initializes those variables to 0 for platforms that do not provide ACPI parameters. Otherwise, we set sda_hold_time to random values, breaking e.g. Galileo and IOT2000 boards. Fixes: 9d6408433019 ("i2c: designware: don't infer timings described by ACPI from clock rate") Signed-off-by: Jan Kiszka Reviewed-by: Ard Biesheuvel Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-platdrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 6283b99d2b17..d1263b82d646 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -94,9 +94,9 @@ static void dw_i2c_acpi_params(struct platform_device *pdev, char method[], static int dw_i2c_acpi_configure(struct platform_device *pdev) { struct dw_i2c_dev *dev = platform_get_drvdata(pdev); + u32 ss_ht = 0, fp_ht = 0, hs_ht = 0, fs_ht = 0; acpi_handle handle = ACPI_HANDLE(&pdev->dev); const struct acpi_device_id *id; - u32 ss_ht, fp_ht, hs_ht, fs_ht; struct acpi_device *adev; const char *uid; -- cgit v1.2.3-59-g8ed1b From 63691587f7b0028326ddd1226c378aaaeca4d4e4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 22 May 2017 16:32:46 +0200 Subject: ALSA: hda - Apply dual-codec quirk for MSI Z270-Gaming mobo MSI Z270-Gamin mobo has also two ALC1220 codecs like Gigabyte AZ370- Gaming mobo. Apply the same quirk to this one. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9c22ad694534..3fdd5af190a4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2328,6 +2328,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), + SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), -- cgit v1.2.3-59-g8ed1b From ba90d6a6b00a84f2a18112145c113e5ef628e561 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 22 May 2017 16:38:47 +0200 Subject: ALSA: hda - Provide dual-codecs model option for a few Realtek codecs Recently some laptops and mobos are equipped with the dual Realtek codecs that require special quirks. For making the debugging easier, add the model "dual-codecs" to be passed via module option. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3fdd5af190a4..918e45268915 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2343,6 +2343,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = { {.id = ALC883_FIXUP_ACER_EAPD, .name = "acer-aspire"}, {.id = ALC882_FIXUP_INV_DMIC, .name = "inv-dmic"}, {.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"}, + {.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"}, {} }; @@ -6015,6 +6016,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"}, {.id = ALC292_FIXUP_TPT440, .name = "tpt440"}, {.id = ALC292_FIXUP_TPT460, .name = "tpt460"}, + {.id = ALC233_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"}, {} }; #define ALC225_STANDARD_PINS \ @@ -7342,6 +7344,7 @@ static const struct hda_model_fixup alc662_fixup_models[] = { {.id = ALC662_FIXUP_ASUS_MODE8, .name = "asus-mode8"}, {.id = ALC662_FIXUP_INV_DMIC, .name = "inv-dmic"}, {.id = ALC668_FIXUP_DELL_MIC_NO_PRESENCE, .name = "dell-headset-multi"}, + {.id = ALC662_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"}, {} }; -- cgit v1.2.3-59-g8ed1b From a79e7df97592b2326be81d5dae286bdb5c529a01 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 22 May 2017 16:41:24 +0200 Subject: ALSA: hda - Update the list of quirk models I've forgotten to sync the documentation with the actually available options for some time. Now all updated. Signed-off-by: Takashi Iwai --- Documentation/sound/hd-audio/models.rst | 114 ++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst index 5338673c88d9..773d2bfacc6c 100644 --- a/Documentation/sound/hd-audio/models.rst +++ b/Documentation/sound/hd-audio/models.rst @@ -16,6 +16,8 @@ ALC880 6-jack in back, 2-jack in front 6stack-digout 6-jack with a SPDIF out +6stack-automute + 6-jack with headphone jack detection ALC260 ====== @@ -62,6 +64,8 @@ lenovo-dock Enables docking station I/O for some Lenovos hp-gpio-led GPIO LED support on HP laptops +hp-dock-gpio-mic1-led + HP dock with mic LED support dell-headset-multi Headset jack, which can also be used as mic-in dell-headset-dock @@ -72,6 +76,12 @@ alc283-sense-combo Combo jack sensing on ALC283 tpt440-dock Pin configs for Lenovo Thinkpad Dock support +tpt440 + Lenovo Thinkpad T440s setup +tpt460 + Lenovo Thinkpad T460/560 setup +dual-codecs + Lenovo laptops with dual codecs ALC66x/67x/892 ============== @@ -97,6 +107,8 @@ inv-dmic Inverted internal mic workaround dell-headset-multi Headset jack, which can also be used as mic-in +dual-codecs + Lenovo laptops with dual codecs ALC680 ====== @@ -114,6 +126,8 @@ inv-dmic Inverted internal mic workaround no-primary-hp VAIO Z/VGC-LN51JGB workaround (for fixed speaker DAC) +dual-codecs + ALC1220 dual codecs for Gaming mobos ALC861/660 ========== @@ -206,65 +220,47 @@ auto Conexant 5045 ============= -laptop-hpsense - Laptop with HP sense (old model laptop) -laptop-micsense - Laptop with Mic sense (old model fujitsu) -laptop-hpmicsense - Laptop with HP and Mic senses -benq - Benq R55E -laptop-hp530 - HP 530 laptop -test - for testing/debugging purpose, almost all controls can be - adjusted. Appearing only when compiled with $CONFIG_SND_DEBUG=y +cap-mix-amp + Fix max input level on mixer widget +toshiba-p105 + Toshiba P105 quirk +hp-530 + HP 530 quirk Conexant 5047 ============= -laptop - Basic Laptop config -laptop-hp - Laptop config for some HP models (subdevice 30A5) -laptop-eapd - Laptop config with EAPD support -test - for testing/debugging purpose, almost all controls can be - adjusted. Appearing only when compiled with $CONFIG_SND_DEBUG=y +cap-mix-amp + Fix max input level on mixer widget Conexant 5051 ============= -laptop - Basic Laptop config (default) -hp - HP Spartan laptop -hp-dv6736 - HP dv6736 -hp-f700 - HP Compaq Presario F700 -ideapad - Lenovo IdeaPad laptop -toshiba - Toshiba Satellite M300 +lenovo-x200 + Lenovo X200 quirk Conexant 5066 ============= -laptop - Basic Laptop config (default) -hp-laptop - HP laptops, e g G60 -asus - Asus K52JU, Lenovo G560 -dell-laptop - Dell laptops -dell-vostro - Dell Vostro -olpc-xo-1_5 - OLPC XO 1.5 -ideapad - Lenovo IdeaPad U150 +stereo-dmic + Workaround for inverted stereo digital mic +gpio1 + Enable GPIO1 pin +headphone-mic-pin + Enable headphone mic NID 0x18 without detection +tp410 + Thinkpad T400 & co quirks thinkpad - Lenovo Thinkpad + Thinkpad mute/mic LED quirk +lemote-a1004 + Lemote A1004 quirk +lemote-a1205 + Lemote A1205 quirk +olpc-xo + OLPC XO quirk +mute-led-eapd + Mute LED control via EAPD +hp-dock + HP dock support +mute-led-gpio + Mute LED control via GPIO STAC9200 ======== @@ -444,6 +440,8 @@ dell-eq Dell desktops/laptops alienware Alienware M17x +asus-mobo + Pin configs for ASUS mobo with 5.1/SPDIF out auto BIOS setup (default) @@ -477,6 +475,8 @@ hp-envy-ts-bass Pin fixup for HP Envy TS bass speaker (NID 0x10) hp-bnb13-eq Hardware equalizer setup for HP laptops +hp-envy-ts-bass + HP Envy TS bass support auto BIOS setup (default) @@ -496,10 +496,22 @@ auto Cirrus Logic CS4206/4207 ======================== +mbp53 + MacBook Pro 5,3 mbp55 MacBook Pro 5,5 imac27 IMac 27 Inch +imac27_122 + iMac 12,2 +apple + Generic Apple quirk +mbp101 + MacBookPro 10,1 +mbp81 + MacBookPro 8,1 +mba42 + MacBookAir 4,2 auto BIOS setup (default) @@ -509,6 +521,10 @@ mba6 MacBook Air 6,1 and 6,2 gpio0 Enable GPIO 0 amp +mbp11 + MacBookPro 11,2 +macmini + MacMini 7,1 auto BIOS setup (default) -- cgit v1.2.3-59-g8ed1b From 232cd35d0804cc241eb887bb8d4d9b3b9881c64a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 May 2017 14:17:48 -0700 Subject: ipv6: fix out of bound writes in __ip6_append_data() Andrey Konovalov and idaifish@gmail.com reported crashes caused by one skb shared_info being overwritten from __ip6_append_data() Andrey program lead to following state : copy -4200 datalen 2000 fraglen 2040 maxfraglen 2040 alloclen 2048 transhdrlen 0 offset 0 fraggap 6200 The skb_copy_and_csum_bits(skb_prev, maxfraglen, data + transhdrlen, fraggap, 0); is overwriting skb->head and skb_shared_info Since we apparently detect this rare condition too late, move the code earlier to even avoid allocating skb and risking crashes. Once again, many thanks to Andrey and syzkaller team. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Reported-by: Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d4a31becbd25..bf8a58a1c32d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1466,6 +1466,11 @@ alloc_new_skb: */ alloclen += sizeof(struct frag_hdr); + copy = datalen - transhdrlen - fraggap; + if (copy < 0) { + err = -EINVAL; + goto error; + } if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len, @@ -1515,13 +1520,9 @@ alloc_new_skb: data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } - copy = datalen - transhdrlen - fraggap; - - if (copy < 0) { - err = -EINVAL; - kfree_skb(skb); - goto error; - } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { + if (copy > 0 && + getfrag(from, data + transhdrlen, offset, + copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; -- cgit v1.2.3-59-g8ed1b From 9e7b9a25e170722f15ed54f5b963e9867f79195d Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 22 May 2017 13:09:19 +0200 Subject: mmc: cavium: Prevent crash with incomplete DT In case the DT specifies neither a regulator nor a gpio for the shared power the driver will crash accessing the regulator. Prevent the crash by checking the regulator before use. Use mmc_regulator_get_supply() instead of open coding the same logic. Signed-off-by: Jan Glauber Signed-off-by: Ulf Hansson --- drivers/mmc/host/cavium.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c index 58b51ba6aabd..b8aaf0fdb77c 100644 --- a/drivers/mmc/host/cavium.c +++ b/drivers/mmc/host/cavium.c @@ -839,14 +839,14 @@ static void cvm_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) cvm_mmc_reset_bus(slot); if (host->global_pwr_gpiod) host->set_shared_power(host, 0); - else + else if (!IS_ERR(mmc->supply.vmmc)) mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0); break; case MMC_POWER_UP: if (host->global_pwr_gpiod) host->set_shared_power(host, 1); - else + else if (!IS_ERR(mmc->supply.vmmc)) mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd); break; } @@ -968,20 +968,15 @@ static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot) return -EINVAL; } - mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc"); - if (IS_ERR(mmc->supply.vmmc)) { - if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER) - return -EPROBE_DEFER; - /* - * Legacy Octeon firmware has no regulator entry, fall-back to - * a hard-coded voltage to get a sane OCR. - */ + ret = mmc_regulator_get_supply(mmc); + if (ret == -EPROBE_DEFER) + return ret; + /* + * Legacy Octeon firmware has no regulator entry, fall-back to + * a hard-coded voltage to get a sane OCR. + */ + if (IS_ERR(mmc->supply.vmmc)) mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - } else { - ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc); - if (ret > 0) - mmc->ocr_avail = ret; - } /* Common MMC bindings */ ret = mmc_of_parse(mmc); -- cgit v1.2.3-59-g8ed1b From c2372c20425bd75a5527b3e2204059762190f6ca Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 22 May 2017 13:09:20 +0200 Subject: of/platform: Make of_platform_device_destroy globally visible of_platform_device_destroy is the counterpart to of_platform_device_create which is a non-static function. After creating a platform device it might be neccessary to destroy it to deal with -EPROBE_DEFER where a repeated of_platform_device_create call would fail otherwise. Therefore also make of_platform_device_destroy globally visible. Signed-off-by: Jan Glauber Acked-by: Rob Herring Signed-off-by: Ulf Hansson --- drivers/of/platform.c | 3 ++- include/linux/of_platform.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 71fecc2debfc..703a42118ffc 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -523,7 +523,7 @@ static int __init of_platform_default_populate_init(void) arch_initcall_sync(of_platform_default_populate_init); #endif -static int of_platform_device_destroy(struct device *dev, void *data) +int of_platform_device_destroy(struct device *dev, void *data) { /* Do not touch devices not populated from the device tree */ if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) @@ -544,6 +544,7 @@ static int of_platform_device_destroy(struct device *dev, void *data) of_node_clear_flag(dev->of_node, OF_POPULATED_BUS); return 0; } +EXPORT_SYMBOL_GPL(of_platform_device_destroy); /** * of_platform_depopulate() - Remove devices populated from device tree diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index dc8224ae28d5..e0d1946270f3 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -64,6 +64,7 @@ extern struct platform_device *of_platform_device_create(struct device_node *np, const char *bus_id, struct device *parent); +extern int of_platform_device_destroy(struct device *dev, void *data); extern int of_platform_bus_probe(struct device_node *root, const struct of_device_id *matches, struct device *parent); -- cgit v1.2.3-59-g8ed1b From 8fb83b142823cdd1f85f78dcf9e861e9033919f9 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 22 May 2017 13:09:21 +0200 Subject: mmc: cavium: Fix probing race with regulator If the regulator probing is not yet finished this driver might catch a -EPROBE_DEFER. Returning after this condition did not remove the created platform device. On a repeated call to the probe function the of_platform_device_create fails. Calling of_platform_device_destroy after EPROBE_DEFER resolves this bug. Signed-off-by: Jan Glauber Signed-off-by: Ulf Hansson --- drivers/mmc/host/cavium-octeon.c | 11 ++++++++++- drivers/mmc/host/cavium-thunderx.c | 6 ++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c index cbb566377508..951d2cdd7888 100644 --- a/drivers/mmc/host/cavium-octeon.c +++ b/drivers/mmc/host/cavium-octeon.c @@ -288,11 +288,20 @@ static int octeon_mmc_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Error populating slots\n"); octeon_mmc_set_shared_power(host, 0); - return ret; + goto error; } i++; } return 0; + +error: + for (i = 0; i < CAVIUM_MAX_MMC; i++) { + if (host->slot[i]) + cvm_mmc_of_slot_remove(host->slot[i]); + if (host->slot_pdev[i]) + of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL); + } + return ret; } static int octeon_mmc_remove(struct platform_device *pdev) diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c index fe3d77267cd6..b9cc95998799 100644 --- a/drivers/mmc/host/cavium-thunderx.c +++ b/drivers/mmc/host/cavium-thunderx.c @@ -146,6 +146,12 @@ static int thunder_mmc_probe(struct pci_dev *pdev, return 0; error: + for (i = 0; i < CAVIUM_MAX_MMC; i++) { + if (host->slot[i]) + cvm_mmc_of_slot_remove(host->slot[i]); + if (host->slot_pdev[i]) + of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL); + } clk_disable_unprepare(host->clk); return ret; } -- cgit v1.2.3-59-g8ed1b From bd703a1524e851b8d6d646be9dafd794d4eb6045 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 20 May 2017 01:52:11 +0300 Subject: net: atheros: atl2: don't return zero on failure path in atl2_probe() If dma mask checks fail in atl2_probe(), it breaks off initialization, deallocates all resources, but returns zero. The patch adds proper error code return value and make error code setup unified. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atlx/atl2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 63f2deec2a52..77a1c03255de 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1353,6 +1353,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) && pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) { printk(KERN_ERR "atl2: No usable DMA configuration, aborting\n"); + err = -EIO; goto err_dma; } @@ -1366,10 +1367,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * pcibios_set_master to do the needed arch specific settings */ pci_set_master(pdev); - err = -ENOMEM; netdev = alloc_etherdev(sizeof(struct atl2_adapter)); - if (!netdev) + if (!netdev) { + err = -ENOMEM; goto err_alloc_etherdev; + } SET_NETDEV_DEV(netdev, &pdev->dev); @@ -1408,8 +1410,6 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_sw_init; - err = -EIO; - netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX; netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); -- cgit v1.2.3-59-g8ed1b From 751da2a69b7cc82d83dc310ed7606225f2d6e014 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 19 May 2017 19:43:45 -0400 Subject: bonding: fix accounting of active ports in 3ad As of 7bb11dc9f59d and 0622cab0341c, bond slaves in a 3ad bond are not removed from the aggregator when they are down, and the active slave count is NOT equal to number of ports in the aggregator, but rather the number of ports in the aggregator that are still enabled. The sysfs spew for bonding_show_ad_num_ports() has a comment that says "Show number of active 802.3ad ports.", but it's currently showing total number of ports, both active and inactive. Remedy it by using the same logic introduced in 0622cab0341c in __bond_3ad_get_active_agg_info(), so sysfs, procfs and netlink all report the number of active ports. Note that this means that IFLA_BOND_AD_INFO_NUM_PORTS really means NUM_ACTIVE_PORTS instead of NUM_PORTS, and thus perhaps should be renamed for clarity. Lightly tested on a dual i40e lacp bond, simulating link downs with an ip link set dev down, was able to produce the state where I could see both in the same aggregator, but a number of ports count of 1. MII Status: up Active Aggregator Info: Aggregator ID: 1 Number of ports: 2 <--- Slave Interface: ens10 MII Status: up <--- Aggregator ID: 1 Slave Interface: ens11 MII Status: up Aggregator ID: 1 MII Status: up Active Aggregator Info: Aggregator ID: 1 Number of ports: 1 <--- Slave Interface: ens10 MII Status: down <--- Aggregator ID: 1 Slave Interface: ens11 MII Status: up Aggregator ID: 1 CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index c5fd4259da33..b44a6aeb346d 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2577,7 +2577,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, return -1; ad_info->aggregator_id = aggregator->aggregator_identifier; - ad_info->ports = aggregator->num_of_ports; + ad_info->ports = __agg_active_ports(aggregator); ad_info->actor_key = aggregator->actor_oper_aggregator_key; ad_info->partner_key = aggregator->partner_oper_aggregator_key; ether_addr_copy(ad_info->partner_system, -- cgit v1.2.3-59-g8ed1b From f5f968f2371ccdebb8a365487649673c9af68d09 Mon Sep 17 00:00:00 2001 From: Srinath Mannam Date: Thu, 18 May 2017 22:27:40 +0530 Subject: mmc: sdhci-iproc: suppress spurious interrupt with Multiblock read The stingray SDHCI hardware supports ACMD12 and automatically issues after multi block transfer completed. If ACMD12 in SDHCI is disabled, spurious tx done interrupts are seen on multi block read command with below error message: Got data interrupt 0x00000002 even though no data operation was in progress. This patch uses SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 to enable ACM12 support in SDHCI hardware and suppress spurious interrupt. Signed-off-by: Srinath Mannam Reviewed-by: Ray Jui Reviewed-by: Scott Branden Acked-by: Adrian Hunter Fixes: b580c52d58d9 ("mmc: sdhci-iproc: add IPROC SDHCI driver") Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-iproc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 3275d4995812..61666d269771 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -187,7 +187,8 @@ static const struct sdhci_iproc_data iproc_cygnus_data = { }; static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = { - .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, + .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | + SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN, .ops = &sdhci_iproc_ops, }; -- cgit v1.2.3-59-g8ed1b From e4eda884db7930cee434828759064b4711604078 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 May 2017 12:27:07 -0400 Subject: net: Make IP alignment calulations clearer. The assignmnet: ip_align = strict ? 2 : NET_IP_ALIGN; in compare_pkt_ptr_alignment() trips up Coverity because we can only get to this code when strict is true, therefore ip_align will always be 2 regardless of NET_IP_ALIGN's value. So just assign directly to '2' and explain the situation in the comment above. Reported-by: "Gustavo A. R. Silva" Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1eddb713b815..c72cd41f5b8b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -808,11 +808,15 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, reg_off += reg->aux_off; } - /* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking - * we force this to 2 which is universally what architectures use - * when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. + /* For platforms that do not have a Kconfig enabling + * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of + * NET_IP_ALIGN is universally set to '2'. And on platforms + * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get + * to this code only in strict mode where we want to emulate + * the NET_IP_ALIGN==2 checking. Therefore use an + * unconditional IP align value of '2'. */ - ip_align = strict ? 2 : NET_IP_ALIGN; + ip_align = 2; if ((ip_align + reg_off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", ip_align, reg_off, off, size); -- cgit v1.2.3-59-g8ed1b From 72ccc471e13b8266d2ee2104521df5b92ba08e9c Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 19 May 2017 14:46:46 -0400 Subject: bonding: fix randomly populated arp target array In commit dc9c4d0fe023, the arp_target array moved from a static global to a local variable. By the nature of static globals, the array used to be initialized to all 0. At present, it's full of random data, which that gets interpreted as arp_target values, when none have actually been specified. Systems end up booting with spew along these lines: [ 32.161783] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready [ 32.168475] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready [ 32.175089] 8021q: adding VLAN 0 to HW filter on device lacp0 [ 32.193091] IPv6: ADDRCONF(NETDEV_UP): lacp0: link is not ready [ 32.204892] lacp0: Setting MII monitoring interval to 100 [ 32.211071] lacp0: Removing ARP target 216.124.228.17 [ 32.216824] lacp0: Removing ARP target 218.160.255.255 [ 32.222646] lacp0: Removing ARP target 185.170.136.184 [ 32.228496] lacp0: invalid ARP target 255.255.255.255 specified for removal [ 32.236294] lacp0: option arp_ip_target: invalid value (-255.255.255.255) [ 32.243987] lacp0: Removing ARP target 56.125.228.17 [ 32.249625] lacp0: Removing ARP target 218.160.255.255 [ 32.255432] lacp0: Removing ARP target 15.157.233.184 [ 32.261165] lacp0: invalid ARP target 255.255.255.255 specified for removal [ 32.268939] lacp0: option arp_ip_target: invalid value (-255.255.255.255) [ 32.276632] lacp0: Removing ARP target 16.0.0.0 [ 32.281755] lacp0: Removing ARP target 218.160.255.255 [ 32.287567] lacp0: Removing ARP target 72.125.228.17 [ 32.293165] lacp0: Removing ARP target 218.160.255.255 [ 32.298970] lacp0: Removing ARP target 8.125.228.17 [ 32.304458] lacp0: Removing ARP target 218.160.255.255 None of these were actually specified as ARP targets, and the driver does seem to clean up the mess okay, but it's rather noisy and confusing, leaks values to userspace, and the 255.255.255.255 spew shows up even when debug prints are disabled. The fix: just zero out arp_target at init time. While we're in here, init arp_all_targets_value in the right place. Fixes: dc9c4d0fe023 ("bonding: reduce scope of some global variables") CC: Mahesh Bandewar CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: netdev@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Jarod Wilson Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2be78807fd6e..73313318399c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4271,10 +4271,10 @@ static int bond_check_params(struct bond_params *params) int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; struct bond_opt_value newval; const struct bond_opt_value *valptr; - int arp_all_targets_value; + int arp_all_targets_value = 0; u16 ad_actor_sys_prio = 0; u16 ad_user_port_key = 0; - __be32 arp_target[BOND_MAX_ARP_TARGETS]; + __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 }; int arp_ip_count; int bond_mode = BOND_MODE_ROUNDROBIN; int xmit_hashtype = BOND_XMIT_POLICY_LAYER2; @@ -4501,7 +4501,6 @@ static int bond_check_params(struct bond_params *params) arp_validate_value = 0; } - arp_all_targets_value = 0; if (arp_all_targets) { bond_opt_initstr(&newval, arp_all_targets); valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS), -- cgit v1.2.3-59-g8ed1b From 499fde662f1957e3cb8d192a94a099ebe19c714b Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 19 May 2017 11:21:59 -0700 Subject: vsock: use new wait API for vsock_stream_sendmsg() As reported by Michal, vsock_stream_sendmsg() could still sleep at vsock_stream_has_space() after prepare_to_wait(): vsock_stream_has_space vmci_transport_stream_has_space vmci_qpair_produce_free_space qp_lock qp_acquire_queue_mutex mutex_lock Just switch to the new wait API like we did for commit d9dc8b0f8b4e ("net: fix sleeping for sk_wait_event()"). Reported-by: Michal Kubecek Cc: Stefan Hajnoczi Cc: Jorgen Hansen Cc: "Michael S. Tsirkin" Cc: Claudio Imbrenda Signed-off-by: Cong Wang Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 6f7f6757ceef..dfc8c51e4d74 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1540,8 +1540,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, long timeout; int err; struct vsock_transport_send_notify_data send_data; - - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); sk = sock->sk; vsk = vsock_sk(sk); @@ -1584,11 +1583,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (err < 0) goto out; - while (total_written < len) { ssize_t written; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); while (vsock_stream_has_space(vsk) == 0 && sk->sk_err == 0 && !(sk->sk_shutdown & SEND_SHUTDOWN) && @@ -1597,33 +1595,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Don't wait for non-blocking sockets. */ if (timeout == 0) { err = -EAGAIN; - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } err = transport->notify_send_pre_block(vsk, &send_data); if (err < 0) { - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } release_sock(sk); - timeout = schedule_timeout(timeout); + timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } else if (timeout == 0) { err = -EAGAIN; - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } - - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); /* These checks occur both as part of and after the loop * conditional since we need to check before and after -- cgit v1.2.3-59-g8ed1b From 0544f5494a03b8846db74e02be5685d1f32b06c9 Mon Sep 17 00:00:00 2001 From: Marta Rybczynska Date: Mon, 10 Apr 2017 17:12:34 +0200 Subject: nvme-rdma: support devices with queue size < 32 In the case of small NVMe-oF queue size (<32) we may enter a deadlock caused by the fact that the IB completions aren't sent waiting for 32 and the send queue will fill up. The error is seen as (using mlx5): [ 2048.693355] mlx5_0:mlx5_ib_post_send:3765:(pid 7273): [ 2048.693360] nvme nvme1: nvme_rdma_post_send failed with error code -12 This patch changes the way the signaling is done so that it depends on the queue depth now. The magic define has been removed completely. Cc: stable@vger.kernel.org Signed-off-by: Marta Rybczynska Signed-off-by: Samuel Jones Acked-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dd1c6deef82f..e2c18f3d9dcf 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1038,6 +1038,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) nvme_rdma_wr_error(cq, wc, "SEND"); } +static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) +{ + int sig_limit; + + /* + * We signal completion every queue depth/2 and also handle the + * degenerated case of a device with queue_depth=1, where we + * would need to signal every message. + */ + sig_limit = max(queue->queue_size / 2, 1); + return (++queue->sig_count % sig_limit) == 0; +} + static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, struct ib_send_wr *first, bool flush) @@ -1065,9 +1078,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, * Would have been way to obvious to handle this in hardware or * at least the RDMA stack.. * - * This messy and racy code sniplet is copy and pasted from the iSER - * initiator, and the magic '32' comes from there as well. - * * Always signal the flushes. The magic request used for the flush * sequencer is not allocated in our driver's tagset and it's * triggered to be freed by blk_cleanup_queue(). So we need to @@ -1075,7 +1085,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, * embedded in request's payload, is not freed when __ib_process_cq() * calls wr_cqe->done(). */ - if ((++queue->sig_count % 32) == 0 || flush) + if (nvme_rdma_queue_sig_limit(queue) || flush) wr.send_flags |= IB_SEND_SIGNALED; if (first) -- cgit v1.2.3-59-g8ed1b From 806f026f9b901eaf1a6baeb48b5da18d6a4f818e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 22 May 2017 23:05:03 +0800 Subject: nvme: use blk_mq_start_hw_queues() in nvme_kill_queues() Inside nvme_kill_queues(), we have to start hw queues for draining requests in sw queues, .dispatch list and requeue list, so use blk_mq_start_hw_queues() instead of blk_mq_start_stopped_hw_queues() which only run queues if queues are stopped, but the queues may have been started already, for example nvme_start_queues() is called in reset work function. blk_mq_start_hw_queues() run hw queues in current context, instead of running asynchronously like before. Given nvme_kill_queues() is run from either remove context or reset worker context, both are fine to run hw queue directly. And the mutex of namespaces_mutex isn't a problem too becasue nvme_start_freeze() runs hw queue in this way already. Cc: stable@vger.kernel.org Reported-by: Zhang Yi Reviewed-by: Keith Busch Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d5e0906262ea..40d5e4a9e8d7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2437,7 +2437,13 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); blk_mq_abort_requeue_list(ns->queue); - blk_mq_start_stopped_hw_queues(ns->queue, true); + + /* + * Forcibly start all queues to avoid having stuck requests. + * Note that we must ensure the queues are not stopped + * when the final removal happens. + */ + blk_mq_start_hw_queues(ns->queue); } mutex_unlock(&ctrl->namespaces_mutex); } -- cgit v1.2.3-59-g8ed1b From 986f75c876dbafed98eba7cb516c5118f155db23 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 22 May 2017 23:05:04 +0800 Subject: nvme: avoid to use blk_mq_abort_requeue_list() NVMe may add request into requeue list simply and not kick off the requeue if hw queues are stopped. Then blk_mq_abort_requeue_list() is called in both nvme_kill_queues() and nvme_ns_remove() for dealing with this issue. Unfortunately blk_mq_abort_requeue_list() is absolutely a race maker, for example, one request may be requeued during the aborting. So this patch just calls blk_mq_kick_requeue_list() in nvme_kill_queues() to handle this issue like what nvme_start_queues() does. Now all requests in requeue list when queues are stopped will be handled by blk_mq_kick_requeue_list() when queues are restarted, either in nvme_start_queues() or in nvme_kill_queues(). Cc: stable@vger.kernel.org Reported-by: Zhang Yi Reviewed-by: Keith Busch Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 40d5e4a9e8d7..04e115834702 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2098,7 +2098,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (ns->ndev) nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); - blk_mq_abort_requeue_list(ns->queue); blk_cleanup_queue(ns->queue); } @@ -2436,7 +2435,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) continue; revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); - blk_mq_abort_requeue_list(ns->queue); /* * Forcibly start all queues to avoid having stuck requests. @@ -2444,6 +2442,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) * when the final removal happens. */ blk_mq_start_hw_queues(ns->queue); + + /* draining requests in requeue list */ + blk_mq_kick_requeue_list(ns->queue); } mutex_unlock(&ctrl->namespaces_mutex); } -- cgit v1.2.3-59-g8ed1b From 7254a50a5db40ca6739ddf37e0a45e6912532b2c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 22 May 2017 23:05:05 +0800 Subject: blk-mq: remove blk_mq_abort_requeue_list() No one uses it any more, so remove it. Reviewed-by: Keith Busch Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig --- block/blk-mq.c | 19 ------------------- include/linux/blk-mq.h | 1 - 2 files changed, 20 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index a69ad122ed66..f2224ffd225d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, } EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); -void blk_mq_abort_requeue_list(struct request_queue *q) -{ - unsigned long flags; - LIST_HEAD(rq_list); - - spin_lock_irqsave(&q->requeue_lock, flags); - list_splice_init(&q->requeue_list, &rq_list); - spin_unlock_irqrestore(&q->requeue_lock, flags); - - while (!list_empty(&rq_list)) { - struct request *rq; - - rq = list_first_entry(&rq_list, struct request, queuelist); - list_del_init(&rq->queuelist); - blk_mq_end_request(rq, -EIO); - } -} -EXPORT_SYMBOL(blk_mq_abort_requeue_list); - struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) { if (tag < tags->nr_tags) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c47aa248c640..fcd641032f8d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -238,7 +238,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); -void blk_mq_abort_requeue_list(struct request_queue *q); void blk_mq_complete_request(struct request *rq); bool blk_mq_queue_stopped(struct request_queue *q); -- cgit v1.2.3-59-g8ed1b From 2d76b2f8b54abd16225cd80afca36ed43f113c41 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 22 May 2017 16:46:13 +0200 Subject: net: sched: cls_matchall: fix null pointer dereference Since the head is guaranteed by the check above to be null, the call_rcu would explode. Remove the previously logically dead code that was made logically very much alive and kicking. Fixes: 985538eee06f ("net/sched: remove redundant null check on head") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index dee469fed967..51859b8edd7e 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -203,7 +203,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, *arg = (unsigned long) head; rcu_assign_pointer(tp->root, new); - call_rcu(&head->rcu, mall_destroy_rcu); return 0; err_replace_hw_filter: -- cgit v1.2.3-59-g8ed1b From 0ce872bf8b5c4d425a41940a523ff1b8daa0b275 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:15 -0700 Subject: nvme_fc: get rid of local reconnect_delay Remove the local copy of reconnect_delay. Use the value in the controller options directly. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index dca7165fabcf..c3ab1043efbd 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -165,7 +165,6 @@ struct nvme_fc_ctrl { struct work_struct delete_work; struct work_struct reset_work; struct delayed_work connect_work; - int reconnect_delay; int connect_attempts; struct kref ref; @@ -2615,9 +2614,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", - ctrl->cnum, ctrl->reconnect_delay); + ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, - ctrl->reconnect_delay * HZ); + ctrl->ctrl.opts->reconnect_delay * HZ); } else dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); @@ -2695,9 +2694,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", - ctrl->cnum, ctrl->reconnect_delay); + ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, - ctrl->reconnect_delay * HZ); + ctrl->ctrl.opts->reconnect_delay * HZ); } else dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller reconnect complete\n", @@ -2755,7 +2754,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); - ctrl->reconnect_delay = opts->reconnect_delay; spin_lock_init(&ctrl->lock); /* io queue count */ -- cgit v1.2.3-59-g8ed1b From 5bbecdbc8e7ffaaf47ac1f02014bf3bedda3fd11 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:16 -0700 Subject: nvme_fc: Support ctrl_loss_tmo Sync with Sagi's recent addition of ctrl_loss_tmo in the core fabrics layer. Remove local connect limits and connect_attempts variable. Use fabrics new nr_connects variable and use of nvmf_should_reconnect() Refactor duplicate reconnect failure code. Addresses review comment by Sagi on controller reset support: http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 116 +++++++++++++++++++++---------------------------- 1 file changed, 49 insertions(+), 67 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index c3ab1043efbd..a0f05d5e966c 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -45,8 +45,6 @@ enum nvme_fc_queue_flags { #define NVMEFC_QUEUE_DELAY 3 /* ms units */ -#define NVME_FC_MAX_CONNECT_ATTEMPTS 1 - struct nvme_fc_queue { struct nvme_fc_ctrl *ctrl; struct device *dev; @@ -165,7 +163,6 @@ struct nvme_fc_ctrl { struct work_struct delete_work; struct work_struct reset_work; struct delayed_work connect_work; - int connect_attempts; struct kref ref; u32 flags; @@ -2305,7 +2302,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) int ret; bool changed; - ctrl->connect_attempts++; + ++ctrl->ctrl.opts->nr_reconnects; /* * Create the admin queue @@ -2402,7 +2399,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - ctrl->connect_attempts = 0; + ctrl->ctrl.opts->nr_reconnects = 0; kref_get(&ctrl->ctrl.kref); @@ -2545,16 +2542,22 @@ nvme_fc_delete_ctrl_work(struct work_struct *work) nvme_put_ctrl(&ctrl->ctrl); } -static int -__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) +static bool +__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl) { if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return -EBUSY; + return true; if (!queue_work(nvme_fc_wq, &ctrl->delete_work)) - return -EBUSY; + return true; - return 0; + return false; +} + +static int +__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) +{ + return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0; } /* @@ -2579,6 +2582,35 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) return ret; } +static void +nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) +{ + /* If we are resetting/deleting then do nothing */ + if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { + WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || + ctrl->ctrl.state == NVME_CTRL_LIVE); + return; + } + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", + ctrl->cnum, status); + + if (nvmf_should_reconnect(&ctrl->ctrl)) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", + ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); + queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, + ctrl->ctrl.opts->reconnect_delay * HZ); + } else { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts (%d) " + "reached. Removing controller\n", + ctrl->cnum, ctrl->ctrl.opts->nr_reconnects); + WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); + } +} + static void nvme_fc_reset_ctrl_work(struct work_struct *work) { @@ -2590,34 +2622,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) nvme_fc_delete_association(ctrl); ret = nvme_fc_create_association(ctrl); - if (ret) { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", - ctrl->cnum, ret); - if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Max reconnect attempts (%d) " - "reached. Removing controller\n", - ctrl->cnum, ctrl->connect_attempts); - - if (!nvme_change_ctrl_state(&ctrl->ctrl, - NVME_CTRL_DELETING)) { - dev_err(ctrl->ctrl.device, - "NVME-FC{%d}: failed to change state " - "to DELETING\n", ctrl->cnum); - return; - } - - WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); - return; - } - - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", - ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); - queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, - ctrl->ctrl.opts->reconnect_delay * HZ); - } else + if (ret) + nvme_fc_reconnect_or_delete(ctrl, ret); + else dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); } @@ -2670,34 +2677,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work) struct nvme_fc_ctrl, connect_work); ret = nvme_fc_create_association(ctrl); - if (ret) { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Reconnect attempt failed (%d)\n", - ctrl->cnum, ret); - if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Max reconnect attempts (%d) " - "reached. Removing controller\n", - ctrl->cnum, ctrl->connect_attempts); - - if (!nvme_change_ctrl_state(&ctrl->ctrl, - NVME_CTRL_DELETING)) { - dev_err(ctrl->ctrl.device, - "NVME-FC{%d}: failed to change state " - "to DELETING\n", ctrl->cnum); - return; - } - - WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); - return; - } - - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", - ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); - queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, - ctrl->ctrl.opts->reconnect_delay * HZ); - } else + if (ret) + nvme_fc_reconnect_or_delete(ctrl, ret); + else dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller reconnect complete\n", ctrl->cnum); @@ -2969,7 +2951,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) static struct nvmf_transport_ops nvme_fc_transport = { .name = "fc", .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, - .allowed_opts = NVMF_OPT_RECONNECT_DELAY, + .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, .create_ctrl = nvme_fc_create_ctrl, }; -- cgit v1.2.3-59-g8ed1b From a5321aa5efea05ae748dc5b3e8053584213325ca Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:18 -0700 Subject: nvme_fc: revise comment on teardown Per the recommendation by Sagi on: http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html An extra reference was pointed out. There's no issue with the references, but rather a literal interpretation of what the comment is saying. Reword the comment to avoid confusion. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a0f05d5e966c..0b7f7dd2779a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2532,10 +2532,10 @@ nvme_fc_delete_ctrl_work(struct work_struct *work) /* * tear down the controller - * This will result in the last reference on the nvme ctrl to - * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback. - * From there, the transport will tear down it's logical queues and - * association. + * After the last reference on the nvme ctrl is removed, + * the transport nvme_fc_nvme_ctrl_freed() callback will be + * invoked. From there, the transport will tear down it's + * logical queues and association. */ nvme_uninit_ctrl(&ctrl->ctrl); -- cgit v1.2.3-59-g8ed1b From 589ff7753bb54edd3ee4a9399ccc3ac48d9b22d7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:19 -0700 Subject: nvme_fc: set logging level on resets/deletes Per the review by Sagi on: http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html Looked at existing warn vs info vs err dev_xxx levels for the messages printed on reconnects and deletes: - Resets due to error and resets transitioned to deletes are dev_warn - Other reset/disconnect messages are dev_info - Removed chatty io queue related messages Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0b7f7dd2779a..e4817f9f4323 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1747,7 +1747,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport association error detected: %s\n", ctrl->cnum, errmsg); - dev_info(ctrl->ctrl.device, + dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: resetting controller\n", ctrl->cnum); /* stop the queues on error, cleanup is in reset thread */ @@ -2191,9 +2191,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) if (!opts->nr_io_queues) return 0; - dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", - opts->nr_io_queues); - nvme_fc_init_io_queues(ctrl); memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); @@ -2264,9 +2261,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) if (ctrl->queue_count == 1) return 0; - dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n", - opts->nr_io_queues); - nvme_fc_init_io_queues(ctrl); ret = blk_mq_reinit_tagset(&ctrl->tag_set); @@ -2592,7 +2586,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) return; } - dev_warn(ctrl->ctrl.device, + dev_info(ctrl->ctrl.device, "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", ctrl->cnum, status); @@ -2603,7 +2597,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, ctrl->ctrl.opts->reconnect_delay * HZ); } else { - dev_info(ctrl->ctrl.device, + dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: Max reconnect attempts (%d) " "reached. Removing controller\n", ctrl->cnum, ctrl->ctrl.opts->nr_reconnects); @@ -2638,7 +2632,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - dev_warn(ctrl->ctrl.device, + dev_info(ctrl->ctrl.device, "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) -- cgit v1.2.3-59-g8ed1b From e392e1f1f408fe8baf1046c970d05cbf1f0ec945 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:24 -0700 Subject: nvme_fc: correct nvme status set on abort correct nvme status set on abort. Patch that changed status to being actual nvme status crossed in the night with the patch that added abort values. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e4817f9f4323..775869c69df6 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1372,9 +1372,9 @@ done: complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); if (!complete_rq) { if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { - status = cpu_to_le16(NVME_SC_ABORT_REQ); + status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); if (blk_queue_dying(rq->q)) - status |= cpu_to_le16(NVME_SC_DNR); + status |= cpu_to_le16(NVME_SC_DNR << 1); } nvme_end_request(rq, status, result); } else -- cgit v1.2.3-59-g8ed1b From 2cb657bc0242dfdca20869685bf179774ef1a6fb Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:22 -0700 Subject: nvme_fc: remove extra controller reference taken on reconnect fix extra controller reference taken on reconnect by moving reference to initial controller create Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 775869c69df6..14a009e43aa5 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2395,8 +2395,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ctrl->ctrl.opts->nr_reconnects = 0; - kref_get(&ctrl->ctrl.kref); - if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl); @@ -2793,7 +2791,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = NULL; /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); /* as we're past the point where we transition to the ref * counting teardown path, if we return a bad pointer here, @@ -2809,6 +2806,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, return ERR_PTR(ret); } + kref_get(&ctrl->ctrl.kref); + dev_info(ctrl->ctrl.device, "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", ctrl->cnum, ctrl->ctrl.opts->subsysnqn); -- cgit v1.2.3-59-g8ed1b From aace34c0bb8ea3c8bdcec865b6a4be4db0a68e33 Mon Sep 17 00:00:00 2001 From: Tin Huynh Date: Mon, 22 May 2017 16:19:20 +0700 Subject: leds: pca955x: Correct I2C Functionality The driver checks an incorrect flag of functionality of adapter. When a driver requires i2c_smbus_read_byte_data and i2c_smbus_write_byte_data, it should check I2C_FUNC_SMBUS_BYTE_DATA instead I2C_FUNC_I2C. This patch fixes the problem. Signed-off-by: Tin Huynh Signed-off-by: Jacek Anaszewski --- drivers/leds/leds-pca955x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index 78a7ce816a47..9a873118ea5f 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -285,7 +285,7 @@ static int pca955x_probe(struct i2c_client *client, "slave address 0x%02x\n", client->name, chip->bits, client->addr); - if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) return -EIO; if (pdata) { -- cgit v1.2.3-59-g8ed1b From 5b81fc3cc625e857275573cb4240bbab553f919c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 17 May 2017 13:07:24 -0700 Subject: blk-throttle: add hierarchy support for latency target and idle time For idle time, children's setting should not be bigger than parent's. For latency target, children's setting should not be smaller than parent's. The leaf nodes will adjust their settings according to the hierarchy and compare their IO with the settings and do upgrade/downgrade. parents nodes don't need to track their IO latency/idle time. Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-throttle.c | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index b78db2e5fdff..16174f8cb0a1 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -157,6 +157,7 @@ struct throtl_grp { unsigned long last_check_time; unsigned long latency_target; /* us */ + unsigned long latency_target_conf; /* us */ /* When did we start a new slice */ unsigned long slice_start[2]; unsigned long slice_end[2]; @@ -165,6 +166,7 @@ struct throtl_grp { unsigned long checked_last_finish_time; /* ns / 1024 */ unsigned long avg_idletime; /* ns / 1024 */ unsigned long idletime_threshold; /* us */ + unsigned long idletime_threshold_conf; /* us */ unsigned int bio_cnt; /* total bios */ unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ @@ -482,6 +484,7 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) /* LIMIT_LOW will have default value 0 */ tg->latency_target = DFL_LATENCY_TARGET; + tg->latency_target_conf = DFL_LATENCY_TARGET; return &tg->pd; } @@ -512,6 +515,7 @@ static void throtl_pd_init(struct blkg_policy_data *pd) tg->td = td; tg->idletime_threshold = td->dft_idletime_threshold; + tg->idletime_threshold_conf = td->dft_idletime_threshold; } /* @@ -1367,8 +1371,25 @@ static void tg_conf_updated(struct throtl_grp *tg) * restrictions in the whole hierarchy and allows them to bypass * blk-throttle. */ - blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg)) - tg_update_has_rules(blkg_to_tg(blkg)); + blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg)) { + struct throtl_grp *this_tg = blkg_to_tg(blkg); + struct throtl_grp *parent_tg; + + tg_update_has_rules(this_tg); + /* ignore root/second level */ + if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent || + !blkg->parent->parent) + continue; + parent_tg = blkg_to_tg(blkg->parent); + /* + * make sure all children has lower idle time threshold and + * higher latency target + */ + this_tg->idletime_threshold = min(this_tg->idletime_threshold, + parent_tg->idletime_threshold); + this_tg->latency_target = max(this_tg->latency_target, + parent_tg->latency_target); + } /* * We're already holding queue_lock and know @tg is valid. Let's @@ -1497,8 +1518,8 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd, tg->iops_conf[READ][off] == iops_dft && tg->iops_conf[WRITE][off] == iops_dft && (off != LIMIT_LOW || - (tg->idletime_threshold == tg->td->dft_idletime_threshold && - tg->latency_target == DFL_LATENCY_TARGET))) + (tg->idletime_threshold_conf == tg->td->dft_idletime_threshold && + tg->latency_target_conf == DFL_LATENCY_TARGET))) return 0; if (tg->bps_conf[READ][off] != bps_dft) @@ -1514,17 +1535,17 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd, snprintf(bufs[3], sizeof(bufs[3]), "%u", tg->iops_conf[WRITE][off]); if (off == LIMIT_LOW) { - if (tg->idletime_threshold == ULONG_MAX) + if (tg->idletime_threshold_conf == ULONG_MAX) strcpy(idle_time, " idle=max"); else snprintf(idle_time, sizeof(idle_time), " idle=%lu", - tg->idletime_threshold); + tg->idletime_threshold_conf); - if (tg->latency_target == ULONG_MAX) + if (tg->latency_target_conf == ULONG_MAX) strcpy(latency_time, " latency=max"); else snprintf(latency_time, sizeof(latency_time), - " latency=%lu", tg->latency_target); + " latency=%lu", tg->latency_target_conf); } seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n", @@ -1563,8 +1584,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, v[2] = tg->iops_conf[READ][index]; v[3] = tg->iops_conf[WRITE][index]; - idle_time = tg->idletime_threshold; - latency_time = tg->latency_target; + idle_time = tg->idletime_threshold_conf; + latency_time = tg->latency_target_conf; while (true) { char tok[27]; /* wiops=18446744073709551616 */ char *p; @@ -1628,10 +1649,10 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, blk_throtl_update_limit_valid(tg->td); if (tg->td->limit_valid[LIMIT_LOW]) tg->td->limit_index = LIMIT_LOW; - tg->idletime_threshold = (idle_time == ULONG_MAX) ? - ULONG_MAX : idle_time; - tg->latency_target = (latency_time == ULONG_MAX) ? - ULONG_MAX : latency_time; + tg->idletime_threshold_conf = idle_time; + tg->idletime_threshold = tg->idletime_threshold_conf; + tg->latency_target_conf = latency_time; + tg->latency_target = tg->latency_target_conf; } tg_conf_updated(tg); ret = 0; @@ -2385,6 +2406,7 @@ void blk_throtl_register_queue(struct request_queue *q) struct throtl_grp *tg = blkg_to_tg(blkg); tg->idletime_threshold = td->dft_idletime_threshold; + tg->idletime_threshold_conf = td->dft_idletime_threshold; } rcu_read_unlock(); } -- cgit v1.2.3-59-g8ed1b From 4cff729f62d1bd433178f1ffe09db5718835e925 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 17 May 2017 13:07:25 -0700 Subject: blk-throttle: output some debug info in trace These info are important to understand what's happening and help debug. Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-throttle.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 16174f8cb0a1..1f8d62f5e808 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1748,12 +1748,18 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg) * - IO latency is largely below threshold */ unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice); + bool ret; time = min_t(unsigned long, MAX_IDLE_TIME, time); - return (ktime_get_ns() >> 10) - tg->last_finish_time > time || + ret = (ktime_get_ns() >> 10) - tg->last_finish_time > time || tg->avg_idletime > tg->idletime_threshold || (tg->latency_target && tg->bio_cnt && tg->bad_bio_cnt * 5 < tg->bio_cnt); + throtl_log(&tg->service_queue, + "avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d", + tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt, + tg->bio_cnt, ret, tg->td->scale); + return ret; } static bool throtl_tg_can_upgrade(struct throtl_grp *tg) @@ -1849,6 +1855,7 @@ static void throtl_upgrade_state(struct throtl_data *td) struct cgroup_subsys_state *pos_css; struct blkcg_gq *blkg; + throtl_log(&td->service_queue, "upgrade to max"); td->limit_index = LIMIT_MAX; td->low_upgrade_time = jiffies; td->scale = 0; @@ -1871,6 +1878,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new) { td->scale /= 2; + throtl_log(&td->service_queue, "downgrade, scale %d", td->scale); if (td->scale) { td->low_upgrade_time = jiffies - td->scale * td->throtl_slice; return; @@ -2044,6 +2052,11 @@ static void throtl_update_latency_buckets(struct throtl_data *td) td->avg_buckets[i].valid = true; last_latency = td->avg_buckets[i].latency; } + + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) + throtl_log(&td->service_queue, + "Latency bucket %d: latency=%ld, valid=%d", i, + td->avg_buckets[i].latency, td->avg_buckets[i].valid); } #else static inline void throtl_update_latency_buckets(struct throtl_data *td) -- cgit v1.2.3-59-g8ed1b From 9bb67aeb96784527dbc784c7a1b234461299363c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 17 May 2017 13:07:26 -0700 Subject: blk-throttle: respect 0 bps/iops settings for io.low If a cgroup with low limit 0 for both bps/iops, the cgroup's low limit is ignored and we throttle the cgroup with its max limit. In this way, other cgroups with a low limit will not get protected. To fix this, we don't do the exception any more. cgroup will be throttled to a limit 0 if it uese default setting. To avoid completed stall, we give such cgroup tiny IO resources. Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-throttle.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 1f8d62f5e808..f6a9f42a0ad7 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -27,6 +27,8 @@ static int throtl_quantum = 32; #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */ /* default latency target is 0, eg, guarantee IO latency by default */ #define DFL_LATENCY_TARGET (0) +#define MIN_THROTL_BPS (320 * 1024) +#define MIN_THROTL_IOPS (10) #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT) @@ -296,8 +298,14 @@ static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw) td = tg->td; ret = tg->bps[rw][td->limit_index]; - if (ret == 0 && td->limit_index == LIMIT_LOW) - return tg->bps[rw][LIMIT_MAX]; + if (ret == 0 && td->limit_index == LIMIT_LOW) { + /* intermediate node or iops isn't 0 */ + if (!list_empty(&blkg->blkcg->css.children) || + tg->iops[rw][td->limit_index]) + return U64_MAX; + else + return MIN_THROTL_BPS; + } if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] && tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) { @@ -317,10 +325,17 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent) return UINT_MAX; + td = tg->td; ret = tg->iops[rw][td->limit_index]; - if (ret == 0 && tg->td->limit_index == LIMIT_LOW) - return tg->iops[rw][LIMIT_MAX]; + if (ret == 0 && tg->td->limit_index == LIMIT_LOW) { + /* intermediate node or bps isn't 0 */ + if (!list_empty(&blkg->blkcg->css.children) || + tg->bps[rw][td->limit_index]) + return UINT_MAX; + else + return MIN_THROTL_IOPS; + } if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] && tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) { @@ -1353,7 +1368,7 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v) return 0; } -static void tg_conf_updated(struct throtl_grp *tg) +static void tg_conf_updated(struct throtl_grp *tg, bool global) { struct throtl_service_queue *sq = &tg->service_queue; struct cgroup_subsys_state *pos_css; @@ -1371,7 +1386,8 @@ static void tg_conf_updated(struct throtl_grp *tg) * restrictions in the whole hierarchy and allows them to bypass * blk-throttle. */ - blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg)) { + blkg_for_each_descendant_pre(blkg, pos_css, + global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) { struct throtl_grp *this_tg = blkg_to_tg(blkg); struct throtl_grp *parent_tg; @@ -1434,7 +1450,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of, else *(unsigned int *)((void *)tg + of_cft(of)->private) = v; - tg_conf_updated(tg); + tg_conf_updated(tg, false); ret = 0; out_finish: blkg_conf_finish(&ctx); @@ -1522,16 +1538,16 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd, tg->latency_target_conf == DFL_LATENCY_TARGET))) return 0; - if (tg->bps_conf[READ][off] != bps_dft) + if (tg->bps_conf[READ][off] != U64_MAX) snprintf(bufs[0], sizeof(bufs[0]), "%llu", tg->bps_conf[READ][off]); - if (tg->bps_conf[WRITE][off] != bps_dft) + if (tg->bps_conf[WRITE][off] != U64_MAX) snprintf(bufs[1], sizeof(bufs[1]), "%llu", tg->bps_conf[WRITE][off]); - if (tg->iops_conf[READ][off] != iops_dft) + if (tg->iops_conf[READ][off] != UINT_MAX) snprintf(bufs[2], sizeof(bufs[2]), "%u", tg->iops_conf[READ][off]); - if (tg->iops_conf[WRITE][off] != iops_dft) + if (tg->iops_conf[WRITE][off] != UINT_MAX) snprintf(bufs[3], sizeof(bufs[3]), "%u", tg->iops_conf[WRITE][off]); if (off == LIMIT_LOW) { @@ -1654,7 +1670,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, tg->latency_target_conf = latency_time; tg->latency_target = tg->latency_target_conf; } - tg_conf_updated(tg); + tg_conf_updated(tg, index == LIMIT_LOW && + tg->td->limit_valid[LIMIT_LOW]); ret = 0; out_finish: blkg_conf_finish(&ctx); -- cgit v1.2.3-59-g8ed1b From b4f428ef2844e9fa8154f2faaca249aa74e222a7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 17 May 2017 13:07:27 -0700 Subject: blk-throttle: force user to configure all settings for io.low Default value of io.low limit is 0. If user doesn't configure the limit, last patch makes cgroup be throttled to very tiny bps/iops, which could stall the system. A cgroup with default settings of io.low limit really means nothing, so we force user to configure all settings, otherwise io.low limit doesn't take effect. With this stragety, default setting of latency/idle isn't important, so just set them to very conservative and safe value. Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-throttle.c | 80 ++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 43 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f6a9f42a0ad7..fc13dd0c6e39 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -22,13 +22,11 @@ static int throtl_quantum = 32; #define DFL_THROTL_SLICE_HD (HZ / 10) #define DFL_THROTL_SLICE_SSD (HZ / 50) #define MAX_THROTL_SLICE (HZ) -#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */ -#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */ #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */ -/* default latency target is 0, eg, guarantee IO latency by default */ -#define DFL_LATENCY_TARGET (0) #define MIN_THROTL_BPS (320 * 1024) #define MIN_THROTL_IOPS (10) +#define DFL_LATENCY_TARGET (-1L) +#define DFL_IDLE_THRESHOLD (0) #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT) @@ -205,8 +203,6 @@ struct throtl_data unsigned int limit_index; bool limit_valid[LIMIT_CNT]; - unsigned long dft_idletime_threshold; /* us */ - unsigned long low_upgrade_time; unsigned long low_downgrade_time; @@ -500,6 +496,8 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) tg->latency_target = DFL_LATENCY_TARGET; tg->latency_target_conf = DFL_LATENCY_TARGET; + tg->idletime_threshold = DFL_IDLE_THRESHOLD; + tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD; return &tg->pd; } @@ -528,9 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd) if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent) sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue; tg->td = td; - - tg->idletime_threshold = td->dft_idletime_threshold; - tg->idletime_threshold_conf = td->dft_idletime_threshold; } /* @@ -1534,7 +1529,7 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd, tg->iops_conf[READ][off] == iops_dft && tg->iops_conf[WRITE][off] == iops_dft && (off != LIMIT_LOW || - (tg->idletime_threshold_conf == tg->td->dft_idletime_threshold && + (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD && tg->latency_target_conf == DFL_LATENCY_TARGET))) return 0; @@ -1660,16 +1655,31 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, tg->iops_conf[READ][LIMIT_MAX]); tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW], tg->iops_conf[WRITE][LIMIT_MAX]); - - if (index == LIMIT_LOW) { - blk_throtl_update_limit_valid(tg->td); - if (tg->td->limit_valid[LIMIT_LOW]) - tg->td->limit_index = LIMIT_LOW; - tg->idletime_threshold_conf = idle_time; + tg->idletime_threshold_conf = idle_time; + tg->latency_target_conf = latency_time; + + /* force user to configure all settings for low limit */ + if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] || + tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) || + tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD || + tg->latency_target_conf == DFL_LATENCY_TARGET) { + tg->bps[READ][LIMIT_LOW] = 0; + tg->bps[WRITE][LIMIT_LOW] = 0; + tg->iops[READ][LIMIT_LOW] = 0; + tg->iops[WRITE][LIMIT_LOW] = 0; + tg->idletime_threshold = DFL_IDLE_THRESHOLD; + tg->latency_target = DFL_LATENCY_TARGET; + } else if (index == LIMIT_LOW) { tg->idletime_threshold = tg->idletime_threshold_conf; - tg->latency_target_conf = latency_time; tg->latency_target = tg->latency_target_conf; } + + blk_throtl_update_limit_valid(tg->td); + if (tg->td->limit_valid[LIMIT_LOW]) { + if (index == LIMIT_LOW) + tg->td->limit_index = LIMIT_LOW; + } else + tg->td->limit_index = LIMIT_MAX; tg_conf_updated(tg, index == LIMIT_LOW && tg->td->limit_valid[LIMIT_LOW]); ret = 0; @@ -1760,17 +1770,19 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg) /* * cgroup is idle if: * - single idle is too long, longer than a fixed value (in case user - * configure a too big threshold) or 4 times of slice + * configure a too big threshold) or 4 times of idletime threshold * - average think time is more than threshold * - IO latency is largely below threshold */ - unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice); + unsigned long time; bool ret; - time = min_t(unsigned long, MAX_IDLE_TIME, time); - ret = (ktime_get_ns() >> 10) - tg->last_finish_time > time || - tg->avg_idletime > tg->idletime_threshold || - (tg->latency_target && tg->bio_cnt && + time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold); + ret = tg->latency_target == DFL_LATENCY_TARGET || + tg->idletime_threshold == DFL_IDLE_THRESHOLD || + (ktime_get_ns() >> 10) - tg->last_finish_time > time || + tg->avg_idletime > tg->idletime_threshold || + (tg->latency_target && tg->bio_cnt && tg->bad_bio_cnt * 5 < tg->bio_cnt); throtl_log(&tg->service_queue, "avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d", @@ -2405,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q) void blk_throtl_register_queue(struct request_queue *q) { struct throtl_data *td; - struct cgroup_subsys_state *pos_css; - struct blkcg_gq *blkg; td = q->td; BUG_ON(!td); - if (blk_queue_nonrot(q)) { + if (blk_queue_nonrot(q)) td->throtl_slice = DFL_THROTL_SLICE_SSD; - td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD; - } else { + else td->throtl_slice = DFL_THROTL_SLICE_HD; - td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD; - } #ifndef CONFIG_BLK_DEV_THROTTLING_LOW /* if no low limit, use previous default */ td->throtl_slice = DFL_THROTL_SLICE_HD; @@ -2426,19 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q) td->track_bio_latency = !q->mq_ops && !q->request_fn; if (!td->track_bio_latency) blk_stat_enable_accounting(q); - - /* - * some tg are created before queue is fully initialized, eg, nonrot - * isn't initialized yet - */ - rcu_read_lock(); - blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) { - struct throtl_grp *tg = blkg_to_tg(blkg); - - tg->idletime_threshold = td->dft_idletime_threshold; - tg->idletime_threshold_conf = td->dft_idletime_threshold; - } - rcu_read_unlock(); } #ifdef CONFIG_BLK_DEV_THROTTLING_LOW -- cgit v1.2.3-59-g8ed1b From c849e55178f559c4bbed43efb113cb7602aade89 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 16 May 2017 19:21:08 +0200 Subject: PCI: endpoint: Make PCI_ENDPOINT depend on HAS_DMA If NO_DMA=y: drivers/built-in.o: In function `__pci_epc_create': (.text+0xef4e): undefined reference to `bad_dma_ops' drivers/built-in.o: In function `pci_epc_add_epf': (.text+0xf676): undefined reference to `bad_dma_ops' drivers/built-in.o: In function `pci_epf_alloc_space': (.text+0xfa32): undefined reference to `bad_dma_ops' drivers/built-in.o: In function `pci_epf_free_space': (.text+0xfac4): undefined reference to `bad_dma_ops' Add a dependency on HAS_DMA to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: Bjorn Helgaas --- drivers/pci/endpoint/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/endpoint/Kconfig b/drivers/pci/endpoint/Kconfig index c23f146fb5a6..c09623ca8c3b 100644 --- a/drivers/pci/endpoint/Kconfig +++ b/drivers/pci/endpoint/Kconfig @@ -6,6 +6,7 @@ menu "PCI Endpoint" config PCI_ENDPOINT bool "PCI Endpoint Support" + depends on HAS_DMA help Enable this configuration option to support configurable PCI endpoint. This should be enabled if the platform has a PCI -- cgit v1.2.3-59-g8ed1b From e40cf640b8f632091a30ef0b030c83546f07c902 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 22 May 2017 16:52:24 -0500 Subject: switchtec: Use new cdev_device_add() helper function Convert from "cdev_add() + device_add()" to cdev_device_add(), and from "device_del() + cdev_del()" to cdev_device_del(). [bhelgaas: changelog] Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/switch/switchtec.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index cc6e085008fb..abaa227a5f34 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1291,7 +1291,6 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) cdev = &stdev->cdev; cdev_init(cdev, &switchtec_fops); cdev->owner = THIS_MODULE; - cdev->kobj.parent = &dev->kobj; return stdev; @@ -1479,11 +1478,7 @@ static int switchtec_pci_probe(struct pci_dev *pdev, SWITCHTEC_EVENT_EN_IRQ, &stdev->mmio_part_cfg->mrpc_comp_hdr); - rc = cdev_add(&stdev->cdev, stdev->dev.devt, 1); - if (rc) - goto err_put; - - rc = device_add(&stdev->dev); + rc = cdev_device_add(&stdev->cdev, &stdev->dev); if (rc) goto err_devadd; @@ -1492,7 +1487,6 @@ static int switchtec_pci_probe(struct pci_dev *pdev, return 0; err_devadd: - cdev_del(&stdev->cdev); stdev_kill(stdev); err_put: ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt)); @@ -1506,8 +1500,7 @@ static void switchtec_pci_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); - device_del(&stdev->dev); - cdev_del(&stdev->cdev); + cdev_device_del(&stdev->cdev, &stdev->dev); ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt)); dev_info(&stdev->dev, "unregistered.\n"); -- cgit v1.2.3-59-g8ed1b From 9871e9bb5cf6ff0b51457ca74c270c5c5230b224 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 22 May 2017 16:52:30 -0500 Subject: switchtec: Fix minor bug with partition ID register When a switch endpoint is configured without NTB, the mmio_ntb registers will read all zeros. However, in corner case configurations where the partition ID is not zero and NTB is not enabled, the code will have the wrong partition ID and this causes the driver to use the wrong set of drivers. To fix this we simply take the partition ID from the system info region. Reported-by: Dingbao Chen Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/switch/switchtec.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index abaa227a5f34..f6a63406c76e 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1441,12 +1441,15 @@ static int switchtec_init_pci(struct switchtec_dev *stdev, stdev->mmio_sys_info = stdev->mmio + SWITCHTEC_GAS_SYS_INFO_OFFSET; stdev->mmio_flash_info = stdev->mmio + SWITCHTEC_GAS_FLASH_INFO_OFFSET; stdev->mmio_ntb = stdev->mmio + SWITCHTEC_GAS_NTB_OFFSET; - stdev->partition = ioread8(&stdev->mmio_ntb->partition_id); + stdev->partition = ioread8(&stdev->mmio_sys_info->partition_id); stdev->partition_count = ioread8(&stdev->mmio_ntb->partition_count); stdev->mmio_part_cfg_all = stdev->mmio + SWITCHTEC_GAS_PART_CFG_OFFSET; stdev->mmio_part_cfg = &stdev->mmio_part_cfg_all[stdev->partition]; stdev->mmio_pff_csr = stdev->mmio + SWITCHTEC_GAS_PFF_CSR_OFFSET; + if (stdev->partition_count < 1) + stdev->partition_count = 1; + init_pff(stdev); pci_set_drvdata(pdev, stdev); -- cgit v1.2.3-59-g8ed1b From 415b6185c541dc0a21457ff307cdb61950a6eb9f Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 22 May 2017 17:06:30 -0500 Subject: PCI: imx6: Fix config read timeout handling Commit cc7b0d495589 ("PCI: designware: Update PCI config space remap function") made PCI configuration requests non-posted, which means we now get a synchronous abort when the CFG space read to probe for downstream devices times out. Synchronous aborts need to be handled differently from the async aborts we were getting before, in particular the PC needs to be advanced when resolving the abort. This is mostly a copy of what other PCI drivers do on ARM to handle those aborts. [bhelgaas: changelog, "Fixes"] Fixes: cc7b0d495589 ("PCI: designware: Update PCI config space remap function") Tested-by: Fabio Estevam Tested-by: Peter Senna Tschudin Signed-off-by: Lucas Stach Signed-off-by: Bjorn Helgaas Acked-by: Richard Zhu --- drivers/pci/dwc/pci-imx6.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c index a98cba55c7f0..19a289b8cc94 100644 --- a/drivers/pci/dwc/pci-imx6.c +++ b/drivers/pci/dwc/pci-imx6.c @@ -252,7 +252,34 @@ static void imx6_pcie_reset_phy(struct imx6_pcie *imx6_pcie) static int imx6q_pcie_abort_handler(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - return 0; + unsigned long pc = instruction_pointer(regs); + unsigned long instr = *(unsigned long *)pc; + int reg = (instr >> 12) & 15; + + /* + * If the instruction being executed was a read, + * make it look like it read all-ones. + */ + if ((instr & 0x0c100000) == 0x04100000) { + unsigned long val; + + if (instr & 0x00400000) + val = 255; + else + val = -1; + + regs->uregs[reg] = val; + regs->ARM_pc += 4; + return 0; + } + + if ((instr & 0x0e100090) == 0x00100090) { + regs->uregs[reg] = -1; + regs->ARM_pc += 4; + return 0; + } + + return 1; } static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie) @@ -819,8 +846,8 @@ static int __init imx6_pcie_init(void) * we can install the handler here without risking it * accessing some uninitialized driver state. */ - hook_fault_code(16 + 6, imx6q_pcie_abort_handler, SIGBUS, 0, - "imprecise external abort"); + hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0, + "external abort on non-linefetch"); return platform_driver_register(&imx6_pcie_driver); } -- cgit v1.2.3-59-g8ed1b From c10e8031d5b34cde06b039ca2e8af87a33d5ba11 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 18 May 2017 13:07:49 -0700 Subject: efi-pstore: Fix write/erase id tracking Prior to the pstore interface refactoring, the "id" generated during a backend pstore_write() was only retained by the internal pstore inode tracking list. Additionally the "part" was ignored, so EFI would encode this in the id. This corrects the misunderstandings and correctly sets "id" during pstore_write(), and uses "part" directly during pstore_erase(). Reported-by: Marta Lofstedt Fixes: 76cc9580e3fb ("pstore: Replace arguments for write() API") Fixes: a61072aae693 ("pstore: Replace arguments for erase() API") Signed-off-by: Kees Cook Tested-by: Marta Lofstedt --- drivers/firmware/efi/efi-pstore.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 44148fd4c9f2..dda2e96328c0 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -53,6 +53,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry, if (sscanf(name, "dump-type%u-%u-%d-%lu-%c", &record->type, &part, &cnt, &time, &data_type) == 5) { record->id = generic_id(time, part, cnt); + record->part = part; record->count = cnt; record->time.tv_sec = time; record->time.tv_nsec = 0; @@ -64,6 +65,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry, } else if (sscanf(name, "dump-type%u-%u-%d-%lu", &record->type, &part, &cnt, &time) == 4) { record->id = generic_id(time, part, cnt); + record->part = part; record->count = cnt; record->time.tv_sec = time; record->time.tv_nsec = 0; @@ -77,6 +79,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry, * multiple logs, remains. */ record->id = generic_id(time, part, 0); + record->part = part; record->count = 0; record->time.tv_sec = time; record->time.tv_nsec = 0; @@ -241,9 +244,15 @@ static int efi_pstore_write(struct pstore_record *record) efi_guid_t vendor = LINUX_EFI_CRASH_GUID; int i, ret = 0; + record->time.tv_sec = get_seconds(); + record->time.tv_nsec = 0; + + record->id = generic_id(record->time.tv_sec, record->part, + record->count); + snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lu-%c", record->type, record->part, record->count, - get_seconds(), record->compressed ? 'C' : 'D'); + record->time.tv_sec, record->compressed ? 'C' : 'D'); for (i = 0; i < DUMP_NAME_LEN; i++) efi_name[i] = name[i]; @@ -255,7 +264,6 @@ static int efi_pstore_write(struct pstore_record *record) if (record->reason == KMSG_DUMP_OOPS) efivar_run_worker(); - record->id = record->part; return ret; }; @@ -287,7 +295,7 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data) * holding multiple logs, remains. */ snprintf(name_old, sizeof(name_old), "dump-type%u-%u-%lu", - ed->record->type, (unsigned int)ed->record->id, + ed->record->type, ed->record->part, ed->record->time.tv_sec); for (i = 0; i < DUMP_NAME_LEN; i++) @@ -320,10 +328,7 @@ static int efi_pstore_erase(struct pstore_record *record) char name[DUMP_NAME_LEN]; efi_char16_t efi_name[DUMP_NAME_LEN]; int found, i; - unsigned int part; - do_div(record->id, 1000); - part = do_div(record->id, 100); snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lu", record->type, record->part, record->count, record->time.tv_sec); -- cgit v1.2.3-59-g8ed1b From aa3d4409b664813ceb86a24bd09458cdd29cbb8a Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Mon, 22 May 2017 17:19:45 -0700 Subject: Input: edt-ft5x06 - increase allowed data range for threshold parameter The datasheet and application note does not mention an allowed range for the M09_REGISTER_THRESHOLD parameter. One of our customers needs to set lower values than 20 and they seem to work just fine on EDT EP0xx0M09 with T5x06 touch. So, lacking a known lower limit, we increase the range for thresholds, and set the lower limit to 0. The documentation is updated accordingly. Signed-off-by: Schoefegger Stefan Signed-off-by: Manfred Schlaegl Signed-off-by: Martin Kepplinger Acked-by: Rob Herring Signed-off-by: Dmitry Torokhov --- Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt | 2 +- Documentation/input/devices/edt-ft5x06.rst | 2 +- drivers/input/touchscreen/edt-ft5x06.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt index 6db22103e2dd..025cf8c9324a 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt +++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt @@ -36,7 +36,7 @@ Optional properties: control gpios - threshold: allows setting the "click"-threshold in the range - from 20 to 80. + from 0 to 80. - gain: allows setting the sensitivity in the range from 0 to 31. Note that lower values indicate higher diff --git a/Documentation/input/devices/edt-ft5x06.rst b/Documentation/input/devices/edt-ft5x06.rst index 2032f0b7a8fa..1ccc94b192b7 100644 --- a/Documentation/input/devices/edt-ft5x06.rst +++ b/Documentation/input/devices/edt-ft5x06.rst @@ -15,7 +15,7 @@ It has been tested with the following devices: The driver allows configuration of the touch screen via a set of sysfs files: /sys/class/input/eventX/device/device/threshold: - allows setting the "click"-threshold in the range from 20 to 80. + allows setting the "click"-threshold in the range from 0 to 80. /sys/class/input/eventX/device/device/gain: allows setting the sensitivity in the range from 0 to 31. Note that diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 8cf8d8d5d4ef..f872817e81e4 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -471,7 +471,7 @@ static EDT_ATTR(gain, S_IWUSR | S_IRUGO, WORK_REGISTER_GAIN, static EDT_ATTR(offset, S_IWUSR | S_IRUGO, WORK_REGISTER_OFFSET, M09_REGISTER_OFFSET, 0, 31); static EDT_ATTR(threshold, S_IWUSR | S_IRUGO, WORK_REGISTER_THRESHOLD, - M09_REGISTER_THRESHOLD, 20, 80); + M09_REGISTER_THRESHOLD, 0, 80); static EDT_ATTR(report_rate, S_IWUSR | S_IRUGO, WORK_REGISTER_REPORT_RATE, NO_REGISTER, 3, 14); -- cgit v1.2.3-59-g8ed1b From 089b50d95948f691589cca4d81f1f8761747dbaa Mon Sep 17 00:00:00 2001 From: Maxime Roussin-Bélanger Date: Fri, 19 May 2017 14:59:03 -0700 Subject: Input: atmel_mxt_ts - add T100 as a readable object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using the 'object' sysfs attribute, T100 is not displayed in the output. Signed-off-by: Maxime Roussin-Bélanger Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 2302aef2b2d4..dd042a9b0aaa 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -350,6 +350,7 @@ static bool mxt_object_readable(unsigned int type) case MXT_TOUCH_KEYARRAY_T15: case MXT_TOUCH_PROXIMITY_T23: case MXT_TOUCH_PROXKEY_T52: + case MXT_TOUCH_MULTITOUCHSCREEN_T100: case MXT_PROCI_GRIPFACE_T20: case MXT_PROCG_NOISE_T22: case MXT_PROCI_ONETOUCH_T24: -- cgit v1.2.3-59-g8ed1b From ad258fb918dae8b1ec79a85b4c7f518e4f902869 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 22 May 2017 07:46:55 +0200 Subject: i2c: designware: Fix bogus sda_hold_time due to uninitialized vars We need to initializes those variables to 0 for platforms that do not provide ACPI parameters. Otherwise, we set sda_hold_time to random values, breaking e.g. Galileo and IOT2000 boards. Reported-and-tested-by: Linus Torvalds Reported-by: Tobias Klausmann Fixes: 9d6408433019 ("i2c: designware: don't infer timings described by ACPI from clock rate") Signed-off-by: Jan Kiszka Reviewed-by: Ard Biesheuvel Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang Signed-off-by: Linus Torvalds --- drivers/i2c/busses/i2c-designware-platdrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 6283b99d2b17..d1263b82d646 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -94,9 +94,9 @@ static void dw_i2c_acpi_params(struct platform_device *pdev, char method[], static int dw_i2c_acpi_configure(struct platform_device *pdev) { struct dw_i2c_dev *dev = platform_get_drvdata(pdev); + u32 ss_ht = 0, fp_ht = 0, hs_ht = 0, fs_ht = 0; acpi_handle handle = ACPI_HANDLE(&pdev->dev); const struct acpi_device_id *id; - u32 ss_ht, fp_ht, hs_ht, fs_ht; struct acpi_device *adev; const char *uid; -- cgit v1.2.3-59-g8ed1b From 1fc2e41f7af4572b07190f9dec28396b418e9a36 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Mon, 22 May 2017 20:58:11 +0300 Subject: ALSA: hda - apply STAC_9200_DELL_M22 quirk for Dell Latitude D430 This model is actually called 92XXM2-8 in Windows driver. But since pin configs for M22 and M28 are identical, just reuse M22 quirk. Fixes external microphone (tested) and probably docking station ports (not tested). Signed-off-by: Alexander Tsoy Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index faa3d38bac0b..6cefdf6c0b75 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1559,6 +1559,8 @@ static const struct snd_pci_quirk stac9200_fixup_tbl[] = { "Dell Inspiron 1501", STAC_9200_DELL_M26), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f6, "unknown Dell", STAC_9200_DELL_M26), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0201, + "Dell Latitude D430", STAC_9200_DELL_M22), /* Panasonic */ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-74", STAC_9200_PANASONIC), /* Gateway machines needs EAPD to be set on resume */ -- cgit v1.2.3-59-g8ed1b From 429030bc944ee9a8bbe5d9bb23dcda0ae2205450 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 19 May 2017 14:58:19 -0300 Subject: drm: qxl: Delay entering atomic context during cursor update qxl_release_map will enter an atomic context, but since we still need to alloc memory for BOs, we better delay that until we have everything we need, in case we need to sleep inside the allocation. This avoids the Sleep in atomic state below, which was reported by Mike. [ 43.910362] BUG: sleeping function called from invalid context at mm/slab.h:432 [ 43.910955] in_atomic(): 1, irqs_disabled(): 0, pid: 2077, name: Xorg [ 43.911472] Preemption disabled at: [ 43.911478] [] qxl_bo_kmap_atomic_page+0xa5/0x100 [qxl] [ 43.912103] CPU: 0 PID: 2077 Comm: Xorg Tainted: G E 4.12.0-master #38 [ 43.912550] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20161202_174313-build11a 04/01/2014 [ 43.913202] Call Trace: [ 43.913371] dump_stack+0x65/0x89 [ 43.913581] ? qxl_bo_kmap_atomic_page+0xa5/0x100 [qxl] [ 43.913876] ___might_sleep+0x11a/0x190 [ 43.914095] __might_sleep+0x4a/0x80 [ 43.914319] ? qxl_bo_create+0x50/0x190 [qxl] [ 43.914565] kmem_cache_alloc_trace+0x46/0x180 [ 43.914836] qxl_bo_create+0x50/0x190 [qxl] [ 43.915082] ? refcount_dec_and_test+0x11/0x20 [ 43.915332] ? ttm_mem_io_reserve+0x41/0xe0 [ttm] [ 43.915595] qxl_alloc_bo_reserved+0x37/0xb0 [qxl] [ 43.915884] qxl_cursor_atomic_update+0x8f/0x260 [qxl] [ 43.916172] ? drm_atomic_helper_update_legacy_modeset_state+0x1d6/0x210 [drm_kms_helper] [ 43.916623] drm_atomic_helper_commit_planes+0xec/0x230 [drm_kms_helper] [ 43.916995] drm_atomic_helper_commit_tail+0x2b/0x60 [drm_kms_helper] [ 43.917398] commit_tail+0x65/0x70 [drm_kms_helper] [ 43.917693] drm_atomic_helper_commit+0xa9/0x100 [drm_kms_helper] [ 43.918039] drm_atomic_commit+0x4b/0x50 [drm] [ 43.918334] drm_atomic_helper_update_plane+0xf1/0x110 [drm_kms_helper] [ 43.918902] __setplane_internal+0x19f/0x280 [drm] [ 43.919240] drm_mode_cursor_universal+0x101/0x1c0 [drm] [ 43.919541] drm_mode_cursor_common+0x15b/0x1d0 [drm] [ 43.919858] drm_mode_cursor2_ioctl+0xe/0x10 [drm] [ 43.920157] drm_ioctl+0x211/0x460 [drm] [ 43.920383] ? drm_mode_cursor_ioctl+0x50/0x50 [drm] [ 43.920664] ? handle_mm_fault+0x93/0x160 [ 43.920893] do_vfs_ioctl+0x96/0x6e0 [ 43.921117] ? __fget+0x73/0xa0 [ 43.921322] SyS_ioctl+0x41/0x70 [ 43.921545] entry_SYSCALL_64_fastpath+0x1a/0xa5 [ 43.922188] RIP: 0033:0x7f1145804bc7 [ 43.922526] RSP: 002b:00007ffcd3e50508 EFLAGS: 00003246 ORIG_RAX: 0000000000000010 [ 43.923367] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007f1145804bc7 [ 43.923852] RDX: 00007ffcd3e50540 RSI: 00000000c02464bb RDI: 000000000000000b [ 43.924299] RBP: 0000000000000040 R08: 0000000000000040 R09: 000000000000000c [ 43.924694] R10: 00007ffcd3e50340 R11: 0000000000003246 R12: 0000000000000018 [ 43.925128] R13: 00000000022bc390 R14: 0000000000000040 R15: 00007ffcd3e5062c Reported-by: Mike Galbraith Signed-off-by: Gabriel Krisman Bertazi Link: http://patchwork.freedesktop.org/patch/msgid/20170519175819.15682-1-krisman@collabora.co.uk Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/qxl/qxl_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 058340a002c2..4a340efd8ba6 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -575,8 +575,6 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, if (ret) return; - cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release); - if (fb != old_state->fb) { obj = to_qxl_framebuffer(fb)->obj; user_bo = gem_to_qxl_bo(obj); @@ -614,6 +612,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, qxl_bo_kunmap(cursor_bo); qxl_bo_kunmap(user_bo); + cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release); cmd->u.set.visible = 1; cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0); @@ -624,6 +623,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, if (ret) goto out_free_release; + cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_MOVE; } -- cgit v1.2.3-59-g8ed1b From f928543404bdf6bb4e8d6a6c3ced5edebd0d6f38 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 22 May 2017 15:59:45 +0200 Subject: drm: Fix deadlock retry loop in page_flip_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I failed to properly onion-wrap the unwind code: We acquire the vblank reference before we start with the wait-wound locking dance, hence we must make sure we retry before we drop the reference. Oops. v2: The vblank_put must be after the frambuffer_put (Michel). I suck at unwrapping code that doesn't use separate labels for each stage, but checks each pointer first ... While re-reading everything I also realized that we must clean up the fb refcounts, and specifically plane->old_fb before we drop the locks, either in the final unlocking, or in the w/w retry path. Hence the correct fix is to drop the vblank_put to the very bottom. Fixes: 29dc0d1de182 ("drm: Roll out acquire context for the page_flip ioctl") Cc: Harry Wentland Cc: Daniel Vetter Cc: Jani Nikula Cc: Sean Paul Cc: David Airlie Cc: dri-devel@lists.freedesktop.org Reported-by: Tommi Rantala Cc: Tommi Rantala Cc: Michel Dänzer Tested-by: Tommi Rantala Reviewed-by: Michel Dänzer Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170522135945.28831-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_plane.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index fedd4d60d9cd..5dc8c4350602 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -948,8 +948,6 @@ retry: } out: - if (ret && crtc->funcs->page_flip_target) - drm_crtc_vblank_put(crtc); if (fb) drm_framebuffer_put(fb); if (crtc->primary->old_fb) @@ -964,5 +962,8 @@ out: drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); + if (ret && crtc->funcs->page_flip_target) + drm_crtc_vblank_put(crtc); + return ret; } -- cgit v1.2.3-59-g8ed1b From c477ebe21fabe0010a2ed324ce3a1762c757d867 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Sat, 6 May 2017 11:41:30 +0200 Subject: mmc: dt: pwrseq-simple: Invent power-off-delay-us During power off, after the GPIO pin has been asserted, some devices like the Wifi chip from TI, Wl18xx, needs a delay before the host continues with clock gating and turning off regulators as to follow a graceful shutdown sequence. Therefore invent an optional power-off-delay-us DT binding for mmc-pwrseq-simple, to allow us to support this constraint. Cc: devicetree@vger.kernel.org Cc: Rob Herring Cc: linux-mmc@vger.kernel.org Signed-off-by: Ulf Hansson Acked-by: Arnd Bergmann --- Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt index e25436861867..9029b45b8a22 100644 --- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt +++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt @@ -18,6 +18,8 @@ Optional properties: "ext_clock" (External clock provided to the card). - post-power-on-delay-ms : Delay in ms after powering the card and de-asserting the reset-gpios (if any) +- power-off-delay-us : Delay in us after asserting the reset-gpios (if any) + during power off of the card. Example: -- cgit v1.2.3-59-g8ed1b From e9256e142f597edf90c68cec22db4c4aebaa27de Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Sat, 6 May 2017 11:43:05 +0200 Subject: mmc: pwrseq_simple: Parse DTS for the power-off-delay-us property If the optional power-off-delay-us property is found, insert the corresponding delay after asserting the GPIO during power off. This enables a graceful shutdown sequence for some devices. Cc: linux-mmc@vger.kernel.org Signed-off-by: Ulf Hansson Acked-by: Arnd Bergmann --- drivers/mmc/core/pwrseq_simple.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c index 1304160de168..13ef162cf066 100644 --- a/drivers/mmc/core/pwrseq_simple.c +++ b/drivers/mmc/core/pwrseq_simple.c @@ -27,6 +27,7 @@ struct mmc_pwrseq_simple { struct mmc_pwrseq pwrseq; bool clk_enabled; u32 post_power_on_delay_ms; + u32 power_off_delay_us; struct clk *ext_clk; struct gpio_descs *reset_gpios; }; @@ -78,6 +79,10 @@ static void mmc_pwrseq_simple_power_off(struct mmc_host *host) mmc_pwrseq_simple_set_gpios_value(pwrseq, 1); + if (pwrseq->power_off_delay_us) + usleep_range(pwrseq->power_off_delay_us, + 2 * pwrseq->power_off_delay_us); + if (!IS_ERR(pwrseq->ext_clk) && pwrseq->clk_enabled) { clk_disable_unprepare(pwrseq->ext_clk); pwrseq->clk_enabled = false; @@ -119,6 +124,8 @@ static int mmc_pwrseq_simple_probe(struct platform_device *pdev) device_property_read_u32(dev, "post-power-on-delay-ms", &pwrseq->post_power_on_delay_ms); + device_property_read_u32(dev, "power-off-delay-us", + &pwrseq->power_off_delay_us); pwrseq->pwrseq.dev = dev; pwrseq->pwrseq.ops = &mmc_pwrseq_simple_ops; -- cgit v1.2.3-59-g8ed1b From f74ac688c981138c914f9afba50b646146e35585 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 24 Apr 2017 22:40:22 +0200 Subject: mfd: dts: hi655x: Add clock binding for the pmic The hi655x PMIC provides the regulators but also a clock. The latter is missing in the definition, so extend the documentation to include this as well. Signed-off-by: Daniel Lezcano Acked-by: Rob Herring Acked-by: Lee Jones [Ulf: Split patch and updated changelog] Signed-off-by: Ulf Hansson Acked-by: Arnd Bergmann --- Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt index 05485699d70e..9630ac0e4b56 100644 --- a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt +++ b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt @@ -16,6 +16,11 @@ Required properties: - reg: Base address of PMIC on Hi6220 SoC. - interrupt-controller: Hi655x has internal IRQs (has own IRQ domain). - pmic-gpios: The GPIO used by PMIC IRQ. +- #clock-cells: From common clock binding; shall be set to 0 + +Optional properties: +- clock-output-names: From common clock binding to override the + default output clock name Example: pmic: pmic@f8000000 { @@ -24,4 +29,5 @@ Example: interrupt-controller; #interrupt-cells = <2>; pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; + #clock-cells = <0>; } -- cgit v1.2.3-59-g8ed1b From 307ded8968868e55343e063fbe96cff1efd77eb6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 24 Apr 2017 22:40:22 +0200 Subject: arm64: dts: hikey: Add clock for the pmic mfd The hi655x PMIC provides the regulators but also a clock. The latter is missing so let's add it. This clock is used by WiFi/Bluetooth chip, but that connection is done in a separate change on top of this one. Signed-off-by: Daniel Lezcano Acked-by: Rob Herring Acked-by: Lee Jones [Ulf: Split patch and updated changelog] Signed-off-by: Ulf Hansson Acked-by: Arnd Bergmann --- arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 75bce2d0b1a8..d22eb3a646c4 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -330,6 +330,7 @@ pmic: pmic@f8000000 { compatible = "hisilicon,hi655x-pmic"; reg = <0x0 0xf8000000 0x0 0x1000>; + #clock-cells = <0>; interrupt-controller; #interrupt-cells = <2>; pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; -- cgit v1.2.3-59-g8ed1b From 1b32a5ff98fbb271d2235ddcfe3b58f514f8260a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 May 2017 12:46:55 +0200 Subject: arm64: dts: hi6220: Move the fixed_5v_hub regulator to the hikey dts The regulator is a part of the hikey board, therefore let's move it from the hi6220 SoC dtsi file into the hikey dts file . Let's also rename the regulator according to the datasheet (5V_HUB) to better reflect the HW. Signed-off-by: Ulf Hansson Acked-by: Daniel Lezcano Acked-by: Arnd Bergmann --- arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 10 ++++++++++ arch/arm64/boot/dts/hisilicon/hi6220.dtsi | 12 +----------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index d22eb3a646c4..0f6cba77fc76 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -81,6 +81,16 @@ }; }; + reg_5v_hub: regulator@0 { + compatible = "regulator-fixed"; + regulator-name = "5V_HUB"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-boot-on; + gpio = <&gpio0 7 0>; + regulator-always-on; + }; + soc { spi0: spi@f7106000 { status = "ok"; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi index 1e5129b19280..951152d44c02 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi @@ -725,20 +725,10 @@ status = "disabled"; }; - fixed_5v_hub: regulator@0 { - compatible = "regulator-fixed"; - regulator-name = "fixed_5v_hub"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - regulator-boot-on; - gpio = <&gpio0 7 0>; - regulator-always-on; - }; - usb_phy: usbphy { compatible = "hisilicon,hi6220-usb-phy"; #phy-cells = <0>; - phy-supply = <&fixed_5v_hub>; + phy-supply = <®_5v_hub>; hisilicon,peripheral-syscon = <&sys_ctrl>; }; -- cgit v1.2.3-59-g8ed1b From 84f7c60b31f10e3a438153bc7408ad536f585641 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 May 2017 13:51:27 +0200 Subject: arm64: dts: hikey: Add the SYS_5V and the VDD_3V3 regulators Add these regulators to better describe the HW, but also because those is needed in following changes. Signed-off-by: Ulf Hansson Acked-by: Daniel Lezcano Acked-by: Arnd Bergmann --- arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 0f6cba77fc76..802f4a4bed30 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -81,7 +81,26 @@ }; }; - reg_5v_hub: regulator@0 { + reg_sys_5v: regulator@0 { + compatible = "regulator-fixed"; + regulator-name = "SYS_5V"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-boot-on; + regulator-always-on; + }; + + reg_vdd_3v3: regulator@1 { + compatible = "regulator-fixed"; + regulator-name = "VDD_3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + regulator-always-on; + vin-supply = <®_sys_5v>; + }; + + reg_5v_hub: regulator@2 { compatible = "regulator-fixed"; regulator-name = "5V_HUB"; regulator-min-microvolt = <5000000>; @@ -89,6 +108,7 @@ regulator-boot-on; gpio = <&gpio0 7 0>; regulator-always-on; + vin-supply = <®_sys_5v>; }; soc { -- cgit v1.2.3-59-g8ed1b From 76f1dfb687150e852aa74573962cfc158a9570cc Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 May 2017 14:18:26 +0200 Subject: arm64: dts: hi6220: Move board data from the dwmmc nodes to hikey dts Move the board specific descriptions for the dwmmc nodes in the hi6220 SoC dtsi, into the hikey dts as it's there these belongs. While changing this, let's take the opportunity to drop the use of the "ti,non-removable" binding for one of the dwmmc device nodes, as it's not a valid binding and not used. Drop also the unnecessary use of "num-slots = <0x1>" for all of the dwmmc nodes, as there is no need to set this since when default number of slots is one. Signed-off-by: Ulf Hansson Acked-by: Daniel Lezcano Acked-by: Arnd Bergmann --- arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 23 ++++++++++++++++++++++- arch/arm64/boot/dts/hisilicon/hi6220.dtsi | 19 ------------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 802f4a4bed30..5132d8ed4664 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -286,8 +286,29 @@ /* GPIO blocks 16 thru 19 do not appear to be routed to pins */ + dwmmc_0: dwmmc0@f723d000 { + cap-mmc-highspeed; + non-removable; + bus-width = <0x8>; + vmmc-supply = <&ldo19>; + }; + + dwmmc_1: dwmmc1@f723e000 { + card-detect-delay = <200>; + cap-sd-highspeed; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + vqmmc-supply = <&ldo7>; + vmmc-supply = <&ldo10>; + bus-width = <0x4>; + disable-wp; + cd-gpios = <&gpio1 0 1>; + }; + dwmmc_2: dwmmc2@f723f000 { - ti,non-removable; + broken-cd; + bus-width = <0x4>; non-removable; /* WL_EN */ vmmc-supply = <&wlan_en_reg>; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi index 951152d44c02..5013e4b2ea71 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi @@ -756,17 +756,12 @@ dwmmc_0: dwmmc0@f723d000 { compatible = "hisilicon,hi6220-dw-mshc"; - num-slots = <0x1>; - cap-mmc-highspeed; - non-removable; reg = <0x0 0xf723d000 0x0 0x1000>; interrupts = <0x0 0x48 0x4>; clocks = <&sys_ctrl 2>, <&sys_ctrl 1>; clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>; reset-names = "reset"; - bus-width = <0x8>; - vmmc-supply = <&ldo19>; pinctrl-names = "default"; pinctrl-0 = <&emmc_pmx_func &emmc_clk_cfg_func &emmc_cfg_func &emmc_rst_cfg_func>; @@ -774,13 +769,7 @@ dwmmc_1: dwmmc1@f723e000 { compatible = "hisilicon,hi6220-dw-mshc"; - num-slots = <0x1>; - card-detect-delay = <200>; hisilicon,peripheral-syscon = <&ao_ctrl>; - cap-sd-highspeed; - sd-uhs-sdr12; - sd-uhs-sdr25; - sd-uhs-sdr50; reg = <0x0 0xf723e000 0x0 0x1000>; interrupts = <0x0 0x49 0x4>; #address-cells = <0x1>; @@ -789,11 +778,6 @@ clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>; reset-names = "reset"; - vqmmc-supply = <&ldo7>; - vmmc-supply = <&ldo10>; - bus-width = <0x4>; - disable-wp; - cd-gpios = <&gpio1 0 1>; pinctrl-names = "default", "idle"; pinctrl-0 = <&sd_pmx_func &sd_clk_cfg_func &sd_cfg_func>; pinctrl-1 = <&sd_pmx_idle &sd_clk_cfg_idle &sd_cfg_idle>; @@ -801,15 +785,12 @@ dwmmc_2: dwmmc2@f723f000 { compatible = "hisilicon,hi6220-dw-mshc"; - num-slots = <0x1>; reg = <0x0 0xf723f000 0x0 0x1000>; interrupts = <0x0 0x4a 0x4>; clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>; clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>; reset-names = "reset"; - bus-width = <0x4>; - broken-cd; pinctrl-names = "default", "idle"; pinctrl-0 = <&sdio_pmx_func &sdio_clk_cfg_func &sdio_cfg_func>; pinctrl-1 = <&sdio_pmx_idle &sdio_clk_cfg_idle &sdio_cfg_idle>; -- cgit v1.2.3-59-g8ed1b From ea452678734eb782126f999bf5c4fb3e71d3b196 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 May 2017 16:11:33 +0200 Subject: arm64: dts: hikey: Fix WiFi support The description of the connection between the dwmmc (SDIO) controller and the Wifi chip, which is attached to the SDIO bus is wrong. Currently the SDIO card can't be detected and thus the Wifi doesn't work. Let's fix this by assigning the correct vmmc supply, which is the always on regulator VDD_3V3 and remove the WLAN enable regulator altogether. Then to properly deal with the power on/off sequence, add a mmc-pwrseq node to describe the resources needed to detect the SDIO card. Except for the WLAN enable GPIO and its corresponding assert/de-assert delays, the mmc-pwrseq node also contains a handle to a clock provided by the hi655x pmic. This clock is also needed to be able to turn on the WiFi chip. Signed-off-by: Ulf Hansson Acked-by: Arnd Bergmann --- arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 5132d8ed4664..49f6a6242cf9 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -111,6 +111,15 @@ vin-supply = <®_sys_5v>; }; + wl1835_pwrseq: wl1835-pwrseq { + compatible = "mmc-pwrseq-simple"; + /* WLAN_EN GPIO */ + reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>; + clocks = <&pmic>; + clock-names = "ext_clock"; + power-off-delay-us = <10>; + }; + soc { spi0: spi@f7106000 { status = "ok"; @@ -307,11 +316,10 @@ }; dwmmc_2: dwmmc2@f723f000 { - broken-cd; bus-width = <0x4>; non-removable; - /* WL_EN */ - vmmc-supply = <&wlan_en_reg>; + vmmc-supply = <®_vdd_3v3>; + mmc-pwrseq = <&wl1835_pwrseq>; #address-cells = <0x1>; #size-cells = <0x0>; @@ -323,18 +331,6 @@ interrupts = <3 IRQ_TYPE_EDGE_RISING>; }; }; - - wlan_en_reg: regulator@1 { - compatible = "regulator-fixed"; - regulator-name = "wlan-en-regulator"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - /* WLAN_EN GPIO */ - gpio = <&gpio0 5 0>; - /* WLAN card specific delay */ - startup-delay-us = <70000>; - enable-active-high; - }; }; leds { -- cgit v1.2.3-59-g8ed1b From 1b57b6210f4e52904393be97c62122aae69bc8aa Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Tue, 23 May 2017 09:58:07 +0100 Subject: cfg80211: make cfg80211_sched_scan_results() work from atomic context Drivers should be able to call cfg80211_sched_scan_results() from atomic context. However, with the introduction of multiple scheduled scan feature this requirement was not taken into account resulting in regression shown below. [ 119.021594] BUG: scheduling while atomic: irq/47-iwlwifi/517/0x00000200 [ 119.021604] Modules linked in: [...] [ 119.021759] CPU: 1 PID: 517 Comm: irq/47-iwlwifi Not tainted 4.12.0-rc2-t440s-20170522+ #1 [ 119.021763] Hardware name: LENOVO 20AQS03H00/20AQS03H00, BIOS GJET91WW (2.41 ) 09/21/2016 [ 119.021766] Call Trace: [ 119.021778] ? dump_stack+0x5c/0x84 [ 119.021784] ? __schedule_bug+0x4c/0x70 [ 119.021792] ? __schedule+0x496/0x5c0 [ 119.021798] ? schedule+0x2d/0x80 [ 119.021804] ? schedule_preempt_disabled+0x5/0x10 [ 119.021810] ? __mutex_lock.isra.0+0x18e/0x4c0 [ 119.021817] ? __wake_up+0x2f/0x50 [ 119.021833] ? cfg80211_sched_scan_results+0x19/0x60 [cfg80211] [ 119.021844] ? cfg80211_sched_scan_results+0x19/0x60 [cfg80211] [ 119.021859] ? iwl_mvm_rx_lmac_scan_iter_complete_notif+0x17/0x30 [iwlmvm] [ 119.021869] ? iwl_pcie_rx_handle+0x2a9/0x7e0 [iwlwifi] [ 119.021878] ? iwl_pcie_irq_handler+0x17c/0x730 [iwlwifi] [ 119.021884] ? irq_forced_thread_fn+0x60/0x60 [ 119.021887] ? irq_thread_fn+0x16/0x40 [ 119.021892] ? irq_thread+0x109/0x180 [ 119.021896] ? wake_threads_waitq+0x30/0x30 [ 119.021901] ? kthread+0xf2/0x130 [ 119.021905] ? irq_thread_dtor+0x90/0x90 [ 119.021910] ? kthread_create_on_node+0x40/0x40 [ 119.021915] ? ret_from_fork+0x26/0x40 Fixes: b34939b98369 ("cfg80211: add request id to cfg80211_sched_scan_*() api") Reported-by: Sander Eikelenboom Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- net/wireless/scan.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 14d5f0c8c45f..9f0901f3e42b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -322,9 +322,9 @@ cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid) { struct cfg80211_sched_scan_request *pos; - ASSERT_RTNL(); + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); - list_for_each_entry(pos, &rdev->sched_scan_req_list, list) { + list_for_each_entry_rcu(pos, &rdev->sched_scan_req_list, list) { if (pos->reqid == reqid) return pos; } @@ -398,13 +398,13 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid) trace_cfg80211_sched_scan_results(wiphy, reqid); /* ignore if we're not scanning */ - rtnl_lock(); + rcu_read_lock(); request = cfg80211_find_sched_scan_req(rdev, reqid); if (request) { request->report_results = true; queue_work(cfg80211_wq, &rdev->sched_scan_res_wk); } - rtnl_unlock(); + rcu_read_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_results); -- cgit v1.2.3-59-g8ed1b From c70d9d809fdeecedb96972457ee45c49a232d97f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 May 2017 15:40:12 -0500 Subject: ptrace: Properly initialize ptracer_cred on fork When I introduced ptracer_cred I failed to consider the weirdness of fork where the task_struct copies the old value by default. This winds up leaving ptracer_cred set even when a process forks and the child process does not wind up being ptraced. Because ptracer_cred is not set on non-ptraced processes whose parents were ptraced this has broken the ability of the enlightenment window manager to start setuid children. Fix this by properly initializing ptracer_cred in ptrace_init_task This must be done with a little bit of care to preserve the current value of ptracer_cred when ptrace carries through fork. Re-reading the ptracer_cred from the ptracing process at this point is inconsistent with how PT_PTRACE_CAP has been maintained all of these years. Tested-by: Takashi Iwai Fixes: 64b875f7ac8a ("ptrace: Capture the ptracer's creds not PT_PTRACE_CAP") Signed-off-by: "Eric W. Biederman" --- include/linux/ptrace.h | 7 +++++-- kernel/ptrace.c | 20 +++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 422bc2e4cb6a..ef3eb8bbfee4 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -54,7 +54,8 @@ extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, - struct task_struct *new_parent); + struct task_struct *new_parent, + const struct cred *ptracer_cred); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 @@ -206,7 +207,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) if (unlikely(ptrace) && current->ptrace) { child->ptrace = current->ptrace; - __ptrace_link(child, current->parent); + __ptrace_link(child, current->parent, current->ptracer_cred); if (child->ptrace & PT_SEIZED) task_set_jobctl_pending(child, JOBCTL_TRAP_STOP); @@ -215,6 +216,8 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) set_tsk_thread_flag(child, TIF_SIGPENDING); } + else + child->ptracer_cred = NULL; } /** diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 266ddcc1d8bb..60f356d91060 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, } +void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, + const struct cred *ptracer_cred) +{ + BUG_ON(!list_empty(&child->ptrace_entry)); + list_add(&child->ptrace_entry, &new_parent->ptraced); + child->parent = new_parent; + child->ptracer_cred = get_cred(ptracer_cred); +} + /* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. * * Must be called with the tasklist lock write-held. */ -void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) +static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - BUG_ON(!list_empty(&child->ptrace_entry)); - list_add(&child->ptrace_entry, &new_parent->ptraced); - child->parent = new_parent; rcu_read_lock(); - child->ptracer_cred = get_cred(__task_cred(new_parent)); + __ptrace_link(child, new_parent, __task_cred(new_parent)); rcu_read_unlock(); } @@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request, flags |= PT_SEIZED; task->ptrace = flags; - __ptrace_link(task, current); + ptrace_link(task, current); /* SEIZE doesn't trap tracee on attach */ if (!seize) @@ -459,7 +465,7 @@ static int ptrace_traceme(void) */ if (!ret && !(current->real_parent->flags & PF_EXITING)) { current->ptrace = PT_PTRACED; - __ptrace_link(current, current->real_parent); + ptrace_link(current, current->real_parent); } } write_unlock_irq(&tasklist_lock); -- cgit v1.2.3-59-g8ed1b From cdc5a7f363be34287ac6c2345e5d1d3b37cf4a23 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 9 May 2017 20:45:06 +0300 Subject: net/mlx5e: Use the correct delete call on offloaded TC encap entry detach We wrongly direcly invoke hlist_del_rcu() and not hash_del_rcu() which does a slightly different call now and may change later, fix that. Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads') Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 11c27e4fadf6..a90dd26ea51c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -384,7 +384,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, if (e->flags & MLX5_ENCAP_ENTRY_VALID) mlx5_encap_dealloc(priv->mdev, e->encap_id); - hlist_del_rcu(&e->encap_hlist); + hash_del_rcu(&e->encap_hlist); kfree(e->encap_header); kfree(e); } -- cgit v1.2.3-59-g8ed1b From 3aa4266405a6c2e03eb0ff12d7c573d3d903da4c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 10 May 2017 13:48:41 +0300 Subject: net/sched: act_csum: Add accessors for offloading drivers Add the accessors for realizing if this is a csum action, and for which fields checksum is needed. Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/net/tc_act/tc_csum.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index f31fb6331a53..3248beaf16b0 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -3,6 +3,7 @@ #include #include +#include struct tcf_csum { struct tc_action common; @@ -11,4 +12,18 @@ struct tcf_csum { }; #define to_tcf_csum(a) ((struct tcf_csum *)a) +static inline bool is_tcf_csum(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_CSUM) + return true; +#endif + return false; +} + +static inline u32 tcf_csum_update_flags(const struct tc_action *a) +{ + return to_tcf_csum(a)->update_flags; +} + #endif /* __NET_TC_CSUM_H */ -- cgit v1.2.3-59-g8ed1b From 26c02749936f064abf771a0f5f49b280fcfd8b66 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 10 May 2017 13:59:54 +0300 Subject: net/mlx5e: Allow TC csum offload if applied together with pedit action When offloading header re-writes, the HW re-calculates the relevant L3/L4 checksums. Hence, when upper layers (as done by OVS) ask for TC checksum action offload together with pedit offload, don't err. This command now works: tc filter add dev ens1f0 protocol ip parent ffff: prio 20 flower skip_sw ip_proto tcp dst_port 9001 action pedit ex munge tcp dport set 0x1234 pipe action csum tcp Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 39 +++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a90dd26ea51c..9dd83c7e4c51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include "en.h" @@ -1109,6 +1110,28 @@ out_err: return err; } +static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags) +{ + u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | + TCA_CSUM_UPDATE_FLAG_UDP; + + /* The HW recalcs checksums only if re-writing headers */ + if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { + netdev_warn(priv->netdev, + "TC csum action is only offloaded with pedit\n"); + return false; + } + + if (update_flags & ~prot_flags) { + netdev_warn(priv->netdev, + "can't offload TC csum action for some header/s - flags %#x\n", + update_flags); + return false; + } + + return true; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow) @@ -1149,6 +1172,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_csum(a)) { + if (csum_offload_supported(priv, attr->action, + tcf_csum_update_flags(a))) + continue; + + return -EOPNOTSUPP; + } + if (is_tcf_skbedit_mark(a)) { u32 mark = tcf_skbedit_mark(a); @@ -1651,6 +1682,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_csum(a)) { + if (csum_offload_supported(priv, attr->action, + tcf_csum_update_flags(a))) + continue; + + return -EOPNOTSUPP; + } + if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev, *encap_dev = NULL; -- cgit v1.2.3-59-g8ed1b From d824bf3fe2d352fc2c52b7ede05b1a0e95d946be Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 9 May 2017 19:02:42 +0300 Subject: net/mlx5e: Properly enforce disallowing of partial field re-write offload Currently we don't support partial header re-writes through TC pedit action offloading. However, the code that enforces that wasn't err-ing on cases where the first and last bits of the mask are set but there is some zero bit between them, such as in the below example, fix that! tc filter add dev enp1s0 protocol ip parent ffff: prio 10 flower ip_proto udp dst_port 2001 skip_sw action pedit munge ip src set 1.0.0.1 retain 0xff0000ff Fixes: d79b6df6b10a ('net/mlx5e: Add parsing of TC pedit actions to HW format') Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9dd83c7e4c51..0387c321f0a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -926,7 +926,7 @@ static int offload_pedit_fields(struct pedit_headers *masks, struct mlx5e_tc_flow_parse_attr *parse_attr) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - int i, action_size, nactions, max_actions, first, last; + int i, action_size, nactions, max_actions, first, last, first_z; void *s_masks_p, *a_masks_p, *vals_p; u32 s_mask, a_mask, val; struct mlx5_fields *f; @@ -985,9 +985,10 @@ static int offload_pedit_fields(struct pedit_headers *masks, memcpy(&val, vals_p, f->size); field_bsize = f->size * BITS_PER_BYTE; + first_z = find_first_zero_bit(&mask, field_bsize); first = find_first_bit(&mask, field_bsize); last = find_last_bit(&mask, field_bsize); - if (first > 0 || last != (field_bsize - 1)) { + if (first > 0 || last != (field_bsize - 1) || first_z < last) { printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n", mask); return -EOPNOTSUPP; -- cgit v1.2.3-59-g8ed1b From e3ca4e0583a02a04503d9c827fb5c5d50abc4ff5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 9 May 2017 13:37:26 +0300 Subject: net/mlx5e: Fix warnings around parsing of TC pedit actions The sparse tool emits these correct complaints: drivers/net/ethernet/mellanox/mlx5/core//en_tc.c:1005:25: warning: cast to restricted __be32 drivers/net/ethernet/mellanox/mlx5/core//en_tc.c:1007:25: warning: cast to restricted __be16 The value is provided from user-space in network order, but there's no way for them to realize that, avoid the warnings by casting to the appropriate type. Fixes: d79b6df6b10a ('net/mlx5e: Add parsing of TC pedit actions to HW format') Signed-off-by: Or Gerlitz Reported-by: Leon Romanovsky Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0387c321f0a2..ec63158ab643 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -928,9 +928,9 @@ static int offload_pedit_fields(struct pedit_headers *masks, struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; int i, action_size, nactions, max_actions, first, last, first_z; void *s_masks_p, *a_masks_p, *vals_p; - u32 s_mask, a_mask, val; struct mlx5_fields *f; u8 cmd, field_bsize; + u32 s_mask, a_mask; unsigned long mask; void *action; @@ -947,7 +947,8 @@ static int offload_pedit_fields(struct pedit_headers *masks, for (i = 0; i < ARRAY_SIZE(fields); i++) { f = &fields[i]; /* avoid seeing bits set from previous iterations */ - s_mask = a_mask = mask = val = 0; + s_mask = 0; + a_mask = 0; s_masks_p = (void *)set_masks + f->offset; a_masks_p = (void *)add_masks + f->offset; @@ -982,9 +983,8 @@ static int offload_pedit_fields(struct pedit_headers *masks, memset(a_masks_p, 0, f->size); } - memcpy(&val, vals_p, f->size); - field_bsize = f->size * BITS_PER_BYTE; + first_z = find_first_zero_bit(&mask, field_bsize); first = find_first_bit(&mask, field_bsize); last = find_last_bit(&mask, field_bsize); @@ -1004,11 +1004,11 @@ static int offload_pedit_fields(struct pedit_headers *masks, } if (field_bsize == 32) - MLX5_SET(set_action_in, action, data, ntohl(val)); + MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p)); else if (field_bsize == 16) - MLX5_SET(set_action_in, action, data, ntohs(val)); + MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p)); else if (field_bsize == 8) - MLX5_SET(set_action_in, action, data, val); + MLX5_SET(set_action_in, action, data, *(u8 *)vals_p); action += action_size; nactions++; -- cgit v1.2.3-59-g8ed1b From b57fe691961cc8f00541f9a435c70df45d41e514 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 27 Apr 2017 17:59:00 +0300 Subject: net/mlx5e: IPoIB, handle RX packet correctly IPoIB packet contains the pseudo header area, we need to pull it prior to reset_mac_header in order to let the GRO work well. In more details: GRO checks the mac address of the new coming packet, it does that by comparing the hard_header_len size of the current packet to the previous one in that session, the comparison is over hard_header_len size. Now, the driver prepares that area in the skb by allocating area from the reserved part and resetting the correct mac header to it. Fixes: 9d6bd752c63c ("net/mlx5e: IPoIB, RX handler") Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7b1566f0ae58..66b5fec15313 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1041,6 +1041,8 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) #define MLX5_IB_GRH_BYTES 40 #define MLX5_IPOIB_ENCAP_LEN 4 #define MLX5_GID_SIZE 16 +#define MLX5_IPOIB_PSEUDO_LEN 20 +#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN) static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, @@ -1048,6 +1050,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; + char *pseudo_header; u8 *dgid; u8 g; @@ -1076,8 +1079,11 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, if (likely(netdev->features & NETIF_F_RXHASH)) mlx5e_skb_set_hash(cqe, skb); + /* 20 bytes of ipoib header and 4 for encap existing */ + pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN); + memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN); skb_reset_mac_header(skb); - skb_pull(skb, MLX5_IPOIB_ENCAP_LEN); + skb_pull(skb, MLX5_IPOIB_HARD_LEN); skb->dev = netdev; -- cgit v1.2.3-59-g8ed1b From 73dd3a4839c1d27c36d4dcc92e1ff44225ecbeb7 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 23 Feb 2017 11:19:36 +0200 Subject: net/mlx5: Avoid using pending command interface slots Currently when firmware command gets stuck or it takes long time to complete, the driver command will get timeout and the command slot is freed and can be used for new commands, and if the firmware receive new command on the old busy slot its behavior is unexpected and this could be harmful. To fix this when the driver command gets timeout we return failure, but we don't free the command slot and we wait for the firmware to explicitly respond to that command. Once all the entries are busy we will stop processing new firmware commands. Fixes: 9cba4ebcf374 ('net/mlx5: Fix potential deadlock in command mode change') Signed-off-by: Mohamad Haj Yahia Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 41 +++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- include/linux/mlx5/driver.h | 7 +++- 4 files changed, 44 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 5bdaf3d545b2..10d282841f5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -774,7 +774,7 @@ static void cb_timeout_handler(struct work_struct *work) mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } static void cmd_work_handler(struct work_struct *work) @@ -804,6 +804,7 @@ static void cmd_work_handler(struct work_struct *work) } cmd->ent_arr[ent->idx] = ent; + set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); lay = get_inst(cmd, ent->idx); ent->lay = lay; memset(lay, 0, sizeof(*lay)); @@ -825,6 +826,20 @@ static void cmd_work_handler(struct work_struct *work) if (ent->callback) schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + /* Skip sending command to fw if internal error */ + if (pci_channel_offline(dev->pdev) || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + u8 status = 0; + u32 drv_synd; + + ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status); + MLX5_SET(mbox_out, ent->out, status, status); + MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); + + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + return; + } + /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -835,7 +850,7 @@ static void cmd_work_handler(struct work_struct *work) poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT)); } } @@ -879,7 +894,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) wait_for_completion(&ent->done); } else if (!wait_for_completion_timeout(&ent->done, timeout)) { ent->ret = -ETIMEDOUT; - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } err = ent->ret; @@ -1375,7 +1390,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) } } -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -1395,6 +1410,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) struct semaphore *sem; ent = cmd->ent_arr[i]; + + /* if we already completed the command, ignore it */ + if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, + &ent->state)) { + /* only real completion can free the cmd slot */ + if (!forced) { + mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", + ent->idx); + free_ent(cmd, ent->idx); + } + continue; + } + if (ent->callback) cancel_delayed_work(&ent->cb_timeout_work); if (ent->page_queue) @@ -1417,7 +1445,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", ent->ret, deliv_status_to_str(ent->status), ent->status); } - free_ent(cmd, ent->idx); + + /* only real completion will free the entry slot */ + if (!forced) + free_ent(cmd, ent->idx); if (ent->callback) { ds = ent->ts2 - ent->ts1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ea5d8d37a75c..33eae5ad2fb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) break; case MLX5_EVENT_TYPE_CMD: - mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector)); + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); break; case MLX5_EVENT_TYPE_PORT_CHANGE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index d0515391d33b..44f59b1d6f0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev) spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); mlx5_core_dbg(dev, "vector 0x%llx\n", vector); - mlx5_cmd_comp_handler(dev, vector); + mlx5_cmd_comp_handler(dev, vector, true); return; no_trig: diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bcdf739ee41a..93273d9ea4d1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -787,7 +787,12 @@ enum { typedef void (*mlx5_cmd_cbk_t)(int status, void *context); +enum { + MLX5_CMD_ENT_STATE_PENDING_COMP, +}; + struct mlx5_cmd_work_ent { + unsigned long state; struct mlx5_cmd_msg *in; struct mlx5_cmd_msg *out; void *uout; @@ -976,7 +981,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, -- cgit v1.2.3-59-g8ed1b From b665d98edc9ab295169be2fc5bb4e89a46de0a1a Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 18 May 2017 13:34:43 +0300 Subject: net/mlx5: Tolerate irq_set_affinity_hint() failures Add tolerance to failures of irq_set_affinity_hint(). Its role is to give hints that optimizes performance, and should not block the driver load. In non-SMP systems, functionality is not available as there is a single core, and all these calls definitely fail. Hence, do not call the function and avoid the warning prints. Fixes: db058a186f98 ("net/mlx5_core: Set irq affinity hints") Signed-off-by: Tariq Toukan Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0c123d571b4c..fe5546bb4153 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -612,7 +612,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; struct msix_entry *msix = priv->msix_arr; int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - int err; if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); @@ -622,18 +621,12 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), priv->irq_info[i].mask); - err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); - if (err) { - mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x", - irq); - goto err_clear_mask; - } +#ifdef CONFIG_SMP + if (irq_set_affinity_hint(irq, priv->irq_info[i].mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); +#endif return 0; - -err_clear_mask: - free_cpumask_var(priv->irq_info[i].mask); - return err; } static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) -- cgit v1.2.3-59-g8ed1b From 7bd897cfce1eb373892d35d7f73201b0f9b221c4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 May 2017 17:28:36 +0300 Subject: block: fix an error code in add_partition() We don't set an error code on this path. It means that we return NULL instead of an error pointer and the caller does a NULL dereference. Fixes: 6d1d8050b4bc ("block, partition: add partition_meta_info to hd_struct") Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- block/partition-generic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/partition-generic.c b/block/partition-generic.c index ff07b9143ca4..c5ec8246e25e 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -320,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, if (info) { struct partition_meta_info *pinfo = alloc_part_info(disk); - if (!pinfo) + if (!pinfo) { + err = -ENOMEM; goto out_free_stats; + } memcpy(pinfo, info, sizeof(*info)); p->info = pinfo; } -- cgit v1.2.3-59-g8ed1b From 7f65b1f5adc5f8496ca8bec4947de66fefe36220 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 22 May 2017 14:50:30 +0200 Subject: cdc-ether: divorce initialisation with a filter reset and a generic method Some devices need their multicast filter reset but others are crashed by that. So the methods need to be separated. Signed-off-by: Oliver Neukum Reported-by: "Ridgway, Keith" Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 31 ++++++++++++++++++++++++------- include/linux/usb/usbnet.h | 1 + 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index f3ae88fdf332..8ab281b478f2 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -310,6 +310,26 @@ skip: return -ENODEV; } + return 0; + +bad_desc: + dev_info(&dev->udev->dev, "bad CDC descriptors\n"); + return -ENODEV; +} +EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind); + + +/* like usbnet_generic_cdc_bind() but handles filter initialization + * correctly + */ +int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf) +{ + int rv; + + rv = usbnet_generic_cdc_bind(dev, intf); + if (rv < 0) + goto bail_out; + /* Some devices don't initialise properly. In particular * the packet filter is not reset. There are devices that * don't do reset all the way. So the packet filter should @@ -317,13 +337,10 @@ skip: */ usbnet_cdc_update_filter(dev); - return 0; - -bad_desc: - dev_info(&dev->udev->dev, "bad CDC descriptors\n"); - return -ENODEV; +bail_out: + return rv; } -EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind); +EXPORT_SYMBOL_GPL(usbnet_ether_cdc_bind); void usbnet_cdc_unbind(struct usbnet *dev, struct usb_interface *intf) { @@ -417,7 +434,7 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf) BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data) < sizeof(struct cdc_state))); - status = usbnet_generic_cdc_bind(dev, intf); + status = usbnet_ether_cdc_bind(dev, intf); if (status < 0) return status; diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 7dffa5624ea6..97116379db5f 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -206,6 +206,7 @@ struct cdc_state { }; extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *); +extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf); extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_status(struct usbnet *, struct urb *); -- cgit v1.2.3-59-g8ed1b From 12e8b570e732eaa5eae3a2895ba3fbcf91bde2b4 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 22 May 2017 20:13:07 +0200 Subject: mlx5: fix bug reading rss_hash_type from CQE Masks for extracting part of the Completion Queue Entry (CQE) field rss_hash_type was swapped, namely CQE_RSS_HTYPE_IP and CQE_RSS_HTYPE_L4. The bug resulted in setting skb->l4_hash, even-though the rss_hash_type indicated that hash was NOT computed over the L4 (UDP or TCP) part of the packet. Added comments from the datasheet, to make it more clear what these masks are selecting. Signed-off-by: Jesper Dangaard Brouer Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index dd9a263ed368..a940ec6a046c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -787,8 +787,14 @@ enum { }; enum { - CQE_RSS_HTYPE_IP = 0x3 << 6, - CQE_RSS_HTYPE_L4 = 0x3 << 2, + CQE_RSS_HTYPE_IP = 0x3 << 2, + /* cqe->rss_hash_type[3:2] - IP destination selected for hash + * (00 = none, 01 = IPv4, 10 = IPv6, 11 = Reserved) + */ + CQE_RSS_HTYPE_L4 = 0x3 << 6, + /* cqe->rss_hash_type[7:6] - L4 destination selected for hash + * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI + */ }; enum { -- cgit v1.2.3-59-g8ed1b From 223220356d5ebc05ead9a8d697abb0c0a906fc81 Mon Sep 17 00:00:00 2001 From: Richard Date: Sun, 21 May 2017 12:27:00 -0700 Subject: partitions/msdos: FreeBSD UFS2 file systems are not recognized The code in block/partitions/msdos.c recognizes FreeBSD, OpenBSD and NetBSD partitions and does a reasonable job picking out OpenBSD and NetBSD UFS subpartitions. But for FreeBSD the subpartitions are always "bad". Kernel: Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/partitions/msdos.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 93e7c1b32edd..5610cd537da7 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -300,6 +300,8 @@ static void parse_bsd(struct parsed_partitions *state, continue; bsd_start = le32_to_cpu(p->p_offset); bsd_size = le32_to_cpu(p->p_size); + if (memcmp(flavour, "bsd\0", 4) == 0) + bsd_start += offset; if (offset == bsd_start && size == bsd_size) /* full parent partition, we have it already */ continue; -- cgit v1.2.3-59-g8ed1b From 6f4dbd149d2a151b89d1a5bbf7530ee5546c7908 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 May 2017 11:33:16 +0200 Subject: libceph: use kbasename() and kill ceph_file_part() Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/ceph_debug.h | 6 +++--- net/ceph/ceph_common.c | 13 ------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index aa2e19182d99..51c5bd64bd00 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -3,6 +3,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include + #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG /* @@ -12,12 +14,10 @@ */ # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) -extern const char *ceph_file_part(const char *s, int len); # define dout(fmt, ...) \ pr_debug("%.*s %12.12s:%-4d : " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ - ceph_file_part(__FILE__, sizeof(__FILE__)), \ - __LINE__, ##__VA_ARGS__) + kbasename(__FILE__), __LINE__, ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) do { \ diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 4fd02831beed..47e94b560ba0 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -56,19 +56,6 @@ static const struct kernel_param_ops param_ops_supported_features = { module_param_cb(supported_features, ¶m_ops_supported_features, NULL, S_IRUGO); -/* - * find filename portion of a path (/foo/bar/baz -> baz) - */ -const char *ceph_file_part(const char *s, int len) -{ - const char *e = s + len; - - while (e != s && *(e-1) != '/') - e--; - return e; -} -EXPORT_SYMBOL(ceph_file_part); - const char *ceph_msg_type_name(int type) { switch (type) { -- cgit v1.2.3-59-g8ed1b From 1759f7b0e3fab1d1882d7c680af9d12c5c111b0e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 May 2017 11:38:17 +0200 Subject: libceph: make ceph_msg_data_advance() return void Both callers ignore the returned bool. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/messenger.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5766a6c896c4..d7ab481b2508 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1174,8 +1174,8 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, * Returns true if the result moves the cursor on to the next piece * of the data item. */ -static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, - size_t bytes) +static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, + size_t bytes) { bool new_piece; @@ -1207,8 +1207,6 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, new_piece = true; } cursor->need_crc = new_piece; - - return new_piece; } static size_t sizeof_footer(struct ceph_connection *con) @@ -1577,7 +1575,6 @@ static int write_partial_message_data(struct ceph_connection *con) size_t page_offset; size_t length; bool last_piece; - bool need_crc; int ret; page = ceph_msg_data_next(cursor, &page_offset, &length, @@ -1592,7 +1589,7 @@ static int write_partial_message_data(struct ceph_connection *con) } if (do_datacrc && cursor->need_crc) crc = ceph_crc32c_page(crc, page, page_offset, length); - need_crc = ceph_msg_data_advance(cursor, (size_t)ret); + ceph_msg_data_advance(cursor, (size_t)ret); } dout("%s %p msg %p done\n", __func__, con, msg); @@ -2299,7 +2296,7 @@ static int read_partial_msg_data(struct ceph_connection *con) if (do_datacrc) crc = ceph_crc32c_page(crc, page, page_offset, ret); - (void) ceph_msg_data_advance(cursor, (size_t)ret); + ceph_msg_data_advance(cursor, (size_t)ret); } if (do_datacrc) con->in_data_crc = crc; -- cgit v1.2.3-59-g8ed1b From f3b4e55ded9b3c52831a7d2ab9e511293c99fc11 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 May 2017 11:59:22 +0200 Subject: libceph: drop version variable from ceph_monmap_decode() It's set but not used: CEPH_FEATURE_MONNAMES feature bit isn't advertised, which guarantees a v1 MonMap. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/mon_client.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 29a0ef351c5e..250f11f78609 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -43,15 +43,13 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) int i, err = -EINVAL; struct ceph_fsid fsid; u32 epoch, num_mon; - u16 version; u32 len; ceph_decode_32_safe(&p, end, len, bad); ceph_decode_need(&p, end, len, bad); dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p)); - - ceph_decode_16_safe(&p, end, version, bad); + p += sizeof(u16); /* skip version */ ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad); ceph_decode_copy(&p, &fsid, sizeof(fsid)); -- cgit v1.2.3-59-g8ed1b From d18a1247c4070390fc0c2d83d89a72afe921882e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 May 2017 12:21:56 +0200 Subject: libceph: validate blob_struct_v in process_one_ticket() None of these are validated in userspace, but since we do validate reply_struct_v in ceph_x_proc_ticket_reply(), tkt_struct_v (first) and CephXServiceTicket struct_v (second) in process_one_ticket(), validate CephXTicketBlob struct_v as well. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/auth_x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 2034fb926670..d0126df33f1f 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -215,6 +215,9 @@ static int process_one_ticket(struct ceph_auth_client *ac, dout(" ticket blob is %d bytes\n", dlen); ceph_decode_need(ptp, tpend, 1 + sizeof(u64), bad); blob_struct_v = ceph_decode_8(ptp); + if (blob_struct_v != 1) + goto bad; + new_secret_id = ceph_decode_64(ptp); ret = ceph_decode_buffer(&new_ticket_blob, ptp, tpend); if (ret) -- cgit v1.2.3-59-g8ed1b From b51456a6096ebf9f4ceb2cc7e176b471d4b70af0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 May 2017 14:24:36 +0200 Subject: libceph: fix error handling in process_one_ticket() Don't leak key internals after new_session_key is populated. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/auth_x.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index d0126df33f1f..8757fb87dab8 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -151,7 +151,7 @@ static int process_one_ticket(struct ceph_auth_client *ac, struct timespec validity; void *tp, *tpend; void **ptp; - struct ceph_crypto_key new_session_key; + struct ceph_crypto_key new_session_key = { 0 }; struct ceph_buffer *new_ticket_blob; unsigned long new_expires, new_renew_after; u64 new_secret_id; @@ -237,13 +237,13 @@ static int process_one_ticket(struct ceph_auth_client *ac, type, ceph_entity_type_name(type), th->secret_id, (int)th->ticket_blob->vec.iov_len); xi->have_keys |= th->service; - -out: - return ret; + return 0; bad: ret = -EINVAL; - goto out; +out: + ceph_crypto_key_destroy(&new_session_key); + return ret; } static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, -- cgit v1.2.3-59-g8ed1b From 293dffaad8d500e1a5336eeb90d544cf40d4fbd8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 May 2017 17:25:10 +0300 Subject: libceph: NULL deref on crush_decode() error path If there is not enough space then ceph_decode_32_safe() does a goto bad. We need to return an error code in that situation. The current code returns ERR_PTR(0) which is NULL. The callers are not expecting that and it results in a NULL dereference. Fixes: f24e9980eb86 ("ceph: OSD client") Signed-off-by: Dan Carpenter Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/osdmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index ffe9e904d4d1..55e3a477f92d 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -317,6 +317,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) u32 yes; struct crush_rule *r; + err = -EINVAL; ceph_decode_32_safe(p, end, yes, bad); if (!yes) { dout("crush_decode NO rule %d off %x %p to %p\n", -- cgit v1.2.3-59-g8ed1b From 4d071c3238987325b9e50e33051a40d1cce311cc Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 23 May 2017 14:18:17 -0500 Subject: PCI/PM: Add needs_resume flag to avoid suspend complete optimization Some drivers - like i915 - may not support the system suspend direct complete optimization due to differences in their runtime and system suspend sequence. Add a flag that when set resumes the device before calling the driver's system suspend handlers which effectively disables the optimization. Needed by a future patch fixing suspend/resume on i915. Suggested by Rafael. Signed-off-by: Imre Deak Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Cc: stable@vger.kernel.org --- drivers/pci/pci.c | 3 ++- include/linux/pci.h | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b01bd5bba8e6..563901cd9c06 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2144,7 +2144,8 @@ bool pci_dev_keep_suspended(struct pci_dev *pci_dev) if (!pm_runtime_suspended(dev) || pci_target_state(pci_dev) != pci_dev->current_state - || platform_pci_need_resume(pci_dev)) + || platform_pci_need_resume(pci_dev) + || (pci_dev->dev_flags & PCI_DEV_FLAGS_NEEDS_RESUME)) return false; /* diff --git a/include/linux/pci.h b/include/linux/pci.h index 33c2b0b77429..df7dd9021646 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -183,6 +183,11 @@ enum pci_dev_flags { PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9), /* Do not use FLR even if device advertises PCI_AF_CAP */ PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), + /* + * Resume before calling the driver's system suspend hooks, disabling + * the direct_complete optimization. + */ + PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11), }; enum pci_irq_reroute_variant { -- cgit v1.2.3-59-g8ed1b From 82bc9a42cf854fdf63155759c0aa790bd1f361b0 Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Tue, 18 Apr 2017 13:43:32 +0200 Subject: drm/gma500/psb: Actually use VBT mode when it is found With LVDS we were incorrectly picking the pre-programmed mode instead of the prefered mode provided by VBT. Make sure we pick the VBT mode if one is provided. It is likely that the mode read-out code is still wrong but this patch fixes the immediate problem on most machines. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78562 Cc: Signed-off-by: Patrik Jakobsson Link: http://patchwork.freedesktop.org/patch/msgid/20170418114332.12183-1-patrik.r.jakobsson@gmail.com --- drivers/gpu/drm/gma500/psb_intel_lvds.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c index 0066fe7e622e..be3eefec5152 100644 --- a/drivers/gpu/drm/gma500/psb_intel_lvds.c +++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c @@ -759,20 +759,23 @@ void psb_intel_lvds_init(struct drm_device *dev, if (scan->type & DRM_MODE_TYPE_PREFERRED) { mode_dev->panel_fixed_mode = drm_mode_duplicate(dev, scan); + DRM_DEBUG_KMS("Using mode from DDC\n"); goto out; /* FIXME: check for quirks */ } } /* Failed to get EDID, what about VBT? do we need this? */ - if (mode_dev->vbt_mode) + if (dev_priv->lfp_lvds_vbt_mode) { mode_dev->panel_fixed_mode = - drm_mode_duplicate(dev, mode_dev->vbt_mode); + drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode); - if (!mode_dev->panel_fixed_mode) - if (dev_priv->lfp_lvds_vbt_mode) - mode_dev->panel_fixed_mode = - drm_mode_duplicate(dev, - dev_priv->lfp_lvds_vbt_mode); + if (mode_dev->panel_fixed_mode) { + mode_dev->panel_fixed_mode->type |= + DRM_MODE_TYPE_PREFERRED; + DRM_DEBUG_KMS("Using mode from VBT\n"); + goto out; + } + } /* * If we didn't get EDID, try checking if the panel is already turned @@ -789,6 +792,7 @@ void psb_intel_lvds_init(struct drm_device *dev, if (mode_dev->panel_fixed_mode) { mode_dev->panel_fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; + DRM_DEBUG_KMS("Using pre-programmed mode\n"); goto out; /* FIXME: check for quirks */ } } -- cgit v1.2.3-59-g8ed1b From 0a2ad541071f99eaf4589c3551176fca191c1ee2 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 5 May 2017 18:47:37 +0800 Subject: libceph: cleanup old messages according to reconnect seq when reopen a connection, use 'reconnect seq' to clean up messages that have already been received by peer. Link: http://tracker.ceph.com/issues/18690 Signed-off-by: "Yan, Zheng" Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d7ab481b2508..588a91930051 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2228,10 +2228,18 @@ static void process_ack(struct ceph_connection *con) struct ceph_msg *m; u64 ack = le64_to_cpu(con->in_temp_ack); u64 seq; + bool reconnect = (con->in_tag == CEPH_MSGR_TAG_SEQ); + struct list_head *list = reconnect ? &con->out_queue : &con->out_sent; - while (!list_empty(&con->out_sent)) { - m = list_first_entry(&con->out_sent, struct ceph_msg, - list_head); + /* + * In the reconnect case, con_fault() has requeued messages + * in out_sent. We should cleanup old messages according to + * the reconnect seq. + */ + while (!list_empty(list)) { + m = list_first_entry(list, struct ceph_msg, list_head); + if (reconnect && m->needs_out_seq) + break; seq = le64_to_cpu(m->hdr.seq); if (seq > ack) break; @@ -2240,6 +2248,7 @@ static void process_ack(struct ceph_connection *con) m->ack_stamp = jiffies; ceph_msg_remove(m); } + prepare_read_tag(con); } -- cgit v1.2.3-59-g8ed1b From 42c99fc4c7069371da7b04b9099319dd1c633ee2 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 5 May 2017 18:28:44 +0100 Subject: ceph: check that the new inode size is within limits in ceph_fallocate() Currently the ceph client doesn't respect the rlimit in fallocate. This means that a user can allocate a file with size > RLIMIT_FSIZE. This patch adds the call to inode_newsize_ok() to verify filesystem limits and ulimits. This should make ceph successfully run xfstest generic/228. Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3fdde0b283c9..29308a80d66f 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1671,8 +1671,12 @@ static long ceph_fallocate(struct file *file, int mode, } size = i_size_read(inode); - if (!(mode & FALLOC_FL_KEEP_SIZE)) + if (!(mode & FALLOC_FL_KEEP_SIZE)) { endoff = offset + length; + ret = inode_newsize_ok(inode, endoff); + if (ret) + goto unlock; + } if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; -- cgit v1.2.3-59-g8ed1b From 3ab2137915aea0ce7b3ec02e0f260ecc0f1c289d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 23 May 2017 13:28:54 +0800 Subject: sctp: fix stream update when processing dupcookie Since commit 3dbcc105d556 ("sctp: alloc stream info when initializing asoc"), stream and stream.out info are always alloced when creating an asoc. So it's not correct to check !asoc->stream before updating stream info when processing dupcookie, but would be better to check asoc state instead. Fixes: 3dbcc105d556 ("sctp: alloc stream info when initializing asoc") Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index a9708da28eb5..95238284c422 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1176,7 +1176,9 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->ctsn_ack_point = asoc->next_tsn - 1; asoc->adv_peer_ack_point = asoc->ctsn_ack_point; - if (!asoc->stream) { + + if (sctp_state(asoc, COOKIE_WAIT)) { + sctp_stream_free(asoc->stream); asoc->stream = new->stream; new->stream = NULL; } -- cgit v1.2.3-59-g8ed1b From 7e06297768886337707f5833942b3bd524a6d3d5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 23 May 2017 13:28:55 +0800 Subject: sctp: set new_asoc temp when processing dupcookie After sctp changed to use transport hashtable, a transport would be added into global hashtable when adding the peer to an asoc, then the asoc can be got by searching the transport in the hashtbale. The problem is when processing dupcookie in sctp_sf_do_5_2_4_dupcook, a new asoc would be created. A peer with the same addr and port as the one in the old asoc might be added into the new asoc, but fail to be added into the hashtable, as they also belong to the same sk. It causes that sctp's dupcookie processing can not really work. Since the new asoc will be freed after copying it's information to the old asoc, it's more like a temp asoc. So this patch is to fix it by setting it as a temp asoc to avoid adding it's any transport into the hashtable and also avoid allocing assoc_id. An extra thing it has to do is to also alloc stream info for any temp asoc, as sctp dupcookie process needs it to update old asoc. But I don't think it would hurt something, as a temp asoc would always be freed after finishing processing cookie echo packet. Reported-by: Jianwen Ji Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 13 ++++--------- net/sctp/sm_statefuns.c | 3 +++ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8a08f13469c4..92e332e17391 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2454,16 +2454,11 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * stream sequence number shall be set to 0. */ - /* Allocate storage for the negotiated streams if it is not a temporary - * association. - */ - if (!asoc->temp) { - if (sctp_stream_init(asoc, gfp)) - goto clean_up; + if (sctp_stream_init(asoc, gfp)) + goto clean_up; - if (sctp_assoc_set_id(asoc, gfp)) - goto clean_up; - } + if (!asoc->temp && sctp_assoc_set_id(asoc, gfp)) + goto clean_up; /* ADDIP Section 4.1 ASCONF Chunk Procedures * diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 4f5e6cfc7f60..f863b5573e42 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2088,6 +2088,9 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, } } + /* Set temp so that it won't be added into hashtable */ + new_asoc->temp = 1; + /* Compare the tie_tag in cookie with the verification tag of * current association. */ -- cgit v1.2.3-59-g8ed1b From 159a07604a99bd01e7db112de08d53dc4fcad109 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Tue, 23 May 2017 11:48:08 +0200 Subject: net: fec: add post PHY reset delay DT property Some PHY require to wait for a bit after the reset GPIO has been toggled. This adds support for the DT property `phy-reset-post-delay` which gives the delay in milliseconds to wait after reset. If the DT property is not given, no delay is observed. Post reset delay greater than 1000ms are invalid. Signed-off-by: Quentin Schulz Reviewed-by: Andrew Lunn Acked-by: Fugang Duan Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/fsl-fec.txt | 4 ++++ drivers/net/ethernet/freescale/fec_main.c | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt index a1e3693cca16..6f55bdd52f8a 100644 --- a/Documentation/devicetree/bindings/net/fsl-fec.txt +++ b/Documentation/devicetree/bindings/net/fsl-fec.txt @@ -15,6 +15,10 @@ Optional properties: - phy-reset-active-high : If present then the reset sequence using the GPIO specified in the "phy-reset-gpios" property is reversed (H=reset state, L=operation state). +- phy-reset-post-delay : Post reset delay in milliseconds. If present then + a delay of phy-reset-post-delay milliseconds will be observed after the + phy-reset-gpios has been toggled. Can be omitted thus no delay is + observed. Delay is in range of 1ms to 1000ms. Other delays are invalid. - phy-supply : regulator that powers the Ethernet PHY. - phy-handle : phandle to the PHY device connected to this device. - fixed-link : Assume a fixed link. See fixed-link.txt in the same directory. diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 56a563f90b0b..f7c8649fd28f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3192,7 +3192,7 @@ static int fec_reset_phy(struct platform_device *pdev) { int err, phy_reset; bool active_high = false; - int msec = 1; + int msec = 1, phy_post_delay = 0; struct device_node *np = pdev->dev.of_node; if (!np) @@ -3209,6 +3209,11 @@ static int fec_reset_phy(struct platform_device *pdev) else if (!gpio_is_valid(phy_reset)) return 0; + err = of_property_read_u32(np, "phy-reset-post-delay", &phy_post_delay); + /* valid reset duration should be less than 1s */ + if (!err && phy_post_delay > 1000) + return -EINVAL; + active_high = of_property_read_bool(np, "phy-reset-active-high"); err = devm_gpio_request_one(&pdev->dev, phy_reset, @@ -3226,6 +3231,15 @@ static int fec_reset_phy(struct platform_device *pdev) gpio_set_value_cansleep(phy_reset, !active_high); + if (!phy_post_delay) + return 0; + + if (phy_post_delay > 20) + msleep(phy_post_delay); + else + usleep_range(phy_post_delay * 1000, + phy_post_delay * 1000 + 1000); + return 0; } #else /* CONFIG_OF */ -- cgit v1.2.3-59-g8ed1b From 0ff50e83b5122e836ca492fefb11656b225ac29c Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 23 May 2017 13:20:28 +0200 Subject: net: rtnetlink: bail out from rtnl_fdb_dump() on parse error rtnl_fdb_dump() failed to check the result of nlmsg_parse(), which led to contents of |ifm| being uninitialized because nlh->nlmsglen was too small to accommodate |ifm|. The uninitialized data may affect some branches and result in unwanted effects, although kernel data doesn't seem to leak to the userspace directly. The bug has been detected with KMSAN and syzkaller. For the record, here is the KMSAN report: ================================================================== BUG: KMSAN: use of unitialized memory in rtnl_fdb_dump+0x5dc/0x1000 CPU: 0 PID: 1039 Comm: probe Not tainted 4.11.0-rc5+ #2727 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x143/0x1b0 lib/dump_stack.c:52 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:1007 __kmsan_warning_32+0x66/0xb0 mm/kmsan/kmsan_instr.c:491 rtnl_fdb_dump+0x5dc/0x1000 net/core/rtnetlink.c:3230 netlink_dump+0x84f/0x1190 net/netlink/af_netlink.c:2168 __netlink_dump_start+0xc97/0xe50 net/netlink/af_netlink.c:2258 netlink_dump_start ./include/linux/netlink.h:165 rtnetlink_rcv_msg+0xae9/0xb40 net/core/rtnetlink.c:4094 netlink_rcv_skb+0x339/0x5a0 net/netlink/af_netlink.c:2339 rtnetlink_rcv+0x83/0xa0 net/core/rtnetlink.c:4110 netlink_unicast_kernel net/netlink/af_netlink.c:1272 netlink_unicast+0x13b7/0x1480 net/netlink/af_netlink.c:1298 netlink_sendmsg+0x10b8/0x10f0 net/netlink/af_netlink.c:1844 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 ___sys_sendmsg+0xd4b/0x10f0 net/socket.c:1997 __sys_sendmsg net/socket.c:2031 SYSC_sendmsg+0x2c6/0x3f0 net/socket.c:2042 SyS_sendmsg+0x87/0xb0 net/socket.c:2038 do_syscall_64+0x102/0x150 arch/x86/entry/common.c:285 entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246 RIP: 0033:0x401300 RSP: 002b:00007ffc3b0e6d58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401300 RDX: 0000000000000000 RSI: 00007ffc3b0e6d80 RDI: 0000000000000003 RBP: 00007ffc3b0e6e00 R08: 000000000000000b R09: 0000000000000004 R10: 000000000000000d R11: 0000000000000246 R12: 0000000000000000 R13: 00000000004065a0 R14: 0000000000406630 R15: 0000000000000000 origin: 000000008fe00056 save_stack_trace+0x59/0x60 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:352 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:247 kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:260 slab_alloc_node mm/slub.c:2743 __kmalloc_node_track_caller+0x1f4/0x390 mm/slub.c:4349 __kmalloc_reserve net/core/skbuff.c:138 __alloc_skb+0x2cd/0x740 net/core/skbuff.c:231 alloc_skb ./include/linux/skbuff.h:933 netlink_alloc_large_skb net/netlink/af_netlink.c:1144 netlink_sendmsg+0x934/0x10f0 net/netlink/af_netlink.c:1819 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 ___sys_sendmsg+0xd4b/0x10f0 net/socket.c:1997 __sys_sendmsg net/socket.c:2031 SYSC_sendmsg+0x2c6/0x3f0 net/socket.c:2042 SyS_sendmsg+0x87/0xb0 net/socket.c:2038 do_syscall_64+0x102/0x150 arch/x86/entry/common.c:285 return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246 ================================================================== and the reproducer: ================================================================== #include #include #include #include int main() { int sock = socket(PF_NETLINK, SOCK_DGRAM | SOCK_NONBLOCK, 0); struct msghdr msg; memset(&msg, 0, sizeof(msg)); char nlmsg_buf[32]; memset(nlmsg_buf, 0, sizeof(nlmsg_buf)); struct nlmsghdr *nlmsg = nlmsg_buf; nlmsg->nlmsg_len = 0x11; nlmsg->nlmsg_type = 0x1e; // RTM_NEWROUTE = RTM_BASE + 0x0e // type = 0x0e = 1110b // kind = 2 nlmsg->nlmsg_flags = 0x101; // NLM_F_ROOT | NLM_F_REQUEST nlmsg->nlmsg_seq = 0; nlmsg->nlmsg_pid = 0; nlmsg_buf[16] = (char)7; struct iovec iov; iov.iov_base = nlmsg_buf; iov.iov_len = 17; msg.msg_iov = &iov; msg.msg_iovlen = 1; sendmsg(sock, &msg, 0); return 0; } ================================================================== Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 49a279a7cc15..9e2c0a7cb325 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3231,8 +3231,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int err = 0; int fidx = 0; - if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, - IFLA_MAX, ifla_policy, NULL) == 0) { + err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, + IFLA_MAX, ifla_policy, NULL); + if (err < 0) { + return -EINVAL; + } else if (err == 0) { if (tb[IFLA_MASTER]) br_idx = nla_get_u32(tb[IFLA_MASTER]); } -- cgit v1.2.3-59-g8ed1b From cd47512e51190efc34a6b90d5c6b54de036ea421 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 23 May 2017 08:19:49 -0700 Subject: net/phy: fix mdio-octeon dependency and build Fix build errors by making this driver depend on OF_MDIO, like several other similar drivers do. drivers/built-in.o: In function `octeon_mdiobus_remove': mdio-octeon.c:(.text+0x196ee0): undefined reference to `mdiobus_unregister' mdio-octeon.c:(.text+0x196ee8): undefined reference to `mdiobus_free' drivers/built-in.o: In function `octeon_mdiobus_probe': mdio-octeon.c:(.text+0x196f1d): undefined reference to `devm_mdiobus_alloc_size' mdio-octeon.c:(.text+0x196ffe): undefined reference to `of_mdiobus_register' mdio-octeon.c:(.text+0x197010): undefined reference to `mdiobus_free' Signed-off-by: Randy Dunlap Cc: Andrew Lunn Cc: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 60ffc9da6a28..c360dd6ead22 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -108,7 +108,7 @@ config MDIO_MOXART config MDIO_OCTEON tristate "Octeon and some ThunderX SOCs MDIO buses" depends on 64BIT - depends on HAS_IOMEM + depends on HAS_IOMEM && OF_MDIO select MDIO_CAVIUM help This module provides a driver for the Octeon and ThunderX MDIO -- cgit v1.2.3-59-g8ed1b From f2899788353c13891412b273fdff5f02d49aa40f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 23 May 2017 17:49:13 +0200 Subject: net: phy: marvell: Limit errata to 88m1101 The 88m1101 has an errata when configuring autoneg. However, it was being applied to many other Marvell PHYs as well. Limit its scope to just the 88m1101. Fixes: 76884679c644 ("phylib: Add support for Marvell 88e1111S and 88e1145") Reported-by: Daniel Walker Signed-off-by: Andrew Lunn Acked-by: Harini Katakam Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 66 ++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 272b051a0199..9097e42bec2e 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -255,34 +255,6 @@ static int marvell_config_aneg(struct phy_device *phydev) { int err; - /* The Marvell PHY has an errata which requires - * that certain registers get written in order - * to restart autonegotiation */ - err = phy_write(phydev, MII_BMCR, BMCR_RESET); - - if (err < 0) - return err; - - err = phy_write(phydev, 0x1d, 0x1f); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1e, 0x200c); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1d, 0x5); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1e, 0); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1e, 0x100); - if (err < 0) - return err; - err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; @@ -316,6 +288,42 @@ static int marvell_config_aneg(struct phy_device *phydev) return 0; } +static int m88e1101_config_aneg(struct phy_device *phydev) +{ + int err; + + /* This Marvell PHY has an errata which requires + * that certain registers get written in order + * to restart autonegotiation + */ + err = phy_write(phydev, MII_BMCR, BMCR_RESET); + + if (err < 0) + return err; + + err = phy_write(phydev, 0x1d, 0x1f); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0x200c); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1d, 0x5); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0x100); + if (err < 0) + return err; + + return marvell_config_aneg(phydev); +} + static int m88e1111_config_aneg(struct phy_device *phydev) { int err; @@ -1892,7 +1900,7 @@ static struct phy_driver marvell_drivers[] = { .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &marvell_config_init, - .config_aneg = &marvell_config_aneg, + .config_aneg = &m88e1101_config_aneg, .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, -- cgit v1.2.3-59-g8ed1b From b3c85a0fb2c79f2c945fa1305b39974d0acf3105 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 10 May 2017 20:06:58 +0200 Subject: drm/amdgpu: fix fundamental suspend/resume issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reinitializing the VM manager during suspend/resume is a very very bad idea since all the VMs are still active and kicking. This can lead to random VM faults after resume when new processes become the same client ID assigned. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 15 ++------------- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 15 ++------------- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 15 ++------------- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++-------------- 6 files changed, 30 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 07ff3b1514f1..1bf36c3542c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -672,6 +672,7 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; + atomic64_set(&id->owner, 0); id->gds_base = 0; id->gds_size = 0; id->gws_base = 0; @@ -680,6 +681,26 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, id->oa_size = 0; } +/** + * amdgpu_vm_reset_all_id - reset VMID to zero + * + * @adev: amdgpu device structure + * + * Reset VMID to force flush on next use + */ +void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev) +{ + unsigned i, j; + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vm_id_manager *id_mgr = + &adev->vm_manager.id_mgr[i]; + + for (j = 1; j < id_mgr->num_ids; ++j) + amdgpu_vm_reset_id(adev, i, j); + } +} + /** * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo * @@ -2270,7 +2291,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) adev->vm_manager.seqno[i] = 0; - atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); atomic64_set(&adev->vm_manager.client_counter, 0); spin_lock_init(&adev->vm_manager.prt_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index d97e28b4bdc4..e1d951ece433 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -204,6 +204,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, unsigned vmid); +void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev); int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index a572979f186c..d860939152df 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -950,10 +950,6 @@ static int gmc_v6_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - gmc_v6_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } gmc_v6_0_hw_fini(adev); return 0; @@ -968,16 +964,9 @@ static int gmc_v6_0_resume(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v6_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; - } + amdgpu_vm_reset_all_ids(adev); - return r; + return 0; } static bool gmc_v6_0_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index a9083a16a250..2750e5c23813 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1117,10 +1117,6 @@ static int gmc_v7_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - gmc_v7_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } gmc_v7_0_hw_fini(adev); return 0; @@ -1135,16 +1131,9 @@ static int gmc_v7_0_resume(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v7_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; - } + amdgpu_vm_reset_all_ids(adev); - return r; + return 0; } static bool gmc_v7_0_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 4ac99784160a..f56b4089ee9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1209,10 +1209,6 @@ static int gmc_v8_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - gmc_v8_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } gmc_v8_0_hw_fini(adev); return 0; @@ -1227,16 +1223,9 @@ static int gmc_v8_0_resume(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v8_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; - } + amdgpu_vm_reset_all_ids(adev); - return r; + return 0; } static bool gmc_v8_0_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index dc1e1c1d6b24..f936332a069d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -791,10 +791,6 @@ static int gmc_v9_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - gmc_v9_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } gmc_v9_0_hw_fini(adev); return 0; @@ -809,17 +805,9 @@ static int gmc_v9_0_resume(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v9_0_vm_init(adev); - if (r) { - dev_err(adev->dev, - "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; - } + amdgpu_vm_reset_all_ids(adev); - return r; + return 0; } static bool gmc_v9_0_is_idle(void *handle) -- cgit v1.2.3-59-g8ed1b From 35d2f80b07bbe03fb358afb0bdeff7437a7d67ff Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:41 -0400 Subject: vlan: Fix tcp checksum offloads in Q-in-Q vlans It appears that TCP checksum offloading has been broken for Q-in-Q vlans. The behavior was execerbated by the series commit afb0bc972b52 ("Merge branch 'stacked_vlan_tso'") that that enabled accleleration features on stacked vlans. However, event without that series, it is possible to trigger this issue. It just requires a lot more specialized configuration. The root cause is the interaction between how netdev_intersect_features() works, the features actually set on the vlan devices and HW having the ability to run checksum with longer headers. The issue starts when netdev_interesect_features() replaces NETIF_F_HW_CSUM with a combination of NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM, if the HW advertises IP|IPV6 specific checksums. This happens for tagged and multi-tagged packets. However, HW that enables IP|IPV6 checksum offloading doesn't gurantee that packets with arbitrarily long headers can be checksummed. This patch disables IP|IPV6 checksums on the packet for multi-tagged packets. CC: Toshiaki Makita CC: Michal Kubecek Signed-off-by: Vladislav Yasevich Acked-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8d5fcd6284ce..283dc2f5364d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, netdev_features_t features) { - if (skb_vlan_tagged_multi(skb)) - features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + if (skb_vlan_tagged_multi(skb)) { + /* In the case of multi-tagged packets, use a direct mask + * instead of using netdev_interesect_features(), to make + * sure that only devices supporting NETIF_F_HW_CSUM will + * have checksum offloading support. + */ + features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | + NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + } return features; } -- cgit v1.2.3-59-g8ed1b From cc6e9de62a7f84c9293a2ea41bc412b55bb46e85 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:42 -0400 Subject: be2net: Fix offload features for Q-in-Q packets At least some of the be2net cards do not seem to be capabled of performing checksum offload computions on Q-in-Q packets. In these case, the recevied checksum on the remote is invalid and TCP syn packets are dropped. This patch adds a call to check disbled acceleration features on Q-in-Q tagged traffic. CC: Sathya Perla CC: Ajit Khaparde CC: Sriharsha Basavapatna CC: Somnath Kotur Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index f3a09ab55900..4eee18ce9be4 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5078,9 +5078,11 @@ static netdev_features_t be_features_check(struct sk_buff *skb, struct be_adapter *adapter = netdev_priv(dev); u8 l4_hdr = 0; - /* The code below restricts offload features for some tunneled packets. + /* The code below restricts offload features for some tunneled and + * Q-in-Q packets. * Offload features for normal (non tunnel) packets are unchanged. */ + features = vlan_features_check(skb, features); if (!skb->encapsulation || !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)) return features; -- cgit v1.2.3-59-g8ed1b From 2836b4f224d4fd7d1a2b23c3eecaf0f0ae199a74 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:43 -0400 Subject: virtio-net: enable TSO/checksum offloads for Q-in-Q vlans Since virtio does not provide it's own ndo_features_check handler, TSO, and now checksum offload, are disabled for stacked vlans. Re-enable the support and let the host take care of it. This restores/improves Guest-to-Guest performance over Q-in-Q vlans. Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9320d96a1632..3e9246cc49c3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1989,6 +1989,7 @@ static const struct net_device_ops virtnet_netdev = { .ndo_poll_controller = virtnet_netpoll, #endif .ndo_xdp = virtnet_xdp, + .ndo_features_check = passthru_features_check, }; static void virtnet_config_changed_work(struct work_struct *work) -- cgit v1.2.3-59-g8ed1b From 0a646f331db0eb9efc8d3a95a44872036d441d58 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:10:02 -0400 Subject: drm/amdgpu/ci: disable mclk switching for high refresh rates (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if the vblank period would allow it, it still seems to be problematic on some cards. v2: fix logic inversion (Nils) bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Cc: stable@vger.kernel.org Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 6dc1410b380f..ec93714e4524 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -906,6 +906,12 @@ static bool ci_dpm_vblank_too_short(struct amdgpu_device *adev) u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; + /* disable mclk switching if the refresh is >120Hz, even if the + * blanking period would allow it + */ + if (amdgpu_dpm_get_vrefresh(adev) > 120) + return true; + if (vblank_time < switch_limit) return true; else -- cgit v1.2.3-59-g8ed1b From 58d7e3e427db1bd68f33025519a9468140280a75 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:14:14 -0400 Subject: drm/radeon/ci: disable mclk switching for high refresh rates (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if the vblank period would allow it, it still seems to be problematic on some cards. v2: fix logic inversion (Nils) bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Cc: stable@vger.kernel.org Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ci_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 7ba450832e6b..ea36dc4dd5d2 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -776,6 +776,12 @@ bool ci_dpm_vblank_too_short(struct radeon_device *rdev) u32 vblank_time = r600_dpm_get_vblank_time(rdev); u32 switch_limit = pi->mem_gddr5 ? 450 : 300; + /* disable mclk switching if the refresh is >120Hz, even if the + * blanking period would allow it + */ + if (r600_dpm_get_vrefresh(rdev) > 120) + return true; + if (vblank_time < switch_limit) return true; else -- cgit v1.2.3-59-g8ed1b From 09be4a5219610a6fae3215d4f51f948d6f5d2609 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:46:12 -0400 Subject: drm/amd/powerplay/smu7: add vblank check for mclk switching (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check to make sure the vblank period is long enough to support mclk switching. v2: drop needless initial assignment (Nils) bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Cc: stable@vger.kernel.org Acked-by: Christian König Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 31 +++++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index a74a3db3056c..1445c51b6d05 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2655,6 +2655,28 @@ static int smu7_get_power_state_size(struct pp_hwmgr *hwmgr) return sizeof(struct smu7_power_state); } +static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr, + uint32_t vblank_time_us) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t switch_limit_us; + + switch (hwmgr->chip_id) { + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + switch_limit_us = data->is_memory_gddr5 ? 190 : 150; + break; + default: + switch_limit_us = data->is_memory_gddr5 ? 450 : 150; + break; + } + + if (vblank_time_us < switch_limit_us) + return true; + else + return false; +} static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct pp_power_state *request_ps, @@ -2669,6 +2691,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, bool disable_mclk_switching; bool disable_mclk_switching_for_frame_lock; struct cgs_display_info info = {0}; + struct cgs_mode_info mode_info = {0}; const struct phm_clock_and_voltage_limits *max_limits; uint32_t i; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -2677,6 +2700,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, int32_t count; int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0; + info.mode_info = &mode_info; data->battery_state = (PP_StateUILabel_Battery == request_ps->classification.ui_label); @@ -2703,8 +2727,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, cgs_get_active_displays_info(hwmgr->device, &info); - /*TO DO result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/ - minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock; minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; @@ -2769,8 +2791,9 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); - disable_mclk_switching = (1 < info.display_count) || - disable_mclk_switching_for_frame_lock; + disable_mclk_switching = ((1 < info.display_count) || + disable_mclk_switching_for_frame_lock || + smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us)); sclk = smu7_ps->performance_levels[0].engine_clock; mclk = smu7_ps->performance_levels[0].memory_clock; -- cgit v1.2.3-59-g8ed1b From 2275a3a2fe9914ba6d76c8ea490da3c08342bd19 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:57:41 -0400 Subject: drm/amd/powerplay/smu7: disable mclk switching for high refresh rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if the vblank period would allow it, it still seems to be problematic on some cards. bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Cc: stable@vger.kernel.org Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 1445c51b6d05..102eb6d029fa 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2793,7 +2793,8 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, disable_mclk_switching = ((1 < info.display_count) || disable_mclk_switching_for_frame_lock || - smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us)); + smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) || + (mode_info.refresh_rate > 120)); sclk = smu7_ps->performance_levels[0].engine_clock; mclk = smu7_ps->performance_levels[0].memory_clock; -- cgit v1.2.3-59-g8ed1b From 3d18e33735a02b1a90aecf14410bf3edbfd4d3dc Mon Sep 17 00:00:00 2001 From: Lyude Date: Thu, 11 May 2017 19:31:12 -0400 Subject: drm/radeon: Unbreak HPD handling for r600+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We end up reading the interrupt register for HPD5, and then writing it to HPD6 which on systems without anything using HPD5 results in permanently disabling hotplug on one of the display outputs after the first time we acknowledge a hotplug interrupt from the GPU. This code is really bad. But for now, let's just fix this. I will hopefully have a large patch series to refactor all of this soon. Reviewed-by: Christian König Signed-off-by: Lyude Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 4 ++-- drivers/gpu/drm/radeon/evergreen.c | 4 ++-- drivers/gpu/drm/radeon/r600.c | 2 +- drivers/gpu/drm/radeon/si.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ccebe0f8d2e1..008c145b7f29 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7401,7 +7401,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -7431,7 +7431,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index f130ec41ee4b..0bf103536404 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4927,7 +4927,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -4958,7 +4958,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 0a085176e79b..e06e2d8feab3 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3988,7 +3988,7 @@ static void r600_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index ceee87f029d9..76d1888528e6 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6317,7 +6317,7 @@ static inline void si_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -6348,7 +6348,7 @@ static inline void si_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } -- cgit v1.2.3-59-g8ed1b From 7c4378f4523d4af05b5941ea906e7032631eb753 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 11 May 2017 18:22:17 +0800 Subject: drm/amdgpu: fix NULL pointer panic of emit_gds_switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ 338.384770] BUG: unable to handle kernel NULL pointer dereference at (null) [ 338.384817] IP: [< (null)>] (null) [ 338.385505] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 338.385950] Call Trace: [ 338.385993] [] ? amdgpu_vm_flush+0x283/0x400 [amdgpu] [ 338.386025] [] ? printk+0x4d/0x4f [ 338.386074] [] amdgpu_ib_schedule+0x4a6/0x4d0 [amdgpu] [ 338.386140] [] amdgpu_job_run+0x64/0x180 [amdgpu] [ 338.386203] [] amd_sched_main+0x2e9/0x4a0 [amdgpu] [ 338.386232] [] ? prepare_to_wait_event+0x110/0x110 [ 338.386295] [] ? amd_sched_select_entity+0xe0/0xe0 [amdgpu] [ 338.386327] [] kthread+0xd3/0xf0 [ 338.386349] [] ? kthread_park+0x60/0x60 [ 338.386376] [] ret_from_fork+0x25/0x30 [ 338.386401] Code: Bad RIP value. [ 338.386420] RIP [< (null)>] (null) [ 338.386443] RSP [ 338.386458] CR2: 0000000000000000 [ 338.398508] ---[ end trace 4c66fcdc74b9a0a2 ]--- Signed-off-by: Chunming Zhou Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1bf36c3542c1..8ecf82c5fe74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -634,7 +634,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) mutex_unlock(&id_mgr->lock); } - if (gds_switch_needed) { + if (ring->funcs->emit_gds_switch && gds_switch_needed) { id->gds_base = job->gds_base; id->gds_size = job->gds_size; id->gws_base = job->gws_base; -- cgit v1.2.3-59-g8ed1b From 3083696a1ee68f4845f8e9a21b91e343ff25eff3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 May 2017 13:13:45 +0800 Subject: drm/amd/powerplay: fix a signedness bugs Smatch complains about a signedness bug here: vega10_hwmgr.c:4202 vega10_force_clock_level() warn: always true condition '(i >= 0) => (0-u32max >= 0)' Fixes: 7b52db39a4c2 ("drm/amd/powerplay: fix bug sclk/mclk level can't be set on vega10.") Signed-off-by: Dan Carpenter Reviewed-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index ad30f5d3a10d..2614af2f553f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -4186,7 +4186,7 @@ static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask) { struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); - uint32_t i; + int i; if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From a4d768e702de224cc85e0c8eac9311763403b368 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 22 May 2017 19:54:10 -0700 Subject: xfs: fix unaligned access in xfs_btree_visit_blocks This structure copy was throwing unaligned access warnings on sparc64: Kernel unaligned access at TPC[1043c088] xfs_btree_visit_blocks+0x88/0xe0 [xfs] xfs_btree_copy_ptrs does a memcpy, which avoids it. Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 5392674bf893..3a673ba201aa 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4395,7 +4395,7 @@ xfs_btree_visit_blocks( xfs_btree_readahead_ptr(cur, ptr, 1); /* save for the next iteration of the loop */ - lptr = *ptr; + xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); } /* for each buffer in the level */ -- cgit v1.2.3-59-g8ed1b From 8affebe16d79ebefb1d9d6d56a46dc89716f9453 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Tue, 23 May 2017 08:30:46 -0700 Subject: xfs: fix off-by-one on max nr_pages in xfs_find_get_desired_pgoff() xfs_find_get_desired_pgoff() is used to search for offset of hole or data in page range [index, end] (both inclusive), and the max number of pages to search should be at least one, if end == index. Otherwise the only page is missed and no hole or data is found, which is not correct. When block size is smaller than page size, this can be demonstrated by preallocating a file with size smaller than page size and writing data to the last block. E.g. run this xfs_io command on a 1k block size XFS on x86_64 host. # xfs_io -fc "falloc 0 3k" -c "pwrite 2k 1k" \ -c "seek -d 0" /mnt/xfs/testfile wrote 1024/1024 bytes at offset 2048 1 KiB, 1 ops; 0.0000 sec (33.675 MiB/sec and 34482.7586 ops/sec) Whence Result DATA EOF Data at offset 2k was missed, and lseek(2) returned ENXIO. This is uncovered by generic/285 subtest 07 and 08 on ppc64 host, where pagesize is 64k. Because a recent change to generic/285 reduced the preallocated file size to smaller than 64k. Cc: stable@vger.kernel.org # v3.7+ Signed-off-by: Eryu Guan Reviewed-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 35703a801372..aefa2134a8cb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1049,7 +1049,7 @@ xfs_find_get_desired_pgoff( unsigned nr_pages; unsigned int i; - want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want); /* -- cgit v1.2.3-59-g8ed1b From 5375023ae1266553a7baa0845e82917d8803f48c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 May 2017 16:36:22 -0700 Subject: xfs: Fix missed holes in SEEK_HOLE implementation XFS SEEK_HOLE implementation could miss a hole in an unwritten extent as can be seen by the following command: xfs_io -c "falloc 0 256k" -c "pwrite 0 56k" -c "pwrite 128k 8k" -c "seek -h 0" file wrote 57344/57344 bytes at offset 0 56 KiB, 14 ops; 0.0000 sec (49.312 MiB/sec and 12623.9856 ops/sec) wrote 8192/8192 bytes at offset 131072 8 KiB, 2 ops; 0.0000 sec (70.383 MiB/sec and 18018.0180 ops/sec) Whence Result HOLE 139264 Where we can see that hole at offset 56k was just ignored by SEEK_HOLE implementation. The bug is in xfs_find_get_desired_pgoff() which does not properly detect the case when pages are not contiguous. Fix the problem by properly detecting when found page has larger offset than expected. CC: stable@vger.kernel.org Fixes: d126d43f631f996daeee5006714fed914be32368 Signed-off-by: Jan Kara Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index aefa2134a8cb..f1517e9928c7 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1076,17 +1076,6 @@ xfs_find_get_desired_pgoff( break; } - /* - * At lease we found one page. If this is the first time we - * step into the loop, and if the first page index offset is - * greater than the given search offset, a hole was found. - */ - if (type == HOLE_OFF && lastoff == startoff && - lastoff < page_offset(pvec.pages[0])) { - found = true; - break; - } - for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; loff_t b_offset; @@ -1098,18 +1087,18 @@ xfs_find_get_desired_pgoff( * file mapping. However, page->index will not change * because we have a reference on the page. * - * Searching done if the page index is out of range. - * If the current offset is not reaches the end of - * the specified search range, there should be a hole - * between them. + * If current page offset is beyond where we've ended, + * we've found a hole. */ - if (page->index > end) { - if (type == HOLE_OFF && lastoff < endoff) { - *offset = lastoff; - found = true; - } + if (type == HOLE_OFF && lastoff < endoff && + lastoff < page_offset(pvec.pages[i])) { + found = true; + *offset = lastoff; goto out; } + /* Searching done if the page index is out of range. */ + if (page->index > end) + goto out; lock_page(page); /* -- cgit v1.2.3-59-g8ed1b From d7fd24257aa60316bf81093f7f909dc9475ae974 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 May 2017 16:36:23 -0700 Subject: xfs: Fix off-by-in in loop termination in xfs_find_get_desired_pgoff() There is an off-by-one error in loop termination conditions in xfs_find_get_desired_pgoff() since 'end' may index a page beyond end of desired range if 'endoff' is page aligned. It doesn't have any visible effects but still it is good to fix it. Signed-off-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f1517e9928c7..dc0e4cb7029b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1043,7 +1043,7 @@ xfs_find_get_desired_pgoff( index = startoff >> PAGE_SHIFT; endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount); - end = endoff >> PAGE_SHIFT; + end = (endoff - 1) >> PAGE_SHIFT; do { int want; unsigned nr_pages; -- cgit v1.2.3-59-g8ed1b From a54fba8f5a0dc36161cacdf2aa90f007f702ec1a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 May 2017 16:36:24 -0700 Subject: xfs: Move handling of missing page into one place in xfs_find_get_desired_pgoff() Currently several places in xfs_find_get_desired_pgoff() handle the case of a missing page. Make them all handled in one place after the loop has terminated. Signed-off-by: Jan Kara Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 38 ++++++++------------------------------ 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index dc0e4cb7029b..5fb5a0958a14 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1052,29 +1052,8 @@ xfs_find_get_desired_pgoff( want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want); - /* - * No page mapped into given range. If we are searching holes - * and if this is the first time we got into the loop, it means - * that the given offset is landed in a hole, return it. - * - * If we have already stepped through some block buffers to find - * holes but they all contains data. In this case, the last - * offset is already updated and pointed to the end of the last - * mapped page, if it does not reach the endpoint to search, - * that means there should be a hole between them. - */ - if (nr_pages == 0) { - /* Data search found nothing */ - if (type == DATA_OFF) - break; - - ASSERT(type == HOLE_OFF); - if (lastoff == startoff || lastoff < endoff) { - found = true; - *offset = lastoff; - } + if (nr_pages == 0) break; - } for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -1140,21 +1119,20 @@ xfs_find_get_desired_pgoff( /* * The number of returned pages less than our desired, search - * done. In this case, nothing was found for searching data, - * but we found a hole behind the last offset. + * done. */ - if (nr_pages < want) { - if (type == HOLE_OFF) { - *offset = lastoff; - found = true; - } + if (nr_pages < want) break; - } index = pvec.pages[i - 1]->index + 1; pagevec_release(&pvec); } while (index <= end); + /* No page at lastoff and we are not done - we found a hole. */ + if (type == HOLE_OFF && lastoff < endoff) { + *offset = lastoff; + found = true; + } out: pagevec_release(&pvec); return found; -- cgit v1.2.3-59-g8ed1b From 11387fe4a98f75d1f4cdb3efe3b42b19205c9df5 Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Tue, 23 May 2017 18:37:27 -0400 Subject: geneve: fix fill_info when using collect_metadata Since 9b4437a5b870 ("geneve: Unify LWT and netdev handling.") fill_info does not return UDP_ZERO_CSUM6_RX when using COLLECT_METADATA. This is because it uses ip_tunnel_info_af() with the device level info, which is not valid for COLLECT_METADATA. Fix by checking for the presence of the actual sockets. Fixes: 9b4437a5b870 ("geneve: Unify LWT and netdev handling.") Signed-off-by: Eric Garver Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/geneve.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index dec5d563ab19..959fd12d2e67 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1293,7 +1293,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) goto nla_put_failure; - if (ip_tunnel_info_af(info) == AF_INET) { + if (rtnl_dereference(geneve->sock4)) { if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, info->key.u.ipv4.dst)) goto nla_put_failure; @@ -1302,8 +1302,10 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(info->key.tun_flags & TUNNEL_CSUM))) goto nla_put_failure; + } + #if IS_ENABLED(CONFIG_IPV6) - } else { + if (rtnl_dereference(geneve->sock6)) { if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, &info->key.u.ipv6.dst)) goto nla_put_failure; @@ -1315,8 +1317,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, !geneve->use_udp6_rx_checksums)) goto nla_put_failure; -#endif } +#endif if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || -- cgit v1.2.3-59-g8ed1b From b62ce397675502325d4282924bf70cfb6a005c3a Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 22 May 2017 13:11:41 +0800 Subject: drm/amdgpu: fix null point error when rmmod amdgpu. this bug happened when amdgpu load failed. [ 75.740951] BUG: unable to handle kernel paging request at 00000000000031c0 [ 75.748167] IP: [] amdgpu_fbdev_restore_mode+0x20/0x60 [amdgpu] [ 75.755774] PGD 0 [ 75.759185] Oops: 0000 [#1] SMP [ 75.762408] Modules linked in: amdgpu(OE-) ttm(OE) drm_kms_helper(OE) drm(OE) i2c_algo_bit(E) fb_sys_fops(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) rpcsec_gss_krb5(E) nfsv4(E) nfs(E) fscache(E) eeepc_wmi(E) asus_wmi(E) sparse_keymap(E) intel_rapl(E) snd_hda_codec_hdmi(E) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) snd_hda_intel(E) snd_hda_codec(E) snd_hda_core(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) snd_hwdep(E) snd_pcm(E) snd_seq_midi(E) coretemp(E) kvm_intel(E) snd_seq_midi_event(E) snd_rawmidi(E) kvm(E) snd_seq(E) joydev(E) snd_seq_device(E) snd_timer(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) mei_me(E) ghash_clmulni_intel(E) snd(E) aesni_intel(E) mei(E) soundcore(E) aes_x86_64(E) shpchp(E) serio_raw(E) lrw(E) acpi_pad(E) gf128mul(E) glue_helper(E) ablk_helper(E) mac_hid(E) [ 75.835574] cryptd(E) parport_pc(E) ppdev(E) lp(E) nfsd(E) parport(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sunrpc(E) autofs4(E) hid_generic(E) usbhid(E) mxm_wmi(E) psmouse(E) e1000e(E) ptp(E) pps_core(E) ahci(E) libahci(E) wmi(E) video(E) i2c_hid(E) hid(E) [ 75.858489] CPU: 5 PID: 1603 Comm: rmmod Tainted: G OE 4.9.0-custom #2 [ 75.866183] Hardware name: System manufacturer System Product Name/Z170-A, BIOS 0901 08/31/2015 [ 75.875050] task: ffff88045d1bbb80 task.stack: ffffc90002de4000 [ 75.881094] RIP: 0010:[] [] amdgpu_fbdev_restore_mode+0x20/0x60 [amdgpu] [ 75.891238] RSP: 0018:ffffc90002de7d48 EFLAGS: 00010286 [ 75.896648] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000001 [ 75.903933] RDX: 0000000000000000 RSI: ffff88045d1bbb80 RDI: 0000000000000286 [ 75.911183] RBP: ffffc90002de7d50 R08: 0000000000000502 R09: 0000000000000004 [ 75.918449] R10: 0000000000000000 R11: 0000000000000001 R12: ffff880464bf0000 [ 75.925675] R13: ffffffffa0853000 R14: 0000000000000000 R15: 0000564e44f88210 [ 75.932980] FS: 00007f13d5400700(0000) GS:ffff880476540000(0000) knlGS:0000000000000000 [ 75.941238] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 75.947088] CR2: 00000000000031c0 CR3: 000000045fd0b000 CR4: 00000000003406e0 [ 75.954332] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 75.961566] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 75.968834] Stack: [ 75.970881] ffff880464bf0000 ffffc90002de7d60 ffffffffa0636592 ffffc90002de7d80 [ 75.978454] ffffffffa059015f ffff880464bf0000 ffff880464bf0000 ffffc90002de7da8 [ 75.986076] ffffffffa0595216 ffff880464bf0000 ffff880460f4d000 ffffffffa0853000 [ 75.993692] Call Trace: [ 75.996177] [] amdgpu_driver_lastclose_kms+0x12/0x20 [amdgpu] [ 76.003700] [] drm_lastclose+0x2f/0xd0 [drm] [ 76.009777] [] drm_dev_unregister+0x16/0xd0 [drm] [ 76.016255] [] drm_put_dev+0x34/0x70 [drm] [ 76.022139] [] amdgpu_pci_remove+0x15/0x20 [amdgpu] [ 76.028800] [] pci_device_remove+0x39/0xc0 [ 76.034661] [] __device_release_driver+0x9a/0x140 [ 76.041121] [] driver_detach+0xb8/0xc0 [ 76.046575] [] bus_remove_driver+0x55/0xd0 [ 76.052401] [] driver_unregister+0x2c/0x50 [ 76.058244] [] pci_unregister_driver+0x29/0x90 [ 76.064466] [] drm_pci_exit+0x9e/0xb0 [drm] [ 76.070507] [] amdgpu_exit+0x1c/0x32 [amdgpu] [ 76.076609] [] SyS_delete_module+0x1a0/0x200 [ 76.082627] [] ? rcu_eqs_enter.isra.36+0x4a/0x50 [ 76.089001] [] do_syscall_64+0x6e/0x180 [ 76.094583] [] entry_SYSCALL64_slow_path+0x25/0x25 [ 76.101114] Code: 94 c0 c3 31 c0 5d c3 0f 1f 40 00 0f 1f 44 00 00 55 31 c0 48 89 e5 53 48 89 fb 48 c7 c7 1d 21 84 a0 e8 ab 77 b3 e0 e8 fc 8b d7 e0 <48> 8b bb c0 31 00 00 48 85 ff 74 09 e8 ff eb fc ff 85 c0 75 03 [ 76.121432] RIP [] amdgpu_fbdev_restore_mode+0x20/0x60 [amdgpu] Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 236d9950221b..c0d8c6ff6380 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -425,10 +425,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev) { - struct amdgpu_fbdev *afbdev = adev->mode_info.rfbdev; + struct amdgpu_fbdev *afbdev; struct drm_fb_helper *fb_helper; int ret; + if (!adev) + return; + + afbdev = adev->mode_info.rfbdev; + if (!afbdev) return; -- cgit v1.2.3-59-g8ed1b From 65d786c21bf8140dac83563306f46fe0b13a9aaa Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 23 May 2017 18:18:37 -0500 Subject: net: fix potential null pointer dereference Add null check to avoid a potential null pointer dereference. Addresses-Coverity-ID: 1408831 Signed-off-by: Gustavo A. R. Silva Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- drivers/net/gtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 4fea1b3dfbb4..7b652bb7ebe4 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -873,7 +873,7 @@ static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[]) /* Check if there's an existing gtpX device to configure */ dev = dev_get_by_index_rcu(net, nla_get_u32(nla[GTPA_LINK])); - if (dev->netdev_ops == >p_netdev_ops) + if (dev && dev->netdev_ops == >p_netdev_ops) gtp = netdev_priv(dev); put_net(net); -- cgit v1.2.3-59-g8ed1b From 4b3c7dbbfff0673e8a89575414b864d8b001d3bb Mon Sep 17 00:00:00 2001 From: KT Liao Date: Thu, 25 May 2017 10:06:21 -0700 Subject: Input: elan_i2c - clear INT before resetting controller Some old touchpad FWs need to have interrupt cleared before issuing reset command after updating firmware. We clear interrupt by attempting to read full report from the controller, and discarding any data read. Signed-off-by: KT Liao Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_i2c.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c index a679e56c44cd..765879dcaf85 100644 --- a/drivers/input/mouse/elan_i2c_i2c.c +++ b/drivers/input/mouse/elan_i2c_i2c.c @@ -557,7 +557,14 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client, long ret; int error; int len; - u8 buffer[ETP_I2C_INF_LENGTH]; + u8 buffer[ETP_I2C_REPORT_LEN]; + + len = i2c_master_recv(client, buffer, ETP_I2C_REPORT_LEN); + if (len != ETP_I2C_REPORT_LEN) { + error = len < 0 ? len : -EIO; + dev_warn(dev, "failed to read I2C data after FW WDT reset: %d (%d)\n", + error, len); + } reinit_completion(completion); enable_irq(client->irq); -- cgit v1.2.3-59-g8ed1b From a04f144059ac09f2c3da50b5707df589044aad66 Mon Sep 17 00:00:00 2001 From: KT Liao Date: Tue, 23 May 2017 13:41:47 -0700 Subject: Input: elan_i2c - ignore signals when finishing updating firmware Use wait_for_completion_timeout() instead of wait_for_completion_interruptible_timeout() to avoid stray signals ruining firmware update. Our timeout is only 300 msec so we are fine simply letting it expire in case device misbehaves. Signed-off-by: KT Liao Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_i2c.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c index 765879dcaf85..f431da07f861 100644 --- a/drivers/input/mouse/elan_i2c_i2c.c +++ b/drivers/input/mouse/elan_i2c_i2c.c @@ -554,7 +554,6 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client, struct completion *completion) { struct device *dev = &client->dev; - long ret; int error; int len; u8 buffer[ETP_I2C_REPORT_LEN]; @@ -570,23 +569,19 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client, enable_irq(client->irq); error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET); - if (!error) - ret = wait_for_completion_interruptible_timeout(completion, - msecs_to_jiffies(300)); - disable_irq(client->irq); - if (error) { dev_err(dev, "device reset failed: %d\n", error); - return error; - } else if (ret == 0) { + } else if (!wait_for_completion_timeout(completion, + msecs_to_jiffies(300))) { dev_err(dev, "timeout waiting for device reset\n"); - return -ETIMEDOUT; - } else if (ret < 0) { - error = ret; - dev_err(dev, "error waiting for device reset: %d\n", error); - return error; + error = -ETIMEDOUT; } + disable_irq(client->irq); + + if (error) + return error; + len = i2c_master_recv(client, buffer, ETP_I2C_INF_LENGTH); if (len != ETP_I2C_INF_LENGTH) { error = len < 0 ? len : -EIO; -- cgit v1.2.3-59-g8ed1b From 7c3f1875c66fbc19762760097cabc91849ea0bbb Mon Sep 17 00:00:00 2001 From: Roman Kapl Date: Wed, 24 May 2017 10:22:22 +0200 Subject: net: move somaxconn init from sysctl code The default value for somaxconn is set in sysctl_core_net_init(), but this function is not called when kernel is configured without CONFIG_SYSCTL. This results in the kernel not being able to accept TCP connections, because the backlog has zero size. Usually, the user ends up with: "TCP: request_sock_TCP: Possible SYN flooding on port 7. Dropping request. Check SNMP counters." If SYN cookies are not enabled the connection is rejected. Before ef547f2ac16 (tcp: remove max_qlen_log), the effects were less severe, because the backlog was always at least eight slots long. Signed-off-by: Roman Kapl Signed-off-by: David S. Miller --- net/core/net_namespace.c | 19 +++++++++++++++++++ net/core/sysctl_net_core.c | 2 -- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1934efd4a9d4..26bbfababff2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -315,6 +315,25 @@ out_undo: goto out; } +static int __net_init net_defaults_init_net(struct net *net) +{ + net->core.sysctl_somaxconn = SOMAXCONN; + return 0; +} + +static struct pernet_operations net_defaults_ops = { + .init = net_defaults_init_net, +}; + +static __init int net_defaults_init(void) +{ + if (register_pernet_subsys(&net_defaults_ops)) + panic("Cannot initialize net default settings"); + + return 0; +} + +core_initcall(net_defaults_init); #ifdef CONFIG_NET_NS static struct ucounts *inc_net_namespaces(struct user_namespace *ns) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index ea23254b2457..b7cd9aafe99e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -479,8 +479,6 @@ static __net_init int sysctl_core_net_init(struct net *net) { struct ctl_table *tbl; - net->core.sysctl_somaxconn = SOMAXCONN; - tbl = netns_core_table; if (!net_eq(net, &init_net)) { tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From ba615f675281d76fd19aa03558777f81fb6b6084 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 24 May 2017 09:59:31 -0700 Subject: tcp: avoid fastopen API to be used on AF_UNSPEC Fastopen API should be used to perform fastopen operations on the TCP socket. It does not make sense to use fastopen API to perform disconnect by calling it with AF_UNSPEC. The fastopen data path is also prone to race conditions and bugs when using with AF_UNSPEC. One issue reported and analyzed by Vegard Nossum is as follows: +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Thread A: Thread B: ------------------------------------------------------------------------ sendto() - tcp_sendmsg() - sk_stream_memory_free() = 0 - goto wait_for_sndbuf - sk_stream_wait_memory() - sk_wait_event() // sleep | sendto(flags=MSG_FASTOPEN, dest_addr=AF_UNSPEC) | - tcp_sendmsg() | - tcp_sendmsg_fastopen() | - __inet_stream_connect() | - tcp_disconnect() //because of AF_UNSPEC | - tcp_transmit_skb()// send RST | - return 0; // no reconnect! | - sk_stream_wait_connect() | - sock_error() | - xchg(&sk->sk_err, 0) | - return -ECONNRESET - ... // wake up, see sk->sk_err == 0 - skb_entail() on TCP_CLOSE socket If the connection is reopened then we will send a brand new SYN packet after thread A has already queued a buffer. At this point I think the socket internal state (sequence numbers etc.) becomes messed up. When the new connection is closed, the FIN-ACK is rejected because the sequence number is outside the window. The other side tries to retransmit, but __tcp_retransmit_skb() calls tcp_trim_head() on an empty skb which corrupts the skb data length and hits a BUG() in copy_and_csum_bits(). +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Hence, this patch adds a check for AF_UNSPEC in the fastopen data path and return EOPNOTSUPP to user if such case happens. Fixes: cf60af03ca4e7 ("tcp: Fast Open client - sendmsg(MSG_FASTOPEN)") Reported-by: Vegard Nossum Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 842b575f8fdd..59792d283ff8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1084,9 +1084,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, { struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); + struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) + if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && + uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; if (tp->fastopen_req) return -EALREADY; /* Another Fast Open is in progress */ @@ -1108,7 +1111,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, } } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; - err = __inet_stream_connect(sk->sk_socket, msg->msg_name, + err = __inet_stream_connect(sk->sk_socket, uaddr, msg->msg_namelen, flags, 1); /* fastopen_req could already be freed in __inet_stream_connect * if the connection times out or gets rst -- cgit v1.2.3-59-g8ed1b From 5990baaa6d7b437dfcf58b7021ca56b1d6b35869 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 24 May 2017 15:19:35 -0700 Subject: arp: fixed -Wuninitialized compiler warning Commit 7d472a59c0e5ec117220a05de6b370447fb6cb66 ("arp: always override existing neigh entries with gratuitous ARP") introduced a compiler warning: net/ipv4/arp.c:880:35: warning: 'addr_type' may be used uninitialized in this function [-Wmaybe-uninitialized] While the code logic seems to be correct and doesn't allow the variable to be used uninitialized, and the warning is not consistently reproducible, it's still worth fixing it for other people not to waste time looking at the warning in case it pops up in the build environment. Yes, compiler is probably at fault, but we will need to accommodate. Fixes: 7d472a59c0e5 ("arp: always override existing neigh entries with gratuitous ARP") Signed-off-by: Ihar Hrachyshka Signed-off-by: David S. Miller --- net/ipv4/arp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ae96e6f3e0cb..e9f3386a528b 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -863,8 +863,8 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); + addr_type = -1; if (n || IN_DEV_ARP_ACCEPT(in_dev)) { - addr_type = -1; is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op, sip, tip, sha, tha); } -- cgit v1.2.3-59-g8ed1b From 1ad2f5838d345e1c102bd1cd27c4f4c1349b0dc8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:05 +0200 Subject: bpf: fix incorrect pruning decision when alignment must be tracked Currently, when we enforce alignment tracking on direct packet access, the verifier lets the following program pass despite doing a packet write with unaligned access: 0: (61) r2 = *(u32 *)(r1 +76) 1: (61) r3 = *(u32 *)(r1 +80) 2: (61) r7 = *(u32 *)(r1 +8) 3: (bf) r0 = r2 4: (07) r0 += 14 5: (25) if r7 > 0x1 goto pc+4 R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp 6: (2d) if r0 > r3 goto pc+1 R0=pkt(id=0,off=14,r=14) R1=ctx R2=pkt(id=0,off=0,r=14) R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp 7: (63) *(u32 *)(r0 -4) = r0 8: (b7) r0 = 0 9: (95) exit from 6 to 8: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp 8: (b7) r0 = 0 9: (95) exit from 5 to 10: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=2 R10=fp 10: (07) r0 += 1 11: (05) goto pc-6 6: safe <----- here, wrongly found safe processed 15 insns However, if we enforce a pruning mismatch by adding state into r8 which is then being mismatched in states_equal(), we find that for the otherwise same program, the verifier detects a misaligned packet access when actually walking that path: 0: (61) r2 = *(u32 *)(r1 +76) 1: (61) r3 = *(u32 *)(r1 +80) 2: (61) r7 = *(u32 *)(r1 +8) 3: (b7) r8 = 1 4: (bf) r0 = r2 5: (07) r0 += 14 6: (25) if r7 > 0x1 goto pc+4 R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 7: (2d) if r0 > r3 goto pc+1 R0=pkt(id=0,off=14,r=14) R1=ctx R2=pkt(id=0,off=0,r=14) R3=pkt_end R7=inv,min_value=0,max_value=1 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 8: (63) *(u32 *)(r0 -4) = r0 9: (b7) r0 = 0 10: (95) exit from 7 to 9: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 9: (b7) r0 = 0 10: (95) exit from 6 to 11: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=2 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 11: (07) r0 += 1 12: (b7) r8 = 0 13: (05) goto pc-7 <----- mismatch due to r8 7: (2d) if r0 > r3 goto pc+1 R0=pkt(id=0,off=15,r=15) R1=ctx R2=pkt(id=0,off=0,r=15) R3=pkt_end R7=inv,min_value=2 R8=imm0,min_value=0,max_value=0,min_align=2147483648 R10=fp 8: (63) *(u32 *)(r0 -4) = r0 misaligned packet access off 2+15+-4 size 4 The reason why we fail to see it in states_equal() is that the third test in compare_ptrs_to_packet() ... if (old->off <= cur->off && old->off >= old->range && cur->off >= cur->range) return true; ... will let the above pass. The situation we run into is that old->off <= cur->off (14 <= 15), meaning that prior walked paths went with smaller offset, which was later used in the packet access after successful packet range check and found to be safe already. For example: Given is R0=pkt(id=0,off=0,r=0). Adding offset 14 as in above program to it, results in R0=pkt(id=0,off=14,r=0) before the packet range test. Now, testing this against R3=pkt_end with 'if r0 > r3 goto out' will transform R0 into R0=pkt(id=0,off=14,r=14) for the case when we're within bounds. A write into the packet at offset *(u32 *)(r0 -4), that is, 2 + 14 -4, is valid and aligned (2 is for NET_IP_ALIGN). After processing this with all fall-through paths, we later on check paths from branches. When the above skb->mark test is true, then we jump near the end of the program, perform r0 += 1, and jump back to the 'if r0 > r3 goto out' test we've visited earlier already. This time, R0 is of type R0=pkt(id=0,off=15,r=0), and we'll prune that part because this time we'll have a larger safe packet range, and we already found that with off=14 all further insn were already safe, so it's safe as well with a larger off. However, the problem is that the subsequent write into the packet with 2 + 15 -4 is then unaligned, and not caught by the alignment tracking. Note that min_align, aux_off, and aux_off_align were all 0 in this example. Since we cannot tell at this time what kind of packet access was performed in the prior walk and what minimal requirements it has (we might do so in the future, but that requires more complexity), fix it to disable this pruning case for strict alignment for now, and let the verifier do check such paths instead. With that applied, the test cases pass and reject the program due to misalignment. Fixes: d1174416747d ("bpf: Track alignment of register values in the verifier.") Reference: http://patchwork.ozlabs.org/patch/761909/ Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c72cd41f5b8b..e37e06b1229d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -843,9 +843,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, { bool strict = env->strict_alignment; - if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) - strict = true; - switch (reg->type) { case PTR_TO_PACKET: return check_pkt_ptr_alignment(reg, off, size, strict); @@ -2696,7 +2693,8 @@ err_free: /* the following conditions reduce the number of explored insns * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet */ -static bool compare_ptrs_to_packet(struct bpf_reg_state *old, +static bool compare_ptrs_to_packet(struct bpf_verifier_env *env, + struct bpf_reg_state *old, struct bpf_reg_state *cur) { if (old->id != cur->id) @@ -2739,7 +2737,7 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old, * 'if (R4 > data_end)' and all further insn were already good with r=20, * so they will be good with r=30 and we can prune the search. */ - if (old->off <= cur->off && + if (!env->strict_alignment && old->off <= cur->off && old->off >= old->range && cur->off >= cur->range) return true; @@ -2810,7 +2808,7 @@ static bool states_equal(struct bpf_verifier_env *env, continue; if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && - compare_ptrs_to_packet(rold, rcur)) + compare_ptrs_to_packet(env, rold, rcur)) continue; return false; @@ -3588,10 +3586,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) } else { log_level = 0; } - if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT) + + env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; - else - env->strict_alignment = false; ret = replace_map_fd_with_map_ptr(env); if (ret < 0) @@ -3697,7 +3695,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, mutex_lock(&bpf_verifier_lock); log_level = 0; + env->strict_alignment = false; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + env->strict_alignment = true; env->explored_states = kcalloc(env->prog->len, sizeof(struct bpf_verifier_state_list *), -- cgit v1.2.3-59-g8ed1b From a9789ef9afcb4fb0193f8dd94f2665ba3ad71e79 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:06 +0200 Subject: bpf: properly reset caller saved regs after helper call and ld_abs/ind Currently, after performing helper calls, we clear all caller saved registers, that is r0 - r5 and fill r0 depending on struct bpf_func_proto specification. The way we reset these regs can affect pruning decisions in later paths, since we only reset register's imm to 0 and type to NOT_INIT. However, we leave out clearing of other variables such as id, min_value, max_value, etc, which can later on lead to pruning mismatches due to stale data. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e37e06b1229d..339c8a1371de 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -463,19 +463,22 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; +static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno) +{ + BUG_ON(regno >= MAX_BPF_REG); + + memset(®s[regno], 0, sizeof(regs[regno])); + regs[regno].type = NOT_INIT; + regs[regno].min_value = BPF_REGISTER_MIN_RANGE; + regs[regno].max_value = BPF_REGISTER_MAX_RANGE; +} + static void init_reg_state(struct bpf_reg_state *regs) { int i; - for (i = 0; i < MAX_BPF_REG; i++) { - regs[i].type = NOT_INIT; - regs[i].imm = 0; - regs[i].min_value = BPF_REGISTER_MIN_RANGE; - regs[i].max_value = BPF_REGISTER_MAX_RANGE; - regs[i].min_align = 0; - regs[i].aux_off = 0; - regs[i].aux_off_align = 0; - } + for (i = 0; i < MAX_BPF_REG; i++) + mark_reg_not_init(regs, i); /* frame pointer */ regs[BPF_REG_FP].type = FRAME_PTR; @@ -1346,7 +1349,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; struct bpf_reg_state *regs = state->regs; - struct bpf_reg_state *reg; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1413,11 +1415,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) } /* reset caller saved regs */ - for (i = 0; i < CALLER_SAVED_REGS; i++) { - reg = regs + caller_saved[i]; - reg->type = NOT_INIT; - reg->imm = 0; - } + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(regs, caller_saved[i]); /* update return register */ if (fn->ret_type == RET_INTEGER) { @@ -2445,7 +2444,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { struct bpf_reg_state *regs = env->cur_state.regs; u8 mode = BPF_MODE(insn->code); - struct bpf_reg_state *reg; int i, err; if (!may_access_skb(env->prog->type)) { @@ -2478,11 +2476,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) } /* reset caller saved regs to unreadable */ - for (i = 0; i < CALLER_SAVED_REGS; i++) { - reg = regs + caller_saved[i]; - reg->type = NOT_INIT; - reg->imm = 0; - } + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(regs, caller_saved[i]); /* mark destination R0 register as readable, since it contains * the value fetched from the packet -- cgit v1.2.3-59-g8ed1b From 41703a731066fde79c3e5ccf3391cf77a98aeda5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:07 +0200 Subject: bpf: add bpf_clone_redirect to bpf_helper_changes_pkt_data The bpf_clone_redirect() still needs to be listed in bpf_helper_changes_pkt_data() since we call into bpf_try_make_head_writable() from there, thus we need to invalidate prior pkt regs as well. Fixes: 36bbef52c7eb ("bpf: direct packet write and access for helpers for clsact progs") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index a253a6197e6b..a6bb95fa87b2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2281,6 +2281,7 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_skb_change_head || func == bpf_skb_change_tail || func == bpf_skb_pull_data || + func == bpf_clone_redirect || func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || func == bpf_xdp_adjust_head) -- cgit v1.2.3-59-g8ed1b From a316338cb71a3260201490e615f2f6d5c0d8fb2c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:08 +0200 Subject: bpf: fix wrong exposure of map_flags into fdinfo for lpm trie_alloc() always needs to have BPF_F_NO_PREALLOC passed in via attr->map_flags, since it does not support preallocation yet. We check the flag, but we never copy the flag into trie->map.map_flags, which is later on exposed into fdinfo and used by loaders such as iproute2. Latter uses this in bpf_map_selfcheck_pinned() to test whether a pinned map has the same spec as the one from the BPF obj file and if not, bails out, which is currently the case for lpm since it exposes always 0 as flags. Also copy over flags in array_map_alloc() and stack_map_alloc(). They always have to be 0 right now, but we should make sure to not miss to copy them over at a later point in time when we add actual flags for them to use. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reported-by: Jarno Rajahalme Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 1 + kernel/bpf/lpm_trie.c | 1 + kernel/bpf/stackmap.c | 1 + 3 files changed, 3 insertions(+) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 5e00b2333c26..172dc8ee0e3b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -86,6 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->map.key_size = attr->key_size; array->map.value_size = attr->value_size; array->map.max_entries = attr->max_entries; + array->map.map_flags = attr->map_flags; array->elem_size = elem_size; if (!percpu) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 39cfafd895b8..b09185f0f17d 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) trie->map.key_size = attr->key_size; trie->map.value_size = attr->value_size; trie->map.max_entries = attr->max_entries; + trie->map.map_flags = attr->map_flags; trie->data_size = attr->key_size - offsetof(struct bpf_lpm_trie_key, data); trie->max_prefixlen = trie->data_size * 8; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 4dfd6f2ec2f9..31147d730abf 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) smap->map.key_size = attr->key_size; smap->map.value_size = value_size; smap->map.max_entries = attr->max_entries; + smap->map.map_flags = attr->map_flags; smap->n_buckets = n_buckets; smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; -- cgit v1.2.3-59-g8ed1b From 614d0d77b49a9b131e58b77473698ab5b2c525b7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:09 +0200 Subject: bpf: add various verifier test cases This patch adds various verifier test cases: 1) A test case for the pruning issue when tracking alignment is used. 2) Various PTR_TO_MAP_VALUE_OR_NULL tests to make sure pointer arithmetic turns such register into UNKNOWN_VALUE type. 3) Test cases for the special treatment of LD_ABS/LD_IND to make sure verifier doesn't break calling convention here. Latter is needed, since f.e. arm64 JIT uses r1 - r5 for storing temporary data, so they really must be marked as NOT_INIT. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 10 ++ tools/include/linux/filter.h | 10 ++ tools/testing/selftests/bpf/test_verifier.c | 239 +++++++++++++++++++++++++++- 3 files changed, 255 insertions(+), 4 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 56197f82af45..62d948f80730 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -272,6 +272,16 @@ struct bpf_prog_aux; .off = OFF, \ .imm = IMM }) +/* Unconditional jumps, goto pc + off16 */ + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 390d7c9685fd..4ce25d43e8e3 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -208,6 +208,16 @@ .off = OFF, \ .imm = IMM }) +/* Unconditional jumps, goto pc + off16 */ + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3773562056da..cabb19b1e371 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -49,6 +49,7 @@ #define MAX_NR_MAPS 4 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) +#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) struct bpf_test { const char *descr; @@ -2614,6 +2615,30 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test17 (pruning, alignment)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14), + BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_JMP_A(-6), + }, + .errstr = "misaligned packet access off 2+15+-4 size 4", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + }, { "helper access to packet: test1, valid packet_ptr range", .insns = { @@ -3340,6 +3365,70 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, + { + "alu ops on ptr_to_map_value_or_null, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "alu ops on ptr_to_map_value_or_null, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "alu ops on ptr_to_map_value_or_null, 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, { "invalid memory access with multiple map_lookup_elem calls", .insns = { @@ -4937,7 +5026,149 @@ static struct bpf_test tests[] = { .fixup_map_in_map = { 3 }, .errstr = "R1 type=map_value_or_null expected=map_ptr", .result = REJECT, - } + }, + { + "ld_abs: check calling conv, r1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r3", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .errstr = "R3 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .errstr = "R4 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r5", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .errstr = "R5 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r7", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_7, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "ld_ind: check calling conv, r1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r3", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .errstr = "R3 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .errstr = "R4 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r5", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_5, 1), + BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .errstr = "R5 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r7", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -5059,9 +5290,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, do_test_fixup(test, prog, map_fds); - fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, "GPL", 0, bpf_vlog, - sizeof(bpf_vlog)); + fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + "GPL", 0, bpf_vlog, sizeof(bpf_vlog)); expected_ret = unpriv && test->result_unpriv != UNDEF ? test->result_unpriv : test->result; -- cgit v1.2.3-59-g8ed1b From 791caeb084c57e3a4d648cf1ee799d1f70c0ef4e Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 24 May 2017 16:35:49 -0700 Subject: test_bpf: Add a couple of tests for BPF_JSGE. Some JITs can optimize comparisons with zero. Add a couple of BPF_JSGE tests against immediate zero. Signed-off-by: David Daney Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- lib/test_bpf.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 889bc31785be..be88cbaadde3 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4504,6 +4504,44 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "JMP_JSGE_K: Signed jump: value walk 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -3), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 6), + BPF_ALU64_IMM(BPF_ADD, R1, 1), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 4), + BPF_ALU64_IMM(BPF_ADD, R1, 1), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 2), + BPF_ALU64_IMM(BPF_ADD, R1, 1), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 1), + BPF_EXIT_INSN(), /* bad exit */ + BPF_ALU32_IMM(BPF_MOV, R0, 1), /* good exit */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGE_K: Signed jump: value walk 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -3), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 4), + BPF_ALU64_IMM(BPF_ADD, R1, 2), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 2), + BPF_ALU64_IMM(BPF_ADD, R1, 2), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 1), + BPF_EXIT_INSN(), /* bad exit */ + BPF_ALU32_IMM(BPF_MOV, R0, 1), /* good exit */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JGT | BPF_K */ { "JMP_JGT_K: if (3 > 2) return 1", -- cgit v1.2.3-59-g8ed1b From 797a93647a48d6cb8a20641a86a71713a947f786 Mon Sep 17 00:00:00 2001 From: Nithin Sujir Date: Wed, 24 May 2017 19:45:17 -0700 Subject: bonding: Don't update slave->link until ready to commit In the loadbalance arp monitoring scheme, when a slave link change is detected, the slave->link is immediately updated and slave_state_changed is set. Later down the function, the rtnl_lock is acquired and the changes are committed, updating the bond link state. However, the acquisition of the rtnl_lock can fail. The next time the monitor runs, since slave->link is already updated, it determines that link is unchanged. This results in the bond link state permanently out of sync with the slave link. This patch modifies bond_loadbalance_arp_mon() to handle link changes identical to bond_ab_arp_{inspect/commit}(). The new link state is maintained in slave->new_link until we're ready to commit at which point it's copied into slave->link. NOTE: miimon_{inspect/commit}() has a more complex state machine requiring the use of the bond_{propose,commit}_link_state() functions which maintains the intermediate state in slave->link_new_state. The arp monitors don't require that. Testing: This bug is very easy to reproduce with the following steps. 1. In a loop, toggle a slave link of a bond slave interface. 2. In a separate loop, do ifconfig up/down of an unrelated interface to create contention for rtnl_lock. Within a few iterations, the bond link goes out of sync with the slave link. Signed-off-by: Nithin Nayak Sujir Cc: Mahesh Bandewar Cc: Jay Vosburgh Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 73313318399c..2359478b977f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2612,11 +2612,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); + slave->new_link = BOND_LINK_NOCHANGE; + if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, trans_start, 1) && bond_time_in_interval(bond, slave->last_rx, 1)) { - slave->link = BOND_LINK_UP; + slave->new_link = BOND_LINK_UP; slave_state_changed = 1; /* primary_slave has no meaning in round-robin @@ -2643,7 +2645,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) if (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, slave->last_rx, 2)) { - slave->link = BOND_LINK_DOWN; + slave->new_link = BOND_LINK_DOWN; slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) @@ -2674,6 +2676,11 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) if (!rtnl_trylock()) goto re_arm; + bond_for_each_slave(bond, slave, iter) { + if (slave->new_link != BOND_LINK_NOCHANGE) + slave->link = slave->new_link; + } + if (slave_state_changed) { bond_slave_state_change(bond); if (BOND_MODE(bond) == BOND_MODE_XOR) -- cgit v1.2.3-59-g8ed1b From 9bdcfb10f221e796c9619fe48655e0f1272f1d92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 May 2017 15:14:43 +0200 Subject: nvme-pci: consistencly use ctrl->device for logging This is what most of the code already does and gives much more useful prefixes than the device embedded in the pci_dev. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/pci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4c2ff2bb26bc..bf8bec39c017 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -263,7 +263,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev) c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) { - dev_warn(dev->dev, "unable to set dbbuf\n"); + dev_warn(dev->ctrl.device, "unable to set dbbuf\n"); /* Free memory and continue on */ nvme_dbbuf_dma_free(dev); } @@ -1394,11 +1394,11 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS, &pci_status); if (result == PCIBIOS_SUCCESSFUL) - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n", csts, pci_status); else - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n", csts, result); } @@ -1740,8 +1740,8 @@ static int nvme_pci_enable(struct nvme_dev *dev) */ if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) { dev->q_depth = 2; - dev_warn(dev->dev, "detected Apple NVMe controller, set " - "queue depth=%u to work around controller resets\n", + dev_warn(dev->ctrl.device, "detected Apple NVMe controller, " + "set queue depth=%u to work around controller resets\n", dev->q_depth); } @@ -1759,7 +1759,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) if (dev->cmbsz) { if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, &dev_attr_cmb.attr, NULL)) - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "failed to add sysfs attribute for CMB\n"); } } -- cgit v1.2.3-59-g8ed1b From d3d5b87ddde09bade512526f6df90e8c06c28230 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 May 2017 15:14:44 +0200 Subject: nvme: replace is_flags field in nvme_ctrl_ops with a flags field So that we can have more flags for transport-specific behavior. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/nvme.h | 3 ++- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/target/loop.c | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 04e115834702..228f7c73e2f1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1605,7 +1605,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); - if (ctrl->ops->is_fabrics) { + if (ctrl->ops->flags & NVME_F_FABRICS) { ctrl->icdoff = le16_to_cpu(id->icdoff); ctrl->ioccsz = le32_to_cpu(id->ioccsz); ctrl->iorcsz = le32_to_cpu(id->iorcsz); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 14a009e43aa5..5b14cbefb724 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2647,7 +2647,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .name = "fc", .module = THIS_MODULE, - .is_fabrics = true, + .flags = NVME_F_FABRICS, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 29c708ca9621..7c4b0f6636c5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -208,7 +208,8 @@ struct nvme_ns { struct nvme_ctrl_ops { const char *name; struct module *module; - bool is_fabrics; + unsigned int flags; +#define NVME_F_FABRICS (1 << 0) int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index e2c18f3d9dcf..28bd255c144d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1792,7 +1792,7 @@ static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl) static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .name = "rdma", .module = THIS_MODULE, - .is_fabrics = true, + .flags = NVME_F_FABRICS, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index feb497134aee..e503cfff0337 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -558,7 +558,7 @@ static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl) static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { .name = "loop", .module = THIS_MODULE, - .is_fabrics = true, + .flags = NVME_F_FABRICS, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, -- cgit v1.2.3-59-g8ed1b From c81bfba9983fc44210d3eb5971e0faac597bf50d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 May 2017 15:14:45 +0200 Subject: nvme: only setup block integrity if supported by the driver Currently only the PCIe driver supports metadata, so we should not claim integrity support for the other drivers. This prevents nasty crashes with targets that advertise metadata support on fabrics. Also use the opportunity to factor out some code into a separate helper that isn't even compiled if CONFIG_BLK_DEV_INTEGRITY is disabled. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/core.c | 50 ++++++++++++++++++++++++++++++------------------ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 1 + 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 228f7c73e2f1..a60926410438 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -925,6 +925,29 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY +static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id, + u16 bs) +{ + struct nvme_ns *ns = disk->private_data; + u16 old_ms = ns->ms; + u8 pi_type = 0; + + ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); + ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); + + /* PI implementation requires metadata equal t10 pi tuple size */ + if (ns->ms == sizeof(struct t10_pi_tuple)) + pi_type = id->dps & NVME_NS_DPS_PI_MASK; + + if (blk_get_integrity(disk) && + (ns->pi_type != pi_type || ns->ms != old_ms || + bs != queue_logical_block_size(disk->queue) || + (ns->ms && ns->ext))) + blk_integrity_unregister(disk); + + ns->pi_type = pi_type; +} + static void nvme_init_integrity(struct nvme_ns *ns) { struct blk_integrity integrity; @@ -951,6 +974,10 @@ static void nvme_init_integrity(struct nvme_ns *ns) blk_queue_max_integrity_segments(ns->queue, 1); } #else +static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id, + u16 bs) +{ +} static void nvme_init_integrity(struct nvme_ns *ns) { } @@ -997,37 +1024,22 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; - u8 lbaf, pi_type; - u16 old_ms; - unsigned short bs; - - old_ms = ns->ms; - lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; - ns->lba_shift = id->lbaf[lbaf].ds; - ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); + u16 bs; /* * If identify namespace failed, use default 512 byte block size so * block layer can use before failing read/write for 0 capacity. */ + ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; if (ns->lba_shift == 0) ns->lba_shift = 9; bs = 1 << ns->lba_shift; - /* XXX: PI implementation requires metadata equal t10 pi tuple size */ - pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? - id->dps & NVME_NS_DPS_PI_MASK : 0; blk_mq_freeze_queue(disk->queue); - if (blk_get_integrity(disk) && (ns->pi_type != pi_type || - ns->ms != old_ms || - bs != queue_logical_block_size(disk->queue) || - (ns->ms && ns->ext))) - blk_integrity_unregister(disk); - ns->pi_type = pi_type; + if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) + nvme_prep_integrity(disk, id, bs); blk_queue_logical_block_size(ns->queue, bs); - if (ns->ms && !blk_get_integrity(disk) && !ns->ext) nvme_init_integrity(ns); if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7c4b0f6636c5..9d6a070d4391 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -210,6 +210,7 @@ struct nvme_ctrl_ops { struct module *module; unsigned int flags; #define NVME_F_FABRICS (1 << 0) +#define NVME_F_METADATA_SUPPORTED (1 << 1) int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bf8bec39c017..6103b178e43a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2047,6 +2047,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .name = "pcie", .module = THIS_MODULE, + .flags = NVME_F_METADATA_SUPPORTED, .reg_read32 = nvme_pci_reg_read32, .reg_write32 = nvme_pci_reg_write32, .reg_read64 = nvme_pci_reg_read64, -- cgit v1.2.3-59-g8ed1b From 50af47d04ca530544b27affffb0722f158e2bb9c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 24 May 2017 15:06:31 -0700 Subject: nvme: Quirk APST on Intel 600P/P3100 devices They have known firmware bugs. A fix is apparently in the works -- once fixed firmware is available, someone from Intel (Hi, Keith!) can adjust the quirk accordingly. Cc: stable@vger.kernel.org # v4.11 Cc: Kai-Heng Feng Cc: Mario Limonciello Signed-off-by: Andy Lutomirski Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6103b178e43a..d52701df7245 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2294,6 +2294,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0a54), .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, + { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ -- cgit v1.2.3-59-g8ed1b From a8ecdd7117ee68fe27009acc8021423870c1dcd7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 25 May 2017 16:38:06 -0700 Subject: blk-mq: Only register debugfs attributes for blk-mq queues The code in blk-mq-debugfs.c assumes that it is working on a blk-mq queue and is not intended to work on a blk-sq queue. Hence only register blk-mq debugfs attributes for blk-mq queues. Fixes: commit 9c1051aacde8 ("blk-mq: untangle debugfs and sysfs") Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Ming Lei Reviewed-by: Omar Sandoval Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 504fee940052..712b018e9f54 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -887,10 +887,10 @@ int blk_register_queue(struct gendisk *disk) goto unlock; } - if (q->mq_ops) + if (q->mq_ops) { __blk_mq_register_dev(dev, q); - - blk_mq_debugfs_register(q); + blk_mq_debugfs_register(q); + } kobject_uevent(&q->kobj, KOBJ_ADD); -- cgit v1.2.3-59-g8ed1b From 83b4605b0c16cde5b00c8cf192408d51eab75402 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 May 2017 18:59:54 +0200 Subject: PCI/msi: fix the pci_alloc_irq_vectors_affinity stub We need to return an error for any call that asks for MSI / MSI-X vectors only, so that non-trivial fallback logic can work properly. Also valid dev->irq and use the "correct" errno value based on feedback from Linus. Signed-off-by: Christoph Hellwig Reported-by: Steven Rostedt Fixes: aff17164 ("PCI: Provide sensible IRQ vector alloc/free routines") Signed-off-by: Linus Torvalds --- include/linux/pci.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/pci.h b/include/linux/pci.h index 33c2b0b77429..fc2e832d7b9c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1342,9 +1342,9 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, const struct irq_affinity *aff_desc) { - if (min_vecs > 1) - return -EINVAL; - return 1; + if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq) + return 1; + return -ENOSPC; } static inline void pci_free_irq_vectors(struct pci_dev *dev) -- cgit v1.2.3-59-g8ed1b From 0908cf4dfef35fc6ac12329007052ebe93ff1081 Mon Sep 17 00:00:00 2001 From: linzhang Date: Thu, 25 May 2017 14:07:18 +0800 Subject: net: llc: add lock_sock in llc_ui_bind to avoid a race condition There is a race condition in llc_ui_bind if two or more processes/threads try to bind a same socket. If more processes/threads bind a same socket success that will lead to two problems, one is this action is not what we expected, another is will lead to kernel in unstable status or oops(in my simple test case, cause llc2.ko can't unload). The current code is test SOCK_ZAPPED bit to avoid a process to bind a same socket twice but that is can't avoid more processes/threads try to bind a same socket at the same time. So, add lock_sock in llc_ui_bind like others, such as llc_ui_connect. Signed-off-by: Lin Zhang Signed-off-by: David S. Miller --- net/llc/af_llc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8364fe5b59e4..c38d16f22d2a 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -311,6 +311,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) int rc = -EINVAL; dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); + + lock_sock(sk); if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; @@ -382,6 +384,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) out_put: llc_sap_put(sap); out: + release_sock(sk); return rc; } -- cgit v1.2.3-59-g8ed1b From 804ec7ebe8ea003999ca8d1bfc499edc6a9e07df Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 25 May 2017 19:14:56 +0200 Subject: sctp: fix ICMP processing if skb is non-linear sometimes ICMP replies to INIT chunks are ignored by the client, even if the encapsulated SCTP headers match an open socket. This happens when the ICMP packet is carried by a paged skb: use skb_header_pointer() to read packet contents beyond the SCTP header, so that chunk header and initiate tag are validated correctly. v2: - don't use skb_header_pointer() to read the transport header, since icmp_socket_deliver() already puts these 8 bytes in the linear area. - change commit message to make specific reference to INIT chunks. Signed-off-by: Davide Caratti Acked-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/input.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 0e06a278d2a9..ba9ad32fc447 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -473,15 +473,14 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctp_association **app, struct sctp_transport **tpp) { + struct sctp_init_chunk *chunkhdr, _chunkhdr; union sctp_addr saddr; union sctp_addr daddr; struct sctp_af *af; struct sock *sk = NULL; struct sctp_association *asoc; struct sctp_transport *transport = NULL; - struct sctp_init_chunk *chunkhdr; __u32 vtag = ntohl(sctphdr->vtag); - int len = skb->len - ((void *)sctphdr - (void *)skb->data); *app = NULL; *tpp = NULL; @@ -516,13 +515,16 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, * discard the packet. */ if (vtag == 0) { - chunkhdr = (void *)sctphdr + sizeof(struct sctphdr); - if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t) - + sizeof(__be32) || + /* chunk header + first 4 octects of init header */ + chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) + + sizeof(struct sctphdr), + sizeof(struct sctp_chunkhdr) + + sizeof(__be32), &_chunkhdr); + if (!chunkhdr || chunkhdr->chunk_hdr.type != SCTP_CID_INIT || - ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) { + ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) goto out; - } + } else if (vtag != asoc->c.peer_vtag) { goto out; } -- cgit v1.2.3-59-g8ed1b From 0e9a709560dbcfbace8bf4019dc5298619235891 Mon Sep 17 00:00:00 2001 From: Peter Dawson Date: Fri, 26 May 2017 06:35:18 +1000 Subject: ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated packets This fix addresses two problems in the way the DSCP field is formulated on the encapsulating header of IPv6 tunnels. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195661 1) The IPv6 tunneling code was manipulating the DSCP field of the encapsulating packet using the 32b flowlabel. Since the flowlabel is only the lower 20b it was incorrect to assume that the upper 12b containing the DSCP and ECN fields would remain intact when formulating the encapsulating header. This fix handles the 'inherit' and 'fixed-value' DSCP cases explicitly using the extant dsfield u8 variable. 2) The use of INET_ECN_encapsulate(0, dsfield) in ip6_tnl_xmit was incorrect and resulted in the DSCP value always being set to 0. Commit 90427ef5d2a4 ("ipv6: fix flow labels when the traffic class is non-0") caused the regression by masking out the flowlabel which exposed the incorrect handling of the DSCP portion of the flowlabel in ip6_tunnel and ip6_gre. Fixes: 90427ef5d2a4 ("ipv6: fix flow labels when the traffic class is non-0") Signed-off-by: Peter Dawson Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 13 +++++++------ net/ipv6/ip6_tunnel.c | 21 +++++++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8d128ba79b66..0c5b4caa1949 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -537,11 +537,10 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - dsfield = ipv4_get_dsfield(iph); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) - & IPV6_TCLASS_MASK; + dsfield = ipv4_get_dsfield(iph); + else + dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; else @@ -598,9 +597,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + dsfield = ipv6_get_dsfield(ipv6h); + else + dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6eb2ae507500..7ae6c503f1ca 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1196,7 +1196,7 @@ route_lookup: skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_flow_hdr(ipv6h, dsfield, ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); ipv6h->hop_limit = hop_limit; ipv6h->nexthdr = proto; @@ -1231,8 +1231,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (tproto != IPPROTO_IPIP && tproto != 0) return -1; - dsfield = ipv4_get_dsfield(iph); - if (t->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; @@ -1246,6 +1244,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPIP; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; + dsfield = ip6_tclass(key->label); } else { if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; @@ -1254,8 +1253,9 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPIP; if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) - & IPV6_TCLASS_MASK; + dsfield = ipv4_get_dsfield(iph); + else + dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; else @@ -1267,6 +1267,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; + dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); + skb_set_inner_ipproto(skb, IPPROTO_IPIP); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, @@ -1300,8 +1302,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) ip6_tnl_addr_conflict(t, ipv6h)) return -1; - dsfield = ipv6_get_dsfield(ipv6h); - if (t->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; @@ -1315,6 +1315,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPV6; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; + dsfield = ip6_tclass(key->label); } else { offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ @@ -1337,7 +1338,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPV6; if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK); + dsfield = ipv6_get_dsfield(ipv6h); + else + dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) @@ -1351,6 +1354,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; + dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); + skb_set_inner_ipproto(skb, IPPROTO_IPV6); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, -- cgit v1.2.3-59-g8ed1b From 82533ad9a1ce3a7a6863849a552c2cc041b55e0d Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 25 May 2017 22:54:53 +0200 Subject: net: ethernet: ax88796: don't call free_irq without request_irq first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function ax_init_dev (which is called only from the driver's .probe function) calls free_irq in the error path without having requested the irq in the first place. So drop the free_irq call in the error path. Fixes: 825a2ff1896e ("AX88796 network driver") Signed-off-by: Uwe Kleine-König Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/ax88796.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index b0a3b85fc6f8..db02bc2fb4b2 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -748,13 +748,13 @@ static int ax_init_dev(struct net_device *dev) ret = ax_mii_init(dev); if (ret) - goto out_irq; + goto err_out; ax_NS8390_init(dev, 0); ret = register_netdev(dev); if (ret) - goto out_irq; + goto err_out; netdev_info(dev, "%dbit, irq %d, %lx, MAC: %pM\n", ei_local->word16 ? 16 : 8, dev->irq, dev->base_addr, @@ -762,9 +762,6 @@ static int ax_init_dev(struct net_device *dev) return 0; - out_irq: - /* cleanup irq */ - free_irq(dev->irq, dev); err_out: return ret; } -- cgit v1.2.3-59-g8ed1b From 3fb07daff8e99243366a081e5129560734de4ada Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 May 2017 14:27:35 -0700 Subject: ipv4: add reference counting to metrics Andrey Konovalov reported crashes in ipv4_mtu() I could reproduce the issue with KASAN kernels, between 10.246.7.151 and 10.246.7.152 : 1) 20 concurrent netperf -t TCP_RR -H 10.246.7.152 -l 1000 & 2) At the same time run following loop : while : do ip ro add 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500 ip ro del 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500 done Cong Wang attempted to add back rt->fi in commit 82486aa6f1b9 ("ipv4: restore rt->fi for reference counting") but this proved to add some issues that were complex to solve. Instead, I suggested to add a refcount to the metrics themselves, being a standalone object (in particular, no reference to other objects) I tried to make this patch as small as possible to ease its backport, instead of being super clean. Note that we believe that only ipv4 dst need to take care of the metric refcount. But if this is wrong, this patch adds the basic infrastructure to extend this to other families. Many thanks to Julian Anastasov for reviewing this patch, and Cong Wang for his efforts on this problem. Fixes: 2860583fe840 ("ipv4: Kill rt->fi") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Reviewed-by: Julian Anastasov Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/net/dst.h | 8 +++++++- include/net/ip_fib.h | 10 +++++----- net/core/dst.c | 23 ++++++++++++++--------- net/ipv4/fib_semantics.c | 17 ++++++++++------- net/ipv4/route.c | 10 +++++++++- 5 files changed, 45 insertions(+), 23 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 049af33da3b6..cfc043784166 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -107,10 +107,16 @@ struct dst_entry { }; }; +struct dst_metrics { + u32 metrics[RTAX_MAX]; + atomic_t refcnt; +}; +extern const struct dst_metrics dst_default_metrics; + u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); -extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL +#define DST_METRICS_REFCOUNTED 0x2UL #define DST_METRICS_FLAGS 0x3UL #define __DST_METRICS_PTR(Y) \ ((u32 *)((Y) & ~DST_METRICS_FLAGS)) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 6692c5758b33..f7f6aa789c61 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -114,11 +114,11 @@ struct fib_info { __be32 fib_prefsrc; u32 fib_tb_id; u32 fib_priority; - u32 *fib_metrics; -#define fib_mtu fib_metrics[RTAX_MTU-1] -#define fib_window fib_metrics[RTAX_WINDOW-1] -#define fib_rtt fib_metrics[RTAX_RTT-1] -#define fib_advmss fib_metrics[RTAX_ADVMSS-1] + struct dst_metrics *fib_metrics; +#define fib_mtu fib_metrics->metrics[RTAX_MTU-1] +#define fib_window fib_metrics->metrics[RTAX_WINDOW-1] +#define fib_rtt fib_metrics->metrics[RTAX_RTT-1] +#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; diff --git a/net/core/dst.c b/net/core/dst.c index 960e503b5a52..6192f11beec9 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard_out); -const u32 dst_default_metrics[RTAX_MAX + 1] = { +const struct dst_metrics dst_default_metrics = { /* This initializer is needed to force linker to place this variable * into const section. Otherwise it might end into bss section. * We really want to avoid false sharing on this variable, and catch * any writes on it. */ - [RTAX_MAX] = 0xdeadbeef, + .refcnt = ATOMIC_INIT(1), }; void dst_init(struct dst_entry *dst, struct dst_ops *ops, @@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, if (dev) dev_hold(dev); dst->ops = ops; - dst_init_metrics(dst, dst_default_metrics, true); + dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; dst->path = dst; dst->from = NULL; @@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release); u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) { - u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC); if (p) { - u32 *old_p = __DST_METRICS_PTR(old); + struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old); unsigned long prev, new; - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + atomic_set(&p->refcnt, 1); + memcpy(p->metrics, old_p->metrics, sizeof(p->metrics)); new = (unsigned long) p; prev = cmpxchg(&dst->_metrics, old, new); if (prev != old) { kfree(p); - p = __DST_METRICS_PTR(prev); + p = (struct dst_metrics *)__DST_METRICS_PTR(prev); if (prev & DST_METRICS_READ_ONLY) p = NULL; + } else if (prev & DST_METRICS_REFCOUNTED) { + if (atomic_dec_and_test(&old_p->refcnt)) + kfree(old_p); } } - return p; + BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0); + return (u32 *)p; } EXPORT_SYMBOL(dst_cow_metrics_generic); @@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) { unsigned long prev, new; - new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY; + new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY; prev = cmpxchg(&dst->_metrics, old, new); if (prev == old) kfree(__DST_METRICS_PTR(old)); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index da449ddb8cc1..ad9ad4aab5da 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -203,6 +203,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); + struct dst_metrics *m; change_nexthops(fi) { if (nexthop_nh->nh_dev) @@ -213,8 +214,9 @@ static void free_fib_info_rcu(struct rcu_head *head) rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); - if (fi->fib_metrics != (u32 *) dst_default_metrics) - kfree(fi->fib_metrics); + m = fi->fib_metrics; + if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) + kfree(m); kfree(fi); } @@ -971,11 +973,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; - fi->fib_metrics[type - 1] = val; + fi->fib_metrics->metrics[type - 1] = val; } if (ecn_ca) - fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; return 0; } @@ -1033,11 +1035,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; fib_info_cnt++; if (cfg->fc_mx) { - fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); if (!fi->fib_metrics) goto failure; + atomic_set(&fi->fib_metrics->refcnt, 1); } else - fi->fib_metrics = (u32 *) dst_default_metrics; + fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; @@ -1238,7 +1241,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, if (fi->fib_priority && nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) goto nla_put_failure; - if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) + if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) goto nla_put_failure; if (fi->fib_prefsrc && diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 655d9eebe43e..6883b3d4ba8f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1385,8 +1385,12 @@ static void rt_add_uncached_list(struct rtable *rt) static void ipv4_dst_destroy(struct dst_entry *dst) { + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); struct rtable *rt = (struct rtable *) dst; + if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt)) + kfree(p); + if (!list_empty(&rt->rt_uncached)) { struct uncached_list *ul = rt->rt_uncached_list; @@ -1438,7 +1442,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->rt_gateway = nh->nh_gw; rt->rt_uses_gateway = 1; } - dst_init_metrics(&rt->dst, fi->fib_metrics, true); + dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); + if (fi->fib_metrics != &dst_default_metrics) { + rt->dst._metrics |= DST_METRICS_REFCOUNTED; + atomic_inc(&fi->fib_metrics->refcnt); + } #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif -- cgit v1.2.3-59-g8ed1b From 80b79dd0e2f29f06a6a54a5755c718f1c7ebb136 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 27 May 2017 06:07:18 -0400 Subject: fs: locks: Fix some troubles at kernel-doc comments There are a few syntax violations that cause outputs of a few comments to not be properly parsed in ReST format. No functional changes. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jeff Layton --- fs/locks.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index af2031a1fcff..4a4543a7f9c1 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1858,8 +1858,8 @@ EXPORT_SYMBOL(generic_setlease); * * Call this to establish a lease on the file. The "lease" argument is not * used for F_UNLCK requests and may be NULL. For commands that set or alter - * an existing lease, the (*lease)->fl_lmops->lm_break operation must be set; - * if not, this function will return -ENOLCK (and generate a scary-looking + * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be + * set; if not, this function will return -ENOLCK (and generate a scary-looking * stack trace). * * The "priv" pointer is passed directly to the lm_setup function as-is. It @@ -1972,15 +1972,13 @@ EXPORT_SYMBOL(locks_lock_inode_wait); * @cmd: the type of lock to apply. * * Apply a %FL_FLOCK style lock to an open file descriptor. - * The @cmd can be one of + * The @cmd can be one of: * - * %LOCK_SH -- a shared lock. - * - * %LOCK_EX -- an exclusive lock. - * - * %LOCK_UN -- remove an existing lock. - * - * %LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes. + * - %LOCK_SH -- a shared lock. + * - %LOCK_EX -- an exclusive lock. + * - %LOCK_UN -- remove an existing lock. + * - %LOCK_MAND -- a 'mandatory' flock. + * This exists to emulate Windows Share Modes. * * %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other * processes read and write access respectively. -- cgit v1.2.3-59-g8ed1b From a75d30c772078546ac00399a94ecdc82df1a4d72 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 27 May 2017 06:07:19 -0400 Subject: fs/locks: pass kernel struct flock to fcntl_getlk/setlk This will make it easier to implement a sane compat fcntl syscall. [ jlayton: fix undeclared identifiers in 32-bit fcntl64 syscall handler ] Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Signed-off-by: Jeff Layton --- fs/fcntl.c | 27 +++++++++++++++---- fs/locks.c | 79 +++++++++++++++--------------------------------------- include/linux/fs.h | 8 +++--- 3 files changed, 48 insertions(+), 66 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index f4e7267d117f..ed4283d500a3 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -246,6 +246,8 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { + void __user *argp = (void __user *)arg; + struct flock flock; long err = -EINVAL; switch (cmd) { @@ -273,7 +275,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_OFD_GETLK: #endif case F_GETLK: - err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); + if (copy_from_user(&flock, argp, sizeof(flock))) + return -EFAULT; + err = fcntl_getlk(filp, cmd, &flock); + if (!err && copy_to_user(argp, &flock, sizeof(flock))) + return -EFAULT; break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ @@ -283,7 +289,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, /* Fallthrough */ case F_SETLK: case F_SETLKW: - err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); + if (copy_from_user(&flock, argp, sizeof(flock))) + return -EFAULT; + err = fcntl_setlk(fd, filp, cmd, &flock); break; case F_GETOWN: /* @@ -383,7 +391,9 @@ out: SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { + void __user *argp = (void __user *)arg; struct fd f = fdget_raw(fd); + struct flock64 flock; long err = -EBADF; if (!f.file) @@ -401,14 +411,21 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, switch (cmd) { case F_GETLK64: case F_OFD_GETLK: - err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); + err = -EFAULT; + if (copy_from_user(&flock, argp, sizeof(flock))) + break; + err = fcntl_getlk64(f.file, cmd, &flock); + if (!err && copy_to_user(argp, &flock, sizeof(flock))) + err = -EFAULT; break; case F_SETLK64: case F_SETLKW64: case F_OFD_SETLK: case F_OFD_SETLKW: - err = fcntl_setlk64(fd, f.file, cmd, - (struct flock64 __user *) arg); + err = -EFAULT; + if (copy_from_user(&flock, argp, sizeof(flock))) + break; + err = fcntl_setlk64(fd, f.file, cmd, &flock); break; default: err = do_fcntl(fd, cmd, arg, f.file); diff --git a/fs/locks.c b/fs/locks.c index 4a4543a7f9c1..afefeb4ad6de 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2084,26 +2084,22 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ -int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) +int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock) { struct file_lock file_lock; - struct flock flock; int error; - error = -EFAULT; - if (copy_from_user(&flock, l, sizeof(flock))) - goto out; error = -EINVAL; - if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) + if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK) goto out; - error = flock_to_posix_lock(filp, &file_lock, &flock); + error = flock_to_posix_lock(filp, &file_lock, flock); if (error) goto out; if (cmd == F_OFD_GETLK) { error = -EINVAL; - if (flock.l_pid != 0) + if (flock->l_pid != 0) goto out; cmd = F_GETLK; @@ -2115,15 +2111,12 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) if (error) goto out; - flock.l_type = file_lock.fl_type; + flock->l_type = file_lock.fl_type; if (file_lock.fl_type != F_UNLCK) { - error = posix_lock_to_flock(&flock, &file_lock); + error = posix_lock_to_flock(flock, &file_lock); if (error) goto rel_priv; } - error = -EFAULT; - if (!copy_to_user(l, &flock, sizeof(flock))) - error = 0; rel_priv: locks_release_private(&file_lock); out: @@ -2216,26 +2209,16 @@ check_fmode_for_setlk(struct file_lock *fl) * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, - struct flock __user *l) + struct flock *flock) { struct file_lock *file_lock = locks_alloc_lock(); - struct flock flock; - struct inode *inode; + struct inode *inode = locks_inode(filp); struct file *f; int error; if (file_lock == NULL) return -ENOLCK; - inode = locks_inode(filp); - - /* - * This might block, so we do it before checking the inode. - */ - error = -EFAULT; - if (copy_from_user(&flock, l, sizeof(flock))) - goto out; - /* Don't allow mandatory locks on files that may be memory mapped * and shared. */ @@ -2244,7 +2227,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } - error = flock_to_posix_lock(filp, file_lock, &flock); + error = flock_to_posix_lock(filp, file_lock, flock); if (error) goto out; @@ -2259,7 +2242,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, switch (cmd) { case F_OFD_SETLK: error = -EINVAL; - if (flock.l_pid != 0) + if (flock->l_pid != 0) goto out; cmd = F_SETLK; @@ -2268,7 +2251,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, break; case F_OFD_SETLKW: error = -EINVAL; - if (flock.l_pid != 0) + if (flock->l_pid != 0) goto out; cmd = F_SETLKW; @@ -2313,26 +2296,22 @@ out: /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ -int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) +int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock) { struct file_lock file_lock; - struct flock64 flock; int error; - error = -EFAULT; - if (copy_from_user(&flock, l, sizeof(flock))) - goto out; error = -EINVAL; - if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) + if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK) goto out; - error = flock64_to_posix_lock(filp, &file_lock, &flock); + error = flock64_to_posix_lock(filp, &file_lock, flock); if (error) goto out; if (cmd == F_OFD_GETLK) { error = -EINVAL; - if (flock.l_pid != 0) + if (flock->l_pid != 0) goto out; cmd = F_GETLK64; @@ -2344,13 +2323,9 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) if (error) goto out; - flock.l_type = file_lock.fl_type; + flock->l_type = file_lock.fl_type; if (file_lock.fl_type != F_UNLCK) - posix_lock_to_flock64(&flock, &file_lock); - - error = -EFAULT; - if (!copy_to_user(l, &flock, sizeof(flock))) - error = 0; + posix_lock_to_flock64(flock, &file_lock); locks_release_private(&file_lock); out: @@ -2361,26 +2336,16 @@ out: * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, - struct flock64 __user *l) + struct flock64 *flock) { struct file_lock *file_lock = locks_alloc_lock(); - struct flock64 flock; - struct inode *inode; + struct inode *inode = locks_inode(filp); struct file *f; int error; if (file_lock == NULL) return -ENOLCK; - /* - * This might block, so we do it before checking the inode. - */ - error = -EFAULT; - if (copy_from_user(&flock, l, sizeof(flock))) - goto out; - - inode = locks_inode(filp); - /* Don't allow mandatory locks on files that may be memory mapped * and shared. */ @@ -2389,7 +2354,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } - error = flock64_to_posix_lock(filp, file_lock, &flock); + error = flock64_to_posix_lock(filp, file_lock, flock); if (error) goto out; @@ -2404,7 +2369,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, switch (cmd) { case F_OFD_SETLK: error = -EINVAL; - if (flock.l_pid != 0) + if (flock->l_pid != 0) goto out; cmd = F_SETLK64; @@ -2413,7 +2378,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, break; case F_OFD_SETLKW: error = -EINVAL; - if (flock.l_pid != 0) + if (flock->l_pid != 0) goto out; cmd = F_SETLKW64; diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..aa4affb38c39 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1038,14 +1038,14 @@ static inline struct inode *locks_inode(const struct file *f) } #ifdef CONFIG_FILE_LOCKING -extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); +extern int fcntl_getlk(struct file *, unsigned int, struct flock *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, - struct flock __user *); + struct flock *); #if BITS_PER_LONG == 32 -extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *); +extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 *); extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, - struct flock64 __user *); + struct flock64 *); #endif extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); -- cgit v1.2.3-59-g8ed1b From 94073ad77fff221b5e66b8b9863a546ba212d6a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 27 May 2017 06:07:20 -0400 Subject: fs/locks: don't mess with the address limit in compat_fcntl64 Instead write a proper compat syscall that calls common helpers. [ jlayton: fix pointer dereferencing in fixup_compat_flock ] Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Signed-off-by: Jeff Layton --- fs/fcntl.c | 118 +++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 67 insertions(+), 51 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index ed4283d500a3..bbf80344c125 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -506,76 +506,92 @@ convert_fcntl_cmd(unsigned int cmd) return cmd; } +/* + * GETLK was successful and we need to return the data, but it needs to fit in + * the compat structure. + * l_start shouldn't be too big, unless the original start + end is greater than + * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return + * -EOVERFLOW in that case. l_len could be too big, in which case we just + * truncate it, and only allow the app to see that part of the conflicting lock + * that might make sense to it anyway + */ +static int fixup_compat_flock(struct flock *flock) +{ + if (flock->l_start > COMPAT_OFF_T_MAX) + return -EOVERFLOW; + if (flock->l_len > COMPAT_OFF_T_MAX) + flock->l_len = COMPAT_OFF_T_MAX; + return 0; +} + COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { - mm_segment_t old_fs; - struct flock f; - long ret; - unsigned int conv_cmd; + struct fd f = fdget_raw(fd); + struct flock flock; + long err = -EBADF; + + if (!f.file) + return err; + + if (unlikely(f.file->f_mode & FMODE_PATH)) { + if (!check_fcntl_cmd(cmd)) + goto out_put; + } + + err = security_file_fcntl(f.file, cmd, arg); + if (err) + goto out_put; switch (cmd) { case F_GETLK: + err = get_compat_flock(&flock, compat_ptr(arg)); + if (err) + break; + err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock); + if (err) + break; + err = fixup_compat_flock(&flock); + if (err) + return err; + err = put_compat_flock(&flock, compat_ptr(arg)); + break; + case F_GETLK64: + case F_OFD_GETLK: + err = get_compat_flock64(&flock, compat_ptr(arg)); + if (err) + break; + err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock); + if (err) + break; + err = fixup_compat_flock(&flock); + if (err) + return err; + err = put_compat_flock64(&flock, compat_ptr(arg)); + break; case F_SETLK: case F_SETLKW: - ret = get_compat_flock(&f, compat_ptr(arg)); - if (ret != 0) + err = get_compat_flock(&flock, compat_ptr(arg)); + if (err) break; - old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = sys_fcntl(fd, cmd, (unsigned long)&f); - set_fs(old_fs); - if (cmd == F_GETLK && ret == 0) { - /* GETLK was successful and we need to return the data... - * but it needs to fit in the compat structure. - * l_start shouldn't be too big, unless the original - * start + end is greater than COMPAT_OFF_T_MAX, in which - * case the app was asking for trouble, so we return - * -EOVERFLOW in that case. - * l_len could be too big, in which case we just truncate it, - * and only allow the app to see that part of the conflicting - * lock that might make sense to it anyway - */ - - if (f.l_start > COMPAT_OFF_T_MAX) - ret = -EOVERFLOW; - if (f.l_len > COMPAT_OFF_T_MAX) - f.l_len = COMPAT_OFF_T_MAX; - if (ret == 0) - ret = put_compat_flock(&f, compat_ptr(arg)); - } + err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock); break; - - case F_GETLK64: case F_SETLK64: case F_SETLKW64: - case F_OFD_GETLK: case F_OFD_SETLK: case F_OFD_SETLKW: - ret = get_compat_flock64(&f, compat_ptr(arg)); - if (ret != 0) + err = get_compat_flock64(&flock, compat_ptr(arg)); + if (err) break; - old_fs = get_fs(); - set_fs(KERNEL_DS); - conv_cmd = convert_fcntl_cmd(cmd); - ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); - set_fs(old_fs); - if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) { - /* need to return lock information - see above for commentary */ - if (f.l_start > COMPAT_LOFF_T_MAX) - ret = -EOVERFLOW; - if (f.l_len > COMPAT_LOFF_T_MAX) - f.l_len = COMPAT_LOFF_T_MAX; - if (ret == 0) - ret = put_compat_flock64(&f, compat_ptr(arg)); - } + err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock); break; - default: - ret = sys_fcntl(fd, cmd, arg); + err = do_fcntl(fd, cmd, arg, f.file); break; } - return ret; +out_put: + fdput(f); + return err; } COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, -- cgit v1.2.3-59-g8ed1b From 393cc3f51135ea2520521f776ef3afdf3395c797 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 13 Jun 2017 13:35:50 +0200 Subject: fs/fcntl: f_setown, allow returning error Allow f_setown to return an error value. We will fail in the next patch with EINVAL for bad input to f_setown, so tile the path for the later patch. Signed-off-by: Jiri Slaby Reviewed-by: Jeff Layton Cc: Jeff Layton Cc: "J. Bruce Fields" Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Jeff Layton --- fs/fcntl.c | 7 ++++--- include/linux/fs.h | 2 +- net/socket.c | 3 +-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index bbf80344c125..313eba860346 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -109,7 +109,7 @@ void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, } EXPORT_SYMBOL(__f_setown); -void f_setown(struct file *filp, unsigned long arg, int force) +int f_setown(struct file *filp, unsigned long arg, int force) { enum pid_type type; struct pid *pid; @@ -123,6 +123,8 @@ void f_setown(struct file *filp, unsigned long arg, int force) pid = find_vpid(who); __f_setown(filp, pid, type, force); rcu_read_unlock(); + + return 0; } EXPORT_SYMBOL(f_setown); @@ -305,8 +307,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, force_successful_syscall_return(); break; case F_SETOWN: - f_setown(filp, arg, 1); - err = 0; + err = f_setown(filp, arg, 1); break; case F_GETOWN_EX: err = f_getown_ex(filp, arg); diff --git a/include/linux/fs.h b/include/linux/fs.h index aa4affb38c39..25ee1ff6d45b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1249,7 +1249,7 @@ extern void fasync_free(struct fasync_struct *); extern void kill_fasync(struct fasync_struct **, int, int); extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); -extern void f_setown(struct file *filp, unsigned long arg, int force); +extern int f_setown(struct file *filp, unsigned long arg, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); diff --git a/net/socket.c b/net/socket.c index c2564eb25c6b..a30a1e324390 100644 --- a/net/socket.c +++ b/net/socket.c @@ -950,8 +950,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = -EFAULT; if (get_user(pid, (int __user *)argp)) break; - f_setown(sock->file, pid, 1); - err = 0; + err = f_setown(sock->file, pid, 1); break; case FIOGETOWN: case SIOCGPGRP: -- cgit v1.2.3-59-g8ed1b From fc3dc67471461c0efcb1ed22fb7595121d65fad9 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 13 Jun 2017 13:35:51 +0200 Subject: fs/fcntl: f_setown, avoid undefined behaviour fcntl(0, F_SETOWN, 0x80000000) triggers: UBSAN: Undefined behaviour in fs/fcntl.c:118:7 negation of -2147483648 cannot be represented in type 'int': CPU: 1 PID: 18261 Comm: syz-executor Not tainted 4.8.1-0-syzkaller #1 ... Call Trace: ... [] ? f_setown+0x1d8/0x200 [] ? SyS_fcntl+0x999/0xf30 [] ? entry_SYSCALL_64_fastpath+0x23/0xc1 Fix that by checking the arg parameter properly (against INT_MAX) before "who = -who". And return immediatelly with -EINVAL in case it is wrong. Note that according to POSIX we can return EINVAL: http://pubs.opengroup.org/onlinepubs/9699919799/functions/fcntl.html [EINVAL] The cmd argument is F_SETOWN and the value of the argument is not valid as a process or process group identifier. [v2] returns an error, v1 used to fail silently [v3] implement proper check for the bad value INT_MIN Signed-off-by: Jiri Slaby Cc: Jeff Layton Cc: "J. Bruce Fields" Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Jeff Layton --- fs/fcntl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fcntl.c b/fs/fcntl.c index 313eba860346..693322e28751 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -116,6 +116,10 @@ int f_setown(struct file *filp, unsigned long arg, int force) int who = arg; type = PIDTYPE_PID; if (who < 0) { + /* avoid overflow below */ + if (who == INT_MIN) + return -EINVAL; + type = PIDTYPE_PGID; who = -who; } -- cgit v1.2.3-59-g8ed1b From f73127356f344483c82632accda2e72b7e0e5f25 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 14 Jun 2017 09:11:54 -0400 Subject: fs/fcntl: return -ESRCH in f_setown when pid/pgid can't be found The current implementation of F_SETOWN doesn't properly vet the argument passed in and only returns an error if INT_MIN is passed in. If the argument doesn't specify a valid pid/pgid, then we just end up cleaning out the file->f_owner structure. What we really want is to only clean that out only in the case where userland passed in an argument of 0. For anything else, we want to return ESRCH if it doesn't refer to a valid pid. The relevant POSIX spec page is here: http://pubs.opengroup.org/onlinepubs/9699919799/functions/fcntl.html Cc: Jiri Slaby Cc: zhong jiang Signed-off-by: Jeff Layton --- fs/fcntl.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 693322e28751..afed3b364979 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -112,8 +112,9 @@ EXPORT_SYMBOL(__f_setown); int f_setown(struct file *filp, unsigned long arg, int force) { enum pid_type type; - struct pid *pid; - int who = arg; + struct pid *pid = NULL; + int who = arg, ret = 0; + type = PIDTYPE_PID; if (who < 0) { /* avoid overflow below */ @@ -123,12 +124,19 @@ int f_setown(struct file *filp, unsigned long arg, int force) type = PIDTYPE_PGID; who = -who; } + rcu_read_lock(); - pid = find_vpid(who); - __f_setown(filp, pid, type, force); + if (who) { + pid = find_vpid(who); + if (!pid) + ret = -ESRCH; + } + + if (!ret) + __f_setown(filp, pid, type, force); rcu_read_unlock(); - return 0; + return ret; } EXPORT_SYMBOL(f_setown); -- cgit v1.2.3-59-g8ed1b